<?php

require_once('/app/vendor/autoload.php');

use DiDom\Document;

function getDataFromPage($link, $attribute, $name, $loadTime, $webDriver, $pdfDirectLink)
{

    if ($pdfDirectLink == 'true') {
        echo ('Getting PDF from direct link...' . PHP_EOL);
        $html = getDataFromPagePdfLink($link, $name);
        return $html;
    }

    if ($webDriver == 'true') {
        echo ('Getting data from page using WebDriver...' . PHP_EOL);
        $html = getDataFromPageWebDriver($link, $name, $loadTime);
    } else {
        echo ('Getting data from page using wget...' . PHP_EOL);
        $html = getDataFromPageWget($link, $name);
    }

    // return an array of the data
    return $html;
    

}

function getDataFromPagePdfLink($link, $name)
{
    echo ('Downloading PDF from direct link...' . PHP_EOL);

    // get name of file from $link
    // $pathParts = pathinfo($link);
    // $fileName = $pathParts['basename'];

  
    if (!file_exists('/app/rate-guides/excel/tmp/')) {
        mkdir('/app/rate-guides/excel/tmp/', 0777, true);
    }
    
    $outputFile = '/app/rate-guides/excel/tmp/' . $name . '.pdf';
    
    $download = downloadWithFallback($link, $outputFile);
    echo ('Downloaded tmp  PDF to ' . $outputFile . PHP_EOL);

    if (!$download) {
        return false;
    }



    $html = md5_file($outputFile);

    echo ('Waiting for download to complete...' . PHP_EOL);

    return $html;

}

function getDataFromPageWget($link, $name)
{

    if (!file_exists('/app/rate-guides/excel/tmp/')) {
        mkdir('/app/rate-guides/excel/tmp/', 0777, true);
    }

    // download using commandline
    $outputFile = '/app/rate-guides/excel/tmp/' . $name . '.htm';
    $download = downloadWithFallback($link, $outputFile);    


    if (!$download) {
        return false;
    }

    $html = file_get_contents($outputFile);
    echo ('Waiting for download to complete...' . PHP_EOL);
    
    

    return $html;

}

function getDataFromPageWebDriver($link, $name, $loadTime)
{
    $options = new \Facebook\WebDriver\Chrome\ChromeOptions();
    $options->addArguments(['--headless', '--disable-gpu', '--no-sandbox']);
    $capabilities = \Facebook\WebDriver\Remote\DesiredCapabilities::chrome();
    $capabilities->setCapability(\Facebook\WebDriver\Chrome\ChromeOptions::CAPABILITY, $options);
    $driver = \Facebook\WebDriver\Remote\RemoteWebDriver::create('http://selenium:4444/wd/hub', $capabilities);

    try {
        $driver->get($link);
        sleep($loadTime);
        $html = $driver->getPageSource();
        return $html;
    } catch (\DiDom\Exceptions\InvalidSelectorException $e) {
        error_log("InvalidSelectorException for '$name' at $link: " . $e->getMessage());
        return '';
    } catch (\Exception $e) {
        error_log("Error fetching '$name' at $link: " . $e->getMessage());
        return '';
    } finally {
        $driver->quit();
    }
}

function getLinkFromHtml($html, $url)
{
    $document = new Document();
    $document->loadHtml($html);

    // get domain name from url
    $urlParts = parse_url($url);
    $domain = $urlParts['scheme'] . '://' . $urlParts['host'];
    
    $posts = $document->find('[onclick], a[href]');



    // echo $html; exit;

    $link = ''; $xxx = 0;
    foreach ($posts as $post) {
        $link = $post->attr('href');
        // check if $link alrea has the domain in it if not, add the domain to the beginning of the link
        if (strpos($link, 'http') === false) {
            $link = $domain . $link;
        }
        break;
    }
    
    return $link;
}

function getSnippetFromHTML($attribute, $name, $html)
{
    echo ('Grabbing data from ' . $name . '...' . PHP_EOL);

    if (empty(trim($attribute))) {
        error_log("Empty selector provided for '$name' — skipping.");
        return '';
    }

    if (empty(trim($html))) {
        error_log("Empty HTML provided for '$name' — skipping.");
        return '';
    }

    $document = new Document();
    $document->loadHtml($html);

    try {
        $posts = $document->find($attribute);
    } catch (\DiDom\Exceptions\InvalidSelectorException $e) {
        error_log("Invalid selector '$attribute' for '$name': " . $e->getMessage());
        return '';
    }

    $output = '';
    foreach ($posts as $post) {
        $output .= $post->html() . "\n";
    }

    return $output;
}

function checkIfSnippetHasChanged($html, $newHtml){
    
    if ($html != $newHtml) {
        return true;
    } else {
        return false;
    }
}

function moveToOld($name, $fileType, $html, $newHtml)
{

    if ($fileType == 'pdf') {
        $tmpPath = '/app/rate-guides/excel/tmp/' . $name . '.pdf';
        $oldPath = '/app/rate-guides/excel/old/' . $name . '.pdf';
        $archivePath = '/app/rate-guides/excel/archive/' . $name . ' - ' . date('Y-m-d_H-i-s') . '.pdf';
        $newPath = '/app/rate-guides/excel/' . $name . '.pdf';
    } else {
        $tmpPath = '/app/rate-guides/excel/tmp/' . $name . '.htm';
        $oldPath = '/app/rate-guides/excel/old/' . $name . '.htm';
        $archivePath = '/app/rate-guides/excel/archive/' . $name . ' - ' . date('Y-m-d_H-i-s') . '.htm';
        $newPath = '/app/rate-guides/excel/' . $name . '.htm';
    }


    rename($oldPath, $archivePath);
    echo ('Moved old file ' . $oldPath . ' to ' . $archivePath . PHP_EOL);
    rename($newPath, $oldPath);
    echo ('Moved new file ' . $newPath . ' to ' . $oldPath . PHP_EOL);
    rename($tmpPath, $newPath);
    echo ('Moved tmp file ' . $tmpPath . ' to ' . $newPath . PHP_EOL);

}
// {

//     $tmpPath = '/app/rate-guides/excel/tmp/' . $name . '.pdf';
//     $oldPath = '/app/rate-guides/excel/old/' . $name . '.pdf';
//     $archivePath = '/app/rate-guides/excel/archive/' . $name . date('Y-m-d_H-i-s') . '.pdf';
//     $newPath = '/app/rate-guides/excel/' . $name . '.pdf';

//     rename($oldPath, $archivePath);
//     rename($newPath, $oldPath);
//     rename($tmpPath, $newPath);

// }

function cleanUpHTML($html)
{
    // remove all whitespace from the html
    $html = preg_replace('/\s+/', ' ', $html);
    $html = preg_replace('/^\/ID .*$(?:\r?\n|\r).*$/m', '', $html);
    $html = preg_replace('/\s*id="[^"]*"/', '', $html);
    $html = preg_replace('/^\/FontName.*\n/m', '', $html);
    $html = preg_replace('/^\/BaseFont.*\n/m', '', $html);
    $html = preg_replace('/class="[^"]*"/', '', $html);
    $html = preg_replace('/data-unique_identifier="[^"]*"/', '', $html);
    $html = preg_replace('/<style\b[^>]*>(.*?)<\/style>/is', '', $html);
    $html = preg_replace('/\s*style\s*=\s*"(?:[^"]*)"/i', '', $html);
    $html = preg_replace('/<svg\b[^>]*>(.*?)<\/svg>/is', '', $html);
    $html = preg_replace('/_ngcontent-ng-c\d+=\"\"/', '', $html);
    $html = preg_replace('/aria-describedby="[^"]*"/', '', $html);
    $html = preg_replace('/id="tooltip-container-[^"]*"/', '', $html);
    $html = preg_replace('/\r\n/', '', $html);
    $html = preg_replace('/\r/', '', $html);
    $html = str_replace('&#13;', '', $html);
    $html = trim($html);

    return $html;
}

function downloadWithFallback($link, $outputFile) {
    $userAgents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_4_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15',
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/124.0.0.0 Safari/537.36',
    ];

    foreach ($userAgents as $index => $userAgent) {
        $command = 'wget --user-agent=' . escapeshellarg($userAgent) .
                   ' -O ' . escapeshellarg($outputFile) .
                   ' ' . escapeshellarg($link);

        echo "Attempt " . ($index + 1) . " with agent: $userAgent" . PHP_EOL;
        echo $command . PHP_EOL;

        exec($command, $output, $returnCode);

        if ($returnCode === 0) {
            echo "Success on attempt " . ($index + 1) . PHP_EOL;
            return true;
        }

        echo "Attempt " . ($index + 1) . " failed (exit code: $returnCode)" . PHP_EOL;
        $pause = rand(2, 10);
        echo "Sleeping for $pause seconds before next attempt..." . PHP_EOL;
        sleep($pause);
    }

    echo "All user agents failed." . PHP_EOL;
    return false;
}


function addNotionTask($name, $link, $description, $lenderId, $method, $status)
{

    //echo $name . ' - ' . $link . ' - ' . $description . ' - ' . $lenderId . ' - ' . $method . ' - ' . $status . PHP_EOL;

    $propertiesPayload = [
        'Name' => [
            'title' => [
                [
                    'text' => [
                        'content' => $name
                    ]
                ]
            ]
        ],
        'Status' => [
            'status' => [
                'name' => $status
            ]
        ],
        'Due date' => [
            'date' => [
                'start' => date('Y-m-d', strtotime('today'))
            ]
        ],
        'Priority' => [
            'select' => [
                'name' => 'High'
            ]
        ],
        'URL' => [
            'url' => $link
        ],
        'Lender ID' => [
            'number' => intval($lenderId)
        ],
        'Method' => [
            'select' => [
                'name' => $method ?? 'Unknown'
            ]
        ],
    ];

    $lenderPageId = fetchLenderPageIdFromNotion($lenderId);

    if ($lenderPageId) {
        $propertiesPayload['Lender Details'] = [
            'relation' => [
                [
                    'id' => $lenderPageId
                ]
            ]
        ];
    }


    $curl = curl_init();

    curl_setopt_array($curl, [
        CURLOPT_URL => "https://api.notion.com/v1/pages",
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_ENCODING => "",
        CURLOPT_MAXREDIRS => 10,
        CURLOPT_TIMEOUT => 30,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
        CURLOPT_CUSTOMREQUEST => "POST",
        CURLOPT_POSTFIELDS => json_encode([
            'parent' => [
                'database_id' => '1b2a56fd826f8033858fc60873ed7b1d'
            ],
            'properties' => $propertiesPayload,
        ]),
        CURLOPT_HTTPHEADER => [
            "Authorization: Bearer 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",
            "Content-Type: application/json",
            "Notion-Version: 2022-06-28"
        ],
    ]);

    $response = curl_exec($curl);
    $httpCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
    $curlError = curl_error($curl);

    curl_close($curl);

    if ($curlError) {
        error_log("cURL Error: " . $curlError);
        die("Error: Unable to send request. Check logs for details.");
    }

    $responseData = json_decode($response, true);

    if ($httpCode >= 400) {
        error_log("Notion API Error: " . json_encode($responseData));
        die("Error: Notion API request failed with HTTP code $httpCode. Check logs for details.");
    }

    echo ('Notion task added...' . PHP_EOL);
}

function fetchLenderPageIdFromNotion($lenderId)
{

    $curl = curl_init();

    curl_setopt_array($curl, [
        CURLOPT_URL => "https://api.notion.com/v1/databases/1f4a56fd826f8054a8f4fba5f1ad0322/query",
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_ENCODING => "",
        CURLOPT_MAXREDIRS => 10,
        CURLOPT_TIMEOUT => 30,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
        CURLOPT_CUSTOMREQUEST => "POST",
        CURLOPT_POSTFIELDS => json_encode([
            'filter' => [
                'property' => 'Lender ID',
                'number' => [
                    'equals' => intval($lenderId)
                ]
            ],
        ]),
        CURLOPT_HTTPHEADER => [
            "Authorization: Bearer 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",
            "Content-Type: application/json",
            "Notion-Version: 2022-06-28"
        ],
    ]);

    $response = curl_exec($curl);
    $httpCode = curl_getinfo($curl, CURLINFO_HTTP_CODE);
    $curlError = curl_error($curl);

    curl_close($curl);

    if ($curlError) {
        error_log("cURL Error: " . $curlError);
        return null;
    }

    $responseData = json_decode($response, true);

    if ($httpCode >= 400) {
        error_log("Notion API Error: " . json_encode($responseData));
        return null;
    }

    $lenderPageId = $responseData['results'][0]['id'] ?? null;

    return $lenderPageId;
}

?>