"; /** * First step - initializing the API */ $res = ApiSCSE::init(API_USER, API_KEY); if (!$res) { echo "Error during API initialization: "; die(ApiSCSE::getError() . "\n"); } /** * Second step - testing license state */ $license = ApiSCSE::getLicense(); $credits = ApiSCSE::getCredits(); $jobs = ApiSCSE::getJobs(); $jobs_unfinished = 0; $jobs_finished = 0; $jobs_running = 0; foreach ($jobs as $job) { if ($job['progress'] == 100) { $jobs_finished++; } else if ($job['start'] == 1) { $jobs_running++; } else { $jobs_unfinished++; } } echo "SCSE license active: {$license['name']}\n"; echo "SCSE wallet: {$credits['credit']} Credits\n"; echo "Jobs: $jobs_unfinished unfinished jobs, $jobs_running running jobs, $jobs_finished finished jobs\n"; if ((float)$credits['credit'] < 0.6) { die ("Insufficient Credits in wallet\n"); } echo "\n"; /** * Third step - creating a scrape job, test if job already exists */ $job = ApiSCSE::getJob(JOBNAME); if (!$job) { echo "Creating job " . JOBNAME . ": "; $job = ApiSCSE::createJob(JOBNAME, LANGUAGE, COUNTRY, RESULTS_PER_KEYWORD, SPEED); if ($job) { echo "success\n"; } else { echo("failure\n"); if (ApiSCSE::getLastResponse()->has_exception) var_dump(ApiSCSE::getLastResponse()->exceptions); die(); } } /** * Fourth step - populating the job with keywords, using replace, skip if job is already running */ if ($job['start'] == 0) { //$keywords=['one','two','three']; if (!file_exists(KEYWORD_FILE)) { die("File " . KEYWORD_FILE . " does not exist\n"); } $keywords = file(KEYWORD_FILE, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $res = ApiSCSE::modify_job(JOBNAME, $keywords, ApiSCSE::MODE_REPLACE, LANGUAGE, COUNTRY, RESULTS_PER_KEYWORD, SPEED); if (!$res) { echo "Modification of Job failed\n"; if (ApiSCSE::getLastResponse()->has_exception) var_dump(ApiSCSE::getLastResponse()->exceptions); die(); } echo "Job starting cost: {$res['credit_cost']} Credits\n"; } /** * Fifth step - Starting the scrape job */ if ($job['start'] == 0) { } $res = ApiSCSE::start_job(JOBNAME); if (!$res) { $started = 0; foreach (ApiSCSE::getLastResponse()->exceptions as $exception) if ($exception->error_id == 10008) { $started = 1; break; } if (!$started) { echo "Job " . JOBNAME . " could not be started\n"; die(); } echo "Job " . JOBNAME . " was already started\n"; } else { echo "Job " . JOBNAME . " has been started\n"; } /** * Final step - Downloading result pages until job is finished * Depending on priority the scraping job will finish sooner or later. The system is capable to scrape many thousands of keywords per minute at high priorities * Depending on the current state of the scraping-backend a started scraping job can have a delay of up to 2 minutes before first result data is received. * There are advanced techniques to avoid this delay, please contact our customer service in case. */ $latest_page = 0; $job = ApiSCSE::getJob(JOBNAME); /** * Since January 2018 a new EXPORT method is available * $res = ApiSCSE::Export(['jobname'=>$jobname],NULL,function ($array,$line){ var_dump($array); var_dump($line)}); * This function allows a full export of all data without pagination, it allows to stream the data into a callback which allows efficient handling of results without high memory requirements. */ $original_estimated_hours = $job['estimated_hours_left']; $start_time = time(); $end_time = $start_time + $original_estimated_hours * 3600 * 3; // 3 times the total estimation timeout $once = 0; // allow one single run to read out finished jobs $total_organic = 0; $total_creative = 0; while (!$once++ || (time() < $end_time)) { echo "Waiting for results, estimated time left: {$job['estimated_hours_left']} hours ; Progress: {$job['progress']}%..\n"; if (MODE == 'http') flush(); sleep(5); $job = ApiSCSE::getJob(JOBNAME); $total_pages = $job['total_pages']; $result_pages = $job['pages']; while ($result_pages > $latest_page) { echo "Downloading new result page " . ($latest_page + 1) . "\n"; $results = ApiSCSE::getResults(JOBNAME, $latest_page + 1); foreach ($results['keyword_results'] as $result) { echo "Keyword: \"{$result['keyword']}\" - Organic results: {$result['count_organic']}, Creative results: {$result['count_creative']}\n"; $total_organic += (int)$result['count_organic']; $total_creative += (int)$result['count_creative']; echo "Organic results:\n"; foreach ($result['results_organic'] as $list) { echo "\tURL: {$list['url']}\n"; echo "\tTitle: {$list['title']}\n"; echo "\tDescription: {$list['description']}\n"; if (count($list['sitelinks'])) { foreach ($list['sitelinks'] as $sitelink) { echo "\t\tURL: {$sitelink['url']}\n"; echo "\t\tTitle: {$sitelink['title']}\n"; echo "\t\tDescription: {$sitelink['description']}\n"; } } echo "\n"; } if ($result['creative'] > 0) { echo "Advertisements:\n"; foreach ($result['results_creative'] as $list) { echo "\tURL: {$list['url']}\n"; echo "\tTitle: {$list['title']}\n"; echo "\tDescription: {$list['description']}\n"; if (count($list['sitelinks'])) { foreach ($list['sitelinks'] as $sitelink) { echo "\t\tURL: {$sitelink['url']}\n"; echo "\t\tTitle: {$sitelink['title']}\n"; echo "\t\tDescription: {$sitelink['description']}\n"; } } echo "\n"; } } echo "\n"; } $latest_page++; } if ($total_pages == $latest_page) break; } $hours_passed = number_format((time() - $start_time) / 3600, 2); echo "\n"; echo "Scraping of $latest_page pages finished successfully\n"; echo "Received $total_organic organic results and $total_creative ads\n"; echo "Estimated time: $original_estimated_hours hours, Real time: $hours_passed hours\n"; if (($hours_passed != '0.00') && ((time() >= $end_time))) { echo "ERROR: Timeout while waiting for results\n"; } if (MODE == 'cli') echo "";