Skip to content

Commit

Permalink
Update scraper.php
Browse files Browse the repository at this point in the history
  • Loading branch information
vikash12345 committed Oct 31, 2017
1 parent ce95e7e commit c8cdba3
Showing 1 changed file with 10 additions and 31 deletions.
41 changes: 10 additions & 31 deletions scraper.php
Expand Up @@ -3,39 +3,18 @@
// including some code snippets below that you should find helpful
require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
$cHeadres = array(
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language: en-US,en;q=0.5',
'Connection: Keep-Alive',
'Pragma: no-cache',
'Cache-Control: no-cache'
);
function dlPage($link) {
global $cHeadres;
$ch = curl_init();
if($ch){
curl_setopt($ch, CURLOPT_URL, $link);
curl_setopt($ch, CURLOPT_HTTPHEADER, $cHeadres);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, 'cookies.txt');
curl_setopt($ch, CURLOPT_COOKIEJAR, 'cookies.txt');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)");
$str = curl_exec($ch);
curl_close($ch);
$dom = new simple_html_dom();
$dom->load($str);
return $dom;
}
}

for($page = 0; $page < 32; $page+=16)
{
$link = 'https://alumni.iba.edu.pk/alumni/findclassmate/a/'.$page;
$maincode = dlPage($link);
echo $maincode;
$link = 'https://alumni.iba.edu.pk/alumni/findclassmate/a/'.$page;
$dom = new simple_html_dom();
echo $dom->load($link);
// print_r($dom->find("table.list"));



// $maincode = dlPage($link);
//echo $maincode;
}
//
// // Read in a page
Expand Down

0 comments on commit c8cdba3

Please sign in to comment.