/
scraper.php
39 lines (34 loc) · 1.49 KB
/
scraper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
<?
// This is a template for a PHP scraper on morph.io (https://morph.io)
// including some code snippets below that you should find helpful
require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
$Alpha=array('1');
//$Alpha=array('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z');
for ($outterloop = 0; $outterloop < sizeof($Alpha); $outterloop++)
{
$NewLink = 'http://globalcontact.com/gc/directory/search.php?table=USBIZ&company='.$Alpha[$outterloop].'&search=&search_sic=&page=1';
$html = file_get_html($NewLink);
sleep(5);
if($html)
{
$link = $html->find("/html/body/center/table/tbody/tr[2]/td[2]/div/div[2]/table/tbody/tr/td[2]/div/div[4]/center/div/a[11]", 0);
$checker = $link->href.'<br>';
$paginationlink = 'http://www.globalcontact.com/gc/directory/'.$checker;
$pages= substr(strrchr($paginationlink, "="), 1);
for ($pagestart = 1; $pagestart <= $pages; $pagestart++)
{
$pagination = "http://globalcontact.com/gc/directory/search.php?table=USBIZ&company=$Alpha[$outterloop]&search=&search_sic=&page=$pagestart";
$mainpage = file_get_html($pagination);
if($mainpage)
{
foreach($mainpage->find("/html/body/center/table/tbody/tr[2]/td[2]/div/div[2]/table/tbody/tr/td[2]/div/div[4]") as $element)
{
$linkofinnerpages = $element->find("//td/a[plaintext^=View Company Profile: Additional Detail of Product and Services]",0)->href;
echo "$linkofinnerpages\n";
}
}
}
}
}
?>