Skip to content
Permalink
Browse files

Testing one single id updation

  • Loading branch information
vasantv committed May 30, 2015
1 parent c80ddfe commit a512eacaccddcbe7996cec7c3ced32bddd2b80aa
Showing with 45 additions and 10 deletions.
  1. +45 −10 scraper.php
@@ -2,20 +2,55 @@
// This is a template for a PHP scraper on morph.io (https://morph.io)
// including some code snippets below that you should find helpful

// require 'scraperwiki.php';
// require 'scraperwiki/simple_html_dom.php';
require 'scraperwiki.php';
require 'scraperwiki/simple_html_dom.php';
//
// // Read in a page
// $html = scraperwiki::scrape("http://foo.com");
//
// // Find something on the page using css selectors
// $dom = new simple_html_dom();
// $dom->load($html);

$MAX_ID = 3; //set based on required maximum numbers

/** looping over list of ids of doctors **/
for($id = 1; $i <= $MAX_ID; $id++)
{
// // Read in a MCI doctor page
$html = scraperwiki::scrape("http://www.mciindia.org/ViewDetails.aspx?ID=".$id);

// Find something on the page using css selectors
$dom = new simple_html_dom();
$dom->load($html);

// walk through the dom and extract doctor information
$info['doc_name'] = $dom->find('span[id=Name]')->plaintext;
$info['doc_fname'] = $dom->find('span[id="FatherName"]')->plaintext;
$info['doc_dob'] = $dom->find('span[id="DOB"]')->plaintext;
$info['doc_infoyear'] = $dom->find('span[id="lbl_Info"]')->plaintext;
$info['doc_regnum'] = $dom->find('span[id="Regis_no"]')->plaintext;
$info['doc_datereg'] = $dom->find('span[id="Date_Reg"]')->plaintext;
$info['doc_council'] = $dom->find('span[id="Lbl_Council"]')->plaintext;
$info['doc_qual'] = $dom->find('span[id="Qual"]')->plaintext;
$info['doc_qualyear'] = $dom->find('span[id="QualYear"]')->plaintext;
$info['doc_univ'] = $dom->find('span[id="Univ"]')->plaintext;
$info['doc_address'] = $dom->find('span[id="Address"]')->plaintext;

// print_r($dom->find("table.list"));
//
// // Write out to the sqlite database using scraperwiki library
// scraperwiki::save_sqlite(array('name'), array('name' => 'susan', 'occupation' => 'software developer'));
//
scraperwiki::save_sqlite(array('mci_snum','registration_number'),
array('mci_snum' => $id,
'name' => sqlite_escape_string(trim($info['doc_name'])),
'fathers_name' => sqlite_escape_string(trim($info['doc_fname'])),
'date_of_birth' => sqlite_escape_string(trim($info['doc_dob'])),
'information_year' => sqlite_escape_string(trim($info['doc_infoyear'])),
'registration_number' => sqlite_escape_string(trim($info['doc_regnum'])),
'date_of_reg' => sqlite_escape_string(trim($info['doc_datereg'])),
'council' => sqlite_escape_string(trim($info['doc_council'])),
'qualifications' => sqlite_escape_string(trim($info['doc_qual'])),
'qualification_year' => sqlite_escape_string(trim($info['doc_qualyear'])),
'permanent_address' => sqlite_escape_string(trim($info['doc_address']))
), "indian_doctors");

//clean out the dom
$dom->__destruct();
}
// // An arbitrary query against the database
// scraperwiki::select("* from data where 'name'='peter'")

0 comments on commit a512eac

Please sign in to comment.
You can’t perform that action at this time.