Skip to content

Commit

Permalink
Update scraper.py
Browse files Browse the repository at this point in the history
Try to eat all unprintable whitespace from cg
  • Loading branch information
zymurgic committed Mar 11, 2014
1 parent 9424d1e commit 149e57b
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions scraper.py
Expand Up @@ -45,7 +45,7 @@ def trawlhtml (url):
tds = tr.cssselect("td")
if len(tds)>1:
trimmed = tds[0].text_content().replace("(e)","").replace("(f)","").replace(" ","")
trimmedcg = tds[1].text_content().replace(" ","").replace("3GV","")
trimmedcg = ''.join(tds[1].text_content().replace(" ","").replace("3GV","").split())
if (trimmed.isdigit() and len(trimmed)>3):
data = {
'cns' : trimmed,
Expand All @@ -59,7 +59,7 @@ def trawlhtml (url):

for t in s.findAll('text'):
if (t.text != " ") and (t.text != "Code") and (t.text !='Type of Call') and (len(t.text)<13):
trimmed = t.text.replace(" ","").replace("3GV","")
trimmed = ''.join(t.text.replace(" ","").replace("3GV","").split())
# if trimmed.startswith("0") and trimmed.isdigit():
if trimmed.isdigit() and (len(trimmed)>3):
cns=trimmed
Expand Down

0 comments on commit 149e57b

Please sign in to comment.