Skip to content
This repository has been archived by the owner on Apr 24, 2020. It is now read-only.

Commit

Permalink
args mismatch fix
Browse files Browse the repository at this point in the history
  • Loading branch information
nova77 committed Nov 22, 2011
1 parent 99a22ff commit 471e591
Showing 1 changed file with 1 addition and 4 deletions.
5 changes: 1 addition & 4 deletions scanLinks.py
Expand Up @@ -122,16 +122,13 @@ def recordArticle(pageDoc):
print "scanLinks.py file1.gz file2.gz ... > links.txt"
sys.exit(1)

print >>sys.stderr, "Creating outgoing list.."
outgoing = {}

for fname in sys.argv[1:]:
print >>sys.stderr, " -> Processing file", fname
#f = Popen(['zcat', fname], stdout=PIPE) # much faster than python gzip
f = Popen(['pigz', '-d', '-c', fname], stdout=PIPE) # even faster

for doc in xmlwikiprep.read(f.stdout, set(['text'])):
recordArticle(doc, outgoing)
recordArticle(doc)

if nsBuflen > 0:
cursor.executemany("""
Expand Down

0 comments on commit 471e591

Please sign in to comment.