Skip to content

Commit

Permalink
Merge pull request #9 from sheetal-158/master
Browse files Browse the repository at this point in the history
changed to python 3.0
  • Loading branch information
xiaohuiyan committed Mar 29, 2017
2 parents 95b33f7 + ff4feb3 commit a29cf8c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 20 deletions.
26 changes: 13 additions & 13 deletions script/indexDocs.py
Expand Up @@ -6,37 +6,37 @@
w2id = {}

def indexFile(pt, res_pt):
print 'index file: ', pt
print('index file: '+str(pt))
wf = open(res_pt, 'w')
for l in open(pt):
ws = l.strip().split()
for w in ws:
if not w2id.has_key(w):
if w not in w2id:
w2id[w] = len(w2id)

wids = [w2id[w] for w in ws]
print >>wf, ' '.join(map(str, wids))

print 'write file: ', res_pt
wids = [w2id[w] for w in ws]
print(' '.join(map(str, wids)), file=wf)
print('write file: '+str(res_pt))


def write_w2id(res_pt):
print 'write:', res_pt
print('write:'+str(res_pt))
wf = open(res_pt, 'w')
for w, wid in sorted(w2id.items(), key=lambda d:d[1]):
print >>wf, '%d\t%s' % (wid, w)
print('%d\t%s' % (wid, w), file=wf)

if __name__ == '__main__':
if len(sys.argv) < 4:
print 'Usage: python %s <doc_pt> <dwid_pt> <voca_pt>' % sys.argv[0]
print '\tdoc_pt input docs to be indexed, each line is a doc with the format "word word ..."'
print '\tdwid_pt output docs after indexing, each line is a doc with the format "wordId wordId ..."'
print '\tvoca_pt output vocabulary file, each line is a word with the format "wordId word"'
print('Usage: python %s <doc_pt> <dwid_pt> <voca_pt>' % sys.argv[0])
print('\tdoc_pt input docs to be indexed, each line is a doc with the format "word word ..."')
print('\tdwid_pt output docs after indexing, each line is a doc with the format "wordId wordId..."')
print('\tvoca_pt output vocabulary file, each line is a word with the format "wordId word"')
exit(1)

doc_pt = sys.argv[1]
dwid_pt = sys.argv[2]
voca_pt = sys.argv[3]
indexFile(doc_pt, dwid_pt)
print 'n(w)=', len(w2id)
print('n(w)='+str(len(w2id)))
write_w2id(voca_pt)
14 changes: 7 additions & 7 deletions script/topicDisplay.py
Expand Up @@ -30,24 +30,24 @@ def dispTopics(pt, voca, pz):
topics.append((pz[k], tmps))
k += 1

print 'p(z)\t\tTop words'
print('p(z)\t\tTop words')
for pz, s in sorted(topics, reverse=True):
print '%f\t%s' % (pz, s)
print('%f\t%s' % (pz, s))

if __name__ == '__main__':
if len(sys.argv) < 4:
print 'Usage: python %s <model_dir> <K> <voca_pt>' % sys.argv[0]
print '\tmodel_dir the output dir of BTM'
print '\tK the number of topics'
print '\tvoca_pt the vocabulary file'
print('Usage: python %s <model_dir> <K> <voca_pt>' % sys.argv[0])
print('\tmodel_dir the output dir of BTM')
print('\tK the number of topics')
print('\tvoca_pt the vocabulary file')
exit(1)

model_dir = sys.argv[1]
K = int(sys.argv[2])
voca_pt = sys.argv[3]
voca = read_voca(voca_pt)
W = len(voca)
print 'K:%d, n(W):%d' % (K, W)
print('K:%d, n(W):%d' % (K, W))

pz_pt = model_dir + 'k%d.pz' % K
pz = read_pz(pz_pt)
Expand Down

0 comments on commit a29cf8c

Please sign in to comment.