In [6]:
!pip3 install nltk
!python3 -m nltk.downloader wordnet punkt

import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.model_selection import KFold, train_test_split
from util import load_json

seed = 17

Collecting nltk
Installing collected packages: nltk
Successfully installed nltk-3.4.3
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/williamzou/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/williamzou/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [10]:
articles = [article for article in load_json('processed_data_aminer.json') if len(article['references']) > 15][:4000]
articles_df = pd.DataFrame.from_dict(articles)
articles_df['title'] = articles_df['title'].str.lower().fillna('')
articles_train_df, articles_test_df = train_test_split(articles_df, test_size=0.001, random_state=17)

In [11]:
count_vectorizer = CountVectorizer()
corpus = [article['abstract'] for _, article in articles_train_df.iterrows()]
count_vectorizer.fit(corpus)

step_size = 1000
count_cols = ['vocab_{}'.format(word) for word in count_vectorizer.get_feature_names()]
counts_df = pd.DataFrame(columns=count_cols)
for i in range(0, len(articles_train_df), step_size):
  corpus = [article['abstract'] for _, article in articles_train_df.iloc[[k for k in range(i, min(i+step_size+1, len(articles_train_df)))]].iterrows()]
  counts = count_vectorizer.trnansform(corpus)
  step_counts_df = pd.DataFrame(counts.todense(), columns=count_cols)
  counts_df = pd.concat([counts_df, step_counts_df], ignore_index=True)

In [15]:
len(counts_df.columns)

18878

In [16]:
train_counts_df = pd.concat([articles_train_df, counts_df], axis=1)
train_counts_no_nans_df = train_counts_df[count_cols].fillna(0)

In [17]:
def get_test_score(model, train_df, test_df, scores, top_k=200):
  # check if top_k vectors with highest cosine similarity have one of the cited papers
  score = 0
  num_examples = len(test_df)
  score_indices = np.argsort(scores, axis=1)
  for i, (_, row) in enumerate(test_df.iterrows()):
    citations = row['references']
    if type(citations) == list:
      top_k_docs = score_indices[i][::-1][:top_k]
      top_k_docs_ids = train_df.iloc[top_k_docs]['id'].tolist()
      if any(citation in top_k_docs_ids for citation in citations):
        score += 1
      elif not any(train_df['id'].isin(citations)):
        num_examples -= 1
    else:
      num_examples -= 1
    
    print(i, num_examples, score, score / num_examples)

  return 0

In [18]:
n_splits = 2
kf = KFold(n_splits=n_splits, random_state=seed)

# grid search for best n_components
scores = []
base_n_components = 8
for n in range(base_n_components, 10):
  fold_scores = []
  for i, (train_index, test_index) in enumerate(kf.split(train_counts_no_nans_df)):
    train_df = train_counts_no_nans_df.iloc[train_index]
    validate_df = train_counts_no_nans_df.iloc[test_index]
    lda = LatentDirichletAllocation(n_components=n, random_state=seed)
    lda.fit(train_df[count_cols])
    X = lda.transform(train_counts_no_nans_df[count_cols])
    X_validate = lda.transform(validate_df[count_cols])
    score = get_test_score(lda, train_counts_df, train_counts_df.iloc[test_index], np.matmul(X_validate, X.T))
    fold_scores += [score]
    print(score)
   
  scores += [sum(fold_scores) / len(fold_scores)]

best_n_components = base_n_components + scores.index(max(scores))

0 1999 0 0.0
1 1998 0 0.0
2 1997 0 0.0
3 1996 0 0.0
4 1995 0 0.0
5 1994 0 0.0
6 1993 0 0.0
7 1992 0 0.0
8 1991 0 0.0
9 1990 0 0.0
10 1989 0 0.0
11 1988 0 0.0
12 1987 0 0.0
13 1986 0 0.0
14 1985 0 0.0
15 1984 0 0.0
16 1983 0 0.0
17 1982 0 0.0
18 1981 0 0.0
19 1980 0 0.0
20 1979 0 0.0
21 1979 0 0.0
22 1978 0 0.0
23 1977 0 0.0
24 1976 0 0.0
25 1976 0 0.0
26 1975 0 0.0
27 1974 0 0.0
28 1973 0 0.0
29 1972 0 0.0
30 1971 0 0.0
31 1970 0 0.0
32 1969 0 0.0
33 1969 0 0.0
34 1968 0 0.0
35 1968 0 0.0
36 1967 0 0.0
37 1966 0 0.0
38 1965 0 0.0
39 1964 0 0.0
40 1963 0 0.0
41 1962 0 0.0
42 1961 0 0.0
43 1960 0 0.0
44 1959 0 0.0
45 1958 0 0.0
46 1957 0 0.0
47 1957 0 0.0
48 1956 0 0.0
49 1955 0 0.0
50 1954 0 0.0
51 1953 0 0.0
52 1952 0 0.0
53 1951 0 0.0
54 1950 0 0.0
55 1949 0 0.0
56 1948 0 0.0
57 1947 0 0.0
58 1946 0 0.0
59 1945 0 0.0
60 1944 0 0.0
61 1943 0 0.0
62 1942 0 0.0
63 1941 0 0.0
64 1940 0 0.0
65 1939 0 0.0
66 1938 0 0.0
67 1937 0 0.0
68 1936 0 0.0
69 1935 0 0.0
70 1934 0 0.0
71 1933 0 0.0
72

400 1618 1 0.0006180469715698393
401 1617 1 0.0006184291898577613
402 1616 1 0.0006188118811881188
403 1615 1 0.0006191950464396285
404 1614 1 0.0006195786864931846
405 1613 1 0.0006199628022318661
406 1612 1 0.0006203473945409429
407 1611 1 0.0006207324643078833
408 1610 1 0.0006211180124223603
409 1609 1 0.0006215040397762585
410 1608 1 0.0006218905472636816
411 1607 1 0.0006222775357809583
412 1606 1 0.0006226650062266501
413 1605 1 0.0006230529595015577
414 1604 1 0.0006234413965087282
415 1604 1 0.0006234413965087282
416 1603 1 0.0006238303181534623
417 1603 1 0.0006238303181534623
418 1602 1 0.0006242197253433209
419 1601 1 0.0006246096189881324
420 1600 1 0.000625
421 1599 1 0.0006253908692933083
422 1598 1 0.0006257822277847309
423 1597 1 0.0006261740763932373
424 1596 1 0.0006265664160401002
425 1595 1 0.0006269592476489029
426 1594 1 0.0006273525721455458
427 1593 1 0.0006277463904582549
428 1592 1 0.000628140703517588
429 1591 1 0.0006285355122564425
430 1590 1 0.00062893081

650 1378 1 0.000725689404934688
651 1377 1 0.0007262164124909223
652 1376 1 0.0007267441860465116
653 1375 1 0.0007272727272727272
654 1374 1 0.000727802037845706
655 1373 1 0.0007283321194464676
656 1372 1 0.0007288629737609329
657 1371 1 0.0007293946024799417
658 1370 1 0.00072992700729927
659 1369 1 0.0007304601899196494
660 1368 1 0.0007309941520467836
661 1367 1 0.000731528895391368
662 1367 1 0.000731528895391368
663 1366 1 0.0007320644216691069
664 1365 1 0.0007326007326007326
665 1364 1 0.0007331378299120235
666 1364 1 0.0007331378299120235
667 1363 1 0.0007336757153338225
668 1362 1 0.0007342143906020558
669 1361 1 0.0007347538574577516
670 1360 1 0.0007352941176470588
671 1359 1 0.0007358351729212656
672 1358 1 0.0007363770250368188
673 1357 1 0.0007369196757553427
674 1356 1 0.0007374631268436578
675 1355 1 0.0007380073800738007
676 1354 1 0.0007385524372230429
677 1353 1 0.0007390983000739098
678 1352 1 0.0007396449704142012
679 1351 1 0.0007401924500370096
680 1350 1 0.000

902 1132 1 0.0008833922261484099
903 1131 1 0.0008841732979664014
904 1130 1 0.0008849557522123894
905 1129 1 0.0008857395925597874
906 1128 1 0.0008865248226950354
907 1128 1 0.0008865248226950354
908 1127 1 0.0008873114463176575
909 1126 1 0.0008880994671403197
910 1125 1 0.0008888888888888889
911 1124 1 0.0008896797153024911
912 1123 1 0.0008904719501335708
913 1122 1 0.00089126559714795
914 1121 1 0.0008920606601248885
915 1120 1 0.0008928571428571428
916 1119 1 0.0008936550491510277
917 1118 1 0.0008944543828264759
918 1117 1 0.0008952551477170994
919 1116 1 0.0008960573476702509
920 1115 1 0.0008968609865470852
921 1114 1 0.0008976660682226212
922 1113 1 0.0008984725965858042
923 1112 1 0.0008992805755395684
924 1111 1 0.0009000900090009
925 1111 1 0.0009000900090009
926 1110 1 0.0009009009009009009
927 1109 1 0.0009017132551848512
928 1108 1 0.0009025270758122744
929 1107 1 0.0009033423667570009
930 1106 1 0.0009041591320072332
931 1105 1 0.0009049773755656109
932 1104 1 0.00090

1154 897 1 0.0011148272017837235
1155 896 1 0.0011160714285714285
1156 895 1 0.0011173184357541898
1157 894 1 0.0011185682326621924
1158 894 1 0.0011185682326621924
1159 893 1 0.0011198208286674132
1160 892 1 0.0011210762331838565
1161 891 1 0.001122334455667789
1162 890 1 0.0011235955056179776
1163 889 1 0.0011248593925759281
1164 888 1 0.0011261261261261261
1165 887 1 0.0011273957158962795
1166 886 1 0.001128668171557562
1167 885 1 0.0011299435028248588
1168 884 1 0.0011312217194570137
1169 883 1 0.0011325028312570782
1170 882 1 0.0011337868480725624
1171 881 1 0.0011350737797956867
1172 880 1 0.0011363636363636363
1173 879 1 0.0011376564277588168
1174 878 1 0.0011389521640091116
1175 877 1 0.0011402508551881414
1176 876 1 0.001141552511415525
1177 875 1 0.001142857142857143
1178 874 1 0.0011441647597254005
1179 873 1 0.001145475372279496
1180 872 1 0.0011467889908256881
1181 871 1 0.001148105625717566
1182 870 1 0.0011494252873563218
1183 869 1 0.0011507479861910242
1184 868 1 0.001

1406 656 1 0.001524390243902439
1407 655 1 0.0015267175572519084
1408 654 1 0.0015290519877675841
1409 653 1 0.0015313935681470138
1410 652 1 0.0015337423312883436
1411 651 1 0.0015360983102918587
1412 650 1 0.0015384615384615385
1413 649 1 0.0015408320493066256
1414 648 1 0.0015432098765432098
1415 647 1 0.0015455950540958269
1416 646 1 0.0015479876160990713
1417 645 1 0.0015503875968992248
1418 644 1 0.0015527950310559005
1419 643 1 0.0015552099533437014
1420 642 1 0.001557632398753894
1421 641 1 0.0015600624024961
1422 640 1 0.0015625
1423 639 1 0.001564945226917058
1424 638 1 0.001567398119122257
1425 637 1 0.0015698587127158557
1426 636 1 0.0015723270440251573
1427 635 1 0.0015748031496062992
1428 634 1 0.0015772870662460567
1429 633 1 0.001579778830963665
1430 632 1 0.0015822784810126582
1431 631 1 0.001584786053882726
1432 630 1 0.0015873015873015873
1433 629 1 0.001589825119236884
1434 628 1 0.0015923566878980893
1435 627 1 0.001594896331738437
1436 627 1 0.001594896331738437
1

1661 410 1 0.0024390243902439024
1662 409 1 0.0024449877750611247
1663 408 1 0.0024509803921568627
1664 407 1 0.002457002457002457
1665 406 1 0.0024630541871921183
1666 405 1 0.0024691358024691358
1667 404 1 0.0024752475247524753
1668 404 1 0.0024752475247524753
1669 403 1 0.0024813895781637717
1670 402 1 0.0024875621890547263
1671 401 1 0.0024937655860349127
1672 400 1 0.0025
1673 399 1 0.002506265664160401
1674 398 1 0.002512562814070352
1675 397 1 0.0025188916876574307
1676 396 1 0.0025252525252525255
1677 395 1 0.002531645569620253
1678 394 1 0.0025380710659898475
1679 393 1 0.002544529262086514
1680 392 1 0.002551020408163265
1681 391 1 0.0025575447570332483
1682 390 1 0.002564102564102564
1683 389 1 0.002570694087403599
1684 388 1 0.002577319587628866
1685 387 1 0.002583979328165375
1686 386 1 0.0025906735751295338
1687 385 1 0.0025974025974025974
1688 384 1 0.0026041666666666665
1689 383 1 0.0026109660574412533
1690 383 1 0.0026109660574412533
1691 382 1 0.002617801047120419
169

1917 168 2 0.011904761904761904
1918 167 2 0.011976047904191617
1919 166 2 0.012048192771084338
1920 165 2 0.012121212121212121
1921 164 2 0.012195121951219513
1922 163 2 0.012269938650306749
1923 162 2 0.012345679012345678
1924 161 2 0.012422360248447204
1925 160 2 0.0125
1926 159 2 0.012578616352201259
1927 158 2 0.012658227848101266
1928 157 2 0.012738853503184714
1929 156 2 0.01282051282051282
1930 155 2 0.012903225806451613
1931 154 2 0.012987012987012988
1932 153 2 0.013071895424836602
1933 153 3 0.0196078431372549
1934 152 3 0.019736842105263157
1935 151 3 0.019867549668874173
1936 150 3 0.02
1937 149 3 0.020134228187919462
1938 148 3 0.02027027027027027
1939 147 3 0.02040816326530612
1940 146 3 0.02054794520547945
1941 145 3 0.020689655172413793
1942 144 3 0.020833333333333332
1943 143 3 0.02097902097902098
1944 142 3 0.02112676056338028
1945 141 3 0.02127659574468085
1946 140 3 0.02142857142857143
1947 139 3 0.02158273381294964
1948 138 3 0.021739130434782608
1949 137 3 0.0218

267 1745 1 0.0005730659025787965
268 1744 1 0.0005733944954128441
269 1743 1 0.0005737234652897303
270 1742 1 0.000574052812858783
271 1741 1 0.0005743825387708214
272 1740 1 0.0005747126436781609
273 1739 1 0.0005750431282346176
274 1738 1 0.0005753739930955121
275 1737 1 0.0005757052389176742
276 1736 1 0.000576036866359447
277 1735 1 0.0005763688760806917
278 1735 1 0.0005763688760806917
279 1734 1 0.0005767012687427913
280 1733 1 0.0005770340450086555
281 1732 1 0.0005773672055427252
282 1731 1 0.0005777007510109763
283 1730 1 0.0005780346820809249
284 1729 1 0.000578368999421631
285 1728 1 0.0005787037037037037
286 1727 1 0.0005790387955993051
287 1726 1 0.0005793742757821553
288 1725 1 0.0005797101449275362
289 1724 1 0.000580046403712297
290 1723 1 0.0005803830528148578
291 1722 1 0.0005807200929152149
292 1721 1 0.0005810575246949448
293 1720 1 0.0005813953488372093
294 1719 1 0.0005817335660267597
295 1718 1 0.0005820721769499418
296 1717 1 0.0005824111822947001
297 1716 1 0.0

518 1509 1 0.0006626905235255136
519 1508 1 0.000663129973474801
520 1507 1 0.0006635700066357001
521 1506 1 0.0006640106241699867
522 1505 1 0.000664451827242525
523 1504 1 0.0006648936170212766
524 1503 1 0.0006653359946773121
525 1502 1 0.0006657789613848203
526 1502 1 0.0006657789613848203
527 1501 1 0.0006662225183211193
528 1500 1 0.0006666666666666666
529 1499 1 0.00066711140760507
530 1498 1 0.0006675567423230974
531 1498 1 0.0006675567423230974
532 1497 1 0.0006680026720106881
533 1496 1 0.0006684491978609625
534 1495 1 0.0006688963210702341
535 1494 1 0.0006693440428380187
536 1493 1 0.0006697923643670462
537 1492 1 0.0006702412868632708
538 1491 1 0.0006706908115358819
539 1490 1 0.0006711409395973154
540 1489 1 0.000671591672263264
541 1488 1 0.0006720430107526882
542 1487 1 0.0006724949562878278
543 1486 1 0.0006729475100942127
544 1485 1 0.0006734006734006734
545 1484 1 0.0006738544474393531
546 1483 1 0.0006743088334457181
547 1482 1 0.0006747638326585695
548 1481 1 0.00

770 1267 1 0.0007892659826361484
771 1266 1 0.0007898894154818325
772 1265 1 0.0007905138339920949
773 1264 1 0.0007911392405063291
774 1263 1 0.000791765637371338
775 1262 1 0.000792393026941363
776 1261 1 0.0007930214115781126
777 1260 1 0.0007936507936507937
778 1259 1 0.0007942811755361397
779 1258 1 0.000794912559618442
780 1257 1 0.0007955449482895784
781 1256 1 0.0007961783439490446
782 1255 1 0.0007968127490039841
783 1254 1 0.0007974481658692185
784 1253 1 0.0007980845969672786
785 1252 1 0.0007987220447284345
786 1251 1 0.0007993605115907274
787 1250 1 0.0008
788 1249 1 0.0008006405124099279
789 1248 1 0.0008012820512820513
790 1247 1 0.0008019246190858059
791 1246 1 0.0008025682182985554
792 1245 1 0.0008032128514056225
793 1244 1 0.0008038585209003215
794 1243 1 0.0008045052292839903
795 1242 1 0.0008051529790660225
796 1242 1 0.0008051529790660225
797 1241 1 0.0008058017727639
798 1240 1 0.0008064516129032258
799 1239 1 0.0008071025020177562
800 1238 1 0.000807754442649434

1023 1027 2 0.0019474196689386564
1024 1026 2 0.001949317738791423
1025 1025 2 0.001951219512195122
1026 1024 2 0.001953125
1027 1023 2 0.0019550342130987292
1028 1022 2 0.0019569471624266144
1029 1021 2 0.0019588638589618022
1030 1020 2 0.00196078431372549
1031 1019 2 0.001962708537782139
1032 1018 2 0.0019646365422396855
1033 1017 2 0.0019665683382497543
1034 1016 2 0.001968503937007874
1035 1015 2 0.0019704433497536944
1036 1014 2 0.0019723865877712033
1037 1013 2 0.0019743336623889436
1038 1012 2 0.001976284584980237
1039 1011 2 0.0019782393669634025
1040 1010 2 0.0019801980198019802
1041 1009 2 0.0019821605550049554
1042 1008 2 0.001984126984126984
1043 1007 2 0.0019860973187686196
1044 1006 2 0.0019880715705765406
1045 1005 2 0.001990049751243781
1046 1004 2 0.00199203187250996
1047 1003 2 0.0019940179461615153
1048 1002 2 0.001996007984031936
1049 1001 2 0.001998001998001998
1050 1000 2 0.002
1051 999 2 0.002002002002002002
1052 998 2 0.002004008016032064
1053 997 2 0.0020060180

1278 781 3 0.0038412291933418692
1279 780 3 0.0038461538461538464
1280 779 3 0.0038510911424903724
1281 778 3 0.0038560411311053984
1282 777 3 0.003861003861003861
1283 776 3 0.003865979381443299
1284 776 4 0.005154639175257732
1285 775 4 0.005161290322580645
1286 774 4 0.00516795865633075
1287 773 4 0.00517464424320828
1288 772 4 0.0051813471502590676
1289 771 4 0.005188067444876783
1290 770 4 0.005194805194805195
1291 769 4 0.005201560468140442
1292 768 4 0.005208333333333333
1293 767 4 0.005215123859191656
1294 766 4 0.005221932114882507
1295 765 4 0.00522875816993464
1296 764 4 0.005235602094240838
1297 763 4 0.005242463958060288
1298 762 4 0.005249343832020997
1299 761 4 0.005256241787122208
1300 760 4 0.005263157894736842
1301 759 4 0.005270092226613966
1302 758 4 0.005277044854881266
1303 757 4 0.005284015852047556
1304 756 4 0.005291005291005291
1305 755 4 0.005298013245033113
1306 754 4 0.005305039787798408
1307 753 4 0.005312084993359893
1308 752 4 0.005319148936170213
1309 7

1538 533 5 0.009380863039399626
1539 532 5 0.009398496240601503
1540 531 5 0.009416195856873822
1541 530 5 0.009433962264150943
1542 529 5 0.00945179584120983
1543 528 5 0.00946969696969697
1544 527 5 0.009487666034155597
1545 526 5 0.009505703422053232
1546 525 5 0.009523809523809525
1547 524 5 0.009541984732824428
1548 523 5 0.009560229445506692
1549 522 5 0.009578544061302681
1550 521 5 0.009596928982725527
1551 520 5 0.009615384615384616
1552 519 5 0.009633911368015413
1553 518 5 0.009652509652509652
1554 517 5 0.009671179883945842
1555 516 5 0.009689922480620155
1556 515 5 0.009708737864077669
1557 514 5 0.009727626459143969
1558 513 5 0.009746588693957114
1559 512 5 0.009765625
1560 511 5 0.009784735812133072
1561 510 5 0.00980392156862745
1562 509 5 0.009823182711198428
1563 508 5 0.00984251968503937
1564 507 5 0.009861932938856016
1565 506 5 0.009881422924901186
1566 505 5 0.009900990099009901
1567 504 5 0.00992063492063492
1568 503 5 0.009940357852882704
1569 502 5 0.009960159

1800 278 5 0.017985611510791366
1801 278 5 0.017985611510791366
1802 277 5 0.018050541516245487
1803 276 5 0.018115942028985508
1804 275 5 0.01818181818181818
1805 274 5 0.01824817518248175
1806 273 5 0.018315018315018316
1807 272 5 0.01838235294117647
1808 271 5 0.01845018450184502
1809 270 5 0.018518518518518517
1810 269 5 0.01858736059479554
1811 268 5 0.018656716417910446
1812 267 5 0.018726591760299626
1813 266 5 0.018796992481203006
1814 265 5 0.018867924528301886
1815 264 5 0.01893939393939394
1816 263 5 0.019011406844106463
1817 262 5 0.019083969465648856
1818 261 5 0.019157088122605363
1819 260 5 0.019230769230769232
1820 259 5 0.019305019305019305
1821 258 5 0.01937984496124031
1822 257 5 0.019455252918287938
1823 256 5 0.01953125
1824 255 5 0.0196078431372549
1825 255 5 0.0196078431372549
1826 254 5 0.01968503937007874
1827 253 5 0.019762845849802372
1828 252 5 0.01984126984126984
1829 251 5 0.0199203187250996
1830 250 5 0.02
1831 249 5 0.020080321285140562
1832 248 5 0.0201

145 1860 0 0.0
146 1859 0 0.0
147 1858 0 0.0
148 1857 0 0.0
149 1856 0 0.0
150 1855 0 0.0
151 1854 0 0.0
152 1853 0 0.0
153 1852 0 0.0
154 1852 0 0.0
155 1851 0 0.0
156 1850 0 0.0
157 1849 0 0.0
158 1848 0 0.0
159 1847 0 0.0
160 1846 0 0.0
161 1845 0 0.0
162 1844 0 0.0
163 1843 0 0.0
164 1843 0 0.0
165 1842 0 0.0
166 1841 0 0.0
167 1840 0 0.0
168 1839 0 0.0
169 1838 0 0.0
170 1837 0 0.0
171 1836 0 0.0
172 1835 0 0.0
173 1834 0 0.0
174 1833 0 0.0
175 1832 0 0.0
176 1831 0 0.0
177 1831 0 0.0
178 1830 0 0.0
179 1829 0 0.0
180 1828 0 0.0
181 1828 0 0.0
182 1827 0 0.0
183 1826 0 0.0
184 1825 0 0.0
185 1824 0 0.0
186 1823 0 0.0
187 1822 0 0.0
188 1822 0 0.0
189 1821 0 0.0
190 1820 0 0.0
191 1819 0 0.0
192 1818 0 0.0
193 1817 0 0.0
194 1816 0 0.0
195 1815 0 0.0
196 1814 0 0.0
197 1813 0 0.0
198 1812 0 0.0
199 1811 0 0.0
200 1810 0 0.0
201 1810 0 0.0
202 1809 0 0.0
203 1808 0 0.0
204 1807 0 0.0
205 1806 0 0.0
206 1805 0 0.0
207 1804 0 0.0
208 1803 0 0.0
209 1802 0 0.0
210 1801 0 0.0
211 1800 0

454 1568 2 0.0012755102040816326
455 1567 2 0.0012763241863433313
456 1566 2 0.001277139208173691
457 1565 2 0.0012779552715654952
458 1564 2 0.0012787723785166241
459 1564 2 0.0012787723785166241
460 1563 2 0.0012795905310300703
461 1562 2 0.0012804097311139564
462 1561 2 0.0012812299807815502
463 1560 2 0.001282051282051282
464 1559 2 0.0012828736369467607
465 1558 2 0.0012836970474967907
466 1557 2 0.0012845215157353885
467 1556 2 0.0012853470437017994
468 1555 2 0.0012861736334405145
469 1554 2 0.001287001287001287
470 1553 2 0.00128783000643915
471 1552 2 0.001288659793814433
472 1551 2 0.0012894906511927789
473 1550 2 0.0012903225806451613
474 1549 2 0.0012911555842479018
475 1548 2 0.0012919896640826874
476 1547 2 0.001292824822236587
477 1546 2 0.00129366106080207
478 1545 2 0.0012944983818770227
479 1544 2 0.0012953367875647669
480 1543 2 0.0012961762799740765
481 1542 2 0.0012970168612191958
482 1541 2 0.0012978585334198572
483 1540 2 0.0012987012987012987
484 1539 2 0.001299

705 1327 2 0.0015071590052750565
706 1326 2 0.0015082956259426848
707 1325 2 0.0015094339622641509
708 1324 2 0.0015105740181268882
709 1323 2 0.0015117157974300832
710 1322 2 0.0015128593040847202
711 1321 2 0.001514004542013626
712 1320 2 0.0015151515151515152
713 1319 2 0.001516300227445034
714 1318 2 0.0015174506828528073
715 1317 2 0.0015186028853454822
716 1316 2 0.001519756838905775
717 1315 2 0.001520912547528517
718 1314 2 0.0015220700152207
719 1313 2 0.0015232292460015233
720 1312 2 0.001524390243902439
721 1311 2 0.0015255530129672007
722 1310 2 0.0015267175572519084
723 1309 2 0.0015278838808250573
724 1309 2 0.0015278838808250573
725 1308 2 0.0015290519877675841
726 1307 2 0.001530221882172915
727 1306 2 0.0015313935681470138
728 1305 2 0.0015325670498084292
729 1304 2 0.0015337423312883436
730 1303 2 0.0015349194167306216
731 1302 2 0.0015360983102918587
732 1301 2 0.0015372790161414297
733 1300 2 0.0015384615384615385
734 1299 2 0.001539645881447267
735 1298 2 0.0015408

958 1082 2 0.0018484288354898336
959 1081 2 0.0018501387604070306
960 1080 2 0.001851851851851852
961 1079 2 0.0018535681186283596
962 1078 2 0.0018552875695732839
963 1077 2 0.0018570102135561746
964 1076 2 0.0018587360594795538
965 1075 2 0.0018604651162790699
966 1074 2 0.00186219739292365
967 1073 2 0.001863932898415657
968 1072 2 0.0018656716417910447
969 1071 2 0.0018674136321195146
970 1070 2 0.001869158878504673
971 1069 2 0.0018709073900841909
972 1068 2 0.0018726591760299626
973 1067 2 0.0018744142455482662
974 1066 2 0.001876172607879925
975 1065 2 0.0018779342723004694
976 1064 2 0.0018796992481203006
977 1063 2 0.0018814675446848542
978 1062 2 0.0018832391713747645
979 1061 2 0.001885014137606032
980 1060 2 0.0018867924528301887
981 1059 2 0.0018885741265344666
982 1058 2 0.001890359168241966
983 1057 2 0.001892147587511826
984 1056 2 0.001893939393939394
985 1055 2 0.0018957345971563982
986 1054 2 0.0018975332068311196
987 1053 2 0.001899335232668566
988 1052 2 0.00190114

1211 842 3 0.0035629453681710215
1212 841 3 0.0035671819262782403
1213 841 3 0.0035671819262782403
1214 840 3 0.0035714285714285713
1215 839 3 0.003575685339690107
1216 838 3 0.003579952267303103
1217 837 3 0.0035842293906810036
1218 836 3 0.0035885167464114833
1219 835 3 0.003592814371257485
1220 834 3 0.0035971223021582736
1221 833 3 0.003601440576230492
1222 832 3 0.003605769230769231
1223 831 3 0.0036101083032490976
1224 831 3 0.0036101083032490976
1225 830 3 0.0036144578313253013
1226 829 3 0.0036188178528347406
1227 828 3 0.0036231884057971015
1228 827 3 0.0036275695284159614
1229 826 3 0.0036319612590799033
1230 825 3 0.0036363636363636364
1231 824 3 0.0036407766990291263
1232 823 3 0.0036452004860267314
1233 822 3 0.0036496350364963502
1234 821 3 0.0036540803897685747
1235 820 3 0.003658536585365854
1236 819 3 0.003663003663003663
1237 818 3 0.003667481662591687
1238 817 3 0.0036719706242350062
1239 816 3 0.003676470588235294
1240 815 3 0.0036809815950920245
1241 814 3 0.003685

1468 598 3 0.005016722408026756
1469 597 3 0.005025125628140704
1470 596 3 0.0050335570469798654
1471 595 3 0.005042016806722689
1472 594 3 0.005050505050505051
1473 593 3 0.00505902192242833
1474 592 3 0.005067567567567568
1475 591 3 0.005076142131979695
1476 590 3 0.005084745762711864
1477 589 3 0.0050933786078098476
1478 588 3 0.00510204081632653
1479 587 3 0.005110732538330494
1480 586 3 0.005119453924914676
1481 585 3 0.005128205128205128
1482 584 3 0.005136986301369863
1483 583 3 0.005145797598627788
1484 582 3 0.005154639175257732
1485 581 3 0.0051635111876075735
1486 580 3 0.005172413793103448
1487 579 3 0.0051813471502590676
1488 578 3 0.005190311418685121
1489 577 3 0.005199306759098787
1490 576 3 0.005208333333333333
1491 575 3 0.0052173913043478265
1492 574 3 0.005226480836236934
1493 573 3 0.005235602094240838
1494 572 3 0.005244755244755245
1495 571 3 0.005253940455341506
1496 571 3 0.005253940455341506
1497 570 3 0.005263157894736842
1498 569 3 0.005272407732864675
1499 

1727 349 3 0.008595988538681949
1728 348 3 0.008620689655172414
1729 347 3 0.008645533141210375
1730 346 3 0.008670520231213872
1731 345 3 0.008695652173913044
1732 344 3 0.00872093023255814
1733 343 3 0.008746355685131196
1734 343 3 0.008746355685131196
1735 343 3 0.008746355685131196
1736 342 3 0.008771929824561403
1737 341 3 0.008797653958944282
1738 340 3 0.008823529411764706
1739 339 3 0.008849557522123894
1740 338 3 0.008875739644970414
1741 337 3 0.008902077151335312
1742 336 3 0.008928571428571428
1743 335 3 0.008955223880597015
1744 334 3 0.008982035928143712
1745 333 3 0.009009009009009009
1746 332 3 0.009036144578313253
1747 331 3 0.00906344410876133
1748 330 3 0.00909090909090909
1749 329 3 0.00911854103343465
1750 328 3 0.009146341463414634
1751 327 3 0.009174311926605505
1752 326 3 0.009202453987730062
1753 325 3 0.009230769230769232
1754 324 3 0.009259259259259259
1755 323 3 0.009287925696594427
1756 322 3 0.009316770186335404
1757 321 3 0.009345794392523364
1758 320 3 0

1991 96 5 0.052083333333333336
1992 95 5 0.05263157894736842
1993 94 5 0.05319148936170213
1994 93 5 0.053763440860215055
1995 92 5 0.05434782608695652
1996 91 5 0.054945054945054944
1997 90 5 0.05555555555555555
1998 89 5 0.056179775280898875
1999 88 5 0.056818181818181816
0
0 1999 0 0.0
1 1998 0 0.0
2 1997 0 0.0
3 1996 0 0.0
4 1995 0 0.0
5 1994 0 0.0
6 1993 0 0.0
7 1992 0 0.0
8 1991 0 0.0
9 1990 0 0.0
10 1989 0 0.0
11 1988 0 0.0
12 1987 0 0.0
13 1986 0 0.0
14 1985 0 0.0
15 1984 0 0.0
16 1983 0 0.0
17 1982 0 0.0
18 1981 0 0.0
19 1980 0 0.0
20 1979 0 0.0
21 1978 0 0.0
22 1977 0 0.0
23 1976 0 0.0
24 1975 0 0.0
25 1974 0 0.0
26 1973 0 0.0
27 1972 0 0.0
28 1971 0 0.0
29 1970 0 0.0
30 1969 0 0.0
31 1968 0 0.0
32 1967 0 0.0
33 1966 0 0.0
34 1965 0 0.0
35 1964 0 0.0
36 1963 0 0.0
37 1962 0 0.0
38 1961 0 0.0
39 1960 0 0.0
40 1959 0 0.0
41 1958 0 0.0
42 1957 0 0.0
43 1956 0 0.0
44 1955 0 0.0
45 1954 0 0.0
46 1953 0 0.0
47 1952 0 0.0
48 1951 0 0.0
49 1950 0 0.0
50 1949 0 0.0
51 1948 0 0.0
52 19

337 1682 1 0.0005945303210463733
338 1681 1 0.000594883997620464
339 1681 1 0.000594883997620464
340 1680 1 0.0005952380952380953
341 1679 1 0.0005955926146515784
342 1678 1 0.0005959475566150178
343 1677 1 0.0005963029218843172
344 1676 1 0.0005966587112171838
345 1675 1 0.0005970149253731343
346 1674 1 0.0005973715651135006
347 1673 1 0.0005977286312014345
348 1672 1 0.0005980861244019139
349 1671 1 0.0005984440454817474
350 1670 1 0.0005988023952095808
351 1669 1 0.0005991611743559018
352 1668 1 0.0005995203836930455
353 1667 1 0.0005998800239952009
354 1666 1 0.0006002400960384153
355 1665 1 0.0006006006006006006
356 1664 1 0.0006009615384615385
357 1663 1 0.0006013229104028864
358 1662 1 0.0006016847172081829
359 1661 1 0.0006020469596628537
360 1660 1 0.0006024096385542169
361 1659 1 0.0006027727546714888
362 1658 1 0.0006031363088057901
363 1657 1 0.0006035003017501509
364 1656 1 0.0006038647342995169
365 1655 1 0.0006042296072507553
366 1654 1 0.0006045949214026602
367 1653 1 0

589 1441 1 0.0006939625260235947
590 1440 1 0.0006944444444444445
591 1439 1 0.0006949270326615705
592 1438 1 0.0006954102920723226
593 1437 1 0.0006958942240779402
594 1436 1 0.0006963788300835655
595 1435 1 0.0006968641114982578
596 1434 1 0.000697350069735007
597 1433 1 0.0006978367062107466
598 1432 1 0.0006983240223463687
599 1431 1 0.0006988120195667365
600 1430 1 0.0006993006993006993
601 1429 1 0.0006997900629811056
602 1428 1 0.0007002801120448179
603 1427 1 0.000700770847932726
604 1426 1 0.0007012622720897616
605 1425 1 0.0007017543859649122
606 1424 1 0.0007022471910112359
607 1423 1 0.0007027406886858749
608 1422 1 0.0007032348804500703
609 1421 1 0.0007037297677691766
610 1420 1 0.0007042253521126761
611 1420 1 0.0007042253521126761
612 1419 1 0.0007047216349541931
613 1418 1 0.0007052186177715092
614 1417 1 0.0007057163020465773
615 1416 1 0.0007062146892655367
616 1415 1 0.0007067137809187279
617 1414 1 0.0007072135785007072
618 1413 1 0.0007077140835102619
619 1412 1 0

841 1200 2 0.0016666666666666668
842 1199 2 0.0016680567139282735
843 1199 2 0.0016680567139282735
844 1198 2 0.001669449081803005
845 1197 2 0.001670843776106934
846 1196 2 0.0016722408026755853
847 1195 2 0.0016736401673640166
848 1195 2 0.0016736401673640166
849 1194 2 0.0016750418760469012
850 1193 2 0.0016764459346186086
851 1192 2 0.0016778523489932886
852 1191 2 0.0016792611251049538
853 1190 2 0.0016806722689075631
854 1189 2 0.001682085786375105
855 1188 2 0.0016835016835016834
856 1187 2 0.0016849199663016006
857 1186 2 0.0016863406408094434
858 1185 2 0.0016877637130801688
859 1184 2 0.0016891891891891893
860 1183 2 0.0016906170752324597
861 1182 2 0.001692047377326565
862 1181 2 0.001693480101608806
863 1180 2 0.001694915254237288
864 1179 2 0.0016963528413910093
865 1178 2 0.001697792869269949
866 1177 2 0.0016992353440951572
867 1177 2 0.0016992353440951572
868 1176 2 0.0017006802721088435
869 1175 2 0.001702127659574468
870 1174 2 0.0017035775127768314
871 1173 2 0.00170

1094 956 2 0.0020920502092050207
1095 955 2 0.0020942408376963353
1096 954 2 0.0020964360587002098
1097 953 2 0.002098635886673662
1098 952 2 0.0021008403361344537
1099 951 2 0.002103049421661409
1100 950 2 0.002105263157894737
1101 949 2 0.002107481559536354
1102 948 2 0.002109704641350211
1103 948 2 0.002109704641350211
1104 947 2 0.0021119324181626186
1105 946 2 0.0021141649048625794
1106 945 2 0.0021164021164021165
1107 945 2 0.0021164021164021165
1108 945 2 0.0021164021164021165
1109 944 2 0.00211864406779661
1110 943 2 0.0021208907741251328
1111 942 2 0.0021231422505307855
1112 941 2 0.0021253985122210413
1113 940 2 0.002127659574468085
1114 939 2 0.002129925452609159
1115 938 2 0.0021321961620469083
1116 937 2 0.0021344717182497333
1117 936 2 0.002136752136752137
1118 935 2 0.0021390374331550803
1119 934 2 0.0021413276231263384
1120 933 2 0.0021436227224008574
1121 932 2 0.002145922746781116
1122 932 2 0.002145922746781116
1123 931 2 0.0021482277121374865
1124 930 2 0.0021505376

1349 713 4 0.005610098176718092
1350 712 4 0.0056179775280898875
1351 711 4 0.005625879043600563
1352 710 4 0.005633802816901409
1353 710 4 0.005633802816901409
1354 709 4 0.005641748942172073
1355 708 4 0.005649717514124294
1356 707 4 0.005657708628005658
1357 706 4 0.0056657223796034
1358 705 4 0.005673758865248227
1359 704 4 0.005681818181818182
1360 703 4 0.005689900426742532
1361 702 4 0.005698005698005698
1362 701 4 0.005706134094151213
1363 700 4 0.005714285714285714
1364 699 4 0.005722460658082976
1365 698 4 0.0057306590257879654
1366 697 4 0.005738880918220947
1367 696 4 0.005747126436781609
1368 695 4 0.0057553956834532375
1369 694 4 0.005763688760806916
1370 693 4 0.005772005772005772
1371 692 4 0.005780346820809248
1372 691 4 0.005788712011577424
1373 690 4 0.005797101449275362
1374 689 4 0.005805515239477504
1375 688 4 0.005813953488372093
1376 687 4 0.005822416302765648
1377 686 4 0.0058309037900874635
1378 685 4 0.00583941605839416
1379 684 4 0.005847953216374269
1380 68

1610 462 5 0.010822510822510822
1611 461 5 0.010845986984815618
1612 461 5 0.010845986984815618
1613 460 5 0.010869565217391304
1614 460 5 0.010869565217391304
1615 459 5 0.010893246187363835
1616 458 5 0.010917030567685589
1617 457 5 0.010940919037199124
1618 456 5 0.010964912280701754
1619 455 5 0.01098901098901099
1620 454 5 0.011013215859030838
1621 453 5 0.011037527593818985
1622 452 5 0.011061946902654867
1623 451 5 0.011086474501108648
1624 450 5 0.011111111111111112
1625 449 5 0.011135857461024499
1626 448 5 0.011160714285714286
1627 447 5 0.011185682326621925
1628 446 5 0.011210762331838564
1629 445 5 0.011235955056179775
1630 444 5 0.01126126126126126
1631 443 5 0.011286681715575621
1632 442 5 0.011312217194570135
1633 441 5 0.011337868480725623
1634 440 5 0.011363636363636364
1635 439 5 0.011389521640091117
1636 438 5 0.01141552511415525
1637 437 5 0.011441647597254004
1638 436 5 0.011467889908256881
1639 435 5 0.011494252873563218
1640 434 5 0.01152073732718894
1641 433 5 0

1873 209 5 0.023923444976076555
1874 209 5 0.023923444976076555
1875 208 5 0.02403846153846154
1876 207 5 0.024154589371980676
1877 206 5 0.024271844660194174
1878 205 5 0.024390243902439025
1879 204 5 0.024509803921568627
1880 203 5 0.024630541871921183
1881 202 5 0.024752475247524754
1882 201 5 0.024875621890547265
1883 200 5 0.025
1884 199 5 0.02512562814070352
1885 198 5 0.025252525252525252
1886 197 5 0.025380710659898477
1887 196 5 0.025510204081632654
1888 195 5 0.02564102564102564
1889 194 5 0.02577319587628866
1890 193 5 0.025906735751295335
1891 192 5 0.026041666666666668
1892 191 5 0.02617801047120419
1893 190 5 0.02631578947368421
1894 189 5 0.026455026455026454
1895 188 5 0.026595744680851064
1896 187 5 0.026737967914438502
1897 186 5 0.026881720430107527
1898 186 5 0.026881720430107527
1899 185 5 0.02702702702702703
1900 184 5 0.02717391304347826
1901 183 5 0.0273224043715847
1902 182 5 0.027472527472527472
1903 181 5 0.027624309392265192
1904 180 5 0.027777777777777776
1