In [1]:
%matplotlib inline
import re
import pandas as pd
pd.set_option('precision', 2)
pd.set_option('display.max_rows', 600)
from IPython.display import display, HTML, Markdown
import IPython.core.display as core_disp

# merge entire text file & convert line endings + spaces to single line endings
with open('./data/federalist_all.txt', 'r') as file:
    papers = re.sub(r'\n\s*', '\n', file.read())
# remove gutenberg front and end matter
content_str = papers.split('The Federalist Papers')[3].split('End of the Project Gutenberg EBook of')[0]
# divide papers into rows, parse sections into columns
papers_df = pd.DataFrame(re.findall(
    r"""
    FEDERALIST.?\sNo.\s+(?P<no>\d+)             # Find beginning of Paper, record number
    \n(?P<title>[\s\S]*?)(?=\n(?:From|For))     # title is everything up to newline followed by either `For` or `From` - required altering no 58 - no pub included
    \n(?P<pulication>[\s\S]*?)(?=\.).           # publication is everything up to a period
    \n(?P<author>[\s\S]*?)(?=\nTo\s)            # author(s) is everything up to `To`
    \n(?P<addressee>[\s\S]*?)(?=[:.])[:.]       # addressee is everything up to a colon
    \n(?P<body>[\s\S]*?)(?=FEDERALIST|\Z)       # everything up to next paper or end of string recorded as body
    """, content_str, re.VERBOSE))
papers_df.rename({0:'no', 1:'title', 2:'publication', 3:'author', 4:'addressee', 5:'body'}, axis=1, inplace=True)

for number, text in zip(papers_df.no, papers_df.body): 
    with open('./data/bodies/{}.txt'.format(number), 'w') as writefile:
        writefile.write(text)

In [2]:
papers_df

Unnamed: 0,no,title,publication,author,addressee,body
0,1,General Introduction,For the Independent Journal,HAMILTON,To the People of the State of New York,AFTER an unequivocal experience of the ineffic...
1,2,Concerning Dangers from Foreign Force and Infl...,For the Independent Journal,JAY,To the People of the State of New York,WHEN the people of America reflect that they a...
2,3,The Same Subject Continued\n(Concerning Danger...,For the Independent Journal,JAY,To the People of the State of New York,IT IS not a new observation that the people of...
3,4,The Same Subject Continued\n(Concerning Danger...,For the Independent Journal,JAY,To the People of the State of New York,MY LAST paper assigned several reasons why the...
4,5,The Same Subject Continued\n(Concerning Danger...,For the Independent Journal,JAY,To the People of the State of New York,"QUEEN ANNE, in her letter of the 1st July, 170..."
5,6,Concerning Dangers from Dissensions Between th...,For the Independent Journal,HAMILTON,To the People of the State of New York,THE three last numbers of this paper have been...
6,7,The Same Subject Continued\n(Concerning Danger...,For the Independent Journal,HAMILTON,To the People of the State of New York,"IT IS sometimes asked, with an air of seeming ..."
7,8,The Consequences of Hostilities Between the St...,From the New York Packet,"Tuesday, November 20, 1787.\nHAMILTON",To the People of the State of New York,ASSUMING it therefore as an established truth ...
8,9,The Union as a Safeguard Against Domestic Fact...,For the Independent Journal,HAMILTON,To the People of the State of New York,A FIRM Union will be of the utmost moment to t...
9,10,The Same Subject Continued\n(The Union as a Sa...,From the New York Packet,"Friday, November 23, 1787.\nMADISON",To the People of the State of New York,AMONG the numerous advantages promised by a we...


In [3]:
manual_terms = ['authority', 'sovereign', 'tyranny', 'liberty', 'union', 'administer', 'confederacy', 'american states', 'federal', 'common interest', 'military', 'government', 'law', 'jealousy', 'coercion', 'political', 'principle', 'principal', 'sufficient', 'equal']
manual_freq_15 = [7, 5, 0, 0, 12, 3, 7, 0, 5, 2, 2, 15, 6, 2, 2, 7, 7, 2, 0, 0]
manual_freq_18 = [6, 3, 4, 3, 6, 4, 12, 1, 6, 0, 0, 8, 5, 3, 1, 2, 1, 1, 1, 4]
manual_df = pd.DataFrame(zip(manual_terms, manual_freq_15, manual_freq_18))
manual_df.rename({0:'Term', 1:'manual_freq_15', 2:'manual_freq_18'}, axis=1, inplace=True)
manual_df

Unnamed: 0,Term,manual_freq_15,manual_freq_18
0,authority,7,6
1,sovereign,5,3
2,tyranny,0,4
3,liberty,0,3
4,union,12,6
5,administer,3,4
6,confederacy,7,12
7,american states,0,1
8,federal,5,6
9,common interest,2,0


In [4]:
termine15 = pd.read_csv('./data/results/15_termine.csv')
termine15['Term'] = termine15['Term'].str.lower()
termine15.rename({'Rank':'15_termine_rank', 'Score':'15_termine_score'}, axis=1, inplace=True)
termine18 = pd.read_csv('./data/results/18_termine.csv')
termine18['Term'] = termine18['Term'].str.lower()
termine18.rename({'Rank':'18_termine_rank', 'Score':'18_termine_score'}, axis=1, inplace=True)
fivefilters15 = pd.read_csv('./data/results/15_5filters.csv')
fivefilters15['Term'] = fivefilters15['Term'].str.lower()
fivefilters15.rename({'Occurrence':'15_5filters_freq', 'Word count':'15_ngram_length'}, axis=1, inplace=True)
fivefilters18 = pd.read_csv('./data/results/18_5filters.csv')
fivefilters18['Term'] = fivefilters18['Term'].str.lower()
fivefilters18.rename({'Occurrence':'18_5filters_freq', 'Word count':'18_ngram_length'}, axis=1, inplace=True)

df_15 = pd.merge(termine15, fivefilters15, on='Term', how='outer')
df_15_inner = pd.merge(termine15, fivefilters15, on='Term', how='inner')
df_18 = pd.merge(termine18, fivefilters18, on='Term', how='outer')
df_18_inner = pd.merge(termine18, fivefilters18, on='Term', how='inner')
df_term_5filt = pd.merge(df_15, df_18, on='Term', how='inner')

df_full = pd.merge(manual_df, df_15, on='Term', how='left')
df_full = pd.merge(df_full, df_18, on='Term', how='left')


In [5]:
display(termine15)

Unnamed: 0,15_termine_rank,Term,15_termine_score
0,1,common interest,2.0
1,1,united state,2.0
2,1,federal authority,2.0
3,1,respective member,2.0
4,5,awful stand congress,1.58
5,5,prudent man choose,1.58
6,5,federal government thing,1.58
7,5,obvious reason regard,1.58
8,9,active influence,1.0
9,9,political safety,1.0


In [6]:
display(fivefilters15)

Unnamed: 0,Term,15_5filters_freq,15_ngram_length
0,union,12,1
1,government,11,1
2,power,8,1
3,states,8,1
4,men,7,1
5,time,6,1
6,kind,6,1
7,part,6,1
8,authority,6,1
9,interest,5,1


In [7]:
display(termine18)

Unnamed: 0,18_termine_rank,Term,18_termine_score
0,1,amphictyonic confederacy,2.0
1,1,federal council,2.0
2,3,curious political fabric,1.58
3,4,unfaithful part,1.0
4,4,had greece,1.0
5,4,common welfare,1.0
6,4,syrian prince,1.0
7,4,obvious policy,1.0
8,4,contest philip,1.0
9,4,dangerous expedient,1.0


In [8]:
display(fivefilters18)

Unnamed: 0,Term,18_5filters_freq,18_ngram_length
0,cities,15,1
1,members,13,1
2,macedon,11,1
3,achaeans,9,1
4,greece,8,1
5,league,8,1
6,government,7,1
7,union,5,1
8,greece seventy-three years,1,3
9,arts,5,1


In [9]:
display(df_full.style.hide_index())
df_full.to_csv("./data/results/df_full.csv", index=False)

Term,manual_freq_15,manual_freq_18,15_termine_rank,15_termine_score,15_5filters_freq,15_ngram_length,18_termine_rank,18_termine_score,18_5filters_freq,18_ngram_length
authority,7,6,,,6.0,1.0,,,5.0,1.0
sovereign,5,3,,,,,,,,
tyranny,0,4,,,,,,,2.0,1.0
liberty,0,3,,,,,,,2.0,1.0
union,12,6,,,12.0,1.0,,,5.0,1.0
union,12,6,,,12.0,1.0,,,1.0,1.0
administer,3,4,,,,,,,,
confederacy,7,12,,,1.0,1.0,,,5.0,1.0
confederacy,7,12,,,1.0,1.0,,,1.0,1.0
confederacy,7,12,,,1.0,1.0,,,5.0,1.0


In [10]:
display(HTML(df_15_inner.to_html(index=False)))
df_15_inner.to_csv("./data/results/df_15_inner.csv", index=False)

15_termine_rank,Term,15_termine_score,15_5filters_freq,15_ngram_length
9,state authority,1.0,1,2
9,melancholy situation,1.0,1,2
9,waste land,1.0,1,2
9,discretionary superintendence,1.0,1,2
9,confederate government,1.0,1,2
9,epidemical rage,1.0,1,2
9,collective capacities,1.0,1,2
91,depreciate property,0.0,1,2


In [11]:
display(HTML(df_18_inner.to_html(index=False)))
df_18_inner.to_csv("./data/results/df_18_inner.csv", index=False)

18_termine_rank,Term,18_termine_score,18_5filters_freq,18_ngram_length
1,amphictyonic confederacy,2.0,2,2
4,unprovoked attack,1.0,1,2
4,general authority,1.0,1,2
4,achaean republic,1.0,1,2
4,abbe milot,1.0,1,2
4,peloponnesian war,1.0,1,2
4,macedonian power,1.0,1,2
4,achaean league,1.0,1,2
60,macedonian army,0.0,1,2
60,amphictyonic council,0.0,3,2
