# Setup Environment

In [27]:
from working_dir import set_working_dir

In [28]:
set_working_dir()

Current working directory: D:\git\CS410-Project


In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import Libraries

In [30]:
import pandas as pd
from sentence_ranker import SentenceRanker, Inclusion
from pos_difficulty_ranker import POSDifficultyRanker
from word_frequency import WordFrequencies
from difficulty_ranker import DifficultyRankers
from word_difficulty_ranker import WordDifficultyRanker

In [31]:
%matplotlib inline

In [32]:
pd.set_option('max_colwidth', 400)

# Combined Difficulty
We experiment and test out combined difficulty(combine word difficulty and POS difficulty with equal weight 0.5) with some example sentences.

In [33]:
word_frequencies = WordFrequencies.read_excel(file_path="data/wordFrequency.xlsx")

In [34]:
word_difficulty_ranker = WordDifficultyRanker(word_frequencies=word_frequencies)
pos_difficulty_ranker = POSDifficultyRanker()
difficulty_rankers = DifficultyRankers(
        rankers=[word_difficulty_ranker, pos_difficulty_ranker],
        weights=[0.5, 0.5])
ranker = SentenceRanker(difficulty_rankers)

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\weich\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [35]:
examples = [
    "What do you really think about it?",
    "I don’t care about your past.",
    "What do you think about that issue?",
    "He genuinely cares about me.",
    "Do you have any information about the project?",
    "Our house is worth about 150,000 dollars.",
    "Don’t worry too much about unimportant things.",
    "John's mother abandoned him in cold heart after he was born",
]

In [36]:
ranked_sentences = ranker.rank(examples)

In [37]:
ranked_sentences.sort_by_difficulty()

In [38]:
df = ranked_sentences.to_pandas(inclusion = Inclusion.All)

In [39]:
df

Unnamed: 0,Sentence,Word Count,Overall Difficulty,Overall Difficulty Calculation,Word Difficulty,Word Raw Difficulty,Word Difficulty Details,POS Difficulty,POS Raw Difficulty,POS Difficulty Details
0,Do you have any information about the project?,8,0.45,0.1*0.5 + 0.8*0.5,0.1,150.25,"do(28), you(9), have(22), any(108), information(289), about(41), the(1), project(704)",0.8,12.0,"do(VBP:1), you(PRP:1), have(VB:1), any(DT:1), information(NN:1), about(IN:5), the(DT:1), project(NN:1)"
1,I don’t care about your past.,7,0.47,0.07*0.5 + 0.87*0.5,0.07,102.0,"i(7), do(28), not(24), care(254), about(41), your(46), past(314)",0.87,13.0,"i(NNS:1), do(VBP:1), not(RB:3), care(VB:1), about(IN:5), your(PRP$:1), past(NN:1)"
2,"Our house is worth about 150,000 dollars.",6,0.49,0.25*0.5 + 0.73*0.5,0.25,377.17,"our(77), house(176), is(11), worth(847), about(41), dollars(1111)",0.73,11.0,"our(PRP$:1), house(NN:1), is(VBZ:1), worth(JJ:2), about(IN:5), dollars(NNS:1)"
3,What do you think about that issue?,7,0.53,0.06*0.5 + 1.0*0.5,0.06,92.29,"what(29), do(28), you(9), think(75), about(41), that(8), issue(456)",1.0,15.0,"what(WP:5), do(VBP:1), you(PRP:1), think(VB:1), about(IN:5), that(DT:1), issue(NN:1)"
4,What do you really think about it?,7,0.58,0.03*0.5 + 1.13*0.5,0.03,44.29,"what(29), do(28), you(9), really(118), think(75), about(41), it(10)",1.13,17.0,"what(WP:5), do(VBP:1), you(PRP:1), really(RB:3), think(VBP:1), about(IN:5), it(PRP:1)"
5,John's mother abandoned him in cold heart after he was born,12,0.9,0.4*0.5 + 1.4*0.5,0.4,596.25,"john(356), s(410), mother(284), abandoned(3516), him(67), in(6), cold(866), heart(479), after(102), he(15), was(14), born(1040)",1.4,21.0,"john(NN:1), s(NN:1), mother(NN:1), abandoned(VBD:1), him(PRP:1), in(IN:5), cold(JJ:2), heart(NN:1), after(IN:5), he(PRP:1), was(VBD:1), born(VBN:1)"
6,Don’t worry too much about unimportant things.,8,0.92,0.63*0.5 + 1.2*0.5,0.63,947.0,"do(28), not(24), worry(1095), too(129), much(111), about(41), unimportant(6000), things(148)",1.2,18.0,"do(VB:1), not(RB:3), worry(VB:1), too(RB:3), much(JJ:2), about(IN:5), unimportant(JJ:2), things(NNS:1)"
7,He genuinely cares about me.,5,1.06,1.38*0.5 + 0.73*0.5,1.38,2076.6,"he(15), genuinely(6000), cares(4285), about(41), me(42)",0.73,11.0,"he(PRP:1), genuinely(RB:3), cares(VBZ:1), about(IN:5), me(PRP:1)"
