# Classify texts by difficulty

In [None]:
# !pip install textstat
# !pip install wordfreq
# !pip3 install big-phoney
# !pip install pron-difficulty
# !pip install sklearn for pron diff


https://pypi.org/project/textstat/
https://pypi.org/project/wordfreq/
https://github.com/federicotorrielli/pron-difficulty
https://github.com/repp/big-phoney

In [5]:
import textstat
from wordfreq import zipf_frequency
import pandas as pd

## Classify words

In [1]:
text = "thorough"

In [5]:
from big_phoney import BigPhoney
phoney = BigPhoney()
print(phoney.count_syllables(text))

2


## Classify texts

In [2]:
filename = "../sentences/datasets/eng_spa_audio_sentences.csv"
eng_sen = pd.read_csv(filename, header=0)

In [6]:
eng_sen.head(10)

Unnamed: 0,eng_id,eng_sentence,spa_id,spa_sen,audio_id
0,403859,"If I could rearrange the alphabet, I would put...",690143,"Si pudiera reordenar el alfabeto, pondría la T...",1123747
1,403860,I'm not good at multitasking.,1612871,No soy bueno para hacer varias cosas a la vez.,32210
2,414272,Any teacher that can be replaced by a machine ...,627877,Cualquier profesor que pueda ser reemplazado p...,911934
3,618394,No words can express how amazing you are.,1011397,No existen palabras para expresar lo increíble...,906756
4,618396,It's rare to meet nice people like you.,1011395,Es difícil conocer a gente tan agradable como tú.,906757
5,618397,There is something very charming about you.,2945787,Hay algo muy encantador en ti.,911594
6,618401,It's the first time in my life I've felt so co...,618418,Es la primera vez en mi vida que me siento tan...,344960
7,618405,You have very sexy legs.,618465,Tienes piernas muy sexy.,907498
8,4796852,Alex is non-binary.,5990784,Alex es no binarie.,1099772
9,4834651,You smell awful.,9443872,Hueles horrible.,270941


In [3]:
def count_words(text:str)->int:
    word_count = textstat.lexicon_count(text, removepunct=True)
    return word_count

In [None]:
def get_text_difficulty(text:str)->int:
    grade_level_comprehension = textstat.text_standard(text, float_output=True)
    word_count = textstat.lexicon_count(text, removepunct=True)
    
    if word_count < 4: return 0
    elif grade_level_comprehension < 5 and word_count < 6: return 0 # Easy
    elif grade_level_comprehension < 8 and word_count < 8: return 1 # Medium
    else: return 2 # Hard

In [7]:
eng_sen["difficulty"] = eng_sen["eng_sentence"].apply(get_text_difficulty)

In [8]:
eng_sen["word_count"] = eng_sen["eng_sentence"].apply(count_words)

In [14]:
eng_sen.head(60)

Unnamed: 0,eng_id,eng_sentence,spa_id,spa_sen,audio_id,difficulty,word_count
0,403859,"If I could rearrange the alphabet, I would put...",690143,"Si pudiera reordenar el alfabeto, pondría la T...",1123747,2,13
1,403860,I'm not good at multitasking.,1612871,No soy bueno para hacer varias cosas a la vez.,32210,1,5
2,414272,Any teacher that can be replaced by a machine ...,627877,Cualquier profesor que pueda ser reemplazado p...,911934,2,11
3,618394,No words can express how amazing you are.,1011397,No existen palabras para expresar lo increíble...,906756,2,8
4,618396,It's rare to meet nice people like you.,1011395,Es difícil conocer a gente tan agradable como tú.,906757,2,8
5,618397,There is something very charming about you.,2945787,Hay algo muy encantador en ti.,911594,1,7
6,618401,It's the first time in my life I've felt so co...,618418,Es la primera vez en mi vida que me siento tan...,344960,2,13
7,618405,You have very sexy legs.,618465,Tienes piernas muy sexy.,907498,0,5
8,4796852,Alex is non-binary.,5990784,Alex es no binarie.,1099772,0,3
9,4834651,You smell awful.,9443872,Hueles horrible.,270941,0,3
