In [1]:
import re

def count_syllables(word):
    word = word.lower()
    syllable_count = 0
    vowels = "aeiouy"
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith("e"):
        syllable_count -= 1
    if word.endswith("le") and len(word) > 2 and word[-3] not in vowels:
        syllable_count += 1
    if syllable_count == 0:
        syllable_count += 1
    return syllable_count

def analyze_readability(text):
    sentences = re.split(r'[.!?]', text)
    words = re.findall(r'\b\w+\b', text)
    syllables = sum(count_syllables(word) for word in words)

    num_sentences = len(sentences)
    num_words = len(words)
    num_syllables = syllables

    if num_sentences == 0:
        num_sentences = 1  # Avoid division by zero

    # Flesch Reading Ease
    ASL = num_words / num_sentences  # Average Sentence Length
    ASW = num_syllables / num_words  # Average Syllables per Word
    flesch_reading_ease = 206.835 - 1.015 * ASL - 84.6 * ASW

    # Flesch-Kincaid Grade Level
    flesch_kincaid_grade = 0.39 * ASL + 11.8 * ASW - 15.59

    return {
        'num_sentences': num_sentences,
        'num_words': num_words,
        'num_syllables': num_syllables,
        'flesch_reading_ease': flesch_reading_ease,
        'flesch_kincaid_grade': flesch_kincaid_grade
    }

# Example usage
text = """Your text goes here. This is a sample text to demonstrate readability analysis.
The readability scores will help you understand how easy or difficult it is to read the given text."""
readability_scores = analyze_readability(text)

print(f"Number of sentences: {readability_scores['num_sentences']}")
print(f"Number of words: {readability_scores['num_words']}")
print(f"Number of syllables: {readability_scores['num_syllables']}")
print(f"Flesch Reading Ease score: {readability_scores['flesch_reading_ease']:.2f}")
print(f"Flesch-Kincaid Grade Level: {readability_scores['flesch_kincaid_grade']:.2f}")


Number of sentences: 4
Number of words: 31
Number of syllables: 52
Flesch Reading Ease score: 57.06
Flesch-Kincaid Grade Level: 7.23


In [23]:
import textstat

# Example text
text = """The absurdity of the analysis is pararelevant in the context of the current 
mystification of sentences."""

text = "She went to the sink and drank a glass of water."

# Compute various readability scores
scores = {
    "Dale-Chall Readability Score": textstat.dale_chall_readability_score(text),
    #"Flesch Reading Ease": textstat.flesch_reading_ease(text),
    #"Flesch-Kincaid Grade Level": textstat.flesch_kincaid_grade(text),
    "Gunning Fog Index": textstat.gunning_fog(text),
    #"SMOG Index": textstat.smog_index(text),
    #"Automated Readability Index": textstat.automated_readability_index(text),
}

# Print the results
for score_name, score_value in scores.items():
    print(f"{score_name}: {score_value:.2f}")


Dale-Chall Readability Score: 0.55
Gunning Fog Index: 4.40


In [9]:
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/au701203/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/au701203/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [64]:
import nltk
from nltk import pos_tag
from nltk.tokenize import word_tokenize

def calculate_pos_ratio(words):    
    # Perform POS tagging
    tagged_words = pos_tag(words)
    
    # Count nouns, adjectives, and verbs
    noun_count = sum(1 for word, pos in tagged_words if pos.startswith('NN'))
    adjective_count = sum(1 for word, pos in tagged_words if pos.startswith('JJ'))
    verb_count = sum(1 for word, pos in tagged_words if pos.startswith('VB'))
    
    # Calculate the ratio (nouns + adjectives) / verbs
    if verb_count == 0:
        ratio = float('inf')  # Handle division by zero if there are no verbs
    else:
        ratio = verb_count / (noun_count + adjective_count) 
    
    return {
        'noun_count': noun_count,
        'adjective_count': adjective_count,
        'verb_count': verb_count,
        'ratio': ratio
    }

# Example usage
text = """Your text goes here. This is an example text to demonstrate PoS tagging and ratio calculation."""

text = "We ran and climbed and scuddled and hiked until we reached the pretty mountain top"

words = word_tokenize(text)
result = calculate_pos_ratio(words)

print(f"Noun count: {result['noun_count']}")
print(f"Adjective count: {result['adjective_count']}")
print(f"Verb count: {result['verb_count']}")
print(f"Verbs ratio / (Nouns + Adjectives): {result['ratio']:.2f}")


Noun count: 2
Adjective count: 1
Verb count: 5
Verbs ratio / (Nouns + Adjectives): 1.67


In [60]:
# Now I want a function that returns the (nouns+adjectives)/verbs ratio
# and (as well as) two readabilities: the FOG and the dale-chall

def analyze_text(text):

    # Tokenize the text into words
    words = word_tokenize(text)


    # Calculate the (nouns + adjectives) / verbs ratio
    pos_ratio = calculate_pos_ratio(words)
    
    # Compute the FOG and Dale-Chall readability scores
    fog = textstat.gunning_fog(text)
    dale_chall = textstat.dale_chall_readability_score(text)
    
    return {
        'pos_ratio': pos_ratio,
        'fog': fog,
        'dale_chall': dale_chall
    }

# Example usage
text = """Your text goes here. This is an example text to demonstrate readability analysis."""

result = analyze_text(text)
print(result)

{'pos_ratio': {'noun_count': 5, 'adjective_count': 0, 'verb_count': 3, 'ratio': 0.6}, 'fog': 11.83, 'dale_chall': 10.03}


In [37]:
gemini1 = """Sergei gripped his rifle, the cold metal biting into his calloused hands. He wasn't Estonian, but the promise of a worker's paradise had lured him to fight alongside the Bolsheviks. In the frozen trenches near Narva, he squinted through the swirling snow, spotting the enemy - Estonian farmers and students defending their newfound freedom. A pang of guilt hit him as he raised his weapon. Were these not the same oppressed masses he was supposed to be liberating? Yet, the propaganda echoed in his ears, drowning out his doubts. With a cry, he charged forward, another pawn in the bloody chess game of revolution."""
gemini2 = """Nikolai gripped his Mosin-Nagant, the cold metal biting into his gloveless hands. The Estonian snow crunched under his worn boots as he advanced with his Red Army comrades. A fervent Bolshevik, he believed their cause would bring equality and liberation to the oppressed masses. Yet, as they neared the Estonian lines, doubt crept in. The faces of the "enemy" were not so different from his own - young men, scared and shivering. A stray bullet whizzed past, snapping him back to reality. He raised his rifle, the image of his starving family flashing before his eyes, fueling his resolve. With a heavy heart, he squeezed the trigger, the deafening roar echoing the turmoil within."""
gemini3 = """Sergei gripped his rifle, the cold metal biting into his calloused hands. The Estonian forest was a labyrinth of shadows, the air thick with the scent of pine and the metallic tang of blood. He was a Red Rifleman, a Bolshevik fighting for the revolution, yet his heart ached for his Latvian home. Every crack of a twig, every rustle of leaves sent a jolt of fear through him, a fear he masked with a stoic face. He believed in the cause, in the promise of a new world, but the relentless march of death and the sight of young men, barely out of boyhood, falling to Estonian bullets, gnawed at his resolve. The echo of gunfire shattered the silence, a chilling reminder that the fight for independence raged on, and he was but a pawn in a bloody game."""

In [38]:
gemini4 = """Nikolai, a young laborer from Petrograd, had been swept up in the fervor of the revolution. His heart swelled with the promise of a worker's paradise, and he found himself fighting alongside the Red Army during the Estonian War of Independence. The harsh Estonian winter chilled him to the bone as he patrolled the snowy forests, rifle clutched in his numb hands. He missed his family dearly, but the dream of a brighter future for them fueled his determination. Each gunshot echoed through the trees, a grim reminder of the cost of their ideals. Yet, doubt gnawed at Nikolai's mind as he witnessed the brutality of war and the suffering it inflicted on both sides. The dream of a worker's paradise seemed to fade with each fallen comrade, leaving him questioning if the fight was truly worth the sacrifice. """
gemini5 = """Ivan clutched his rifle, the cold metal a stark contrast to the burning fervor in his heart. He was a true believer, a son of the revolution, fighting for a world free from the chains of capitalist oppression. The Estonian forest was a maze of shadows and whispers, where death could come from any direction. Yet, Ivan pressed on, his resolve fueled by the dream of a socialist utopia. But as the days turned into weeks, the war's brutality gnawed at his ideals. He saw the suffering of civilians, the senseless violence, and the growing disillusionment in his comrades' eyes. The glorious revolution he envisioned seemed a distant dream, replaced by the harsh realities of war. Still, he fought on, clinging to the last shreds of his belief, even as doubt crept into his soul."""
gemini6 = """Sergei gripped his rifle, the cold metal biting into his calloused hands. He was a farmer, not a soldier, but the revolution had swept him up in its fervor. Now, he fought for the Bolsheviks, believing in a world without landowners and tsars. The Estonian forest was a labyrinth of shadows and gunfire, each crack echoing the uncertainty in his heart. He had seen the brutality of both sides, the idealism of the revolution replaced with the grim reality of war. Yet, he pressed on, fueled by a desperate hope for a better future, even as he questioned if he was on the right side of history."""

In [39]:
# for each text, I want the analyze_text test
g1 = analyze_text(gemini1)
g2 = analyze_text(gemini2)
g3 = analyze_text(gemini3)

In [40]:
# first, compare the texts by noun ratio
print("Noun ratio comparison:")
print(f"Gemini 1: {g1['pos_ratio']['ratio']:.2f}")
print(f"Gemini 2: {g2['pos_ratio']['ratio']:.2f}")
print(f"Gemini 3: {g3['pos_ratio']['ratio']:.2f}")


Noun ratio comparison:
Gemini 1: 0.45
Gemini 2: 0.47
Gemini 3: 0.24


In [41]:
# second, compare them by the readabilities
print("\nReadability comparison:")
print(f"Gemini 1 - FOG: {g1['fog']:.2f}, Dale-Chall: {g1['dale_chall']:.2f}")
print(f"Gemini 2 - FOG: {g2['fog']:.2f}, Dale-Chall: {g2['dale_chall']:.2f}")
print(f"Gemini 3 - FOG: {g3['fog']:.2f}, Dale-Chall: {g3['dale_chall']:.2f}")


Readability comparison:
Gemini 1 - FOG: 8.21, Dale-Chall: 10.19
Gemini 2 - FOG: 8.47, Dale-Chall: 10.06
Gemini 3 - FOG: 11.03, Dale-Chall: 8.97


In [42]:
# sum the three measures to find the winner
g1_score = g1['pos_ratio']['ratio'] + g1['fog'] + g1['dale_chall']
g2_score = g2['pos_ratio']['ratio'] + g2['fog'] + g2['dale_chall']
g3_score = g3['pos_ratio']['ratio'] + g3['fog'] + g3['dale_chall']

print("\nOverall scores:")
print(f"Gemini 1: {g1_score:.2f}")
print(f"Gemini 2: {g2_score:.2f}")
print(f"Gemini 3: {g3_score:.2f}")


Overall scores:
Gemini 1: 18.85
Gemini 2: 19.00
Gemini 3: 20.24


In [56]:
# collect all the gemini stories

geminis = [gemini1, gemini2, gemini3, gemini4, gemini5, gemini6]

# find the one with the highest and lowest sum score
scores = [analyze_text(gemini) for gemini in geminis]
sum_scores = [score['pos_ratio']['ratio'] * (score['fog'] + score['dale_chall']) for score in scores]

# find the highest score and print the score and the story, then repeat for the lowest score
max_score = max(sum_scores)
min_score = min(sum_scores)

max_index = sum_scores.index(max_score)
min_index = sum_scores.index(min_score)

print("\nBest story:")
print(f"Score: {max_score:.2f}")
print(geminis[max_index])

print("\nSimplest story:")
print(f"Score: {min_score:.2f}")
print(geminis[min_index])



Best story:
Score: 8.99
Nikolai, a young laborer from Petrograd, had been swept up in the fervor of the revolution. His heart swelled with the promise of a worker's paradise, and he found himself fighting alongside the Red Army during the Estonian War of Independence. The harsh Estonian winter chilled him to the bone as he patrolled the snowy forests, rifle clutched in his numb hands. He missed his family dearly, but the dream of a brighter future for them fueled his determination. Each gunshot echoed through the trees, a grim reminder of the cost of their ideals. Yet, doubt gnawed at Nikolai's mind as he witnessed the brutality of war and the suffering it inflicted on both sides. The dream of a worker's paradise seemed to fade with each fallen comrade, leaving him questioning if the fight was truly worth the sacrifice. 

Simplest story:
Score: 4.83
Sergei gripped his rifle, the cold metal biting into his calloused hands. The Estonian forest was a labyrinth of shadows, the air thick w

In [57]:
reversion = """Nikolai gripped his Mosin-Nagant nto his gloveless hands. The Estonian snow crunched under his boots and he advanced with his Red Army comrades. He believed in the cause. Yet, as they neared the Estonian lines, doubt crept in. The faces of the "enemy" were not so different from his own - young men, scared and shivering. A stray bullet whizzed past, and snapped him back to reality. He raised his rifle and squeezed the trigger, the deafening roar covering the turmoil."""
# score this
reversion_score = analyze_text(reversion)
# compute the sum score
reversion_sum_score = reversion_score['pos_ratio']['ratio'] * (reversion_score['fog'] + reversion_score['dale_chall'])
print("\nReversion score:", reversion_sum_score)


Reversion score: 9.203571428571427


In [58]:
hemingway = """ That night at the hotel, in our room with the long empty hall outside and our shoes outside the door, a thick carpet on the floor of the room, outside the windows the rain falling and in the room light and pleasant and cheerful, then the light out and it exciting with smooth sheets and the bed comfortable, feeling that we had come home, feeling no longer alone, waking in the night to find the other one there, and not gone away; all other things were unreal. We slept when we were tired and if we woke the other one woke so no one was not alone. Often a man wishes to be alone and a girl wishes to be alone too and if they love each other they are jealous of that in each other, but I can truly say we never felt that. """
# score this
hemingway_score = analyze_text(hemingway)
# compute the sum score
hemingway_sum_score = hemingway_score['pos_ratio']['ratio'] * (hemingway_score['fog'] + hemingway_score['dale_chall'])
print("\nHemingway score:", hemingway_sum_score)


Hemingway score: 18.21333333333333


In [59]:
# Ok now, I want to do sentiment analysis at the word level, using VADER's dictionary

from nltk.sentiment.vader import SentimentIntensityAnalyzer

def analyze_sentiment(words):
    # Initialize the VADER sentiment analyzer
    analyzer = SentimentIntensityAnalyzer()
    
    # Analyze the sentiment of each word
    word_sentiments = {word: analyzer.polarity_scores(word)['compound'] for word in words}
    
    return word_sentiments

# Example usage
words = ["happy", "sad", "joyful", "miserable", "victorious", "defeated"]

sentiments = analyze_sentiment(words)
print(sentiments)

{'happy': 0.5719, 'sad': -0.4767, 'joyful': 0.5994, 'miserable': -0.4939, 'victorious': 0.0, 'defeated': -0.4767}


In [68]:
# integrate this into the analyze_text function
import numpy as np
def analyze_text(text):

    words = word_tokenize(text)
    # Calculate the (nouns + adjectives) / verbs ratio
    pos_ratio = calculate_pos_ratio(words)
    
    # Compute the FOG and Dale-Chall readability scores
    fog = textstat.gunning_fog(text)
    dale_chall = textstat.dale_chall_readability_score(text)
    
    word_sentiments = analyze_sentiment(words)
    sentiments =list(word_sentiments.values())

    # std of the sentiments
    sentiment_std = np.std(sentiments)
    mean = np.mean(sentiments)
    
    return {
        'pos_ratio': pos_ratio,
        'fog': fog,
        'dale_chall': dale_chall,
        'sentiment_std': sentiment_std,
        'mean_sentiment': mean
    }

# Example usage
text = """Your text goes here. This is an example text to demonstrate the great, wonderful, perfect readability analysis."""

result = analyze_text(text)
print(result)

{'pos_ratio': {'noun_count': 5, 'adjective_count': 3, 'verb_count': 3, 'ratio': 0.375}, 'fog': 10.46, 'dale_chall': 9.63, 'sentiment_std': 0.21995514729368723, 'mean_sentiment': 0.0982611111111111}


In [73]:
for score in scores:
    score['mean_sentiment']

In [104]:
# let's make a function that takes a score output and combines it into a single number
def score_text(score):
    style_number = score['pos_ratio']['ratio'] * (score['fog'] + score['dale_chall'])
    sentiment_number = score['mean_sentiment'] + score['sentiment_std']
    return style_number + sentiment_number

In [105]:
# goooood chilly good.
# now let's recompute the average score of the gemini stories
scores = [analyze_text(gemini) for gemini in geminis]
sum_scores = [score_text(score) for score in scores]

# find the highest score and print the score and the story, then repeat for the lowest score
max_score = max(sum_scores)
min_score = min(sum_scores)

max_index = sum_scores.index(max_score)
min_index = sum_scores.index(min_score)

print("\nBest story:")
print(f"Score: {max_score:.2f}")
print(geminis[max_index])

print("\nSimplest story:")
print(f"Score: {min_score:.2f}")
print(geminis[min_index])



Best story:
Score: -0.02
Sergei gripped his rifle, the cold metal biting into his calloused hands. The Estonian forest was a labyrinth of shadows, the air thick with the scent of pine and the metallic tang of blood. He was a Red Rifleman, a Bolshevik fighting for the revolution, yet his heart ached for his Latvian home. Every crack of a twig, every rustle of leaves sent a jolt of fear through him, a fear he masked with a stoic face. He believed in the cause, in the promise of a new world, but the relentless march of death and the sight of young men, barely out of boyhood, falling to Estonian bullets, gnawed at his resolve. The echo of gunfire shattered the silence, a chilling reminder that the fight for independence raged on, and he was but a pawn in a bloody game.

Simplest story:
Score: -0.04
Nikolai, a young laborer from Petrograd, had been swept up in the fervor of the revolution. His heart swelled with the promise of a worker's paradise, and he found himself fighting alongside th

In [106]:
## compare the reversion and hemingway stories
reversion_score = analyze_text(reversion)
hemingway_score = analyze_text(hemingway)

reversion_number = score_text(reversion_score)
hemingway_number = score_text(hemingway_score)

print("\nReversion score:", reversion_number)
print("\nHemingway score:", hemingway_number)


Reversion score: -0.02656021659536731

Hemingway score: 0.05483363502490999
