In [1]:
import requests
from bs4 import BeautifulSoup
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.tokenize import word_tokenize
from collections import Counter
import re
import nltk

# Download the necessary NLTK resources
nltk.download('vader_lexicon')
nltk.download('punkt')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\vsgha\AppData\Roaming\nltk_data...
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vsgha\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [3]:
def get_article_text(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')

    for script in soup(["script", "style"]):
        script.extract()

    text = soup.get_text()
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespaces
    return text.strip()

In [4]:
def load_words_from_txt(file_name, encoding='utf-8'):
    with open(file_name, 'r', encoding=encoding, errors='ignore') as file:
        words = set(file.read().splitlines())
    return words

In [5]:
def calculate_complexity(text):
    sentences = sent_tokenize(text)
    words = word_tokenize(text)
    syllables = [sum(1 for char in word if char.lower() in 'aeiouy') for word in words]

    return {
        'average_sentence_length': len(words) / len(sentences),
        'percentage_of_complex_words': (len([word for word in words if len(word) > 7]) / len(words)) * 100,
        'fog_index': 0.4 * (100 * len(words) / len(sentences) + 100 * sum(syllables) / len(words)) / 2,
        'average_number_of_words_per_sentence': len(words) / len(sentences),
        'complex_word_count': len([word for word in words if len(word) > 7]),
        'word_count': len(words),
        'syllable_per_word': sum(syllables) / len(words),
        'average_word_length': len(text) / len(words),
    }

In [6]:
def analyze_sentiment(text, positive_words, negative_words):
    sia = SentimentIntensityAnalyzer()
    sentences = sent_tokenize(text)
    sentiment_scores = []

    for sentence in sentences:
        score = sia.polarity_scores(sentence)
        sentiment_score = score['pos'] - score['neg']
        sentiment_scores.append(sentiment_score)

    # Calculate overall sentiment scores
    total_sentiment_score = sum(sentiment_scores)
    positive_score = sum(max(0, score) for score in sentiment_scores)
    negative_score = -sum(min(0, score) for score in sentiment_scores)

    # Calculate subjectivity score
    subjectivity_score = sia.polarity_scores(text)['neu']

    return {
        'polarity_score': total_sentiment_score,
        'positive_score': positive_score,
        'negative_score': negative_score,
        'subjectivity_score': subjectivity_score,
    }

In [7]:
def extract_personal_pronouns(text):
    pronouns = ['i', 'you', 'he', 'she', 'it', 'we', 'they', 'me', 'him', 'her', 'its', 'us', 'them', 'my', 'your', 'his', 'her', 'its', 'our', 'their']
    words = word_tokenize(text)
    return Counter([word for word in words if word.lower() in pronouns])

In [8]:
input_urls = [
    'https://insights.blackcoffer.com/rising-it-cities-and-its-impact-on-the-economy-environment-infrastructure-and-city-life-by-the-year-2040-2/',
    'https://insights.blackcoffer.com/rising-it-cities-and-their-impact-on-the-economy-environment-infrastructure-and-city-life-in-future/',
    'https://insights.blackcoffer.com/internet-demands-evolution-communication-impact-and-2035s-alternative-pathways/',
    'https://insights.blackcoffer.com/rise-of-cybercrime-and-its-effect-in-upcoming-future/',
    'https://insights.blackcoffer.com/ott-platform-and-its-impact-on-the-entertainment-industry-in-future/',
    'https://insights.blackcoffer.com/the-rise-of-the-ott-platform-and-its-impact-on-the-entertainment-industry-by-2040/',
    'https://insights.blackcoffer.com/rise-of-cyber-crime-and-its-effects/',
    'https://insights.blackcoffer.com/rise-of-internet-demand-and-its-impact-on-communications-and-alternatives-by-the-year-2035-2/',
    'https://insights.blackcoffer.com/rise-of-cybercrime-and-its-effect-by-the-year-2040-2/',
    'https://insights.blackcoffer.com/rise-of-cybercrime-and-its-effect-by-the-year-2040/',
    'https://insights.blackcoffer.com/rise-of-internet-demand-and-its-impact-on-communications-and-alternatives-by-the-year-2035/',
    'https://insights.blackcoffer.com/rise-of-telemedicine-and-its-impact-on-livelihood-by-2040-3-2/',
    'https://insights.blackcoffer.com/rise-of-e-health-and-its-impact-on-humans-by-the-year-2030/',
    'https://insights.blackcoffer.com/rise-of-e-health-and-its-imapct-on-humans-by-the-year-2030-2/',
    'https://insights.blackcoffer.com/rise-of-telemedicine-and-its-impact-on-livelihood-by-2040-2/',
    'https://insights.blackcoffer.com/rise-of-telemedicine-and-its-impact-on-livelihood-by-2040-2-2/',
    'https://insights.blackcoffer.com/rise-of-chatbots-and-its-impact-on-customer-support-by-the-year-2040/',
    'https://insights.blackcoffer.com/rise-of-e-health-and-its-imapct-on-humans-by-the-year-2030/',
    'https://insights.blackcoffer.com/how-does-marketing-influence-businesses-and-consumers/',
    'https://insights.blackcoffer.com/how-advertisement-increase-your-market-value/',
    'https://insights.blackcoffer.com/negative-effects-of-marketing-on-society/',
    'https://insights.blackcoffer.com/how-advertisement-marketing-affects-business/',
    'https://insights.blackcoffer.com/rising-it-cities-will-impact-the-economy-environment-infrastructure-and-city-life-by-the-year-2035/',
    'https://insights.blackcoffer.com/rise-of-ott-platform-and-its-impact-on-entertainment-industry-by-the-year-2030/',
    'https://insights.blackcoffer.com/rise-of-electric-vehicles-and-its-impact-on-livelihood-by-2040/',
    'https://insights.blackcoffer.com/rise-of-electric-vehicle-and-its-impact-on-livelihood-by-the-year-2040/',
    'https://insights.blackcoffer.com/oil-prices-by-the-year-2040-and-how-it-will-impact-the-world-economy/',
    'https://insights.blackcoffer.com/an-outlook-of-healthcare-by-the-year-2040-and-how-it-will-impact-human-lives/',
    'https://insights.blackcoffer.com/ai-in-healthcare-to-improve-patient-outcomes/',
    'https://insights.blackcoffer.com/what-if-the-creation-is-taking-over-the-creator/',
    'https://insights.blackcoffer.com/what-jobs-will-robots-take-from-humans-in-the-future/',
    'https://insights.blackcoffer.com/will-machine-replace-the-human-in-the-future-of-work/',
    'https://insights.blackcoffer.com/will-ai-replace-us-or-work-with-us/',
    'https://insights.blackcoffer.com/man-and-machines-together-machines-are-more-diligent-than-humans-blackcoffe/',
    'https://insights.blackcoffer.com/in-future-or-in-upcoming-years-humans-and-machines-are-going-to-work-together-in-every-field-of-work/',
    'https://insights.blackcoffer.com/how-neural-networks-can-be-applied-in-various-areas-in-the-future/',
    'https://insights.blackcoffer.com/how-machine-learning-will-affect-your-business/',
    'https://insights.blackcoffer.com/deep-learning-impact-on-areas-of-e-learning/',
    'https://insights.blackcoffer.com/how-to-protect-future-data-and-its-privacy-blackcoffer/',
    'https://insights.blackcoffer.com/how-machines-ai-automations-and-robo-human-are-effective-in-finance-and-banking/',
    'https://insights.blackcoffer.com/ai-human-robotics-machine-future-planet-blackcoffer-thinking-jobs-workplace/',
    'https://insights.blackcoffer.com/how-ai-will-change-the-world-blackcoffer/',
    'https://insights.blackcoffer.com/future-of-work-how-ai-has-entered-the-workplace/',
    'https://insights.blackcoffer.com/ai-tool-alexa-google-assistant-finance-banking-tool-future/',
    'https://insights.blackcoffer.com/ai-healthcare-revolution-ml-technology-algorithm-google-analytics-industrialrevolution/',
    'https://insights.blackcoffer.com/all-you-need-to-know-about-online-marketing/',
    'https://insights.blackcoffer.com/evolution-of-advertising-industry/',
    'https://insights.blackcoffer.com/how-data-analytics-can-help-your-business-respond-to-the-impact-of-covid-19/',
    'https://insights.blackcoffer.com/covid-19-environmental-impact-for-the-future/',
    'https://insights.blackcoffer.com/environmental-impact-of-the-covid-19-pandemic-lesson-for-the-future/',
    'https://insights.blackcoffer.com/how-data-analytics-and-ai-are-used-to-halt-the-covid-19-pandemic/',
    'https://insights.blackcoffer.com/difference-between-artificial-intelligence-machine-learning-statistics-and-data-mining/',
    'https://insights.blackcoffer.com/how-python-became-the-first-choice-for-data-science/',
    'https://insights.blackcoffer.com/how-google-fit-measure-heart-and-respiratory-rates-using-a-phone/',
    'https://insights.blackcoffer.com/what-is-the-future-of-mobile-apps/',
    'https://insights.blackcoffer.com/impact-of-ai-in-health-and-medicine/',
    'https://insights.blackcoffer.com/telemedicine-what-patients-like-and-dislike-about-it/',
    'https://insights.blackcoffer.com/how-we-forecast-future-technologies/',
    'https://insights.blackcoffer.com/can-robots-tackle-late-life-loneliness/',
    'https://insights.blackcoffer.com/embedding-care-robots-into-society-socio-technical-considerations/',
    'https://insights.blackcoffer.com/management-challenges-for-future-digitalization-of-healthcare-services/',
    'https://insights.blackcoffer.com/are-we-any-closer-to-preventing-a-nuclear-holocaust/',
    'https://insights.blackcoffer.com/will-technology-eliminate-the-need-for-animal-testing-in-drug-development/',
    'https://insights.blackcoffer.com/will-we-ever-understand-the-nature-of-consciousness/',
    'https://insights.blackcoffer.com/will-we-ever-colonize-outer-space/',
    'https://insights.blackcoffer.com/what-is-the-chance-homo-sapiens-will-survive-for-the-next-500-years/',
    'https://insights.blackcoffer.com/why-does-your-business-need-a-chatbot/',
    'https://insights.blackcoffer.com/how-you-lead-a-project-or-a-team-without-any-technical-expertise/',
    'https://insights.blackcoffer.com/can-you-be-great-leader-without-technical-expertise/',
    'https://insights.blackcoffer.com/how-does-artificial-intelligence-affect-the-environment/',
    'https://insights.blackcoffer.com/how-to-overcome-your-fear-of-making-mistakes-2/',
    'https://insights.blackcoffer.com/is-perfection-the-greatest-enemy-of-productivity/',
    'https://insights.blackcoffer.com/global-financial-crisis-2008-causes-effects-and-its-solution/',
    'https://insights.blackcoffer.com/gender-diversity-and-equality-in-the-tech-industry/',
    'https://insights.blackcoffer.com/how-to-overcome-your-fear-of-making-mistakes/',
    'https://insights.blackcoffer.com/how-small-business-can-survive-the-coronavirus-crisis/',
    'https://insights.blackcoffer.com/impacts-of-covid-19-on-vegetable-vendors-and-food-stalls/',
    'https://insights.blackcoffer.com/impacts-of-covid-19-on-vegetable-vendors/',
    'https://insights.blackcoffer.com/impact-of-covid-19-pandemic-on-tourism-aviation-industries/',
    'https://insights.blackcoffer.com/impact-of-covid-19-pandemic-on-sports-events-around-the-world/',
    'https://insights.blackcoffer.com/changing-landscape-and-emerging-trends-in-the-indian-it-ites-industry/',
    'https://insights.blackcoffer.com/online-gaming-adolescent-online-gaming-effects-demotivated-depression-musculoskeletal-and-psychosomatic-symptoms/',
    'https://insights.blackcoffer.com/human-rights-outlook/',
    'https://insights.blackcoffer.com/how-voice-search-makes-your-business-a-successful-business/',
    'https://insights.blackcoffer.com/how-the-covid-19-crisis-is-redefining-jobs-and-services/',
    'https://insights.blackcoffer.com/how-to-increase-social-media-engagement-for-marketers/',
    'https://insights.blackcoffer.com/impacts-of-covid-19-on-streets-sides-food-stalls/',
    'https://insights.blackcoffer.com/coronavirus-impact-on-energy-markets-2/',
    'https://insights.blackcoffer.com/coronavirus-impact-on-the-hospitality-industry-5/',
    'https://insights.blackcoffer.com/lessons-from-the-past-some-key-learnings-relevant-to-the-coronavirus-crisis-4/',
    'https://insights.blackcoffer.com/estimating-the-impact-of-covid-19-on-the-world-of-work-2/',
    'https://insights.blackcoffer.com/estimating-the-impact-of-covid-19-on-the-world-of-work-3/',
    'https://insights.blackcoffer.com/travel-and-tourism-outlook/',
    'https://insights.blackcoffer.com/gaming-disorder-and-effects-of-gaming-on-health/',
    'https://insights.blackcoffer.com/what-is-the-repercussion-of-the-environment-due-to-the-covid-19-pandemic-situation/',
    'https://insights.blackcoffer.com/what-is-the-repercussion-of-the-environment-due-to-the-covid-19-pandemic-situation-2/',
    'https://insights.blackcoffer.com/impact-of-covid-19-pandemic-on-office-space-and-co-working-industries/',
    'https://insights.blackcoffer.com/contribution-of-handicrafts-visual-arts-literature-in-the-indian-economy/',
    'https://insights.blackcoffer.com/how-covid-19-is-impacting-payment-preferences/',
    'https://insights.blackcoffer.com/how-will-covid-19-affect-the-world-of-work-2/'

]



In [9]:
positive_words = load_words_from_txt('positive-words.txt', encoding='latin-1')
negative_words = load_words_from_txt('negative-words.txt', encoding='latin-1')

In [10]:
for url in input_urls:
    text = get_article_text(url)
    complexity = calculate_complexity(text)
    sentiment = analyze_sentiment(text, positive_words, negative_words)
    personal_pronouns = extract_personal_pronouns(text)

    print('URL:', url)
    print('Complexity:', complexity)
    print('Sentiment:', sentiment)

    # Print total personal pronouns count for the current URL
    print('Total Personal Pronouns:', sum(personal_pronouns.values()))

    print('\n' + '='*50 + '\n')  # Separator for better readability

URL: https://insights.blackcoffer.com/rising-it-cities-and-its-impact-on-the-economy-environment-infrastructure-and-city-life-by-the-year-2040-2/
Complexity: {'average_sentence_length': 26.795698924731184, 'percentage_of_complex_words': 17.49598715890851, 'fog_index': 572.374652651927, 'average_number_of_words_per_sentence': 26.795698924731184, 'complex_word_count': 436, 'word_count': 2492, 'syllable_per_word': 1.8230337078651686, 'average_word_length': 5.583868378812199}
Sentiment: {'polarity_score': 11.273999999999997, 'positive_score': 12.171999999999999, 'negative_score': 0.898, 'subjectivity_score': 0.854}
Total Personal Pronouns: 138


URL: https://insights.blackcoffer.com/rising-it-cities-and-their-impact-on-the-economy-environment-infrastructure-and-city-life-in-future/
Complexity: {'average_sentence_length': 29.40625, 'percentage_of_complex_words': 21.96245129295076, 'fog_index': 627.2677559334043, 'average_number_of_words_per_sentence': 29.40625, 'complex_word_count': 620, 'w

URL: https://insights.blackcoffer.com/rise-of-e-health-and-its-imapct-on-humans-by-the-year-2030-2/
Complexity: {'average_sentence_length': 28.270588235294117, 'percentage_of_complex_words': 22.43029546400333, 'fog_index': 603.9968666617709, 'average_number_of_words_per_sentence': 28.270588235294117, 'complex_word_count': 539, 'word_count': 2403, 'syllable_per_word': 1.9292550977944236, 'average_word_length': 5.969621306699959}
Sentiment: {'polarity_score': 4.812, 'positive_score': 7.293000000000001, 'negative_score': 2.481, 'subjectivity_score': 0.857}
Total Personal Pronouns: 102


URL: https://insights.blackcoffer.com/rise-of-telemedicine-and-its-impact-on-livelihood-by-2040-2/
Complexity: {'average_sentence_length': 32.71052631578947, 'percentage_of_complex_words': 23.169750603378922, 'fog_index': 693.8243638057332, 'average_number_of_words_per_sentence': 32.71052631578947, 'complex_word_count': 576, 'word_count': 2486, 'syllable_per_word': 1.9806918744971842, 'average_word_length'

URL: https://insights.blackcoffer.com/oil-prices-by-the-year-2040-and-how-it-will-impact-the-world-economy/
Complexity: {'average_sentence_length': 36.929577464788736, 'percentage_of_complex_words': 17.658276125095345, 'fog_index': 774.7700389982919, 'average_number_of_words_per_sentence': 36.929577464788736, 'complex_word_count': 463, 'word_count': 2622, 'syllable_per_word': 1.8089244851258581, 'average_word_length': 5.592295957284516}
Sentiment: {'polarity_score': 4.068999999999998, 'positive_score': 5.6850000000000005, 'negative_score': 1.616, 'subjectivity_score': 0.85}
Total Personal Pronouns: 127


URL: https://insights.blackcoffer.com/an-outlook-of-healthcare-by-the-year-2040-and-how-it-will-impact-human-lives/
Complexity: {'average_sentence_length': 41.8421052631579, 'percentage_of_complex_words': 22.51572327044025, 'fog_index': 875.3242855566589, 'average_number_of_words_per_sentence': 41.8421052631579, 'complex_word_count': 537, 'word_count': 2385, 'syllable_per_word': 1.9241

URL: https://insights.blackcoffer.com/how-machines-ai-automations-and-robo-human-are-effective-in-finance-and-banking/
Complexity: {'average_sentence_length': 31.986842105263158, 'percentage_of_complex_words': 24.269847799259566, 'fog_index': 680.1317413236918, 'average_number_of_words_per_sentence': 31.986842105263158, 'complex_word_count': 590, 'word_count': 2431, 'syllable_per_word': 2.0197449609214315, 'average_word_length': 6.108185931715344}
Sentiment: {'polarity_score': 6.039000000000002, 'positive_score': 7.2890000000000015, 'negative_score': 1.25, 'subjectivity_score': 0.845}
Total Personal Pronouns: 107


URL: https://insights.blackcoffer.com/ai-human-robotics-machine-future-planet-blackcoffer-thinking-jobs-workplace/
Complexity: {'average_sentence_length': 25.36, 'percentage_of_complex_words': 19.63722397476341, 'fog_index': 544.2583596214512, 'average_number_of_words_per_sentence': 25.36, 'complex_word_count': 498, 'word_count': 2536, 'syllable_per_word': 1.8529179810725551

URL: https://insights.blackcoffer.com/how-python-became-the-first-choice-for-data-science/
Complexity: {'average_sentence_length': 29.92063492063492, 'percentage_of_complex_words': 21.220159151193634, 'fog_index': 637.6806029219823, 'average_number_of_words_per_sentence': 29.92063492063492, 'complex_word_count': 400, 'word_count': 1885, 'syllable_per_word': 1.9633952254641909, 'average_word_length': 5.959681697612732}
Sentiment: {'polarity_score': 6.342999999999999, 'positive_score': 6.701999999999999, 'negative_score': 0.359, 'subjectivity_score': 0.853}
Total Personal Pronouns: 94


URL: https://insights.blackcoffer.com/how-google-fit-measure-heart-and-respiratory-rates-using-a-phone/
Complexity: {'average_sentence_length': 44.2, 'percentage_of_complex_words': 20.232708468002585, 'fog_index': 923.224305106658, 'average_number_of_words_per_sentence': 44.2, 'complex_word_count': 313, 'word_count': 1547, 'syllable_per_word': 1.9612152553329023, 'average_word_length': 6.009696186166774}


URL: https://insights.blackcoffer.com/what-is-the-chance-homo-sapiens-will-survive-for-the-next-500-years/
Complexity: {'average_sentence_length': 27.92, 'percentage_of_complex_words': 17.29942693409742, 'fog_index': 594.3813753581662, 'average_number_of_words_per_sentence': 27.92, 'complex_word_count': 483, 'word_count': 2792, 'syllable_per_word': 1.7990687679083095, 'average_word_length': 5.593839541547278}
Sentiment: {'polarity_score': 3.789000000000001, 'positive_score': 7.1560000000000015, 'negative_score': 3.3669999999999995, 'subjectivity_score': 0.857}
Total Personal Pronouns: 126


URL: https://insights.blackcoffer.com/why-does-your-business-need-a-chatbot/
Complexity: {'average_sentence_length': 26.443037974683545, 'percentage_of_complex_words': 18.525610339875538, 'fog_index': 565.3471166023354, 'average_number_of_words_per_sentence': 26.443037974683545, 'complex_word_count': 387, 'word_count': 2089, 'syllable_per_word': 1.8243178554332216, 'average_word_length': 5.639540449

URL: https://insights.blackcoffer.com/impact-of-covid-19-pandemic-on-tourism-aviation-industries/
Complexity: {'average_sentence_length': 33.62222222222222, 'percentage_of_complex_words': 19.993390614672833, 'fog_index': 710.1575971212455, 'average_number_of_words_per_sentence': 33.62222222222222, 'complex_word_count': 605, 'word_count': 3026, 'syllable_per_word': 1.8856576338400528, 'average_word_length': 5.776272306675479}
Sentiment: {'polarity_score': -0.07800000000000015, 'positive_score': 3.9890000000000008, 'negative_score': 4.066999999999999, 'subjectivity_score': 0.841}
Total Personal Pronouns: 115


URL: https://insights.blackcoffer.com/impact-of-covid-19-pandemic-on-sports-events-around-the-world/
Complexity: {'average_sentence_length': 23.72897196261682, 'percentage_of_complex_words': 17.15242221346987, 'fog_index': 508.26986855521164, 'average_number_of_words_per_sentence': 23.72897196261682, 'complex_word_count': 871, 'word_count': 5078, 'syllable_per_word': 1.684521465143

URL: https://insights.blackcoffer.com/estimating-the-impact-of-covid-19-on-the-world-of-work-3/
Complexity: {'average_sentence_length': 35.026666666666664, 'percentage_of_complex_words': 20.74609821088694, 'fog_index': 737.8534703717802, 'average_number_of_words_per_sentence': 35.026666666666664, 'complex_word_count': 545, 'word_count': 2627, 'syllable_per_word': 1.866006851922345, 'average_word_length': 5.833650551960411}
Sentiment: {'polarity_score': -2.1380000000000003, 'positive_score': 3.2790000000000004, 'negative_score': 5.417000000000001, 'subjectivity_score': 0.836}
Total Personal Pronouns: 92


URL: https://insights.blackcoffer.com/travel-and-tourism-outlook/
Complexity: {'average_sentence_length': 52.0, 'percentage_of_complex_words': 20.76923076923077, 'fog_index': 1078.8, 'average_number_of_words_per_sentence': 52.0, 'complex_word_count': 270, 'word_count': 1300, 'syllable_per_word': 1.94, 'average_word_length': 5.986153846153846}
Sentiment: {'polarity_score': 2.21999999999