### Sentiment Analysis

In [1]:
import pickle
import random
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

# Load the tokenizer and model from Hugging Face
model_name = "MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
classifier = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Define the function to calculate sentiment score
def calculate_sentiment_score(text):
    print(1)
    candidate_labels = ["positive", "negative"]
    print(2)
    results = classifier(text, candidate_labels, multi_label=False)
    print(3)
    print(results)
    probabilities = {result['label']: result['score'] for result in results}
    print(4)
    sentiment_score = probabilities['positive'] - probabilities['negative']
    print(5)
    print(sentiment_score)
    return sentiment_score

# Load the dictionary mapping countries to file paths
dict_file_path = "../../data/country_file_paths.txt"
with open(dict_file_path, 'rb') as file:
    nation_mapped_files = pickle.load(file)

In [None]:
# Define the function to analyze sentiment for nation articles
def analyze_nation_articles(nation_files):
    sentiment_scores = {}
    for nation, files in nation_files.items():
        scores = []
        for file_path in files:
            try:
                with open(file_path, 'r', encoding='utf-8') as file:
                    text = file.read()
                score = calculate_sentiment_score(text)
                scores.append(score)
            except Exception as e:
                print(f"Error processing file {file_path}: {e}")
        # Add random noise to prevent ties and compute the average sentiment
        noise = random.uniform(-0.01, 0.01)  # Small noise to prevent exact ties
        sentiment_scores[nation] = sum(scores) / len(scores) + noise if scores else noise
    return sentiment_scores

In [None]:
nation_mapped_files

{'Germany': ['../../data/raw/gazzetta_it/Calcio_Europei_23-04-2024_orsato-e-guida-arbitri-italiani-a-euro-2024-in-germania.shtml.txt',
  '../../data/raw/gazzetta_it/Calcio_Europei_21-03-2024_lewa-e-dzeko-in-campo-negli-spareggi-per-l-europeo-di-germania.shtml.txt',
  '../../data/raw/gazzetta_it/Calcio_nazionali_02-04-2024_germania-adidas-vieta-la-vendita-della-numero-44-ricorda-le-ss.shtml.txt',
  '../../data/raw/gazzetta_it/Calcio_Europei_28-03-2024_yamal-mainoo-guler-e-gli-altri-chi-si-candida-a-un-europeo-da-star.shtml.txt',
  '../../data/raw/fourfourtwo_com/how-to-watch.txt',
  '../../data/raw/fourfourtwo_com/features_uefa-euro-2024-germany-dates-fixtures-stadiums-tickets.txt',
  '../../data/raw/fourfourtwo_com/features_euro-2024-referees-all-18-match-officials-set-to-take-charge-of-games-in-germany.txt',
  '../../data/raw/fourfourtwo_com/news_england-manager-gareth-southgate-facing-disrupted-preparations-for-euro-2024-with-key-player-a-guaranteed-miss.txt',
  '../../data/raw/fourf

In [21]:

with open('../../data/raw/lequipe_fr/Football_Ousmane-dembele.txt', 'r', encoding='utf-8') as file:
    text = file.read()

In [24]:
sentiment_scores = {}
for nation, files in nation_mapped_files.items():
    scores = []
    for file_path in files:
        #print(file_path)
        try:
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
            score = calculate_sentiment_score(text)
            #scores.append(score)
        except Exception as e:
            print(f"Error processing file {file_path}: {e}")

1
2
3
Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_23-04-2024_orsato-e-guida-arbitri-italiani-a-euro-2024-in-germania.shtml.txt: string indices must be integers
1
2
3
Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_21-03-2024_lewa-e-dzeko-in-campo-negli-spareggi-per-l-europeo-di-germania.shtml.txt: string indices must be integers
1
2
3
Error processing file ../../data/raw/gazzetta_it/Calcio_nazionali_02-04-2024_germania-adidas-vieta-la-vendita-della-numero-44-ricorda-le-ss.shtml.txt: string indices must be integers
1
2
3
Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_28-03-2024_yamal-mainoo-guler-e-gli-altri-chi-si-candida-a-un-europeo-da-star.shtml.txt: string indices must be integers
1
2
3
Error processing file ../../data/raw/fourfourtwo_com/how-to-watch.txt: string indices must be integers
1
2
3
Error processing file ../../data/raw/fourfourtwo_com/features_uefa-euro-2024-germany-dates-fixtures-stadiums-tickets.txt: string indices m

In [9]:
# Calculate sentiment scores for each nation
sentiment_scores = analyze_nation_articles(nation_mapped_files)

Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_23-04-2024_orsato-e-guida-arbitri-italiani-a-euro-2024-in-germania.shtml.txt: string indices must be integers
Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_21-03-2024_lewa-e-dzeko-in-campo-negli-spareggi-per-l-europeo-di-germania.shtml.txt: string indices must be integers
Error processing file ../../data/raw/gazzetta_it/Calcio_nazionali_02-04-2024_germania-adidas-vieta-la-vendita-della-numero-44-ricorda-le-ss.shtml.txt: string indices must be integers
Error processing file ../../data/raw/gazzetta_it/Calcio_Europei_28-03-2024_yamal-mainoo-guler-e-gli-altri-chi-si-candida-a-un-europeo-da-star.shtml.txt: string indices must be integers
Error processing file ../../data/raw/fourfourtwo_com/how-to-watch.txt: string indices must be integers
Error processing file ../../data/raw/fourfourtwo_com/features_uefa-euro-2024-germany-dates-fixtures-stadiums-tickets.txt: string indices must be integers
Error processing fil

KeyboardInterrupt: 

In [None]:
# Save the sentiment scores to a new dictionary file
scores_file_path = "../../data/sentiment_scores.pkl"

In [None]:
with open(scores_file_path, 'wb') as file:
    pickle.dump(sentiment_scores, file)

In [None]:
print("Sentiment scores saved to:", scores_file_path)