# Imports

In [2]:
from __future__ import print_function


import matplotlib as mpl
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import re

from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm
2022-12-07 10:13:52.983548: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df = pd.read_csv('/Users/yasmeenmahomedy2/Downloads/Conflict Datasets/un-general-debates.csv')

In [4]:
df2 = df[df['year']>=1990]

# Sentiment

In [5]:
sentiment = pipeline(
    "sentiment-analysis",
    model="distilbert-base-uncased-finetuned-sst-2-english",
    tokenizer="distilbert-base-uncased-finetuned-sst-2-english",
)

2022-12-07 10:14:36.217584: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


In [6]:
def clean_text(text):
    text = text.encode("ascii", errors="ignore").decode(
        "ascii"
    )  # remove non-ascii, Chinese characters
    text = text.lower()
    text = re.sub(r"\n", " ", text)
    text = re.sub(r"\n\n", " ", text)
    text = re.sub(r"\t", " ", text)
    text = text.strip(" ")
    text = re.sub(r"[^\w\s]", "", text)  # remove punctuation and special characters
    text = re.sub(
        " +", " ", text
    ).strip()  # get rid of multiple spaces and replace with a single
    text = text[:500]
    return text


In [7]:
def sentiment_analysis(text):
    input_text = (
        pd.DataFrame(text.split("."))
        .stack()
        .reset_index()
        .rename(columns={0: "Paras"})
        .drop("level_0", axis=1)
        .drop("level_1", axis=1)
        .dropna()
    )

    input_text["Clean_Text"] = input_text["Paras"].map(lambda text: clean_text(text))

    corpus = list(input_text["Clean_Text"].values)

    input_text["Sentiment"] = sentiment(corpus)

    input_text["Sentiment_Label"] = [x.get("label") for x in input_text["Sentiment"]]

    input_text["Sentiment_Score"] = [x.get("score") for x in input_text["Sentiment"]]

    cols = ["Paras", "Sentiment_Label", "Sentiment_Score"]
    df = input_text[cols].copy()

    df = df[df["Paras"].str.strip().astype(bool)]

    df["Sentiment_Score"] = np.where(
        df["Sentiment_Label"] == "NEGATIVE",
        -(df["Sentiment_Score"]),
        df["Sentiment_Score"],
    )

    df["Sentiment_Score"] = df["Sentiment_Score"].round(6)

    overall_sentiment_score = df["Sentiment_Score"].sum().round(3)

    sentiment_count = df["Sentiment_Label"].value_counts().to_string()

#     return overall_sentiment_score, sentiment_count, df
    return df['Sentiment_Score'].mean()


In [None]:
#filter out beginning end of speech? on pakistan df
# temp = pak['text'].reset_index()
# speech_0 = temp['text'][1]
# start = round(len(speech_0)*0.1)
# end = round(len(speech_0)*0.9)

# speech_filter = speech_0[start:end]
# sentiment_analysis(speech_filter)

In [8]:
import seaborn as sns
import matplotlib.pyplot as plt

# Graph Functions

In [None]:
def sentiment_graph(df, country):
    fig = go.Figure(
    data=go.Heatmap(
        z=df['Overall_Sentiment_Score'],
        x=df['year'],
        y=df["new_sentiment_score"],
        colorscale=px.colors.sequential.RdBu,
            )
        )

    fig.update_layout(
        title=go.layout.Title(
            text=f'Sentiment Analysis for {country}'
        ),
        autosize=False,
        width=1150,
        height=400,
    )

    fig.update_layout(yaxis_autorange = "reversed")
    return fig


# Iran

In [10]:
iran = df2[df['country']=='IRN']

  iran = df2[df['country']=='IRN']


In [11]:
iran.head()

Unnamed: 0,session,year,country,text
336,68,2013,IRN,"Mr. President, at the outset, I \nwould like t..."
663,63,2008,IRN,I am grateful to the \nAlmighty for granting m...
839,46,1991,IRN,"﻿At the outset, Sir, I should like to express ..."
1137,57,2002,IRN,﻿I\nwould like at the outset to congratulate M...
1454,51,1996,IRN,﻿It is a\nsource of profound satisfaction to s...


In [12]:
iran['Overall_Sentiment_Score'] = iran['text'].apply(sentiment_analysis)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iran['Overall_Sentiment_Score'] = iran['text'].apply(sentiment_analysis)


In [13]:
iran = pd.DataFrame(iran)

In [14]:
iran['new_sentiment_score'] = np.where(iran['Overall_Sentiment_Score']< 0.3, 'NEGATIVE', 'POSITIVE')
iran.head()

Unnamed: 0,session,year,country,text,Overall_Sentiment_Score,new_sentiment_score
336,68,2013,IRN,"Mr. President, at the outset, I \nwould like t...",0.002566,NEGATIVE
663,63,2008,IRN,I am grateful to the \nAlmighty for granting m...,0.173459,NEGATIVE
839,46,1991,IRN,"﻿At the outset, Sir, I should like to express ...",0.498799,POSITIVE
1137,57,2002,IRN,﻿I\nwould like at the outset to congratulate M...,0.573124,POSITIVE
1454,51,1996,IRN,﻿It is a\nsource of profound satisfaction to s...,-0.035528,NEGATIVE


In [41]:
iran['date_new'] = pd.to_datetime(iran['year'],format='%Y')
iran = iran.sort_values(by='date_new')

In [42]:
iran.to_csv('iran_sentiment_analysis.csv')

In [44]:
sentiment_graph(iran, 'Iran')

In [39]:
fig = go.Figure(
    data=go.Heatmap(
        z=iran['Overall_Sentiment_Score'],
        x=iran['year'],
        y=iran["new_sentiment_score"],
        colorscale=px.colors.sequential.RdBu,
    )
)

fig.update_layout(
    title=go.layout.Title(
        text="Sentiment Analysis"
    ),
    autosize=False,
    width=1150,
    height=400,
)

fig.update_layout(yaxis_autorange = "reversed")

fig.show()

In [38]:
px.line(iran.sort_values(by='date_new'), x="date_new", y="Overall_Sentiment_Score")