In [2]:
## Part 1: Connect to the remote database and pull the Table containing the News.

# Conect to a remote Postgres database.
import psycopg2
import psycopg2.extras as extras
import pandas as pd
import sys

params_dic = {'database':'postgres',
              'host':'ec2-54-67-19-179.us-west-1.compute.amazonaws.com',
              'user':'ubuntu',
              'password':'ubuntu',
              'port':'5432'}

def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    print("Connection successful")
    return conn

conn = connect(params_dic)

Connecting to the PostgreSQL database...
Connection successful


In [78]:
# Checks the tables we have in our DB

cur = conn.cursor()
cur.execute("""SELECT table_name FROM information_schema.tables
       WHERE table_schema = 'public'""")
for table in cur.fetchall():
    print(table)
cur.close()

('news_el_pais',)


In [4]:
# Read the table 'news_el_pais'

def query_read_df(conn,table):
    """
    Contruct a query to read the columns 'cols' from 'table' 
    and stores the data into a pandas Dataframe
    """
    cur = conn.cursor()
    cur.execute('SELECT * FROM %s; '% table)
    df = pd.DataFrame(cur.fetchall(), columns=[desc[0] for desc in cur.description])
    cur.close()
    return df

df1 = query_read_df(conn,'news_el_pais')

In [90]:
df1.head()

Unnamed: 0,id,type,title,date_pub,date_mod,description,article_body,author,category,lang,location,copyright_year,main_entity,publisher,license,free_access,part_of,sentiment_polarity,sentiment_subjectivity
0,1,ReportageNewsArticle,"Caro Quintero, the old drug lord who revolutio...",2022-07-17,2022-07-17,"At 69, he was the longest-serving drug traffic...",“Why are you smiling so much? Has life treated...,Jacobo García,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9861,0.393893
1,2,ReportageNewsArticle,Psychedelic therapy: Fighting depression with ...,2022-07-16,2022-07-16,‘Magic’ fungi are used for therapy around the ...,"When María del Mar Betancur opens her eyes, sh...",Noor Mahtani,International,en-us,Medellín,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9966,0.482127
2,3,ReportageNewsArticle,The last orchid,2022-07-16,2022-07-16,A group of researchers in Colombia has discove...,"This past Thursday, a group of researchers fro...",Lucía Franco,International,en-us,Bogotá,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.6232,0.444344
3,4,ReportageNewsArticle,Rafael Caro Quintero: the historic head of the...,2022-07-15,2022-07-15,Sources from the Marines confirm that the vete...,"Rafael Caro Quintero, the legendary narco lead...",Elena Reina,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.9971,0.400543
4,5,ReportageNewsArticle,Lobo: The surname that encapsulates all of Hon...,2022-07-15,2022-07-15,The execution-style killing of a son of ex-pre...,There are surnames that seem to encapsulate th...,Jacobo García,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.9995,0.330859


In [83]:
## Naive sentiment analysis. This is used only as an example of what can be done with this type of data.

from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob

sia = SentimentIntensityAnalyzer()

def sentiment_polarity(row):
    try:
        return sia.polarity_scores(row.article_body).get('compound')
    except AttributeError:
        pass
    
def sentiment_subjectivity(row):
    try:
        return  TextBlob(row.article_body).sentiment.subjectivity
    except AttributeError:
        pass
    
# Sentiment of the News: [-1, 0, 1] -> [Negative, Neutral, Positive]
df1['sentiment_polarity'] = 0
df1.sentiment_polarity = df1.apply(sentiment_polarity, axis=1)

# Subjectivity: [0,1] ->  Higher subjectivity means that the text contains 
#                        personal opinion rather than factual information.
df1['sentiment_subjectivity'] = 0
df1.sentiment_subjectivity = df1.apply(sentiment_subjectivity, axis=1)
df1.head()

Unnamed: 0,id,type,title,date_pub,date_mod,description,article_body,author,category,lang,location,copyright_year,main_entity,publisher,license,free_access,part_of,sentiment_polarity,sentiment_subjectivity
0,1,ReportageNewsArticle,"Caro Quintero, the old drug lord who revolutio...",2022-07-17,2022-07-17,"At 69, he was the longest-serving drug traffic...",“Why are you smiling so much? Has life treated...,Jacobo García,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9861,0.393893
1,2,ReportageNewsArticle,Psychedelic therapy: Fighting depression with ...,2022-07-16,2022-07-16,‘Magic’ fungi are used for therapy around the ...,"When María del Mar Betancur opens her eyes, sh...",Noor Mahtani,International,en-us,Medellín,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9966,0.482127
2,3,ReportageNewsArticle,The last orchid,2022-07-16,2022-07-16,A group of researchers in Colombia has discove...,"This past Thursday, a group of researchers fro...",Lucía Franco,International,en-us,Bogotá,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.6232,0.444344
3,4,ReportageNewsArticle,Rafael Caro Quintero: the historic head of the...,2022-07-15,2022-07-15,Sources from the Marines confirm that the vete...,"Rafael Caro Quintero, the legendary narco lead...",Elena Reina,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.9971,0.400543
4,5,ReportageNewsArticle,Lobo: The surname that encapsulates all of Hon...,2022-07-15,2022-07-15,The execution-style killing of a son of ex-pre...,There are surnames that seem to encapsulate th...,Jacobo García,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.9995,0.330859


In [86]:
## Part 2:   Use of psycopg2 to export the altered DataFrame into the Postgres table
#           Update DB table with the sentiment analysis data

values = [tuple(x) for x in df1[['sentiment_polarity','sentiment_subjectivity','id']].to_numpy()]

cur = conn.cursor()
cur.execute("""
            ALTER TABLE news_el_pais 
            ADD COLUMN sentiment_polarity FLOAT , 
            ADD COLUMN sentiment_subjectivity FLOAT; 
            """)
for i in range(len(values)):
    cur.execute("""
                UPDATE news_el_pais 
                set sentiment_polarity = %s,  sentiment_subjectivity = %s
                where id=%s;
                """ % values[i])

conn.commit()
cur.close()

In [5]:
# Check the updated table
df2 = query_read_df(conn,'news_el_pais')
df2.head()

Unnamed: 0,id,type,title,date_pub,date_mod,description,article_body,author,category,lang,location,copyright_year,main_entity,publisher,license,free_access,part_of,sentiment_polarity,sentiment_subjectivity
0,16,ReportageNewsArticle,US extends protected status to Venezuelan migr...,2022-07-12,2022-07-12,"Under Temporary Protected Status, nationals fr...",One of the first decisions that US President J...,Florantonia Singer,USA,en-us,Caracas,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9793,0.375556
1,141,ReportageNewsArticle,"How wine is produced at 12,000 feet in the dri...",2022-05-24,2022-05-24,,"In the driest place in the world, at 3,600 met...",Antonia Laborde,Culture,en-us,San Pedro de Atacama (Chile),2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9873,0.412745
2,1,ReportageNewsArticle,"Caro Quintero, the old drug lord who revolutio...",2022-07-17,2022-07-17,"At 69, he was the longest-serving drug traffic...",“Why are you smiling so much? Has life treated...,Jacobo García,International,en-us,Mexico,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9861,0.393893
3,2,ReportageNewsArticle,Psychedelic therapy: Fighting depression with ...,2022-07-16,2022-07-16,‘Magic’ fungi are used for therapy around the ...,"When María del Mar Betancur opens her eyes, sh...",Noor Mahtani,International,en-us,Medellín,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,0.9966,0.482127
4,3,ReportageNewsArticle,The last orchid,2022-07-16,2022-07-16,A group of researchers in Colombia has discove...,"This past Thursday, a group of researchers fro...",Lucía Franco,International,en-us,Bogotá,2022,WebPage,Ediciones EL PAÍS S.L.,https://elpais.com/estaticos/terms-and-conditi...,True,CreativeWork,-0.6232,0.444344


In [89]:
conn.close()

In [37]:
# Q. Who is the author with the most personal opinions in their writing?
df2[['author','sentiment_subjectivity']].groupby('author').agg({'sentiment_subjectivity':'mean'}).sort_values('sentiment_subjectivity', ascending=False)

Unnamed: 0_level_0,sentiment_subjectivity
author,Unnamed: 1_level_1
Karina Suárez,0.557868
Diego Torres,0.517741
Noelia Ramírez,0.510431
Rosa Rivas,0.497449
Luis de Vega (enviado especial),0.495039
...,...
José Naranjo,0.270454
Miguel González,0.225278
Rafael Fraguas,0.225263
María Martín,0.222075


In [None]:
# A. is Karina Suárez.

In [None]:
# Done