In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
from dateutil import parser

# Función para realizar la búsqueda en Google News con un rango de fechas
def search_google_news(query, start_date, end_date, max_results=100000):
    # Codificar la consulta de búsqueda
    encoded_query = encode_special_characters(query)
    
    # Inicializar lista para almacenar los resultados
    all_news_df = []

    # Inicializar el número total de registros recopilados
    total_results = 0

    # Definir la URL base de búsqueda
    base_url = f"https://news.google.com/search?q={encoded_query}&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange={start_date.strftime('%Y-%m-%d')}_{end_date.strftime('%Y-%m-%d')}"   
    
    # Recorrer las páginas de resultados hasta alcanzar el número máximo de resultados
    while total_results < max_results:
        # Construir la URL de búsqueda para la página actual
        url = base_url + f"&start={total_results}"
        print(url)
        
        # Realizar la solicitud GET a la URL de búsqueda
        response = requests.get(url)
        soup = BeautifulSoup(response.text, 'html.parser')

        # Obtener los enlaces de las noticias
        articles = soup.find_all('article')
        links = [article.find('a')['href'] for article in articles]
        links = [link.replace("./articles/", "https://news.google.com/articles/") for link in links]

        # Obtener el texto de las noticias
        news_text = [article.get_text(separator='\n') for article in articles]
        news_text_split = [text.split('\n') for text in news_text]

        # Crear un DataFrame con los datos de las noticias
        news_df = pd.DataFrame({
            'Title': [text[2] for text in news_text_split],
            'Source': [text[0] for text in news_text_split],
            'Time': [text[3] for text in news_text_split],
            'Link': links
        })

        # Agregar el DataFrame actual a la lista de resultados
        all_news_df.append(news_df)

        # Actualizar el número total de registros recopilados
        total_results += len(news_df)

        # Detener la iteración si se alcanza o supera el número máximo de resultados
        if total_results >= max_results:
            break

    # Concatenar todos los DataFrames de resultados en uno solo
    final_news_df = pd.concat(all_news_df, ignore_index=True)

    return final_news_df[:max_results]  # Retornar solo el número máximo de resultados solicitado

# Función para codificar caracteres especiales en una cadena de texto
def encode_special_characters(text):
    encoded_text = ''
    special_characters = {'&': '%26', '=': '%3D', '+': '%2B', ' ': '%20'}  # Add more special characters as needed
    for char in text.lower():
        encoded_text += special_characters.get(char, char)
    return encoded_text

# Definir la consulta de búsqueda y el rango de fechas
query = 'Canada inflation food'
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 2, 29)

# Realizar la búsqueda en Google News y obtener los resultados en un DataFrame
news_df = search_google_news(query, start_date, end_date, max_results=100000)

# Escribir los resultados en un archivo CSV
news_df.to_csv('news.csv', index=False)

news_df.info()


https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=0
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=100
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=200
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=300
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=400
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=500
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=600
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=20

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6400
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6500
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6600
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6700
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6800
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=6900
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=7000
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&dat

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=12700
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=12800
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=12900
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=13000
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=13100
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=13200
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=13300
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=18999
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19099
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19199
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19299
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19399
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19499
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=19599
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25298
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25398
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25498
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25598
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25698
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25798
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=25898
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=31597
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=31697
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=31797
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=31897
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=31997
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=32097
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=32197
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3

https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=37897
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=37997
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=38097
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=38197
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=38297
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=38397
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3Aen&daterange=2024-01-01_2024-02-29&start=38497
https://news.google.com/search?q=canada%20inflation%20food&hl=en-CA&gl=CA&ceid=CA%3

KeyboardInterrupt: 

In [None]:
news_df.head()

In [None]:
# import pandas as pd
# from datetime import datetime, timedelta
# from dateutil.relativedelta import relativedelta

# # Suponiendo que tienes un DataFrame llamado news_df
# # Iterar sobre cada fila del DataFrame
# for index, row in news_df.iterrows():
#     time_text = row['Time']
#     # Verificar si el texto contiene 'Yesterday'
#     if 'Yesterday' in time_text:
#         # Obtener la fecha de ayer
#         yesterday = datetime.now() - timedelta(days=1)
#         # Asignar la fecha de ayer al DataFrame
#         news_df.at[index, 'Time'] = yesterday.strftime('%Y-%m-%d')
#     # Verificar si el texto contiene 'ago'
#     elif 'ago' in time_text:
#         # Extraer la cantidad de tiempo (en horas) desde 'ago'
#         hours_ago = int(time_text.split()[0])
#         # Calcular la fecha actual menos la cantidad de tiempo
#         current_time = datetime.now() - relativedelta(hours=hours_ago)
#         # Asignar la nueva fecha al DataFrame
#         news_df.at[index, 'Time'] = current_time.strftime('%Y-%m-%d')
#     # Verificar si el texto está en el formato "Feb 20"
#     elif len(time_text.split()) == 2:
#         # Obtener el mes y el día del texto
#         month_str, day_str = time_text.split()
#         # Obtener el mes en formato numérico
#         month_num = datetime.strptime(month_str, '%b').month
#         # Obtener la fecha actual
#         current_date = datetime.now().replace(day=int(day_str), month=month_num)
#         # Asignar la nueva fecha al DataFrame
#         news_df.at[index, 'Time'] = current_date.strftime('%Y-%m-%d')
#     elif '-' in time_text:
#         continue  # Saltar la conversión si ya está en el formato esperado    
#     else:
#         # Convertir la fecha al formato YYYY-MM-DD
#         news_df.at[index, 'Time'] = pd.to_datetime(time_text, format='%d-%m-%Y').strftime('%Y-%m-%d')

# # Imprimir las primeras filas para verificar los cambios
# #print(news_df.head())



In [None]:
import pandas as pd
from dateutil import parser
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

# Suponiendo que tienes un DataFrame llamado news_df
# Iterar sobre cada fila del DataFrame
for index, row in news_df.iterrows():
    time_text = row['Time']
    # Verificar si el texto contiene 'Yesterday'
    if 'Yesterday' in time_text:
        # Obtener la fecha de ayer
        yesterday = datetime.now() - timedelta(days=1)
        # Asignar la fecha de ayer al DataFrame
        news_df.at[index, 'Time'] = yesterday.strftime('%Y-%m-%d')
    # Verificar si el texto contiene 'ago'
    elif 'ago' in time_text:
        # Extraer la cantidad de tiempo (en horas) desde 'ago'
        hours_ago = int(time_text.split()[0])
        # Calcular la fecha actual menos la cantidad de tiempo
        current_time = datetime.now() - relativedelta(hours=hours_ago)
        # Asignar la nueva fecha al DataFrame
        news_df.at[index, 'Time'] = current_time.strftime('%Y-%m-%d')
    # Verificar si el texto está en el formato "Feb 20"
    elif len(time_text.split()) == 2:
        # Obtener el mes y el día del texto
        month_str, day_str = time_text.split()
        # Obtener la fecha actual
        current_date = parser.parse(time_text)
        # Asignar la nueva fecha al DataFrame
        news_df.at[index, 'Time'] = current_date.strftime('%Y-%m-%d')
    elif '-' in time_text:
        continue  # Saltar la conversión si ya está en el formato esperado    
    else:
        try:
            # Intentar analizar la fecha con dateutil.parser
            parsed_time = parser.parse(time_text)
            # Convertir la fecha al formato YYYY-MM-DD
            news_df.at[index, 'Time'] = parsed_time.strftime('%Y-%m-%d')
        except Exception as e:
            print(f"Error parsing time at index {index}: {e}")


In [None]:
import pandas as pd
from datetime import datetime

# Suponiendo que tienes el DataFrame llamado df

# Definir el rango de fechas start_date y end_date
#start_date = datetime(2023, 1, 1)
#end_date = datetime(2024, 2, 29)

# Convertir la columna 'Time' al tipo datetime
news_df['Time'] = pd.to_datetime(news_df['Time'], format='%Y-%m-%d')

# Filtrar las filas que estén dentro del rango de fechas especificado
filtered_df = news_df[(news_df['Time'] >= start_date) & (news_df['Time'] <= end_date)]

# Imprimir el DataFrame resultante
print(filtered_df.info())
filtered_df.to_csv('news2.csv', index=False)


In [None]:
from textblob import TextBlob
import pandas as pd

# Suponiendo que tienes el DataFrame llamado df con las columnas 'Title' y 'Time'

# Crear una lista para almacenar los resultados del análisis de sentimientos
sentiment_results = []

# Realizar el análisis de sentimientos en forma mensual
for month, group in filtered_df.groupby(pd.Grouper(freq='M', key='Time')):
    # Concatenar todos los títulos de noticias en un solo texto para el mes actual
    month_text = ' '.join(group['Title'])
    
    # Realizar el análisis de sentimientos en el texto del mes actual
    blob = TextBlob(month_text)
    average_sentiment = blob.sentiment.polarity
    
    # Agregar los resultados a la lista de resultados de sentimiento
    sentiment_results.append({'Month': month.strftime('%Y-%m'), 'Average Sentiment': average_sentiment})

# Crear un DataFrame a partir de la lista de resultados de sentimiento
sentiment_df = pd.DataFrame(sentiment_results)

# Imprimir el DataFrame de resultados de sentimiento
print(sentiment_df)

