In [50]:
import requests
import csv
import os
import time
from dotenv import load_dotenv
load_dotenv()

api_key = os.getenv("RADIOCUT_API_KEY")

base_url = 'https://radiocut.fm/api/search/?type=cut&page='

params = {
    'access_key': api_key
}

# Define the total number of pages you want to iterate over
total_pages = 5

with open('cuts.csv', 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=['index','programa','radio','banda','ciudad','provincia','pais','inicio','duracion','usuario','reproducciones','etiquetas','titulo','descripcion','link'])
    writer.writeheader()
    index = 1
    
    # Iterate over each page
    for page in range(1, total_pages + 1):
        url = base_url + str(page)
        response = requests.get(url, params=params)

        if response.status_code == 200:
            data = response.json()
            cuts_list = data.get('results', [])

            for cuts in cuts_list:
                value = cuts.get('value', {})
                tags = value.get('tags', [])
                tags_str = ', '.join([tag.get('title', 'NS') for tag in tags if 'title' in tag])
                slug = value.get('slug', 'NS')
                link = 'https://radiocut.fm/audiocut/' + slug
                
                writer.writerow({
                    'index': index,
                    'programa': value.get('show', 'NS'),
                    'radio': value.get('radio_obj', {}).get('name', 'NS'),
                    'banda': value.get('radio_obj', {}).get('band', 'NS'),
                    'ciudad': value.get('radio_obj', {}).get('city', {}).get('name', 'NS'),
                    'provincia': value.get('radio_obj', {}).get('city', {}).get('state', {}).get('name', 'NS'),
                    'pais': value.get('radio_obj', {}).get('city', {}).get('state', {}).get('country', {}).get('name', 'NS'),
                    'inicio': value.get('start', 'NS'),
                    'duracion': value.get('length', 'NS'),
                    'usuario': value.get('owner', {}).get('username', 'NS'),
                    'reproducciones': value.get('play_count', 'NS'),
                    'etiquetas': tags_str,
                    'titulo': value.get('title', 'NS'),
                    'descripcion': value.get('description', 'NS'),
                    'link': link,
                })
                index += 1
        
        else:
            print(f'Error al obtener recortes de la página {page}: {response.status_code}')
        
        # Add a 2-second delay between requests
        time.sleep(1)


Error al obtener recortes de la página 1: 502


In [51]:
with open('cuts.csv', 'r', encoding='utf-8') as infile, open('cleaned.csv', 'w', newline='', encoding='utf-8') as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)

    for row in reader:
        # Limpiar saltos de línea en cada celda del CSV
        cleaned_row = [cell.replace('\n', ' ').replace('\r', ' ') for cell in row]
        writer.writerow(cleaned_row)

In [52]:
import os
from dotenv import load_dotenv

load_dotenv()

hf_api_key = os.getenv("HF_TOKEN")

from langchain_community.embeddings import HuggingFaceEmbeddings

HUGGINGFACEHUB_API_TOKEN = hf_api_key

In [53]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN

In [57]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings()

In [58]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader('cleaned.csv', encoding='utf-8')
data = loader.load()

In [59]:
data[0].page_content

'index: 1\nprograma: y-ahora-quien-podra-ayudarno\nradio: Con Vos\nbanda: FM\nciudad: Ciudad de Buenos Aires\nprovincia: Ciudad Autónoma de Buenos Aires\npais: Argentina\ninicio: 2024-08-14T12:15:44+00:00\nduracion: 645\nusuario: dderosa\nreproducciones: 43\netiquetas: \ntitulo: Fukuyama no se equivocó. La guerra contra la 3era posición\ndescripcion: Lo que se viene si gana Trump\nlink: https://radiocut.fm/audiocut/fukuyama-no-se-equivoco-guerra-contra-3era-posicion'

In [49]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_mistralai import MistralAIEmbeddings
import os
from dotenv import load_dotenv

load_dotenv()

api_key = os.getenv("MISTRAL_API_KEY")
if not api_key:
    raise ValueError("Error con las variables de entorno.")

os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

mistral = MistralAIEmbeddings(
    model="mistral-embed",
    api_key=api_key
)

# Especificar la codificación correcta
try:
    loader = CSVLoader(file_path="mas.csv", encoding='utf-8')  # Ajusta la codificación si es necesario
    documents = loader.load()

    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=128)
    texts = text_splitter.split_documents(documents)

    # Extraer el contenido de texto de los documentos
    text_contents = [text.page_content for text in texts]

    # Generar las embeddings
    response = mistral.embed_documents(text_contents)

    print(response)

    # Crear el vectorstore con las embeddings
    vectorstore = FAISS.from_documents(texts, mistral)

except KeyError as e:
    print(f"Error: {e}")
except Exception as e:
    print(f"Unexpected error: {str(e)}")


[[-0.050201416015625, 0.01117706298828125, 0.046295166015625, 0.01000213623046875, 0.0201263427734375, 0.035247802734375, 0.0116424560546875, 0.00368499755859375, 0.0205535888671875, 0.039581298828125, -0.044677734375, 0.06341552734375, -0.020294189453125, -0.0020580291748046875, -0.0298614501953125, 0.033660888671875, -0.00583648681640625, 0.0198516845703125, -0.0032558441162109375, -0.015716552734375, -0.019256591796875, 0.00995635986328125, -0.0284881591796875, -0.002986907958984375, -0.033203125, -0.01439666748046875, 0.004016876220703125, -0.0338134765625, -0.0215911865234375, -0.0166473388671875, 0.0211029052734375, -0.0236358642578125, 0.021392822265625, 0.0202178955078125, -0.0026702880859375, -0.03118896484375, -0.01039886474609375, -0.03155517578125, 0.01399993896484375, 0.000499725341796875, 0.012054443359375, -0.01371002197265625, 0.005496978759765625, -0.0183563232421875, 0.005977630615234375, -0.02685546875, 0.006893157958984375, -0.014617919921875, -0.009002685546875, -0

An error occurred with MistralAI: 'data'


Error: 'data'


In [16]:
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

# Define una interfaz retriever (ahí está el context)
retriever = vectorstore.as_retriever()
# Instancia el Chat
model = ChatMistralAI(api_key=api_key)
# Define prompt template
prompt = ChatPromptTemplate.from_template("""Respondé la siguiente pregunta basándote únicamente en el contexto provisto:

<context>
{context}
</context>

Question: {input}""")

# Crea una cadena de retrieval para responder a la pregunta
document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
response = retrieval_chain.invoke({"input": "¿Cuáles fueron los temas principales del 13 de agosto?"})
print(response["answer"])

Based on the provided context, there were two programs that were broadcast on August 13, one on AM 750 and the other on AM 530 - Somos Radio.

On AM 750, the program "la-manana-con-victor-hugo" featured an editorial by Víctor Hugo Morales. However, the specific content of the editorial is not provided in the context.

On AM 530 - Somos Radio, the program "jugo-limon" featured an editorial by Sandra Russo titled "Y mientras tanto..." The content of this editorial is also not provided in the context.

Therefore, based on the information given, the main topics of August 13 were the editorials by Víctor Hugo Morales and Sandra Russo, but the specific content of these editorials is not provided.
