In [1]:
import os
from openai import OpenAI
import pandas as pd
import json

#from dotenv import load_dotenv  

# Load environment variables from .env file
#load_dotenv()

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

# Initialiser le client OpenAI avec la clé 

client = OpenAI(api_key = OPENAI_API_KEY)


def gen_val_qa_pairs(topic, num_pairs):
    qa_pairs = []

    for _ in range(num_pairs):
        # Générer une question
        prompt_question = f"Generate a complex question about {topic}."
        response_question = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt_question}]
        )
        question = response_question.choices[0].message.content.strip()

        # Générer une réponse à la question
        prompt_answer = f"Answer the following question in detail: {question}"
        response_answer = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt_answer}]
        )
        answer = response_answer.choices[0].message.content.strip()

        qa_pairs.append({"question": question, "answer": answer})

    return qa_pairs

# Génération des paires question-réponse
topic = "finance"
qa_pairs = gen_val_qa_pairs(topic, 5)  # Générer 5 paires pour l'exemple

# Conversion en DataFrame pandas
qa_df = pd.DataFrame(qa_pairs)


In [2]:
# Affichage du DataFrame
print(qa_df)

                                            question  \
0  How does the practice of utilizing derivatives...   
1  What are the primary factors that contribute t...   
2  What is the impact of macroeconomic policies, ...   
3  What are the key factors influencing the price...   
4  How does the interaction of monetary policy, i...   

                                              answer  
0  The practice of utilizing derivatives has a si...  
1  The risk-return relationship in financial mark...  
2  Macroeconomic policies, including fiscal and m...  
3  The key factors influencing the price movement...  
4  The interaction between monetary policy, infla...  


In [4]:
def generate_questions(category, prompt, num_questions):
    questions = []
    for _ in range(num_questions):
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}]
        )
        question = response.choices[0].message.content.strip()
        questions.append({"Category": category, "Question": question})
    return questions

# Définissez les prompts pour chaque catégorie de question
categories_prompts = {
    "Clarifying": "Generate a clarifying question for a team discussion. Example: 'Can you tell me more about your idea?'",
    "Probing": "Generate a probing question to challenge an idea. Example: 'Do you think this is going to work? Why?'",
    "Exploratory": "Generate an exploratory question to consider different scenarios. Example: 'What are the long-term effects of this approach?'",
    "Generative": "Generate a generative question to identify new solutions. Example: 'What other possible solutions can we consider?'"
}

# Nombre total de questions souhaité
total_questions = 16  # Exemple : 100 questions au total
questions_per_category = total_questions // len(categories_prompts)  # Répartition équitable

# Génération du dataset
dataset = []
for category, prompt in categories_prompts.items():
    dataset.extend(generate_questions(category, prompt, questions_per_category))

# Conversion en DataFrame pandas
df = pd.DataFrame(dataset)

In [7]:
def generate_qa_pairs(category, description, topic, num_pairs):
    qa_pairs = []
    for _ in range(num_pairs):
        # Générer une question basée sur le sujet et la catégorie
        prompt_question = f"Generate a {category.lower()} question about {topic}. {description}"
        response_question = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt_question}]
        )
        question = response_question.choices[0].message.content.strip()
        
        # Générer une réponse à cette question
        prompt_answer = f"How would you answer this {category.lower()} question about {topic}: '{question}'?"
        response_answer = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt_answer}]
        )
        answer = response_answer.choices[0].message.content.strip()
        
        qa_pairs.append({"Category": category, "Question": question, "Answer": answer})
    return qa_pairs

# Sujet pour les questions
topic = "Artificial Intelligence"

# Description pour chaque catégorie de question
categories_descriptions = {
    "Clarifying": "Clarifying questions aim to better understand the speaker's intent before jumping to conclusions.",
    "Probing": "Probing questions challenge the speaker's views or ideas, often to highlight flaws.",
    "Exploratory": "Exploratory questions help consider different scenarios and possibilities.",
    "Generative": "Generative questions identify new solutions or improve upon existing ones."
}

# Nombre total de paires QA souhaité par catégorie
num_pairs_per_category = 2

# Génération du dataset
dataset = []
for category, description in categories_descriptions.items():
    dataset.extend(generate_qa_pairs(category, description, topic, num_pairs_per_category))

# Conversion en DataFrame pandas
ds = pd.DataFrame(dataset)

# Affichage du DataFrame
print(ds)

      Category                                           Question  \
0   Clarifying  Could you please clarify what specific aspect ...   
1   Clarifying  What specific aspects or applications of Artif...   
2      Probing  How would you address concerns about the poten...   
3      Probing  What are some potential ethical concerns or ri...   
4  Exploratory  What are the potential ethical implications an...   
5  Exploratory  What are the potential ethical implications of...   
6   Generative  How can artificial intelligence be used to enh...   
7   Generative  How can we develop smarter artificial intellig...   

                                              Answer  
0  I am referring to a specific aspect or applica...  
1  I am referring to the various subfields and ap...  
2  There are a few ways I would address concerns ...  
3  There are several potential ethical concerns a...  
4  The potential ethical implications and consequ...  
5  The widespread adoption of Artificial Intelli

In [24]:
# pip3 install flask-sqlalchemy --user
# pip install apify-client --upgrade --quiet  

from langchain.docstore.document import Document
from langchain.document_loaders import ApifyDatasetLoader
from apify_client import ApifyClient
from langchain.indexes import VectorstoreIndexCreator

APIFY_API_TOKEN = os.getenv('APIFY_API_TOKEN')

# Initialisation du client Apify
apify = ApifyClient(APIFY_API_TOKEN)

# Appel de l'acteur pour obtenir le texte des pages web crawlées
actor_call = apify.actor("apify/website-content-crawler").call(
    run_input={"startUrls": [{"url": "https://python.langchain.com/en/latest/"}]},
)

loader = ApifyDatasetLoader(
    dataset_id=actor_call["defaultDatasetId"],
    dataset_mapping_function=lambda item: Document(
        page_content=item["text"] or "", metadata={"source": item["url"]}
    ),
)

# Création d'un index vectoriel basé sur les données crawlées
index = VectorstoreIndexCreator().from_loaders([loader])

# Requête dans l'index vectoriel
query = "Are any OpenAI chat models integrated in LangChain?"
result = index.query_with_sources(query)
print(result["answer"])
print(result["sources"])
#result = index.query(query)
#print(result)


ImportError: cannot import name 'URL' from 'sqlalchemy' (c:\Users\Zakar\anaconda3\lib\site-packages\sqlalchemy\__init__.py)