In [2]:
# app/main.py
from fastapi import FastAPI, HTTPException
from langchain_community.chains.pebblo_retrieval.enforcement_filters import PINECONE

from app.schemas import Article
from app.crud import (
    get_all_articles, get_article_by_id, create_article, update_article, delete_article
)
from fastapi.middleware.cors import CORSMiddleware
import openai
import os
from langchain_openai import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain_openai.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from fastapi import FastAPI, HTTPException, status, Depends
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
import time
from langchain.schema.runnable import RunnablePassthrough, RunnableParallel
from dotenv import load_dotenv, find_dotenv



Connected to MongoDB database: article-management-db


  from tqdm.autonotebook import tqdm


In [3]:
load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)

In [5]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
text = TextLoader("/home/usama.amjad@vaival.tech/Documents/Extra Work/AI-ArticleManager/app/Data.txt").load()


In [7]:
print(text)

[Document(metadata={'source': '/home/usama.amjad@vaival.tech/Documents/Extra Work/AI-ArticleManager/app/Data.txt'}, page_content='Managing articles efficiently can be incredibly beneficial, whether you\'re a student, researcher, or anyone aiming to stay informed. Here’s a comprehensive guide on how to manage articles in daily life:\n\nSet Clear Goals: Start by defining why you\'re reading or collecting articles. Are you looking to stay updated, conduct research, or simply enjoy reading? This will help in setting up the best practices for managing articles effectively.\n\nSelect Reliable Sources: Choose credible sources to ensure the information you\'re collecting is accurate and valuable. Look for peer-reviewed journals, reputable news sites, or expert publications.\n\nUse a Note-Taking System: Apps like Evernote, Notion, or OneNote allow you to organize notes related to your articles. Highlight key points, jot down your thoughts, and keep everything in one place for easy access.\n\nCa

In [6]:
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = splitter.split_documents(text)
print(docs[0])
print(docs[1])
total_docs = f"Total length of docs {len(docs[1:5])}"

page_content='Managing articles efficiently can be incredibly beneficial, whether you're a student, researcher, or anyone aiming to stay informed. Here’s a comprehensive guide on how to manage articles in daily life:

Set Clear Goals: Start by defining why you're reading or collecting articles. Are you looking to stay updated, conduct research, or simply enjoy reading? This will help in setting up the best practices for managing articles effectively.' metadata={'source': '/home/usama.amjad@vaival.tech/Documents/Extra Work/AI-ArticleManager/app/Data.txt'}
page_content='Select Reliable Sources: Choose credible sources to ensure the information you're collecting is accurate and valuable. Look for peer-reviewed journals, reputable news sites, or expert publications.

Use a Note-Taking System: Apps like Evernote, Notion, or OneNote allow you to organize notes related to your articles. Highlight key points, jot down your thoughts, and keep everything in one place for easy access.' metadata={

In [None]:
index_name = "ai-article-manager"

if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=embeddings.dimension,
        metric="cosine",
        spec=spec
    )
    # Wait for index to be ready
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# See that it is empty
print("Index before upsert:")
print(pc.Index(index_name).describe_index_stats())
print("\n")

namespace = "wondervector5000"

docsearch = PineconeVectorStore.from_documents(
    documents=docs,
    index_name=index_name,
    embedding=embeddings,
    namespace=namespace
)

time.sleep(5)

In [None]:
retriever = docsearch.as_retriever(search_kwargs={"k": 5})

In [None]:


template = """
You are an AI Article Manager. You have been asked to provide the most relevant article based on the user's question and the given context.
Remember: Your goal is to provide the most accurate and relevant answer based on the user's question and the given context. If you cannot find a suitable match, it's better to admit that than to provide incorrect information.
Question: {question}
Context: {context}
"""
prompt = ChatPromptTemplate.from_template(template)
llm = ChatOpenAI(model="gpt-4o", temperature=0)
chain = (
        RunnableParallel({"context": retriever,
                          "question": RunnablePassthrough()})
        | prompt
        | llm
        | StrOutputParser()
)
Question = "What is the best way to manage articles?"
response = chain.invoke(Question)
print(response)
