In [1]:
import os
from openai import *
import pandas as pd
import numpy as np
import pdb
import time
sys.path.append('../../../../infrastructure/tools')
from utilities import *
from plotting import *
import sqlite3

In [2]:
raw_data = pd.read_csv("../../../data/complaints.csv")

In [3]:
raw_data = raw_data.fillna('unknown')

In [4]:
raw_data = raw_data.rename(columns={'Unnamed: 0':'complaint_id'})

In [5]:
raw_data.head()

Unnamed: 0,complaint_id,product,narrative
0,0,credit_card,purchase order day shipping amount receive pro...
1,1,credit_card,forwarded message date tue subject please inve...
2,2,retail_banking,forwarded message cc sent friday pdt subject f...
3,3,credit_reporting,payment history missing credit report speciali...
4,4,credit_reporting,payment history missing credit report made mis...


In [6]:
sample = raw_data.sample(frac=0.1, random_state=123).reset_index(drop=True)

In [7]:
with open("../../../../api_keys/openai_api.txt", "r") as password_file:  # Open in binary read mode
    
    password = password_file.read()


In [8]:
# client = OpenAI(api_key=password)


In [9]:
sample['chatgpt_keyword_extraction'] = 'unknown'

In [10]:
labels = [p for p in sample['product'].unique()]

In [11]:
labels

['credit_reporting',
 'credit_card',
 'retail_banking',
 'debt_collection',
 'mortgages_and_loans']

In [12]:
sample.head(100)

Unnamed: 0,complaint_id,product,narrative,chatgpt_keyword_extraction
0,54641,credit_reporting,disputed error credit report multiple time sti...,unknown
1,55752,credit_reporting,filed dispute regard incorrect item credit rep...,unknown
2,39820,credit_reporting,decided get credit pulled due couple collectio...,unknown
3,76467,credit_reporting,sent paperwork containing undisputed overwhelm...,unknown
4,2686,credit_reporting,according fair credit reporting act section cr...,unknown
...,...,...,...,...
95,65869,credit_reporting,noticed credit report reflecting late payment ...,unknown
96,135883,debt_collection,account mine requested debt validated proof se...,unknown
97,141726,credit_reporting,may concern writing dispute fraudulent charge ...,unknown
98,110370,mortgages_and_loans,started refinance application submitted reques...,unknown


In [13]:
conn = sqlite3.connect('raw_data.db') 

In [14]:
raw_data.to_sql('my_table', conn, if_exists='replace', index=False)

162421

In [15]:
longest_narrative = raw_data.loc[raw_data['complaint_id']==7272, 'narrative'].values[0]

In [16]:
# longest_narrative

In [17]:
!pwd

/Users/stephenzhou/PycharmProjects/text_analytics/code/text_analytics/EDA


In [18]:
from langchain.agents import initialize_agent, Tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI

# from langchain.llms import OpenAI
from langchain.sql_database import SQLDatabase
from langchain_experimental.sql import SQLDatabaseChain
from langchain.chains import LLMMathChain

from langchain_openai import ChatOpenAI
# llm = ChatOpenAI(model_name=llm_name, temperature=0.2)


from langchain_community.utilities import SerpAPIWrapper
search = SerpAPIWrapper(serpapi_api_key='64d9b949f90a2e9b9d237c2e9b92592d899e4d6cdb3daee82cdb7b2e9d00f731')



In [19]:

llm = ChatOpenAI(
    temperature=0, 
    model_name="gpt-3.5-turbo",
    openai_api_key=password
)


In [20]:


llm_math_chain = LLMMathChain.from_llm(llm=llm, verbose=True)


In [21]:
db = SQLDatabase.from_uri("sqlite:///raw_data.db'")

db_chain = SQLDatabaseChain.from_llm(llm, db)

In [22]:
tools = [
    Tool(
        name="SearchTool",
        func=search.run,
        description="useful for when you need to answer questions about narratives. You should ask targeted questions"
    ),
    # Tool(
    #     name="MathTool",
    #     func=llm_math_chain.run,
    #     description="useful for when you need to answer questions about summary statistics"
    # ),
    Tool(
        name="Product_Database",
        func=db_chain.run,
        description="useful for when you need to answer questions about product and narratives"
    )
]

# creating the agent
agent = initialize_agent(
    tools=tools, llm=llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True)




LangChain agents will continue to be supported, but it is recommended for new use cases to be built with LangGraph. LangGraph offers a more flexible and full-featured framework for building agents, including support for tool-calling, persistence of state, and human-in-the-loop workflows. For details, refer to the `LangGraph documentation <https://langchain-ai.github.io/langgraph/>`_ as well as guides for `Migrating from AgentExecutor <https://python.langchain.com/docs/how_to/migrate_agent/>`_ and LangGraph's `Pre-built ReAct agent <https://langchain-ai.github.io/langgraph/how-tos/create-react-agent/>`_.



In [25]:
# ask the LLM a question
agent.run("Search in my_table, which complaint is the longest one?  Given me the complaint_id and tell me how man tokens it has?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `SearchTool` with `longest complaint`


[0m[36;1m[1;3m['Written in Akkadian cuneiform, this tablet is recognized as the "Oldest Customer Complaint" by Guinness World Records.', 'The oldest written customer complaint is the "Complaint tablet to Ea-nasir" and is 3767 years old, acquired by the British Museum (UK) in London, UK, ...', 'In 1750 B.C., a disgruntled customer complained about a bad batch of ingots. Hear his complaint and theoretical apology.', "Possibly the world's first customer service complaint, nearly 4,000 years old. Image.", 'This Bronze-Age Tablet Is The Oldest Customer Complaint on Record · Almost 4,000 years ago, a Mesopotamian man named Nanni was so disappointed ...', "What could be the world's first complaint about shoddy service is on a clay tablet that was first sent about 3,800 years ago in southern Mesopotamia from ...", "Nanni's complaint, written approximately 3,770 years ago, has been

'The longest complaint is the "Complaint tablet to Ea-nasir" written in Akkadian cuneiform, which is recognized as the oldest customer complaint. It is 3767 years old.'