In [1]:
import instructor
import os
import pandas as pd

from langchain_community.llms import Ollama
from crewai import Agent, Task, Crew, Process
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from textwrap import dedent
from dotenv import load_dotenv
load_dotenv()

from langchain.agents import Tool
from langchain.tools import tool # for making custom tools
from langchain_community.utilities import GoogleSerperAPIWrapper
from langchain_experimental.utilities import PythonREPL

In [2]:
llm = Ollama(model="llama2_finance_cat")

1. Read in data
2. Correctly classify each category
3. Identify typical monthly expenditures.
4. Generate spending by months and in categories
4. Determine an emergency fund

In [3]:
df = pd.read_csv("/Users/timwang/Projects/LLM/finance_ai/finance_ai/data/capitalone_transaction_2023.csv")
df.sort_values(by="Description", inplace=True)
df.head(10)

Unnamed: 0,Transaction Date,Posted Date,Card No.,Description,Category,Debit,Credit
812,2023-03-15,2023-03-16,6161,(JOO)HANJIN,Other Travel,5.4,
939,2023-02-02,2023-02-03,6161,(M342)M.K.SUKI -TERMINAL,Dining,43.59,
610,2023-05-28,2023-05-29,6161,(M342)M.K.SUKI -TERMINAL,Dining,23.4,
135,2023-11-19,2023-11-22,2175,1OAK TOKYO,Dining,80.44,
654,2023-05-24,2023-05-25,6161,461 SCT-ICONSIAM 7TH,Dining,3.78,
910,2023-02-04,2023-02-06,6161,7-11 BANGLAMPHU 4,Merchandise,0.58,
676,2023-05-21,2023-05-23,6161,7-11 EKKAMAI 22,Merchandise,1.14,
917,2023-02-03,2023-02-06,6161,7-11 EVERITCH,Merchandise,2.88,
929,2023-02-02,2023-02-04,6161,7-11 EVERITCH,Merchandise,2.5,
918,2023-02-02,2023-02-06,6161,7-11 EVERITCH,Merchandise,1.65,


In [22]:
vendors = sorted(df.Description.unique())
results = llm.invoke(dedent(f"""The following is a list of business names, which may or may not be part of a franchise.
                            Identify all of the unique franchises in this list. You can tell if they are part of the same franchise
                            if they have the same substring in their name.
                            The list: {vendors[:30]}"""))
print(results)

There are 14 unique franchises in this list:

1. 7-Eleven - appears 5 times in the list
2. 808 Local Hawaiian Grill - appears once in the list
3. 99 Ranch - appears once in the list
4. 999 Minimart - appears once in the list
5. A Medium Corporation - appears once in the list
6. A Ri Rang Tofu House - appears once in the list
7. (JOO)HANJIN - appears once in the list
8. (M342)M.K.SUKI -TERMINAL - appears once in the list
9. 1OAK TOKYO - appears once in the list
10. 461 SCT-ICONSIAM 7TH - appears once in the list
11. 7-11 BANGLAMPHU 4 - appears once in the list
12. 7-11 EKKAMAI 22 - appears once in the list
13. 7-11 FURAMA - appears once in the list
14. 7-11 MAHATUN PHLOENCHIT - appears once in the list

The franchises that are part of the same franchise chain are:

* 7-Eleven (5 times)
* 99 Ranch (1 time)
* 999 Minimart (1 time)

The other franchises listed are unique and do not appear to be part of the same franchise chain.


In [5]:
os.environ["SERPER_API_KEY"] = os.getenv("SERPER_API_KEY")
googlesearch = GoogleSerperAPIWrapper()
search_tool = Tool(
    name="Google Search",
    func=googlesearch.run,
    description="Useful for search-based queries"
)

researcher = Agent(
    role="Researcher",
    goal="Search the internet for answers to any queries",
    backstory="An expert research analyst",
    verbose=True,
    llm=llm,
    allow_delegation=False,
    tools=[search_tool]
)

searchtask = Task(description=f"""Take the following list and categorize each item in one word by researching the item on the internet. 
                  The list: {vendors[:5]}""",
                  expected_output="A python dictionary that maps each vendor name to its category.",
                  agent=researcher)

crew= Crew(
    agents=[researcher],
    tasks=[searchtask],
    verbose=True
)

result=crew.kickoff()

[DEBUG]: Working Agent: Researcher
[INFO]: Starting Task: Take the following list and categorize each item in one word by researching the item on the internet. 
                  The list: ['(JOO)HANJIN', '(M342)M.K.SUKI -TERMINAL', '1OAK TOKYO', '461 SCT-ICONSIAM 7TH', '7-11 BANGLAMPHU 4']


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: Do I need to use a tool? Yes
Action: Google Search
Action Input: "JOO)HANJIN"[0m[36;1m[1;3mView the profiles of people named Joo Hanjin. Join Facebook to connect with Joo Hanjin and others you may know. Facebook gives people the power to share... Joo-Kyo Kim is Managing Director at Hanjin P&C Co Ltd. See Joo-Kyo Kim's compensation, career history, education, & memberships. 12 Followers, 8 Following, 0 Posts - See Instagram photos and videos from 조한진 (@hanjin.jo) In October 2003, Kim Ju-ik, then-president of the Hanjin labor union, committed suicide by hanging himself on Crane 85 after his 129-day sit-in ... engages in the p

"You don't need to use any tools to find information about ICONSIAM. The chatbot is able to provide you with the information you need directly through the conversation."

In [55]:
googlesearch.run("(JOO)Hanjin")

"View the profiles of people named Joo Hanjin. Join Facebook to connect with Joo Hanjin and others you may know. Facebook gives people the power to share... Joo-Kyo Kim is Managing Director at Hanjin P&C Co Ltd. See Joo-Kyo Kim's compensation, career history, education, & memberships. 12 Followers, 8 Following, 0 Posts - See Instagram photos and videos from 조한진 (@hanjin.jo) engages in the provision of transportation and logistics services. It operates through the following businesses: Land Transportation, Cargo Handling, Shipping, ... Sales Support, Executive Assistant, Administrative & General Office | Learn more about Joo Hyoun Hwang Kim's work experience, education, connections & more ... HANJIN GROUP ; Seok-Dong Kim. Senior Advisor, Jipyong · - Chairman (Current), Jipyong Institute of Humanities and Society ; In-Ki Joo. Emeritus Professor of ... The Hanjin Group is a South Korean chaebol. The group has various industries covered from transportation and airlines to hotels, tourism, 

In [34]:
class BusinessCategory(BaseModel):
    categories: Dict[str, str]=Field(
        description="A python dictionary that maps the name of the business with it's category")

class Franchise(BaseModel):
    franchises: Dict[str,str]= Field(
        description="""A python dictionary that contains the original name of the business as the key 
        and the franchise it belongs to as the value.""")

# enables `response_model` in create call
client = instructor.patch(
    OpenAI(
        base_url="http://localhost:11434/v1", #base_url of Ollama, v1 is for OpenAI I think
        api_key="ollama",  # required, but unused
    ),
    mode=instructor.Mode.JSON,
)

resp = client.chat.completions.create(
    model="llama2",
    messages=[
        {
            "role": "user",
            "content": dedent(f"""The following is a list of business names, which may or may not be part of a franchise.
                            Identify all of the unique franchises in this list. You can tell if they are part of the same franchise
                            if they have the same substring in their name.
                            The list: {vendors[:30]}""")
        }
    ],
    response_model=Franchise,
    temperature=0,
    max_retries=3
)

KeyboardInterrupt: 

In [33]:
resp.franchises

['7-11',
 '99 RANCH',
 'A MEDIUM CORPORATION',
 'A RI RANG TOFU HOUSE',
 '7-ELEVEN']

In [None]:
googlesearch = 

In [6]:
class PythonCode(BaseModel):
    code: str = Field(description="Provide python code that fulfills the user's request")


# enables `response_model` in create call
client = instructor.patch(
    OpenAI(
        base_url="http://localhost:11434/v1", #base_url of Ollama, v1 is for OpenAI I think
        api_key="ollama",  # required, but unused
    ),
    mode=instructor.Mode.JSON,
)

resp = client.chat.completions.create(
    model="llama2",
    messages=[
        {
            "role": "user",
            "content": "Write a python code to read in a csv as a dataframe",
        }
    ],
    response_model=PythonCode,
)
print(resp.model_dump_json(indent=2))

{
  "function_name": "Function Name",
  "code": "import pandas as pd\n\ndf = pd.read_csv('data.csv')"
}


In [15]:
code_read_csv = resp.code
code_read_csv = code_read_csv.replace("data.csv", "/Users/timwang/Projects/LLM/finance_ai/finance_ai/data/capitalone_transaction_2023.csv")


In [19]:
print(code_read_csv)

import pandas as pd

df = pd.read_csv('/Users/timwang/Projects/LLM/finance_ai/finance_ai/data/capitalone_transaction_2023.csv')


In [20]:
PythonREPL().run(code_read_csv)

''

In [18]:
python_tool = Tool(
    name="python_tool",
    description="Python shell that executed Python commands.",
    func=PythonREPL().run
)

agent = Agent(
    role='Data Cleaner',
    goal='Import and clean data',
    backstory='An experienced data analyst that is highly skilled in Python.',
    llm = llm,
    vebose=True,
    tools=[python_tool]
)

task = Task(
    agent=agent,
    description='Write a code that can read in a csv as a dataframe in Python.'
)

task.execute()

'Agent stopped due to iteration limit or time limit.'