In [17]:
#!pip install langchain_community
import pandas as pd
from langchain_community.llms import Ollama

In [18]:
llm = Ollama(model="llama3.1")
llm.invoke("Hello!")

'Hello! How are you today? Is there something I can help you with or would you like to chat?'

## READ TRANSACTION DATA

In [19]:
df = pd.read_csv(r"C:\Users\sibir\Downloads\transaction-data.csv")
df.head()

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (EUR)
0,30-12-2023,Monthly Appartment Rent,Expense,453.21
1,29-12-2023,Vattenfall Energy,Expense,98.45
2,28-12-2023,Albert Heijn Amsterdam,Expense,32.18
3,27-12-2023,Netflix NL,Expense,11.99
4,26-12-2023,Web Development,Income,1856.32


In [20]:
unique_transactions = df["Name / Description"].unique()
print(len(unique_transactions))
print(unique_transactions)

50
['Monthly Appartment Rent' 'Vattenfall Energy' 'Albert Heijn Amsterdam'
 'Netflix NL' 'Web Development' 'Uber Amsterdam' 'Jumbo Supermarkt'
 'Consulting' 'Starbucks Amsterdam' 'KPN Mobile' 'Lidl Amsterdam'
 'Spotify Ab By Adyen' 'Translation Services' 'Shell Station'
 'Pathé Cinema' 'Deliveroo' 'Content Writing' 'Ziggo Internet'
 'Basic Fit Membership' 'Bol.com' 'Workshop Income' 'De Pijp Cafe'
 'NS Railways' 'Kruidvat' 'Freelancing' 'Disney Plus' 'Tesco Breda'
 'Photography Gig' 'MediaMarkt' 'Uber Eats' 'Youtube Revenue'
 'GVB Amsterdam' 'H&M Amsterdam' 'Graphic Design' 'Foodhallen Amsterdam'
 'Marqt Amsterdam' 'Beta Boulders Ams Amsterdam' 'Apple Services'
 'Online Teaching' 'Paradiso Amsterdam' 'Water Company PWN'
 'Airbnb Hosting' 'Etos Amsterdam' 'Blogging' 'IKEA Amsterdam'
 'Tls Bv Inz Ov-Chipkaart' 'Selling Paintings' 'Melkweg Amsterdam'
 'Vishandel Sier Amsterdam' 'Tk Maxx Amsterdam Da']


## CATEGORIZING DESCRIPTIONS

In [21]:
def categorize_transactions(transaction_names, llm):
    response = llm.invoke("Assign an appropriate category to each expense listed. Examples: Netflix NL - Entertainment, Starbucks Amsterdam - Food and Beverages, Beta Boulders Ams Amsterdam Nld - Sport, etc.. Categories should be concise (less than 4 words). Format your output strictly as: Name - Category. Avoid adding any extra characters or explanations. Don't need to number them either. Just the format mentioned" + transaction_names)
    response = response.split('\n')

    categories = pd.DataFrame({"Transaction and Category":response})
    categories[['Transaction', 'Category']] = categories['Transaction and Category'].str.split(' - ', expand=True)

    return categories


#categorize_transactions(str(unique_transactions[1:30]), llm)

def hop(start, stop, step):
    for i in range(start, stop, step):
        yield i
    yield stop

index_list = list(hop(0, len(unique_transactions), 30))
index_list

[0, 30, 50]

In [22]:
categories_df_all = pd.DataFrame()

for i in range(0, len(index_list)-1):
    transactions_names = str(unique_transactions[index_list[i]:index_list[i+1]])

    categories = categorize_transactions(transactions_names, llm)
    categories_df_all = pd.concat([categories_df_all, categories], ignore_index=True)

In [23]:
unique_classes = categories_df_all["Category"].unique()
categories_df_all = categories_df_all.dropna()
unique_classes

array(['Housing', 'Utilities', 'Food and Beverages', 'Entertainment',
       'Freelancing', 'Transportation', 'Telecommunications', 'Fuel',
       'Sport', 'Shopping', 'Income', 'Business', None, 'Services',
       'Subscription', 'Education', 'Health and Beauty'], dtype=object)

In [27]:
df_needed = pd.read_csv(r"C:\Users\sibir\Downloads\transaction-data.csv")
df_needed = pd.merge(df_needed, categories_df_all, left_on="Name / Description", right_on="Transaction", how="left")
df_needed

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (EUR),Transaction and Category,Transaction,Category
0,30-12-2023,Monthly Appartment Rent,Expense,453.21,Monthly Appartment Rent - Housing,Monthly Appartment Rent,Housing
1,29-12-2023,Vattenfall Energy,Expense,98.45,Vattenfall Energy - Utilities,Vattenfall Energy,Utilities
2,28-12-2023,Albert Heijn Amsterdam,Expense,32.18,Albert Heijn Amsterdam - Food and Beverages,Albert Heijn Amsterdam,Food and Beverages
3,27-12-2023,Netflix NL,Expense,11.99,Netflix NL - Entertainment,Netflix NL,Entertainment
4,26-12-2023,Web Development,Income,1856.32,Web Development - Freelancing,Web Development,Freelancing
...,...,...,...,...,...,...,...
114,05-09-2023,Spotify Ab By Adyen,Expense,12.19,Spotify Ab By Adyen - Entertainment,Spotify Ab By Adyen,Entertainment
115,04-09-2023,Consulting,Income,2345.67,Consulting - Freelancing,Consulting,Freelancing
116,03-09-2023,Deliveroo,Expense,29.90,Deliveroo - Food and Beverages,Deliveroo,Food and Beverages
117,02-09-2023,KPN Mobile,Expense,29.99,KPN Mobile - Telecommunications,KPN Mobile,Telecommunications


In [28]:
df_needed.to_csv("transactions_zen_categorized.csv", index=False)