In [3]:
import pandas as pd
import openpyxl
import tkinter.filedialog
import datetime
import msvcrt
from IPython.display import display
from tqdm import tqdm
from random import randint
import time

### Input new bills records and appended to the history

In [3]:
def strip_in_data(data):  # Remove leading and trailing spaces from column names and data.
    data = data.rename(columns={column_name: column_name.strip() for column_name in data.columns})
    data = data.applymap(lambda x: x.strip().strip('¥') if isinstance(x, str) else x)
    return data


def read_data_wx(path):  # 获取微信数据
    d_wx = pd.read_csv(path, header=16, skipfooter=0, encoding='utf-8')  # 数据获取，微信
    d_wx = d_wx.iloc[:, [0, 4, 7, 1, 2, 3, 5]]  # 按顺序提取所需列
    d_wx = strip_in_data(d_wx)  # 去除列名与数值中的空格。
    d_wx.iloc[:, 0] = d_wx.iloc[:, 0].astype('datetime64')  # 数据类型更改
    d_wx.iloc[:, 6] = d_wx.iloc[:, 6].astype('float64')  # 数据类型更改
    d_wx = d_wx.drop(d_wx[d_wx['收/支'] == '/'].index)  # 删除'收/支'为'/'的行
    d_wx.rename(columns={'当前状态': '支付状态', '交易类型': '类型', '金额(元)': '金额'}, inplace=True)  # 修改列名称
    d_wx.insert(1, '来源', "微信", allow_duplicates=True)  # 添加微信来源标识
    len1 = len(d_wx)
    print("Sucessfully Read " + str(len1) + " 「Wechat」bills\n")
    return d_wx


def read_data_zfb(path):  # 获取支付宝数据
    d_zfb = pd.read_csv(path, header=4, skipfooter=7, encoding='gbk')  # 数据获取，支付宝
    d_zfb = d_zfb.iloc[:, [2, 10, 11, 6, 7, 8, 9]]  # 按顺序提取所需列
    d_zfb = strip_in_data(d_zfb)  # 去除列名与数值中的空格。
    d_zfb.iloc[:, 0] = d_zfb.iloc[:, 0].astype('datetime64')  # 数据类型更改
    d_zfb.iloc[:, 6] = d_zfb.iloc[:, 6].astype('float64')  # 数据类型更改
    d_zfb = d_zfb.drop(d_zfb[d_zfb['收/支'] == ''].index)  # 删除'收/支'为空的行
    d_zfb.rename(columns={'交易创建时间': '交易时间', '交易状态': '支付状态', '商品名称': '商品', '金额（元）': '金额'}, inplace=True)  # 修改列名称
    d_zfb.insert(1, '来源', "支付宝", allow_duplicates=True)  # 添加支付宝来源标识
    len2 = len(d_zfb)
    print("Sucessfully Read " + str(len2) + " 「Alipay」bills\n")
    return d_zfb


def add_cols(data):  # 增加3列数据
    # 逻辑1：取值-1 or 1。-1表示支出，1表示收入。
    data.insert(8, '逻辑1', -1, allow_duplicates=True)  # 插入列，默认值为-1
    for index in range(len(data.iloc[:, 2])):  # 遍历第3列的值，判断为收入，则改'逻辑1'为1
        if data.iloc[index, 2] == '收入':
            data.iloc[index, 8] = 1

        # update 2021/12/29: 修复支付宝理财收支逻辑bug
        elif data.iloc[index, 5] == '蚂蚁财富-蚂蚁（杭州）基金销售有限公司' and '卖出' in data.iloc[index, 6]:
            data.iloc[index, 8] = 1
        elif data.iloc[index, 5] == '蚂蚁财富-蚂蚁（杭州）基金销售有限公司' and '转换至' in data.iloc[index, 6]:
            data.iloc[index, 8] = 0
        elif data.iloc[index, 2] == '其他' and '收益发放' in data.iloc[index, 6]:
            data.iloc[index, 8] = 1
        elif data.iloc[index, 2] == '其他' and '现金分红' in data.iloc[index, 6]:
            data.iloc[index, 8] = 1
        elif data.iloc[index, 2] == '其他' and '买入' in data.iloc[index, 6]:
            data.iloc[index, 8] = -1
        elif data.iloc[index, 2] == '其他':
            data.iloc[index, 8] = 0

    # 逻辑2：取值0 or 1。1表示计入，0表示不计入。
    data.insert(9, '逻辑2', 1, allow_duplicates=True)  # 插入列，默认值为1
    for index in range(len(data.iloc[:, 3])):  # 遍历第4列的值，判断为资金流动，则改'逻辑2'为0
        col3 = data.iloc[index, 3]
        if (col3 == '提现已到账') or (col3 == '已全额退款') or (col3 == '已退款') or (col3 == '退款成功') or (col3 == '还款成功') or (
                col3 == '交易关闭'):
            data.iloc[index, 9] = 0

    # 月份
    data.insert(1, '月份', 0, allow_duplicates=True)  # 插入列，默认值为0
    for index in range(len(data.iloc[:, 0])):
        time = data.iloc[index, 0]
        data.iloc[index, 1] = time.month  # 访问月份属性的值，赋给这月份列

    # 乘后金额
    data.insert(11, '乘后金额', 0, allow_duplicates=True)  # 插入列，默认值为0
    for index in range(len(data.iloc[:, 8])):
        money = data.iloc[index, 8] * data.iloc[index, 9] * data.iloc[index, 10]
        data.iloc[index, 11] = money
    return data

### Here is an example, and package this part into `DataHandler.py`

In [4]:
# New bills
filename = r'New_records_alipay.csv'
path = r'D:\Projects\Accounts\Dataloader'
data_zfb = read_data_zfb(path + '\\' + filename) # New bills
_new_part = add_cols(data_zfb)
_new_part.to_csv(path + '\\' + 'New_records_alipay.csv')

new_part = pd.read_csv(path + '\\' + 'New_records_alipay.csv', index_col=0).fillna('')


Sucessfully Read 148 「Alipay」bills



  d_zfb = pd.read_csv(path, header=4, skipfooter=7, encoding='gbk')  # 数据获取，支付宝


In [6]:
# Historic bills
his_path = r'D:\Projects\Accounts\Dataloader\his\History_records_alipay.csv'
his_part = pd.read_csv(his_path, index_col=0).fillna('') # New bills

In [8]:
data_merge = pd.concat([his_part,new_part]).drop_duplicates()

In [9]:
data_merge.sort_values('交易时间')

Unnamed: 0,交易时间,月份,来源,收/支,支付状态,类型,交易对方,商品,金额,逻辑1,逻辑2,乘后金额
4204,2020-12-01 10:31:33,12,支付宝,不计收支,交易成功,即时到账交易,蚂蚁财富-蚂蚁（杭州）基金销售有限公司,蚂蚁财富-天弘中证银行指数A-买入,23.0,-1,1,-23.0
4203,2020-12-01 13:32:39,12,支付宝,不计收支,交易成功,支付宝担保交易,ellen0wang,基础综合英语 学生用书 研究生英语教材 pdf版电子书,5.5,-1,1,-5.5
4202,2020-12-01 13:37:35,12,支付宝,支出,交易成功,即时到账交易,App Store & Apple Music,App Store & Apple Music: 于 11.30完成的购买,3.0,-1,1,-3.0
4201,2020-12-01 21:53:47,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,6.0,-1,1,-6.0
4200,2020-12-01 21:54:36,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,0.1,-1,1,-0.1
...,...,...,...,...,...,...,...,...,...,...,...,...
4,2023-12-09 20:16:12,12,支付宝,支出,交易成功,即时到账交易,Valve,S2P1172140557 Steam Purchase,15.8,-1,1,-15.8
3,2023-12-10 13:13:45,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,三林东--西藏南路,4.0,-1,1,-4.0
2,2023-12-10 13:21:41,12,支付宝,不计收支,交易成功,即时到账交易,美团,大众点评订单-23121011100400000021574530283011,5.9,-1,1,-5.9
1,2023-12-10 14:17:04,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,马当路--商城路,3.0,-1,1,-3.0


In [10]:
data_merge.to_csv(r'D:\Projects\Accounts\Dataloader\his\History_records_alipay.csv')

### Analysis: Daily expense is the most available way to quantify behaviours. Here is the following steps:
1. Create a set that useful to collect your expense.
2. Categorize the expense by rows.
3. Do your individual analysis with some visualization.

#### My set (this part would be the key, it reflects your value system.
['必要性食品','非必要性食品','交通','一般耐用品','电子耐用品','订阅类信息服务或娱乐产品','线下娱乐或休闲','其他']

#### GPT-4.0 works.
- OpenAI's GPT4 API works in this situation.
- Time & Financial cost almost unavailable but still ok.

In [4]:
# read api key
with open(r'D:\accounts_key.txt','r') as key_file:
    OPENAI_API_KEY = key_file.readline()

import os
import openai 

openai.api_key = OPENAI_API_KEY

In [5]:
# read bills record
bill_df = pd.read_csv(r'D:\Projects\Accounts\Dataloader\his\History_records_alipay.csv', index_col=0)

In [11]:
# Bill
class Bill(object):
    def __init__(
        self,
        row:pd.core.series.Series,
        ):
        '''
        Categorization by row.
        '''
        self.row = row
        self.txt_eles = row.loc[['交易时间','交易对方','商品','金额']].values
        self.head_txts = '这是我的交易记录:' 
        self.txts = f'在{self.txt_eles[0]}, 我花费了{self.txt_eles[-1] }元人民币向{self.txt_eles[1]} 购买了{self.txt_eles[2]}。'
        self.tail_txts = \
        '''请帮我将这笔订单记录分类（只返回类别名称，如果难以判断则归类为“其他”），在以下类别 [食品,交通,耐用品,订阅类信息服务或娱乐产品,线下娱乐或休闲,理财] 中，上面这个订单属于哪一类?'''
        self.asking_texts = self.head_txts + self.txts + self.tail_txts
    
    # classifier
    def classify_expense(self):
        # try:
        response = openai.ChatCompletion.create(
            model='gpt-4-1106-preview',
            messages=[{"role": "system", "content": "你是一个根据订单记录对订单进行分类的智能助力，你言简意赅。你擅长结合交易对方的名称，交易发生的时间，交易金额判断支出类型。"},
                    {"role": "user", "content": self.asking_texts}],
            max_tokens=16
        )
        return response['choices'][0]['message']['content']


In [12]:
bill_df['时间'] =bill_df['交易时间'].copy() 
Bills_df = bill_df.set_index('时间').sort_index()
Bills_df
# Bills_df = bill_df.sort_values('交易时间')

Unnamed: 0_level_0,交易时间,月份,来源,收/支,支付状态,类型,交易对方,商品,金额,逻辑1,逻辑2,乘后金额
时间,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-12-01 10:31:33,2020-12-01 10:31:33,12,支付宝,不计收支,交易成功,即时到账交易,蚂蚁财富-蚂蚁（杭州）基金销售有限公司,蚂蚁财富-天弘中证银行指数A-买入,23.0,-1,1,-23.0
2020-12-01 13:32:39,2020-12-01 13:32:39,12,支付宝,不计收支,交易成功,支付宝担保交易,ellen0wang,基础综合英语 学生用书 研究生英语教材 pdf版电子书,5.5,-1,1,-5.5
2020-12-01 13:37:35,2020-12-01 13:37:35,12,支付宝,支出,交易成功,即时到账交易,App Store & Apple Music,App Store & Apple Music: 于 11.30完成的购买,3.0,-1,1,-3.0
2020-12-01 21:53:47,2020-12-01 21:53:47,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,6.0,-1,1,-6.0
2020-12-01 21:54:36,2020-12-01 21:54:36,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,0.1,-1,1,-0.1
...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-09 20:16:12,2023-12-09 20:16:12,12,支付宝,支出,交易成功,即时到账交易,Valve,S2P1172140557 Steam Purchase,15.8,-1,1,-15.8
2023-12-10 13:13:45,2023-12-10 13:13:45,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,三林东--西藏南路,4.0,-1,1,-4.0
2023-12-10 13:21:41,2023-12-10 13:21:41,12,支付宝,不计收支,交易成功,即时到账交易,美团,大众点评订单-23121011100400000021574530283011,5.9,-1,1,-5.9
2023-12-10 14:17:04,2023-12-10 14:17:04,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,马当路--商城路,3.0,-1,1,-3.0


In [15]:
# cates = {}

In [20]:
for idx, rcd in tqdm(Bills_df.iterrows()):
    if idx in cates.keys():
        continue
    
    one_bill = Bill(rcd)
    cates[idx] = one_bill.classify_expense()
    print(rcd['商品'],cates[idx])
    time.sleep(randint(1,2))


0it [00:00, ?it/s]

App Store & Apple Music: 于 11.30完成的购买 订阅类信息服务或娱乐产品


3it [00:04,  1.47s/it]

麦当劳餐厅(深圳) 有限公司 食品


4it [00:09,  2.56s/it]

麦当劳餐厅(深圳) 有限公司 食品


5it [00:11,  2.34s/it]

麦当劳餐厅(深圳) 有限公司 食品


6it [00:14,  2.57s/it]

【bilibili】现货 万代 鬼灭之刃 炭治郎&amp;祢豆子 Q版手办摆件 耐用品


7it [00:17,  2.69s/it]

艾漫正版 排球少年周边人物立绘亚克力立牌摆件【现货】 订阅类信息服务或娱乐产品


8it [00:20,  2.92s/it]

宠物小精灵星之卡比动漫手办神奇宝贝袋装手办车载摆件蛋糕场景版 耐用品


9it [00:23,  2.78s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


10it [00:25,  2.54s/it]

思迅Pay-微信-A超市 食品


11it [00:29,  2.99s/it]

海贼王手办zero索隆艾斯卡塔库栗路飞公仔摆件周边礼物全套限量版 耐用品


12it [00:31,  2.80s/it]

思迅Pay-微信-A超市 食品


13it [00:34,  2.83s/it]

麦当劳餐厅(深圳) 有限公司 食品


14it [00:37,  2.86s/it]

商品 食品


15it [00:39,  2.63s/it]

韶音AfterShokz AS800骨传导运动蓝牙耳机骨传感跑步无线Aeropex 耐用品


16it [00:41,  2.44s/it]

主动还款-花呗2020年12月账单 理财


17it [00:44,  2.74s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


18it [00:48,  2.95s/it]

退款-宠物小精灵星之卡比动漫手办神奇宝贝袋装手办车载摆件蛋糕场景版 耐用品


19it [00:51,  3.02s/it]

商品 食品


20it [00:55,  3.23s/it]

商品 食品


21it [00:58,  3.32s/it]

益田福伴生活超市收银员:606-7038 食品


22it [01:00,  2.93s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


23it [01:02,  2.64s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


24it [01:06,  3.00s/it]

条码支付-A 食品


25it [01:10,  3.39s/it]

深圳市南山区荣润家龙百货商行-支付宝 食品


26it [01:13,  3.04s/it]

麦当劳餐厅(深圳) 有限公司 食品


27it [01:14,  2.67s/it]

深圳全家购物 食品


28it [01:17,  2.61s/it]

转账 其他


29it [01:19,  2.47s/it]

转账收款到余额宝 理财


30it [01:23,  3.02s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


31it [01:25,  2.73s/it]

益田假日广场B2 线下娱乐或休闲


32it [01:29,  3.08s/it]

益田福伴生活超市收银员:606-7039 食品


33it [01:32,  3.04s/it]

GOODS 食品


34it [01:36,  3.31s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


35it [01:39,  3.25s/it]

App Store & Apple Music: 于 12.07完成的购买 订阅类信息服务或娱乐产品


36it [01:43,  3.33s/it]

益田福伴生活超市收银员:606-7038 食品


37it [01:45,  3.09s/it]

商品 食品


38it [01:47,  2.76s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


39it [01:49,  2.53s/it]

条码支付-A 食品


40it [01:51,  2.35s/it]

麦当劳餐厅(深圳) 有限公司 食品


41it [01:53,  2.25s/it]

深圳全家购物 食品


42it [01:55,  2.15s/it]

App Store & Apple Music: 于 12.09完成的购买 订阅类信息服务或娱乐产品


43it [01:59,  2.71s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


44it [02:01,  2.44s/it]

思迅Pay-微信-A超市 食品


45it [02:04,  2.58s/it]

商品 食品


46it [02:07,  2.71s/it]

【预售】原创款 动物系列开衫 JK制服 学生针织毛衣 加厚 耐用品


47it [02:09,  2.61s/it]

【现货】原创款 日本学生制服小方领角襟刺绣长袖衬衫校服衬衫 等多件 耐用品


48it [02:12,  2.76s/it]

桂格原味即食麦片1000g*3 食品


50it [02:15,  2.17s/it]

金味麦片原味营养麦片懒人代餐速溶燕麦片学生早餐即食冲饮600g 等多件 食品


51it [02:19,  2.47s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


52it [02:22,  2.71s/it]

深圳全家购物 食品


53it [02:24,  2.54s/it]

美团收银766644605549862912 食品


54it [02:27,  2.59s/it]

深圳全家购物 食品


55it [02:30,  2.83s/it]

蚂蚁财富-天弘中证银行指数A-买入 理财


56it [02:32,  2.61s/it]

麦当劳餐厅(深圳) 有限公司 食品


57it [02:35,  2.71s/it]

转账 理财


58it [02:39,  2.91s/it]

广东省天天果品汇 食品


59it [02:41,  2.70s/it]

条码支付 食品


60it [02:43,  2.50s/it]

App Store & Apple Music: 于 12.12完成的购买 订阅类信息服务或娱乐产品


61it [02:45,  2.46s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


62it [02:47,  2.31s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


63it [02:50,  2.48s/it]

点点肠粉：8元 食品


64it [02:53,  2.72s/it]

条码支付-B 食品


65it [02:55,  2.46s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


66it [02:58,  2.61s/it]

商品 食品


67it [03:01,  2.72s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


68it [03:05,  2.95s/it]

条码支付-A 食品


69it [03:07,  2.82s/it]

App Store & Apple Music: 于 12.16完成的购买 订阅类信息服务或娱乐产品


70it [03:10,  2.81s/it]

支付宝APP支付 线下娱乐或休闲


71it [03:14,  3.17s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


72it [03:17,  3.26s/it]

雀巢香滑咖啡210ml 食品


73it [03:20,  2.89s/it]

椒气小姐：22.82元 食品


74it [03:22,  2.72s/it]

益田福伴生活超市收银员:606-7038 食品


75it [03:26,  3.04s/it]

主动还款-花呗2020年12月账单 理财


76it [03:28,  2.96s/it]

蚂蚁财富-天弘中证银行ETF联接A-买入 理财


77it [03:32,  3.01s/it]

充值-普通充值 订阅类信息服务或娱乐产品


In [None]:
cates

{'2020-12-01 10:31:33': '理财', '2020-12-01 13:32:39': '订阅类信息服务或娱乐产品'}

In [None]:
Bills_df['cate'] = pd.Series(cates)

In [None]:
Bills_df

Unnamed: 0_level_0,交易时间,月份,来源,收/支,支付状态,类型,交易对方,商品,金额,逻辑1,逻辑2,乘后金额,cate
时间,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-12-01 10:31:33,2020-12-01 10:31:33,12,支付宝,不计收支,交易成功,即时到账交易,蚂蚁财富-蚂蚁（杭州）基金销售有限公司,蚂蚁财富-天弘中证银行指数A-买入,23.0,-1,1,-23.0,理财
2020-12-01 13:32:39,2020-12-01 13:32:39,12,支付宝,不计收支,交易成功,支付宝担保交易,ellen0wang,基础综合英语 学生用书 研究生英语教材 pdf版电子书,5.5,-1,1,-5.5,订阅类信息服务或娱乐产品
2020-12-01 13:37:35,2020-12-01 13:37:35,12,支付宝,支出,交易成功,即时到账交易,App Store & Apple Music,App Store & Apple Music: 于 11.30完成的购买,3.0,-1,1,-3.0,订阅类信息服务或娱乐产品
2020-12-01 21:53:47,2020-12-01 21:53:47,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,6.0,-1,1,-6.0,'非必要性食品'
2020-12-01 21:54:36,2020-12-01 21:54:36,12,支付宝,不计收支,交易成功,即时到账交易,麦当劳(深南西路分店餐厅),麦当劳餐厅(深圳) 有限公司,0.1,-1,1,-0.1,非必要性食品
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-09 20:16:12,2023-12-09 20:16:12,12,支付宝,支出,交易成功,即时到账交易,Valve,S2P1172140557 Steam Purchase,15.8,-1,1,-15.8,
2023-12-10 13:13:45,2023-12-10 13:13:45,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,三林东--西藏南路,4.0,-1,1,-4.0,
2023-12-10 13:21:41,2023-12-10 13:21:41,12,支付宝,不计收支,交易成功,即时到账交易,美团,大众点评订单-23121011100400000021574530283011,5.9,-1,1,-5.9,
2023-12-10 14:17:04,2023-12-10 14:17:04,12,支付宝,不计收支,交易成功,即时到账交易,上海都畅数字技术有限公司,马当路--商城路,3.0,-1,1,-3.0,


In [None]:
Bills_df.to_excel('CateFinishedBill.xlsx')

#### For free version, we try using LLAMA-2-70B.
- Bad classifying perform.
- Same problem with GPT-3.5-turbo, usually return too much more than I ask for.
- Sometimes successfully limit the returns.

In [37]:
import os
import replicate

In [88]:
output = replicate.run(
  "meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
  input={
    "debug": False,
    "top_k": 50,
    "top_p": 1,
    "prompt": "“12:41:20在上海都畅数字技术有限公司购买了三林东--西藏南路”。 For this expense record text, what would you categorize it to? 'food','sanck','transportation','durable goods','electronic'or'subscription-based service' or 'offline entertainment'? Don't say any unnecessary, you must give me ONE WORD of those categories.",
    "temperature": 1,
    "system_prompt": "You are a good assistant good at categorizing expense base on chinese texts.",
    "max_new_tokens": 16,
    "min_new_tokens": 2
  })

In [89]:
for item in output:
    print(item, end="")

 " Durable goods"