In [76]:
import pandas as pd
from datetime import date, datetime
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import os
import glob

In [77]:
file_path = "data/YTD files"
csv_files = glob.glob(os.path.join(file_path, '*.csv'))
print(csv_files)

['data/YTD files/2023 Profit and Loss YTD.csv', 'data/YTD files/2023 Profit and Loss.csv']


In [78]:
latest_file = max(csv_files, key=os.path.getmtime)

In [79]:
df = pd.read_csv(latest_file, index_col = False)

In [80]:
df.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,BJ WIN HOSPITALITY INC.,,,,
1,Profit and Loss,,,,
2,"From January 1, 2023 to September 30, 2023",,,,
3,,,,,
4,,Year to Date,%,,


In [81]:
datestr = df.loc[2, 'Unnamed: 0']

# datestr = datestr.lower().replace(',', '')
split_date = datestr.split()

from_date = ' '.join(split_date[1:4])
input_from_date = datetime.strptime(from_date, "%B %d, %Y")
formatted_from_date = input_from_date.strftime("%m/%d/%y")

to_date = ' '.join(split_date[5:8])
input_to_date = datetime.strptime(to_date, "%B %d, %Y")
formatted_to_date = input_to_date.strftime("%m/%d/%y")

date_range = formatted_from_date + '-' + formatted_to_date
print(date_range)
# print(split_date[1:4].append(split_date[5:7]))

01/01/23-09/30/23


In [82]:
cols_to_remove = ['Unnamed: 3', 'Unnamed: 4']
df = df.drop(columns = cols_to_remove)

In [83]:
modcols = {
    'Unnamed: 0' : 'metric_type',
    'Unnamed: 1' : date_range,
    'Unnamed: 2' : 'percent',
}

df = df.rename(columns = modcols)

In [84]:
df = df.dropna(axis=0)
df = df.reset_index(drop=True)

In [90]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df)

                    metric_type 01/01/23-09/30/23 percent
0           MEETING ROOM RENTAL        $61,044.69    1.91
1                   ROOM RENTAL      3,119,098.18   97.84
2          MISCELLANEOUS INCOME            188.37    0.01
3                        REFUND           (58.90)  (0.00)
4                Total Revenues      3,180,272.34   99.76
5              FRONT DESK CLERK        131,507.80    4.13
6                   HOUSEKEEPER        314,144.22    9.85
7               CAFE ATTENDANTS         68,689.23    2.15
8             PAYROLL TAXES OPS         44,190.65    1.39
9        WORKERS COMP INSURANCE         47,669.30    1.50
10               LINEN PURCHASE         10,980.87    0.34
11               GUEST SUPPLIES         52,542.72    1.65
12             LAUNDRY SUPPLIES         10,772.95    0.34
13     COMMISSION TRAVEL AGENTS        153,793.84    4.82
14        CONTINENTAL BREAKFAST        102,546.06    3.22
15          FRANCHISE ROYALTIES         64,044.77    2.01
16            

In [87]:
df.iloc[8, df.columns.get_loc('metric_type')]

'PAYROLL TAXES'

In [91]:
# All modifications of dataframe
df.iloc[8, df.columns.get_loc('metric_type')] = 'PAYROLL TAXES OPS'
df.iloc[39, df.columns.get_loc('metric_type')] = 'PAYROLL TAXES ADMIN'
paranthesis = df[date_range].str.contains(r'\(|\)')
df.loc[paranthesis, date_range] = df.loc[paranthesis, date_range].str.replace(r'\(', '-').str.replace(r'\)', '', regex=True)
df[date_range] = df[date_range].str.replace('[\$,]', '', regex=True).astype(float)



  df.loc[paranthesis, date_range] = df.loc[paranthesis, date_range].str.replace(r'\(', '-').str.replace(r'\)', '', regex=True)


In [92]:
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(df)

                    metric_type  01/01/23-09/30/23 percent
0           MEETING ROOM RENTAL          6.104e+04    1.91
1                   ROOM RENTAL          3.119e+06   97.84
2          MISCELLANEOUS INCOME          1.884e+02    0.01
3                        REFUND         -5.890e+01  (0.00)
4                Total Revenues          3.180e+06   99.76
5              FRONT DESK CLERK          1.315e+05    4.13
6                   HOUSEKEEPER          3.141e+05    9.85
7               CAFE ATTENDANTS          6.869e+04    2.15
8             PAYROLL TAXES OPS          4.419e+04    1.39
9        WORKERS COMP INSURANCE          4.767e+04    1.50
10               LINEN PURCHASE          1.098e+04    0.34
11               GUEST SUPPLIES          5.254e+04    1.65
12             LAUNDRY SUPPLIES          1.077e+04    0.34
13     COMMISSION TRAVEL AGENTS          1.538e+05    4.82
14        CONTINENTAL BREAKFAST          1.025e+05    3.22
15          FRANCHISE ROYALTIES          6.404e+04    2.

In [93]:
# columns for cost: total cost of sales, officer/admin salaries, payroll taxes, commercial ins, telephone, internet, property tax, 
# credit card commission, maint salary, payroll tax, pest, eqpt rep, fire and safety, swimming pool, maint&serv, plumbing, elec, trash&waste, 
# maint supp, total utilities, auto/gen insurance, building ins, 
rows_to_sum = ['Total Cost Of Sales', 'OFFICERS SALARIES', 'ADMINISTRATIVE SALARIES', 'PAYROLL TAXES ADMIN', 'COMMERCIAL INSURANCE', 
               'TELEPHONE', 'INTERNET & COMPUTER', 'PROPERTY TAXES', 'AUTO INSURANCE']
df['metric_type'] = df['metric_type'].str.lower().str.strip()
rows_to_sum = [row.lower().strip() for row in rows_to_sum]
# print(rows_to_sum)
temp = df['metric_type'].isin(rows_to_sum)
selectRows = df[temp]
print(selectRows)
totalOpCostCurrYr = selectRows[date_range].sum()

                metric_type  01/01/23-09/30/23 percent
35      total cost of sales         1459179.06   45.77
37        officers salaries          184166.78    5.78
38  administrative salaries          167948.23    5.27
39      payroll taxes admin           27741.71    0.87
40     commercial insurance          125447.19    3.94
47                telephone           11870.02    0.37
48      internet & computer           75913.36    2.38
55           property taxes           19032.39    0.60
62           auto insurance           16069.01    0.50


In [94]:
print(totalOpCostCurrYr)

2087367.75


In [95]:
roomsSold = input("How many rooms sold?")
# retrieve total rooms sold from VM or NA reports
# e.g: 2023 total rooms YTD = 26076
cPORCurrYr = totalOpCostCurrYr/int(roomsSold)
print(f"{date_range}: ${round(cPORCurrYr, 2)}")

How many rooms sold? 26076


01/01/23-09/30/23: $80.05
