In [1]:
import pandas as pd
import plotly.express as px
# import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pd.set_option('display.max_rows',None)

In [2]:
def loadCleanData(path:str):
    # read data
    data = pd.read_csv(path)
    
    # drop columns
    data = data[['Market', 'TrainNumber', 'DepartureDateTime', 'ArrivalDateTime','Origin', 'Destination', 'Revenue', 'Bookings','CommercialClass']]
    
    # change date format
    data['DepartureDateTime'] = pd.to_datetime(data['DepartureDateTime'] )
    data['ArrivalDateTime'] = pd.to_datetime(data['ArrivalDateTime'] )

    # index data
    cleanData = data.groupby(by=['Market', 'TrainNumber', 'DepartureDateTime', 'CommercialClass'])[['Revenue','Bookings']].sum()
    cleanData = cleanData.reset_index()

    return cleanData, data


def rule_80_20(cleanData: pd.DataFrame, attribute:str, percent = 80.0):

    # Distribution of Market by [Demand]
    marketFor = cleanData.groupby(by=['Market'])[attribute].sum().sort_values(ascending=False)
    marketFor = marketFor.reset_index()

    marketFor['Porcent'] = (marketFor[attribute]/marketFor[attribute].sum())*100
    marketFor['AcumulSum'] = marketFor['Porcent'].cumsum()

    marketFor_80 = marketFor[marketFor['AcumulSum'] <= percent]
    numberMarketFor_80 = marketFor_80['Market'].nunique()

    return marketFor_80, numberMarketFor_80

In [3]:
cleanData, data = loadCleanData('/home/wilmer/Documentos/Codes/WORKSHOP/data/anonymizedHistoricalData.csv')

In [4]:
# Adding temporality
cleanData['DayWeek'] = cleanData['DepartureDateTime'].dt.day_name()
cleanData['WeekYear'] = cleanData['DepartureDateTime'].dt.isocalendar().week

# Number of Markets
numMarket = cleanData['Market'].nunique()

marketForDemand, numberMarketForDemand = rule_80_20(cleanData, 'Bookings')
marketForRevenue, numberMarketForRevenue = rule_80_20(cleanData, 'Revenue')

In [14]:
# Add histogram data
df = cleanData.groupby(by=['WeekYear'])[['Bookings']].sum()

# x1 = cleanData[cleanData['Market']=='S46-S31']['Revenue']
# x2 = cleanData[cleanData['Market']=='S31-S46']['Revenue']

# # fig = go.Figure()
# fig = make_subplots(rows=1, cols=2, subplot_titles=("Histograma 1", "Histograma 2"))

# fig.add_trace(go.Histogram(x=x1, name='Market S46-S31'), row=1, col=1)
# fig.add_trace(go.Histogram(x=x2, name='Market S46-S31'), row=1, col=2)

# # Actualizar el diseño del gráfico
# fig.update_layout(title='Histograma de Revenue para Market S46-S31',
#                   xaxis_title='Revenue',
#                   yaxis_title='Count')
# # Ajustar los límites del eje x para agregar un zoom
# fig.update_xaxes(range=[0, 5000], row=1, col=1)
# fig.update_xaxes(range=[0, 5000], row=1, col=2)
# # Mostrar el gráfico
# fig.show()

In [13]:

df = cleanData[cleanData['Market']=='S46-S31'].sort_values(by='WeekYear',ascending=True)
yy = df.groupby(by='WeekYear')['Revenue'].sum()
yy = yy.reset_index()

fig = go.Figure(go.Scatter(
    x = yy['WeekYear'],
    y = yy['Revenue']
))

fig.update_layout(
    xaxis = dict(
        tickmode = 'linear',
        tick0 = 0.5,
        dtick = 0.75
    )
)
fig.show()


In [12]:
df = df.groupby(by='DepartureDateTime')['Revenue'].sum()
df = df.reset_index()

fig = px.histogram(df, x="DepartureDateTime", y="Revenue", histfunc="avg", title="Histogram on Date Axes")
fig.update_traces(xbins_size="M1")
fig.update_xaxes(showgrid=True, ticklabelmode="period", dtick="M1", tickformat="%b\n%Y")
fig.update_layout(bargap=0.1)
fig.add_trace(go.Scatter(mode="markers", x=df["DepartureDateTime"], y=df["Revenue"], name="daily"))
fig.show()