# Imports

In [5]:
import datetime
import warnings

import fbprophet as fp
import pandas as pd
from sklearn.metrics import mean_squared_error


warnings.filterwarnings("ignore")

In [6]:
INPUT_PATH = '/content/drive/MyDrive/open_data_battle/airport/Science.csv'

# Load and transform data

In [7]:
df = pd.read_csv(INPUT_PATH)

In [8]:
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Passengers  144 non-null    float64
 1   Year        156 non-null    int64  
 2   Month       156 non-null    int64  
dtypes: float64(1), int64(2)
memory usage: 3.8 KB
None
        Passengers         Year      Month
count   144.000000   156.000000  156.00000
mean   3239.386042  2013.000000    5.50000
std    1169.158959     3.753708    3.46317
min    1235.750000  2007.000000    0.00000
25%    2460.750000  2010.000000    2.75000
50%    3003.735000  2013.000000    5.50000
75%    3867.542500  2016.000000    8.25000
max    6579.240000  2019.000000   11.00000


In [9]:
df['Month'] = df['Month'] + 1
df['ds'] = (df[['Year', 'Month']]
            .apply(lambda x: datetime.date(x.Year, x.Month, 1), axis=1))
df = (df
      .rename(columns={'Passengers': 'y'})
      .filter(items=['ds', 'y'])
      .sort_values('ds', ascending=True))

In [10]:
# Check NA month
len(df) - 13 * 12

0

# Train Test split

In [11]:
df_target = df[df['y'].isna()]
df_train = df[(~df['y'].isna()) & (df['ds'].apply(lambda x: x.year != 2018))]
df_test = df[(~df['y'].isna()) & (df['ds'].apply(lambda x: x.year == 2018))]

print('df_target:', df_target.shape)
print('df_train:', df_train.shape)
print('df_test:', df_test.shape)

df_target: (12, 2)
df_train: (132, 2)
df_test: (12, 2)


# Fit, predict and score

In [12]:
model = fp.Prophet()
model.fit(df_train)

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


<fbprophet.forecaster.Prophet at 0x7f54f8849f90>

In [13]:
y_pred = model.predict(df_test[['ds']])[['ds', 'yhat']]

In [14]:
mean_squared_error(df_test.y, y_pred.yhat)

401521.61320531485

# Submit

In [15]:
model = fp.Prophet()
model.fit(pd.concat([df_train, df_test], ignore_index=True))
prediction = model.predict(df_target[['ds']])[['ds', 'yhat']]

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [16]:
print({'answer': prediction.yhat.values.astype(str).tolist()})

{'answer': ['5305.672710455586', '5838.8925839725525', '5973.244192375374', '5199.649728180411', '4720.8537926085455', '5759.483121591101', '5535.237852536383', '5502.052724448117', '5456.32847738583', '5999.901828394615', '6166.7416107151075', '6704.3834586927105']}
