<a href="https://colab.research.google.com/github/ussozi/Covid_19_predictions/blob/main/Covid19_predictions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#USING FACEBOOKS PROPHET PACKAGE

### Importing packages

In [1]:
import pandas as pd
from fbprophet import Prophet

In [5]:
#load data
raw_data=pd.read_csv('train.csv')

#Drop unnecesaru columns
raw_data= raw_data.drop(['Territory X Date', 'cases'],axis=1)

#Sort data
raw_data= raw_data.sort_values(['Territory','Date'])

#initialising final dataframe
submission=pd.DataFrame(columns=['Territory','Date', 'target', 'target_lower', 'target_upper'])




In [7]:
#Getting the various country names
countrys=raw_data['Territory'].unique()

#We run the prediction per country since they have varying trends on deaths
for country in countrys:

    #picking off specific country data
    data=raw_data[raw_data['Territory']==country]

    data=data[['Date','target']]

    #Changing date to datetime index
    data['Date'] = pd.DatetimeIndex(data['Date'])

    #force the column names to be ds and y as per prohet requirements
    data = data.rename(columns={'Date': 'ds',
                            'target': 'y'})

    # set the uncertainty interval to 95% (the Prophet default is 80%)
    my_model = Prophet(
        interval_width=0.95,
        growth='linear',
        daily_seasonality=True,
        weekly_seasonality=True,
        yearly_seasonality=False,
        changepoint_prior_scale=0.5,
        n_changepoints=200,
        seasonality_mode='multiplicative')

    #fitting model
    my_model.fit(data)

    #Creating future data frame
    future_dates = my_model.make_future_dataframe(periods=67, freq='D')

    #predict into the future
    forecast = my_model.predict(future_dates)

    #Make final dataframe
    subs = pd.DataFrame({
        'Territory': country,
        'Date':forecast['ds'],
        'target':forecast['yhat'],
        'target_lower':forecast['yhat_lower'],
        'target_upper':forecast['yhat_upper']
    })

    #Append to dataframe
    submission=submission.append(subs)
    print('{} done'.format(country ))

INFO:numexpr.utils:NumExpr defaulting to 2 threads.


Afghanistan done
Albania done
Algeria done
Andorra done
Angola done
Antigua and Barbuda done
Argentina done
Armenia done
Aruba done
Australia done
Austria done
Azerbaijan done
Bahamas (the) done
Bahrain done
Bangladesh done
Barbados done
Belarus done
Belgium done
Belize done
Benin done
Bermuda done
Bhutan done
Bolivia (Plurinational State of) done
Bosnia and Herzegovina done
Botswana done
Brazil done
Brunei Darussalam done
Bulgaria done
Burkina Faso done
Burundi done
Cabo Verde done
Cambodia done
Cameroon done
Canada done
Cayman Islands done
Central African Republic (the) done
Chad done
Chile done
China done
Colombia done
Comoros (the) done
Congo (the) done
Costa Rica done
Croatia done
Cuba done
Curacao done
Cyprus done
Czechia done
Côte d'Ivoire done
Democratic People's Republic of Korea (the) done
Democratic Republic of the Congo (the) done
Denmark done
Djibouti done
Dominica done
Dominican Republic (the) done
Ecuador done
Egypt done
El Salvador done
Equatorial Guinea done
Eritrea do

In [8]:

#pick off data greater than the start date and smaller than the end date
mask = (submission['Date'] > '2020-12-13') & (submission['Date'] <= '2020-12-20')

#work around the checks with test data

#making a copy of the submision dataframe
fst=submission.loc[mask].copy()

#Handling the Territory X Date column
fst['Territory X Date']=[c + ' X ' + '{dt.month}/{dt.day}/{dt:%y}'.format(dt =d) for c,d in zip(fst['Territory'],fst['Date'])]

#getting the absolute values
fst['pred_target']=fst['target_upper'].abs()

fst['pred_target'] = [round(value) for value in fst['target_upper']]

final= fst[['Territory X Date','pred_target']]

#Picking test data
test=pd.read_csv(r'test.csv')

#****************************** Scoring*********************************#
from sklearn.metrics import mean_absolute_error

# Calculate MAE
print('MAE: ', mean_absolute_error(test['target'], final['pred_target']))

#sending to csv
final.to_csv(r'validation.csv',index=False)

MAE:  266.4839371155161
