# Google drive

Authorize access to google drive.

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")
drive_path = "/content/gdrive/My Drive/singapore/"
# comment above three lines if you don't run it on Colab
# and change the drive_path to the path with data

# Load COVID-19 SG data

Load Singapore COVID-19 data from Google drive, which comes from [website](https://data.world/hxchua/covid-19-singapore).

Please delete all figures/charts in advance.

In [None]:
import pandas as pd
import numpy as np

def convert_type(table, from_type, to_type):
  t_col = table.select_dtypes(include=[from_type])
  for col in t_col.columns.values:
    table[col] = table[col].astype(to_type)
  return table


In [None]:
covid_sg = pd.read_excel(drive_path + "Covid-19 SG.xlsx", engine='openpyxl')
# https://data.world/hxchua/covid-19-singapore

covid_sg = covid_sg.drop(covid_sg[covid_sg["Date"].isnull()].index)
covid_sg["Date"] = pd.to_datetime(covid_sg["Date"])
covid_sg = covid_sg.drop(columns=["Unnamed: 36",
                                  "Phase",
                                  "Perc population completed at least one dose",
                                  "Perc population completed vaccination",
                                  "Perc population taken booster shots"])

# print(covid_sg.dtypes)

# Load SG vaccine data

Load Singapore COVID-19 vaccine data from Google drive, which comes from [website](https://github.com/owid/covid-19-data/blob/master/public/data/vaccinations/country_data/Singapore.csv).

In [None]:
vaccine = pd.read_csv(drive_path + "Singapore.csv")
vaccine["date"] = pd.to_datetime(vaccine["date"])

vaccine = vaccine.fillna(0)
vaccine = convert_type(vaccine, "float64", "int64")

# Data clean

Fill missing vaccine data. Save data to Google drive.

In [None]:
for index, row in vaccine.iterrows():
  time_eq_index = covid_sg[covid_sg["Date"] == row["date"]].index
  if row["total_vaccinations"] != 0 and covid_sg["Cumulative Vaccine Doses"][time_eq_index].isnull().array[0]:
    covid_sg["Cumulative Vaccine Doses"][time_eq_index] = row["total_vaccinations"]
    covid_sg["Cumulative Individuals Vaccinated"][time_eq_index] = row["people_vaccinated"]
    covid_sg["Cumulative Individuals Vaccination Completed"][time_eq_index] = row["people_fully_vaccinated"]

covid_sg["Cumulative Vaccine Doses"] = covid_sg["Cumulative Vaccine Doses"].interpolate(limit_area="inside")
covid_sg["Cumulative Individuals Vaccinated"] = covid_sg["Cumulative Individuals Vaccinated"].interpolate(limit_area="inside")
covid_sg["Cumulative Individuals Vaccination Completed"] = covid_sg["Cumulative Individuals Vaccination Completed"].interpolate(limit_area="inside")

covid_sg = covid_sg.fillna(0)
covid_sg = convert_type(covid_sg, "float64", "int64")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


# Load Google mobility

Load Google mobility data from Google drive, which comes from [website](https://ourworldindata.org/covid-google-mobility-trends). Join it with the original data.

In [None]:
mobility = pd.read_csv(drive_path + "changes-visitors-covid.csv")
mobility = mobility[mobility["Entity"] == "Singapore"]
mobility = mobility.drop(columns=["Entity", "Code"])
mobility["Day"] = pd.to_datetime(mobility["Day"])

covid_sg = covid_sg.merge(right=mobility, left_on="Date", right_on="Day", how='left')
covid_sg = covid_sg.drop(columns="Day")

# Load Stringency Index

Load Stringency Index data from Google drive, which comes from [website](https://ourworldindata.org/covid-stringency-index). Join it with the original data.

In [None]:
stringency = pd.read_csv(drive_path + "covid-stringency-index.csv")
stringency = stringency[stringency["Entity"] == "Singapore"]
stringency = stringency.drop(columns=["Entity", "Code"])
stringency["Day"] = pd.to_datetime(stringency["Day"])

covid_sg = covid_sg.merge(right=stringency, left_on="Date", right_on="Day", how='left')
covid_sg = covid_sg.drop(columns="Day")

# Load face-covering policy

Load face-covering data from Google drive, which comes from [website](https://ourworldindata.org/covid-face-coverings). Join it with the original data.

In [None]:
face_cover = pd.read_csv(drive_path + "face-covering-policies-covid.csv")
face_cover = face_cover[face_cover["Entity"] == "Singapore"]
face_cover = face_cover.drop(columns=["Entity", "Code"])
face_cover["Day"] = pd.to_datetime(face_cover["Day"])

covid_sg = covid_sg.merge(right=face_cover, left_on="Date", right_on="Day", how='left')
covid_sg = covid_sg.drop(columns="Day")

# Save data

In [None]:
covid_sg.to_csv(drive_path + "sg_final.csv", index=False)