[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/zhimingkuang/Harvard-AM-115/blob/main/17_COVID19CaseStudy/covid_code4student.ipynb)

In [5]:
import pandas as pd
import numpy as np
from scipy.integrate import solve_ivp
import matplotlib.pylab as plt

### Load data

>If working within Google Colab, run the following cell to clone the Github repo to access the data. If not can just skip to the next cell.

In [2]:
# If using Google Colab, run these lines to import data from github
!git clone https://github.com/zhimingkuang/Harvard-AM-115.git
%cd Harvard-AM-115/17_COVID19CaseStudy

In [6]:
# This code gives an example of how to read in the csv data and a template
# for the workshop

# change this to the path for your data
datapath='./';

filename = 'US_states_population.csv'

# Load the data
try:
    population_table = pd.read_csv(f"{datapath}{filename}")
except FileNotFoundError:
    raise FileNotFoundError(f"Could not find file: {datapath}{filename}")

# Define the target state ID
state_id = 544

# Filter the DataFrame for the specified state ID
state_row = population_table[population_table['LocationID'] == state_id]

# Check if the state was found
if not state_row.empty:
    state_name = state_row['StateName'].values[0]
    population = state_row['Population'].values[0]
    print(f"State: {state_name}, Population: {population}")
else:
    print(f"No state found with LocationID = {state_id}")

# read in the state ID and state population
PopulationTable=pd.read_csv(datapath+'US_states_population.csv');

# choose a state
stateid=544

rowindex=PopulationTable.LocationID==stateid; #find the corresponding row
PopulationTable.StateName[rowindex] # state name
N=PopulationTable.Population[rowindex]; # total state population


In [7]:
def read_covid_data(filename, state_id):
    """
    Reads time series COVID-related data for a specific state.

    Parameters:
    - filename (str): Path to the CSV file.
    - state_id (int): The LocationID of the state to filter.

    Returns:
    - pd.DataFrame: A DataFrame with 'date' as index and 'mean' values.
    """
    # Load the data
    try:
        df = pd.read_csv(filename)
    except FileNotFoundError:
        raise FileNotFoundError(f"File not found: {filename}")

    # Validate required columns
    required_columns = {'location_id', 'date', 'mean'}
    if not required_columns.issubset(df.columns):
        raise ValueError(f"Missing required columns: {required_columns - set(df.columns)}")

    # Filter for the specified state ID
    state_df = df[df['location_id'] == state_id][['date', 'mean']]

    if state_df.empty:
        raise ValueError(f"No data found for state ID {state_id} in file {filename}")

    # Convert date to datetime and set it as index
    state_df['date'] = pd.to_datetime(state_df['date'])
    state_df.set_index('date', inplace=True)

    return state_df


In [8]:
# Read in daily infection, mobility, and mask use data for a specific state
infection       = read_covid_data(f"{datapath}daily_infections.csv", state_id)
mobility        = read_covid_data(f"{datapath}mobility.csv", state_id)
mask_use        = read_covid_data(f"{datapath}mask_use.csv", state_id)

# Merge all three datasets using outer join on the date index
data = infection.join(mobility, how='outer', lsuffix='_infection') \
                      .join(mask_use, how='outer', lsuffix='_mobility', rsuffix='_mask_use')

# Filter the data to only include dates from Feb 1, 2020 to Sep 21, 2020
analysis_start = pd.to_datetime('2020-02-01')
analysis_end   = pd.to_datetime('2020-09-21')
data = data.loc[analysis_start:analysis_end]

# Optional: Check and report missing data
if data.isnull().values.any():
    print("Warning: Missing values detected after merging datasets.")

# ----- Get your ODE solution here -----

# Start fitting from March 11, 2020 (to avoid early-pandemic estimation issues)
fit_start_date = pd.to_datetime('2020-03-11')
fit_data = data.loc[fit_start_date:]

# Extract mobility and mask use time series for fitting
mask_series     = fit_data['mean_mask_use']
mobility_series = fit_data['mean_mobility']


In [9]:
type(infection)

pandas.core.frame.DataFrame

In [22]:
x=infection.to_numpy()

In [23]:
type(x)

numpy.ndarray