In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/covid.train.csv')
print(df.columns)

Index(['id', 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'FL', 'GA', 'ID', 'IL',
       'IN', 'IA', 'KS', 'KY', 'LA', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'NE',
       'NV', 'NJ', 'NM', 'NY', 'NC', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'TX',
       'UT', 'VA', 'WA', 'WV', 'WI', 'cli', 'ili', 'hh_cmnty_cli',
       'nohh_cmnty_cli', 'wearing_mask', 'travel_outside_state',
       'work_outside_home', 'shop', 'restaurant', 'spent_time', 'large_event',
       'public_transit', 'anxious', 'depressed', 'felt_isolated',
       'worried_become_ill', 'worried_finances', 'tested_positive', 'cli.1',
       'ili.1', 'hh_cmnty_cli.1', 'nohh_cmnty_cli.1', 'wearing_mask.1',
       'travel_outside_state.1', 'work_outside_home.1', 'shop.1',
       'restaurant.1', 'spent_time.1', 'large_event.1', 'public_transit.1',
       'anxious.1', 'depressed.1', 'felt_isolated.1', 'worried_become_ill.1',
       'worried_finances.1', 'tested_positive.1', 'cli.2', 'ili.2',
       'hh_cmnty_cli.2', 'nohh_cmnty_cli.2', 'wea

In [4]:
df = pd.read_csv('/content/drive/MyDrive/covid.test.csv')
print(df.columns)

Index(['id', 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'FL', 'GA', 'ID', 'IL',
       'IN', 'IA', 'KS', 'KY', 'LA', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'NE',
       'NV', 'NJ', 'NM', 'NY', 'NC', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'TX',
       'UT', 'VA', 'WA', 'WV', 'WI', 'cli', 'ili', 'hh_cmnty_cli',
       'nohh_cmnty_cli', 'wearing_mask', 'travel_outside_state',
       'work_outside_home', 'shop', 'restaurant', 'spent_time', 'large_event',
       'public_transit', 'anxious', 'depressed', 'felt_isolated',
       'worried_become_ill', 'worried_finances', 'tested_positive', 'cli.1',
       'ili.1', 'hh_cmnty_cli.1', 'nohh_cmnty_cli.1', 'wearing_mask.1',
       'travel_outside_state.1', 'work_outside_home.1', 'shop.1',
       'restaurant.1', 'spent_time.1', 'large_event.1', 'public_transit.1',
       'anxious.1', 'depressed.1', 'felt_isolated.1', 'worried_become_ill.1',
       'worried_finances.1', 'tested_positive.1', 'cli.2', 'ili.2',
       'hh_cmnty_cli.2', 'nohh_cmnty_cli.2', 'wea

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import numpy as np

# Load and preprocess data
def load_and_preprocess_data(filepath, target_column=None, scaler=None):
    # Load data
    df = pd.read_csv(filepath)

    # Drop the 'id' column
    df = df.drop(columns=['id'])

    if target_column is not None:
        # Separate features and target for training data
        X = df.drop(columns=[target_column])
        y = df[target_column]
    else:
        # For test data, just return the features
        X = df
        y = None

    # Normalize features
    if scaler is None:
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
    else:
        X = scaler.transform(X)

    return X, y, scaler



In [6]:
# Define the neural network
class CovidPredictionModel(nn.Module):
    def __init__(self, input_dim):
        super(CovidPredictionModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        return x



In [7]:
# Train the model
def train_model(model, X_train, y_train, criterion, optimizer, num_epochs=1000):
    model.train()
    X_train = torch.FloatTensor(X_train)
    y_train = torch.FloatTensor(y_train.values).view(-1, 1)

    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the model
def evaluate_model(model, X_val, y_val, criterion):
    model.eval()
    X_val = torch.FloatTensor(X_val)
    y_val = torch.FloatTensor(y_val.values).view(-1, 1)

    with torch.no_grad():
        outputs = model(X_val)
        loss = criterion(outputs, y_val)

    return loss.item()



In [8]:
# Main execution
if __name__ == "__main__":
    # Define the target column
    target_column = 'tested_positive.2'

    # Load and preprocess training data
    X, y, scaler = load_and_preprocess_data('/content/drive/MyDrive/covid.train.csv', target_column=target_column)

    # Split data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize model, criterion, and optimizer
    input_dim = X_train.shape[1]
    model = CovidPredictionModel(input_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    train_model(model, X_train, y_train, criterion, optimizer, num_epochs=1000)

    # Evaluate the model
    val_loss = evaluate_model(model, X_val, y_val, criterion)
    print(f'Validation Loss: {val_loss:.4f}')

    # Load and preprocess test data (no target column here)
    X_test, _, _ = load_and_preprocess_data('/content/drive/MyDrive/covid.test.csv', scaler=scaler)

    # Make predictions
    model.eval()
    X_test = torch.FloatTensor(X_test)
    with torch.no_grad():
        predictions = model(X_test)

    # Save predictions to a file
    np.savetxt('/content/drive/MyDrive/predictions.csv', predictions.numpy(), delimiter=',')
    print("Predictions saved to 'predictions.csv'")


Epoch [100/1000], Loss: 15.1926
Epoch [200/1000], Loss: 11.0573
Epoch [300/1000], Loss: 9.6359
Epoch [400/1000], Loss: 9.5095
Epoch [500/1000], Loss: 9.1399
Epoch [600/1000], Loss: 9.0148
Epoch [700/1000], Loss: 8.2545
Epoch [800/1000], Loss: 8.5634
Epoch [900/1000], Loss: 8.3076
Epoch [1000/1000], Loss: 8.5280
Validation Loss: 1.1615
Predictions saved to 'predictions.csv'
