In [1]:
import numpy as np
import pandas as pd


### Dataset Introduction

This data set features license applications received during the last and current calendar years, including applications where a license was issued, denied, withdrawn, or remains pending. https://data.cityofnewyork.us/Business/License-Applications/ptev-4hud

As for this task, we would like to clean the data so that we can further explore the relationship between the covid-19 and the operation of businesses, we find all the applications that are approved since the COVID-19 pandemic outbreak, which is Feb 2020. Then we count the number of monthly applications and compare it with the severity of the pandemic.

Because processing of the applications might take a long time, we count the applications based on the start date of the applications.

In [None]:
filepath = 'License_Applications.csv'
csv_data = pd.read_csv(filepath, low_memory=False)


In [None]:
selected_cols = ['Business Name', 'License Type', 'Status', 'Start Date', 'City']
data = csv_data[selected_cols]
date = '2020/01/01'
def dateAfter(x):
    if (len(x) == 10):
        return (x[6:10]+'/'+x[0:5]) >= date
    else:
        return False
data = data[data['Start Date'].map(dateAfter)]
data['Year'] = data['Start Date'].map(lambda x: x[6:10])
data['Month'] = data['Start Date'].map(lambda x: x[0:2])
data['Day'] = data['Start Date'].map(lambda x: x[3:5])

In [None]:
display(data)

We then count the numbers of applications for each month.

In [None]:
df = pd.DataFrame({'month': ['01/2020','02/2020','03/2020',
                             '04/2020','05/2020','06/2020',
                             '07/2020','08/2020','09/2020',
                             '10/2020','11/2020','12/2020',
                             '01/2021','02/2021','03/2021'],
                   'count': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
                  })
dictionary = {}
for i in df.index:
    dictionary[df.loc[i]['month']] = 0

In [None]:
for i in data.index:
    dictionary[data.loc[i]['Month'] + '/' + data.loc[i]['Year']] += 1

In [None]:
for i in df.index:
    df['count'] = df['month'].map(lambda x: dictionary[x])

In [None]:
display(df)

For this task, we collect the new cases and new deaths for each month in NYC.

The data is collected from https://github.com/nytimes/covid-19-data/blob/master/us-states.csv

In [None]:
filepath = 'us-states.csv'
csv_data = pd.read_csv(filepath)

In [None]:
data = csv_data[csv_data['state'] == 'New York']
data = data[data['date'].map(lambda x: x in
            ['2020-03-01','2020-04-01','2020-05-01','2020-06-01',
             '2020-07-01','2020-08-01','2020-09-01','2020-10-01',
             '2020-11-01','2020-12-01','2021-01-01','2021-02-01',
             '2021-03-01','2021-03-29'])]

In [None]:
prev = 0
prev2 = 0
arr = []
arr2 = []
for i in data.index:
    arr.append(data.loc[i]['cases'] - prev)
    arr2.append(data.loc[i]['deaths'] - prev)
    prev = data.loc[i]['cases']
    prev = data.loc[i]['deaths']

In [None]:
data.loc[:,'new_cases'] = arr
data.loc[:,'new_deaths'] = arr2
display(data)