In [1]:
import numpy as np
import pandas as pd


### Dataset Introduction

This data set features license applications received during the last and current calendar years, including applications where a license was issued, denied, withdrawn, or remains pending. https://data.cityofnewyork.us/Business/License-Applications/ptev-4hud

As for this task, we would like to clean the data so that we can further explore the relationship between the covid-19 and the operation of businesses, we find all the applications that are approved since the COVID-19 pandemic outbreak, which is Feb 2020. Then we count the number of monthly applications and compare it with the severity of the pandemic.

Because processing of the applications might take a long time, we count the applications based on the start date of the applications.

In [2]:
filepath = 'License_Applications.csv'
csv_data = pd.read_csv(filepath, low_memory=False)


In [3]:
selected_cols = ['Business Name', 'License Type', 'Status', 'Start Date', 'City']
data = csv_data[selected_cols]
date = '2020/01/01'
def dateAfter(x):
    if (len(x) == 10):
        return (x[6:10]+'/'+x[0:5]) >= date
    else:
        return False
data = data[data['Start Date'].map(dateAfter)]
data['Year'] = data['Start Date'].map(lambda x: x[6:10])
data['Month'] = data['Start Date'].map(lambda x: x[0:2])
data['Day'] = data['Start Date'].map(lambda x: x[3:5])

In [4]:
display(data)

Unnamed: 0,Business Name,License Type,Status,Start Date,City,Year,Month,Day
52,SMARTSITTING LLC,Business,Issued,04/13/2020,NEW YORK,2020,04,13
55,ABDULLAH ODUNCU,Individual,Pending,04/10/2020,CLIFTON,2020,04,10
279,SHELBA GORHAM BELL,Individual,Denied,03/10/2020,ROCKWALL,2020,03,10
298,Mariya Dimov,Individual,Issued,04/07/2020,BROOKLYN,2020,04,07
313,KAREN A LOERCH,Individual,Denied,03/11/2020,BETHLEHEM,2020,03,11
...,...,...,...,...,...,...,...,...
396176,ihome buyer group corp,Business,Pending,03/19/2021,BROOKLYN,2021,03,19
396177,"Seaside DME of NY, Inc",Business,Pending,03/25/2021,BROOKLYN,2021,03,25
396178,CHI SHING KOON,Individual,Issued,03/22/2021,BROOKLYN,2021,03,22
396179,IOANNIS SAPOUNTZIS,Individual,Pending,03/24/2021,FLORAL PARK,2021,03,24


We then count the numbers of applications for each month.

In [5]:
df = pd.DataFrame({'month': ['01/2020','02/2020','03/2020',
                             '04/2020','05/2020','06/2020',
                             '07/2020','08/2020','09/2020',
                             '10/2020','11/2020','12/2020',
                             '01/2021','02/2021','03/2021'],
                   'count': [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
                  })
dictionary = {}
for i in df.index:
    dictionary[df.loc[i]['month']] = 0

In [6]:
for i in data.index:
    dictionary[data.loc[i]['Month'] + '/' + data.loc[i]['Year']] += 1

In [7]:
for i in df.index:
    df['count'] = df['month'].map(lambda x: dictionary[x])

In [8]:
display(df)

Unnamed: 0,month,count
0,01/2020,3290
1,02/2020,2661
2,03/2020,1694
3,04/2020,398
4,05/2020,527
5,06/2020,1606
6,07/2020,692
7,08/2020,965
8,09/2020,1101
9,10/2020,2169


For this task, we collect the new cases and new deaths for each month in NYC.

The data is collected from https://github.com/nytimes/covid-19-data/blob/master/us-states.csv

In [9]:
filepath = 'us-states.csv'
csv_data = pd.read_csv(filepath)

In [10]:
data = csv_data[csv_data['state'] == 'New York']
data = data[data['date'].map(lambda x: x in
            ['2020-03-01','2020-04-01','2020-05-01','2020-06-01',
             '2020-07-01','2020-08-01','2020-09-01','2020-10-01',
             '2020-11-01','2020-12-01','2021-01-01','2021-02-01',
             '2021-03-01','2021-03-29'])]

In [11]:
prev = 0
prev2 = 0
arr = []
arr2 = []
for i in data.index:
    arr.append(data.loc[i]['cases'] - prev)
    arr2.append(data.loc[i]['deaths'] - prev)
    prev = data.loc[i]['cases']
    prev = data.loc[i]['deaths']

In [12]:
data.loc[:,'new_cases'] = arr
data.loc[:,'new_deaths'] = arr2
display(data)

Unnamed: 0,date,state,fips,cases,deaths,new_cases,new_deaths
246,2020-03-01,New York,36,1,0,1,0
1642,2020-04-01,New York,36,84364,2415,84364,2415
3292,2020-05-01,New York,36,313575,23841,311160,21426
4997,2020-06-01,New York,36,376520,29766,352679,5925
6647,2020-07-01,New York,36,398770,31791,369004,2025
8352,2020-08-01,New York,36,420477,32390,388686,599
10057,2020-09-01,New York,36,440237,32551,407847,161
11707,2020-10-01,New York,36,464752,32768,432201,217
13412,2020-11-01,New York,36,514482,33174,481714,406
15062,2020-12-01,New York,36,660041,34222,626867,1048
