In [33]:
# Importation des packages / bibliothèques nécessaires
import os  # Fournit des fonctions pour interagir avec le système d'exploitation
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, explained_variance_score, mean_absolute_error, mean_squared_error
from math import sqrt
from datetime import datetime

%matplotlib inline  # Pour afficher les graphiques directement dans le notebook

# Pour installer scikit-learn, tapez "pip install numpy scipy scikit-learn" dans le terminal Anaconda

# Pour changer les nombres scientifiques en flottants
np.set_printoptions(formatter={'float_kind':'{:f}'.format})

# Augmente la taille des graphiques seaborn
sns.set(rc={'figure.figsize':(12,10)})

# Pour vérifier les packages installés
# import sys
# !conda list


UsageError: unrecognized arguments: # Pour afficher les graphiques directement dans le notebook


In [34]:
# Chargement des données brutes cumulatives

raw_data_confirmed = pd.read_csv('time_series_19-covid-Confirmed.csv')
raw_data_deaths = pd.read_csv('time_series_19-covid-Deaths.csv')
raw_data_Recovered = pd.read_csv('time_series_19-covid-Recovered.csv')

print("The Shape of Cornirmed is: ", raw_data_confirmed.shape)
print("The Shape of Cornirmed is: ", raw_data_deaths.shape)
print("The Shape of Cornirmed is: ", raw_data_Recovered.shape)

raw_data_confirmed.head()

The Shape of Cornirmed is:  (225, 50)
The Shape of Cornirmed is:  (225, 50)
The Shape of Cornirmed is:  (225, 50)


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20
0,Anhui,Mainland China,31.8257,117.2264,1,9,15,39,60,70,...,989,990,990,990,990,990,990,990,990,990
1,Beijing,Mainland China,40.1824,116.4142,14,22,36,41,68,80,...,410,410,411,413,414,414,418,418,422,426
2,Chongqing,Mainland China,30.0572,107.874,6,9,27,57,75,110,...,576,576,576,576,576,576,576,576,576,576
3,Fujian,Mainland China,26.0789,117.9874,1,5,10,18,35,59,...,296,296,296,296,296,296,296,296,296,296
4,Gansu,Mainland China,36.0611,103.8343,0,2,2,4,7,14,...,91,91,91,91,91,91,91,102,119,120


In [35]:
raw_data_Recovered[raw_data_Recovered['Country/Region'] == 'US']


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20
35,"King County, WA",US,47.6062,-122.3321,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
36,"Cook County, IL",US,41.7377,-87.6976,0,0,0,0,0,0,...,2,2,2,2,2,2,2,2,2,2
46,"Los Angeles, CA",US,34.0522,-118.2437,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
64,"San Benito, CA",US,36.5761,-120.9876,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
66,"Madison, WI",US,43.0731,-89.4012,0,0,0,0,0,0,...,0,0,0,0,0,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,"Pierce County, WA",US,47.0676,-122.1295,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
221,"Plymouth County, MA",US,42.1615,-70.7928,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
222,"Santa Cruz County, CA",US,36.9741,-122.0308,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
223,"Tulsa County, OK",US,36.1593,-95.9410,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
# Dénormalisation des données

raw_data_confirmed2 = pd.melt(raw_data_confirmed, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name=['Date'])
raw_data_deaths2 = pd.melt(raw_data_deaths, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name=['Date'])
raw_data_Recovered2 = pd.melt(raw_data_Recovered, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name=['Date'])


print("The Shape of Cornirmed is: ", raw_data_confirmed2.shape)
print("The Shape of Cornirmed is: ", raw_data_deaths2.shape)
print("The Shape of Cornirmed is: ", raw_data_Recovered2.shape)


raw_data_confirmed2.head()

The Shape of Cornirmed is:  (10350, 6)
The Shape of Cornirmed is:  (10350, 6)
The Shape of Cornirmed is:  (10350, 6)


Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,value
0,Anhui,Mainland China,31.8257,117.2264,1/22/20,1
1,Beijing,Mainland China,40.1824,116.4142,1/22/20,14
2,Chongqing,Mainland China,30.0572,107.874,1/22/20,6
3,Fujian,Mainland China,26.0789,117.9874,1/22/20,1
4,Gansu,Mainland China,36.0611,103.8343,1/22/20,0


In [37]:
#Conversion de la nouvelle colonne en dates

raw_data_confirmed2['Date'] = pd.to_datetime(raw_data_confirmed2['Date'])
raw_data_deaths2['Date'] = pd.to_datetime(raw_data_deaths2['Date'])
raw_data_Recovered2['Date'] = pd.to_datetime(raw_data_Recovered2['Date'])

  raw_data_confirmed2['Date'] = pd.to_datetime(raw_data_confirmed2['Date'])
  raw_data_deaths2['Date'] = pd.to_datetime(raw_data_deaths2['Date'])
  raw_data_Recovered2['Date'] = pd.to_datetime(raw_data_Recovered2['Date'])


In [38]:
# Renomer les valeurs
raw_data_confirmed2.columns = raw_data_confirmed2.columns.str.replace('value', 'Confirmed')
raw_data_deaths2.columns = raw_data_deaths2.columns.str.replace('value', 'Deaths')
raw_data_Recovered2.columns = raw_data_Recovered2.columns.str.replace('value', 'Recovered')

In [39]:
# Investiguer les valleurs  NULLES 
raw_data_Recovered2.isnull().sum()

Province/State    4324
Country/Region       0
Lat                  0
Long                 0
Date                 0
Recovered            0
dtype: int64

In [40]:
# Traitement des valeurs NULL

raw_data_confirmed2['Province/State'].fillna(raw_data_confirmed2['Country/Region'], inplace=True)
raw_data_deaths2['Province/State'].fillna(raw_data_deaths2['Country/Region'], inplace=True)
raw_data_Recovered2['Province/State'].fillna(raw_data_Recovered2['Country/Region'], inplace=True)

raw_data_confirmed2.isnull().sum()

Province/State    0
Country/Region    0
Lat               0
Long              0
Date              0
Confirmed         0
dtype: int64

In [41]:
# Impression des formes avant la jointure
print("The Shape of Cornirmed is: ", raw_data_confirmed2.shape)
print("The Shape of Cornirmed is: ", raw_data_deaths2.shape)
print("The Shape of Cornirmed is: ", raw_data_Recovered2.shape)

The Shape of Cornirmed is:  (10350, 6)
The Shape of Cornirmed is:  (10350, 6)
The Shape of Cornirmed is:  (10350, 6)


In [42]:
raw_data_confirmed2.isnull().sum()
raw_data_deaths2.isnull().sum()
raw_data_Recovered2.isnull().sum()

Province/State    0
Country/Region    0
Lat               0
Long              0
Date              0
Recovered         0
dtype: int64

In [43]:
# Jointures complètes

# Confirmed with Deaths
full_join = raw_data_confirmed2.merge(raw_data_deaths2[['Province/State','Country/Region','Date','Deaths']], 
                                      how = 'left', 
                                      left_on = ['Province/State','Country/Region','Date'], 
                                      right_on = ['Province/State', 'Country/Region','Date'])

print("Shape of first join: ", full_join.shape)

# full join with Recovered
full_join = full_join.merge(raw_data_Recovered2[['Province/State','Country/Region','Date','Recovered']], 
                                      how = 'left', 
                                      left_on = ['Province/State','Country/Region','Date'], 
                                      right_on = ['Province/State', 'Country/Region','Date'])

print("Shape of second join: ", full_join.shape)

full_join.head()

Shape of first join:  (10350, 7)
Shape of second join:  (10350, 8)


Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0


In [44]:
# checking for null values (especially long and lat)
full_join.isnull().sum()

Province/State    0
Country/Region    0
Lat               0
Long              0
Date              0
Confirmed         0
Deaths            0
Recovered         0
dtype: int64

In [45]:
# Adding Month and Year as a new Column
full_join['Month-Year'] = full_join['Date'].dt.strftime('%b-%Y')
full_join.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0,Jan-2020
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0,Jan-2020
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0,Jan-2020
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0,Jan-2020


In [47]:
######################## Braking the numbers by Day #########################################
#############################################################################################

# filtering data to Anhui to give you an example

#creating a new df    
test = full_join[full_join['Province/State'] == 'Anhui']

#creating a new df    
full_join2 = test.copy()

#creating a new date columns - 1
full_join2['Date - 1'] = full_join2['Date'] + pd.Timedelta(days=1)
full_join2.rename(columns={'Confirmed': 'Confirmed - 1', 'Deaths': 'Deaths - 1', 'Recovered': 'Recovered - 1',
                          'Date': 'Date Minus 1'}, inplace=True)

#Joing on the 2 DFs
full_join3 = test.merge(full_join2[['Province/State', 'Country/Region','Confirmed - 1', 'Deaths - 1', 
                            'Recovered - 1', 'Date - 1', 'Date Minus 1']], how = 'outer',
                             left_on = ['Province/State','Country/Region','Date'], 
                             right_on = ['Province/State', 'Country/Region','Date - 1'])

# Additional Calculations
full_join3['Confirmed Daily'] = full_join3['Confirmed'] - full_join3['Confirmed - 1']


test.head()
full_join2.head()
full_join3.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year,Confirmed - 1,Deaths - 1,Recovered - 1,Date - 1,Date Minus 1,Confirmed Daily
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1.0,0.0,0.0,Jan-2020,,,,NaT,NaT,
1,Anhui,Mainland China,31.8257,117.2264,2020-01-23,9.0,0.0,0.0,Jan-2020,1.0,0.0,0.0,2020-01-23,2020-01-22,8.0
2,Anhui,Mainland China,31.8257,117.2264,2020-01-24,15.0,0.0,0.0,Jan-2020,9.0,0.0,0.0,2020-01-24,2020-01-23,6.0
3,Anhui,Mainland China,31.8257,117.2264,2020-01-25,39.0,0.0,0.0,Jan-2020,15.0,0.0,0.0,2020-01-25,2020-01-24,24.0
4,Anhui,Mainland China,31.8257,117.2264,2020-01-26,60.0,0.0,0.0,Jan-2020,39.0,0.0,0.0,2020-01-26,2020-01-25,21.0


In [48]:
test.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020
225,Anhui,Mainland China,31.8257,117.2264,2020-01-23,9,0,0,Jan-2020
450,Anhui,Mainland China,31.8257,117.2264,2020-01-24,15,0,0,Jan-2020
675,Anhui,Mainland China,31.8257,117.2264,2020-01-25,39,0,0,Jan-2020
900,Anhui,Mainland China,31.8257,117.2264,2020-01-26,60,0,0,Jan-2020


In [49]:
full_join2.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date Minus 1,Confirmed - 1,Deaths - 1,Recovered - 1,Month-Year,Date - 1
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020,2020-01-23
225,Anhui,Mainland China,31.8257,117.2264,2020-01-23,9,0,0,Jan-2020,2020-01-24
450,Anhui,Mainland China,31.8257,117.2264,2020-01-24,15,0,0,Jan-2020,2020-01-25
675,Anhui,Mainland China,31.8257,117.2264,2020-01-25,39,0,0,Jan-2020,2020-01-26
900,Anhui,Mainland China,31.8257,117.2264,2020-01-26,60,0,0,Jan-2020,2020-01-27


In [50]:
######################## Braking the numbers by Day #########################################
#############################################################################################

## Applying it on all dataset

#creating a new df    
full_join2 = full_join.copy()

#creating a new date columns - 1
full_join2['Date - 1'] = full_join2['Date'] + pd.Timedelta(days=1)
full_join2.rename(columns={'Confirmed': 'Confirmed - 1', 'Deaths': 'Deaths - 1', 'Recovered': 'Recovered - 1',
                          'Date': 'Date Minus 1'}, inplace=True)

#Joing on the 2 DFs
full_join3 = full_join.merge(full_join2[['Province/State', 'Country/Region','Confirmed - 1', 'Deaths - 1', 
                            'Recovered - 1', 'Date - 1', 'Date Minus 1']], how = 'left',
                             left_on = ['Province/State','Country/Region','Date'], 
                             right_on = ['Province/State', 'Country/Region','Date - 1'])

#minus_onedf.rename(columns={'Confirmed': 'Confirmed - 1', 'Deaths': 'Deaths - 1', 'Recovered': 'Recovered - 1'}, inplace=True)

full_join3.head()

# Additional Calculations
full_join3['Confirmed Daily'] = full_join3['Confirmed'] - full_join3['Confirmed - 1']
full_join3['Deaths Daily'] = full_join3['Deaths'] - full_join3['Deaths - 1']
full_join3['Recovered Daily'] = full_join3['Recovered'] - full_join3['Recovered - 1']

print(full_join3.shape)

(10350, 17)


In [51]:
full_join3.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year,Confirmed - 1,Deaths - 1,Recovered - 1,Date - 1,Date Minus 1,Confirmed Daily,Deaths Daily,Recovered Daily
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020,,,,NaT,NaT,,,
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0,Jan-2020,,,,NaT,NaT,,,
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0,Jan-2020,,,,NaT,NaT,,,
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0,Jan-2020,,,,NaT,NaT,,,
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0,Jan-2020,,,,NaT,NaT,,,


In [52]:
# Additing manually the numbers for first day

full_join3['Confirmed Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Confirmed']
full_join3['Deaths Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Deaths']
full_join3['Recovered Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Recovered']

# deleting columns
del full_join3['Confirmed - 1']
del full_join3['Deaths - 1']
del full_join3['Recovered - 1']
del full_join3['Date - 1']
del full_join3['Date Minus 1']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_join3['Confirmed Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Confirmed']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_join3['Deaths Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Deaths']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_join3['Recovered Daily'].loc[full_join3['Date'] == '2020-01-22'] = full_join3['Recovered']


In [53]:
# Creating additional slicer for easy of use

full_join3['Hubei Vs Rest of the World'] = 'Rest of the World'
full_join3['Hubei Vs Rest of the World'].loc[full_join3['Province/State'] == 'Hubei'] = 'Hubei - Virus birth'

#full_join3[full_join3['Province/State'] == 'Hubei']
full_join3.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  full_join3['Hubei Vs Rest of the World'].loc[full_join3['Province/State'] == 'Hubei'] = 'Hubei - Virus birth'


Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year,Confirmed Daily,Deaths Daily,Recovered Daily,Hubei Vs Rest of the World
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0,Jan-2020,14.0,0.0,0.0,Rest of the World
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0,Jan-2020,6.0,0.0,0.0,Rest of the World
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0,Jan-2020,0.0,0.0,0.0,Rest of the World


In [54]:
# Exporting the data

# Setting my path
path = "C:\\Users\\dell\\Desktop\\data vis"

# Changing my CWD
os.chdir(path)

full_join3.to_csv('CoronaVirus PowerBI Raw', sep='\t')

In [55]:
full_join3.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year,Confirmed Daily,Deaths Daily,Recovered Daily,Hubei Vs Rest of the World
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0,Jan-2020,14.0,0.0,0.0,Rest of the World
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0,Jan-2020,6.0,0.0,0.0,Rest of the World
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0,Jan-2020,0.0,0.0,0.0,Rest of the World


In [56]:
# puting unique values in list
dates = full_join3['Date'].unique()

#creating a df with unique
dates = pd.DataFrame(dates, columns=['Date'])

# Ordering df
dates = dates.sort_values(by=['Date'])

# Creating an ordered list now
dates = full_join3['Date'].unique()

print(full_join3.shape)

try:
    del concat_data
except:
    print()
    
try:
    del final_concat_data
except:
    print()
    

for i in dates:
    new_data = full_join3[full_join3['Date'] == i]
    new_data['Cumulative Date'] = i
    print(i)
    
    try:     
        concat_data = pd.concat([concat_data, new_data], ignore_index = True)
        concat_data['Cumulative Date 2'] = i
        print(concat_data['Date'].unique())
        
        try:
            final_concat_data = pd.concat([final_concat_data, concat_data], ignore_index = True)
        except:
            final_concat_data = concat_data

    except:
        concat_data = new_data
        
print(final_concat_data.shape)

(10350, 13)
2020-01-22 00:00:00
2020-01-23 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00']
Length: 2, dtype: datetime64[ns]
2020-01-24 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00']
Length: 3, dtype: datetime64[ns]
2020-01-25 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00']
Length: 4, dtype: datetime64[ns]
2020-01-26 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00']
Length: 5, dtype: datetime64[ns]
2020-01-27 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00', '2020-01-27 00:00:00']
Length: 6, dtype: datetime64[ns]
2020-01-28 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00', '2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00', '2020-01-27 00:00:00',
 '2020-01-28 00:00:00', '2020-01-29 00:00:00', '2020-01-30 00:00:00',
 '2020-01-31 00:00:00', '2020-02-01 00:00:00', '2020-02-02 00:00:00',
 '2020-02-03 00:00:00', '2020-02-04 00:00:00', '2020-02-05 00:00:00',
 '2020-02-06 00:00:00', '2020-02-07 00:00:00', '2020-02-08 00:00:00',
 '2020-02-09 00:00:00', '2020-02-10 00:00:00', '2020-02-11 00:00:00',
 '2020-02-12 00:00:00', '2020-02-13 00:00:00', '2020-02-14 00:00:00',
 '2020-02-15 00:00:00', '2020-02-16 00:00:00', '2020-02-17 00:00:00',
 '2020-02-18 00:00:00', '2020-02-19 00:00:00', '2020-02-20 00:00:00']
Length: 30, dtype: datetime64[ns]
2020-02-21 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00', '2020-01-27 00:00:00',
 '2020-01-28 00:00:00', '2020-01-29 00:00:00', '2020-01-30 00:00:00',
 '20

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

2020-03-02 00:00:00
<DatetimeArray>
['2020-01-22 00:00:00', '2020-01-23 00:00:00', '2020-01-24 00:00:00',
 '2020-01-25 00:00:00', '2020-01-26 00:00:00', '2020-01-27 00:00:00',
 '2020-01-28 00:00:00', '2020-01-29 00:00:00', '2020-01-30 00:00:00',
 '2020-01-31 00:00:00', '2020-02-01 00:00:00', '2020-02-02 00:00:00',
 '2020-02-03 00:00:00', '2020-02-04 00:00:00', '2020-02-05 00:00:00',
 '2020-02-06 00:00:00', '2020-02-07 00:00:00', '2020-02-08 00:00:00',
 '2020-02-09 00:00:00', '2020-02-10 00:00:00', '2020-02-11 00:00:00',
 '2020-02-12 00:00:00', '2020-02-13 00:00:00', '2020-02-14 00:00:00',
 '2020-02-15 00:00:00', '2020-02-16 00:00:00', '2020-02-17 00:00:00',
 '2020-02-18 00:00:00', '2020-02-19 00:00:00', '2020-02-20 00:00:00',
 '2020-02-21 00:00:00', '2020-02-22 00:00:00', '2020-02-23 00:00:00',
 '2020-02-24 00:00:00', '2020-02-25 00:00:00', '2020-02-26 00:00:00',
 '2020-02-27 00:00:00', '2020-02-28 00:00:00', '2020-02-29 00:00:00',
 '2020-03-01 00:00:00', '2020-03-02 00:00:00']
Length:

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_data['Cumulative Date'] = i
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = 

In [57]:
# Exporting the data

final_concat_data.to_csv('CoronaVirus PowerBI Raw - Cumulative Test', sep='\t')

In [58]:
final_concat_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Month-Year,Confirmed Daily,Deaths Daily,Recovered Daily,Hubei Vs Rest of the World,Cumulative Date,Cumulative Date 2
0,Anhui,Mainland China,31.8257,117.2264,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World,2020-01-22,2020-01-23
1,Beijing,Mainland China,40.1824,116.4142,2020-01-22,14,0,0,Jan-2020,14.0,0.0,0.0,Rest of the World,2020-01-22,2020-01-23
2,Chongqing,Mainland China,30.0572,107.874,2020-01-22,6,0,0,Jan-2020,6.0,0.0,0.0,Rest of the World,2020-01-22,2020-01-23
3,Fujian,Mainland China,26.0789,117.9874,2020-01-22,1,0,0,Jan-2020,1.0,0.0,0.0,Rest of the World,2020-01-22,2020-01-23
4,Gansu,Mainland China,36.0611,103.8343,2020-01-22,0,0,0,Jan-2020,0.0,0.0,0.0,Rest of the World,2020-01-22,2020-01-23
