# Import

In [4]:
import numpy as np
import pandas as pd
from numpy.random import randn

# Series

In [5]:
arr=np.array([1,2,3])
row_names=['a','b','c']

series=pd.Series(arr,index=row_names)
series

a    1
b    2
c    3
dtype: int32

## Series Index

In [6]:
print(series[0])
print(series['a'])

1
1


  print(series[0])


# DataFrames

## Create a DataFrame from a dictionary

In [7]:
data = {'A': [1, 2, 3], 'B': [4, 5, 6] , 'C':[7,8,9]}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C
0,1,4,7
1,2,5,8
2,3,6,9


## Create DataFrames

In [8]:
arr=np.array([[1,2,3],[4,5,6],[7,8,9]])
row_names=['a','b','c']
col_names=['one','two','three']
my_dataframe=pd.DataFrame(arr,index=row_names,columns=col_names)
my_dataframe

Unnamed: 0,one,two,three
a,1,2,3
b,4,5,6
c,7,8,9


In [9]:
df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())
df

Unnamed: 0,W,X,Y,Z
A,0.819169,-1.191389,-1.18164,-0.268665
B,-0.645518,-0.586682,-2.436931,0.695171
C,-0.768674,-0.496281,-0.203846,-0.112344
D,0.142609,-0.762177,-0.593719,-0.947341
E,-0.177383,0.402003,0.762066,-0.793373


## Selection and Indexing

In [10]:
df['W']

A    0.819169
B   -0.645518
C   -0.768674
D    0.142609
E   -0.177383
Name: W, dtype: float64

In [11]:
# Pass a list of column names
df[['W','Z']]

Unnamed: 0,W,Z
A,0.819169,-0.268665
B,-0.645518,0.695171
C,-0.768674,-0.112344
D,0.142609,-0.947341
E,-0.177383,-0.793373


In [12]:
# SQL Syntax (NOT RECOMMENDED!)
df.W

A    0.819169
B   -0.645518
C   -0.768674
D    0.142609
E   -0.177383
Name: W, dtype: float64

### **Selecting Rows**

In [13]:
df.loc['A']

W    0.819169
X   -1.191389
Y   -1.181640
Z   -0.268665
Name: A, dtype: float64

### select based off of position instead of label 

In [14]:
df.iloc[2]

W   -0.768674
X   -0.496281
Y   -0.203846
Z   -0.112344
Name: C, dtype: float64

### **Selecting subset of rows and columns**

In [15]:
df.loc['B','Y']

-2.4369305856478567

In [16]:
df.loc[['A', 'B'], ['W', 'X']]

Unnamed: 0,W,X
A,0.819169,-1.191389
B,-0.645518,-0.586682


In [17]:
df.iloc[1:3,:]


Unnamed: 0,W,X,Y,Z
B,-0.645518,-0.586682,-2.436931,0.695171
C,-0.768674,-0.496281,-0.203846,-0.112344


In [18]:
df.iloc[0:2, 0:2]

Unnamed: 0,W,X
A,0.819169,-1.191389
B,-0.645518,-0.586682


### DataFrame Columns are just Series

In [19]:
type(df['W'])

pandas.core.series.Series

## **Creating a new column:**

In [20]:
df['new'] = df['W'] + df['Y']
df

Unnamed: 0,W,X,Y,Z,new
A,0.819169,-1.191389,-1.18164,-0.268665,-0.362471
B,-0.645518,-0.586682,-2.436931,0.695171,-3.082449
C,-0.768674,-0.496281,-0.203846,-0.112344,-0.972519
D,0.142609,-0.762177,-0.593719,-0.947341,-0.45111
E,-0.177383,0.402003,0.762066,-0.793373,0.584683


## **Removing**

### Removing Columns

In [21]:
#index (0) or columns (1)
df.drop('new',axis=1)
# Not inplace unless specified!
df

Unnamed: 0,W,X,Y,Z,new
A,0.819169,-1.191389,-1.18164,-0.268665,-0.362471
B,-0.645518,-0.586682,-2.436931,0.695171,-3.082449
C,-0.768674,-0.496281,-0.203846,-0.112344,-0.972519
D,0.142609,-0.762177,-0.593719,-0.947341,-0.45111
E,-0.177383,0.402003,0.762066,-0.793373,0.584683


In [22]:
df.drop('new',axis=1,inplace=True)
# With inplace it will removed!
df

Unnamed: 0,W,X,Y,Z
A,0.819169,-1.191389,-1.18164,-0.268665
B,-0.645518,-0.586682,-2.436931,0.695171
C,-0.768674,-0.496281,-0.203846,-0.112344
D,0.142609,-0.762177,-0.593719,-0.947341
E,-0.177383,0.402003,0.762066,-0.793373


### Removing rows

In [23]:
df.drop('E',axis=0,inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,0.819169,-1.191389,-1.18164,-0.268665
B,-0.645518,-0.586682,-2.436931,0.695171
C,-0.768674,-0.496281,-0.203846,-0.112344
D,0.142609,-0.762177,-0.593719,-0.947341


### **Permanently Removing a Column**

In [24]:
del df['Z']
df

Unnamed: 0,W,X,Y
A,0.819169,-1.191389,-1.18164
B,-0.645518,-0.586682,-2.436931
C,-0.768674,-0.496281,-0.203846
D,0.142609,-0.762177,-0.593719


## Conditional Selection

An important feature of pandas is conditional selection using bracket notation, very similar to numpy:

In [25]:
df

Unnamed: 0,W,X,Y
A,0.819169,-1.191389,-1.18164
B,-0.645518,-0.586682,-2.436931
C,-0.768674,-0.496281,-0.203846
D,0.142609,-0.762177,-0.593719


In [26]:
df>0

Unnamed: 0,W,X,Y
A,True,False,False
B,False,False,False
C,False,False,False
D,True,False,False


In [27]:
df[df>0]

Unnamed: 0,W,X,Y
A,0.819169,,
B,,,
C,,,
D,0.142609,,


In [28]:
df['W']>0

A     True
B    False
C    False
D     True
Name: W, dtype: bool

In [29]:
df[['W','Y']] >0.9

Unnamed: 0,W,Y
A,False,False
B,False,False
C,False,False
D,False,False


# Read_data

## Read from csv

In [30]:
df = pd.read_csv('50_StartUp_for_pandas.csv')
df

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6
8,120542.52,148718.95,311613.29,New York,152211.77
9,123334.88,108679.17,304981.62,California,149759.96


### without header

In [31]:
df = pd.read_csv('50_StartUp_for_pandas.csv',header=None)
df

Unnamed: 0,0,1,2,3,4
0,R&D Spend,Administration,Marketing Spend,State,Profit
1,165349.2,136897.8,471784.1,New York,192261.83
2,162597.7,151377.59,443898.53,California,191792.06
3,153441.51,101145.55,407934.54,Florida,191050.39
4,144372.41,118671.85,383199.62,New York,182901.99
5,142107.34,91391.77,366168.42,Florida,166187.94
6,131876.9,99814.71,362861.36,New York,156991.12
7,134615.46,147198.87,127716.82,California,156122.51
8,130298.13,145530.06,323876.68,Florida,155752.6
9,120542.52,148718.95,311613.29,New York,152211.77


### the header is column two

In [32]:
df = pd.read_csv('50_StartUp_for_pandas.csv',header=2)
df

Unnamed: 0,162597.7,151377.59,443898.53,California,191792.06
0,153441.51,101145.55,407934.54,Florida,191050.39
1,144372.41,118671.85,383199.62,New York,182901.99
2,142107.34,91391.77,366168.42,Florida,166187.94
3,131876.9,99814.71,362861.36,New York,156991.12
4,134615.46,147198.87,127716.82,California,156122.51
5,130298.13,145530.06,323876.68,Florida,155752.6
6,120542.52,148718.95,311613.29,New York,152211.77
7,123334.88,108679.17,304981.62,California,149759.96
8,101913.08,110594.11,229160.95,Florida,146121.95
9,100671.96,91790.61,249744.55,California,144259.4


## Read from excel

In [33]:
pd.read_excel('Book_for_pandas.xlsx',sheet_name='Sheet1')

Unnamed: 0,0,a,b,c,d
0,0,0,1,2,3
1,1,4,5,6,7
2,2,8,9,10,11
3,3,12,13,14,15


### change sheet

In [34]:
pd.read_excel('Book_for_pandas.xlsx',sheet_name='Sheet2')

Unnamed: 0,index,E,F
0,0,5,4
1,1,5,4
2,2,6,7


# Save tha DataFrame

## Save to csv

In [35]:
df.to_csv('saved_data.csv')

## Save to excel

In [36]:
df.to_excel('saved_data.xlsx',sheet_name='Sheet3')

# DataFrame Manipulation (EDA)

In [37]:
df = pd.read_csv('50_StartUp_for_pandas.csv')
df

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6
8,120542.52,148718.95,311613.29,New York,152211.77
9,123334.88,108679.17,304981.62,California,149759.96


## head()
### get first 5 rows

In [38]:
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


## head(number)
### get first number rows

In [39]:
df.head(8)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94
5,131876.9,99814.71,362861.36,New York,156991.12
6,134615.46,147198.87,127716.82,California,156122.51
7,130298.13,145530.06,323876.68,Florida,155752.6


## tail()
### get last 5 rows

In [40]:
df.tail()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
45,1000.23,124153.04,1903.93,New York,64926.08
46,1315.46,115816.21,297114.46,Florida,49490.75
47,0.0,135426.92,0.0,California,42559.73
48,542.05,51743.15,0.0,New York,35673.41
49,0.0,116983.8,45173.06,California,14681.4


## tail(number)
### get last number rows

In [41]:
df.tail(8)

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
42,23640.93,96189.63,148001.11,California,71498.49
43,15505.73,127382.3,35534.17,New York,69758.98
44,22177.74,154806.14,28334.72,California,65200.33
45,1000.23,124153.04,1903.93,New York,64926.08
46,1315.46,115816.21,297114.46,Florida,49490.75
47,0.0,135426.92,0.0,California,42559.73
48,542.05,51743.15,0.0,New York,35673.41
49,0.0,116983.8,45173.06,California,14681.4


## Get data type for each column of the DataFrame

In [42]:
df.dtypes

R&D Spend          float64
Administration     float64
Marketing Spend    float64
State               object
Profit             float64
dtype: object

## Get the dimensions of the DataFrame

In [43]:
df.shape

(50, 5)

## Get the column labels of the DataFrame

In [44]:
df.columns

Index(['R&D Spend', 'Administration', 'Marketing Spend', 'State', 'Profit'], dtype='object')

## Get the index of the DataFrame

In [45]:
df.index

RangeIndex(start=0, stop=50, step=1)

## Get a concise summary of the DataFrame

In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   R&D Spend        50 non-null     float64
 1   Administration   50 non-null     float64
 2   Marketing Spend  50 non-null     float64
 3   State            50 non-null     object 
 4   Profit           50 non-null     float64
dtypes: float64(4), object(1)
memory usage: 2.1+ KB


## descriptive statistics

In [47]:
df.describe()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,Profit
count,50.0,50.0,50.0,50.0
mean,73721.6156,121344.6396,211025.0978,112012.6392
std,45902.256482,28017.802755,122290.310726,40306.180338
min,0.0,51283.14,0.0,14681.4
25%,39936.37,103730.875,129300.1325,90138.9025
50%,73051.08,122699.795,212716.24,107978.19
75%,101602.8,144842.18,299469.085,139765.9775
max,165349.2,182645.56,471784.1,192261.83


In [48]:
df.describe(include=['object'])

Unnamed: 0,State
count,50
unique,3
top,New York
freq,17


# Data Aggregation

## Groupby

### The groupby method allows you to group rows of data together and call aggregate functions

In [49]:
data = {'Company':['google','google','MSFT','MSFT','FB','FB'],
        'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Sales
0,google,200
1,google,120
2,MSFT,340
3,MSFT,124
4,FB,243
5,FB,350


### Apply groupby

In [50]:
groupby = df.groupby('Company')

### get summation

In [51]:
groupby.sum()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,593
MSFT,464
google,320


### get mean

In [52]:
groupby.mean()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
MSFT,232.0
google,160.0


### get std

In [53]:
groupby.std()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,75.660426
MSFT,152.735065
google,56.568542


### get min

In [54]:
groupby.min()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,243
MSFT,124
google,120


### get max

In [55]:
groupby.max()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,350
MSFT,340
google,200


### get count

In [56]:
groupby.count()

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,2
MSFT,2
google,2


### get describe

In [57]:
groupby.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0
google,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0


### get transpose of describe

In [58]:
groupby.describe().transpose()

Unnamed: 0,Company,FB,MSFT,google
Sales,count,2.0,2.0,2.0
Sales,mean,296.5,232.0,160.0
Sales,std,75.660426,152.735065,56.568542
Sales,min,243.0,124.0,120.0
Sales,25%,269.75,178.0,140.0
Sales,50%,296.5,232.0,160.0
Sales,75%,323.25,286.0,180.0
Sales,max,350.0,340.0,200.0


## Aggregate
### using one or more operations over the specified axis.

In [59]:
aggregated = df.agg({'Company': 'count', 'Sales': 'mean'})
aggregated

Company      6.0
Sales      229.5
dtype: float64

## cumsum
### Return cumulative sum over a DataFrame or Series axis

In [60]:
cumsum = df.cumsum()
cumsum

Unnamed: 0,Company,Sales
0,google,200
1,googlegoogle,320
2,googlegoogleMSFT,660
3,googlegoogleMSFTMSFT,784
4,googlegoogleMSFTMSFTFB,1027
5,googlegoogleMSFTMSFTFBFB,1377


## cumprod
### Return cumulative product over a DataFrame or Series axis

In [61]:
data = {'A': [1, 2, 3], 'B': [4, 5, 6]}
data = pd.DataFrame(data)
cumprod = data.cumprod()
cumprod

Unnamed: 0,A,B
0,1,4
1,2,20
2,6,120


# Missing Data

In [62]:
df = pd.DataFrame({'A':[1,2,np.nan],
                    'B':[5,np.nan,np.nan],
                    'C':[1,2,3]})
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


## isnull
### Detect missing values

In [63]:
df.isnull()

Unnamed: 0,A,B,C
0,False,False,False
1,False,True,False
2,True,True,False


In [64]:
df.isnull().sum()

A    1
B    2
C    0
dtype: int64

## dropna
### Drop rows with missing values

In [65]:
df.dropna()
# Not inplace unless specified!

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [66]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [67]:
df.dropna(inplace=True)
# With inplace it will removed!
df

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [68]:
df = pd.DataFrame({'A':[1,2,np.nan],
                    'B':[5,np.nan,np.nan],
                    'C':[1,2,3]})
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


### remove column which it's values is null

In [69]:
df.dropna(axis=1)

Unnamed: 0,C
0,1
1,2
2,3


### dropna(thresh=number)
### remove row which it's values is at least number null

In [70]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


## fillna
### Fill missing values with a specified value.

In [71]:
df.fillna(value='FILL VALUE')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,FILL VALUE,2
2,FILL VALUE,FILL VALUE,3


In [72]:
df.fillna(value='2')

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,2.0,2
2,2.0,2.0,3


In [73]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

## replace
### Replace values with another value

In [74]:
replaced_df = df.replace(1, 100)
replaced_df

Unnamed: 0,A,B,C
0,100.0,5.0,100
1,2.0,,2
2,,,3


In [75]:
replaced_df = df.replace(np.nan, 6)
replaced_df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,6.0,2
2,6.0,6.0,3


# Merging and Joining

## merge
### Merge DataFrames have a common columns

In [76]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [1, 2], 'C': [5, 6]})
merged_df = pd.merge(df1, df2, on='A')
merged_df

Unnamed: 0,A,B,C
0,1,3,5
1,2,4,6


In [77]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4],'D': [50, 41]})
df2 = pd.DataFrame({'A': [1, 2], 'C': [5, 6],'D': [50, 41]})
merged_df = pd.merge(df1, df2,on=['A','D'])
merged_df

Unnamed: 0,A,B,D,C
0,1,3,50,5
1,2,4,41,6


## concat
### Concatenate DataFrames

In [78]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
concat_df = pd.concat([df1, df2])
concat_df

Unnamed: 0,A,B
0,1,3
1,2,4
0,5,7
1,6,8


In [79]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4],'C': [5, 6]})
df2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8],'D': [50, 41]})
concat_df = pd.concat([df1, df2])
concat_df

Unnamed: 0,A,B,C,D
0,1,3,5.0,
1,2,4,6.0,
0,5,7,,50.0
1,6,8,,41.0


## join
### Join columns of another DataFrame

In [80]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df2 = pd.DataFrame({'C': [5, 6]}, index=[0, 1])
joined_df = df1.join(df2)
joined_df

Unnamed: 0,A,B,C
0,1,3,5
1,2,4,6


# Operations

### There are lots of operations with pandas that will be really useful to you, but don't fall into any distinct category.

In [81]:
# importing nesscesary libraries and check the head of the data
df = pd.DataFrame({'col1':[1,2,3,4],'col2':[444,555,666,444],'col3':['abc','def','ghi','xyz']})
df.head()

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


## Info on Unique Values

In [82]:
df['col2'].unique() #unique values like set()

array([444, 555, 666], dtype=int64)

In [83]:
df['col2'].nunique() #no. of unique values

3

In [84]:
df['col2'].value_counts() #count values of  each col

col2
444    2
555    1
666    1
Name: count, dtype: int64

In [87]:
# engine-location as variable
engine_loc_counts = df['col2'].value_counts().to_frame()
engine_loc_counts.rename(columns={'col2': 'value_counts'}, inplace=True)
engine_loc_counts.index.name = 'col2'
engine_loc_counts.head(10)

Unnamed: 0_level_0,count
al_arkam,Unnamed: 1_level_1
444,2
555,1
666,1


## Applying Functions

In [81]:
def times2(x):
    return x*2

In [82]:
df['col1'].apply(times2)

0    2
1    4
2    6
3    8
Name: col1, dtype: int64

In [83]:
#get length of character 
df['col3'].apply(len)

0    3
1    3
2    3
3    3
Name: col3, dtype: int64

In [84]:
df['col1'].sum()

10

## **Sorting and Ordering a DataFrame:**

In [85]:
df

Unnamed: 0,col1,col2,col3
0,1,444,abc
1,2,555,def
2,3,666,ghi
3,4,444,xyz


In [86]:
df.sort_values(by='col2') #inplace=False by default

Unnamed: 0,col1,col2,col3
0,1,444,abc
3,4,444,xyz
1,2,555,def
2,3,666,ghi


# Date and Time Functions

## to_datetime
### Convert argument to datetime

In [87]:
dates = pd.to_datetime(['2020-01-01', '2020-01-02'])
print(dates)

DatetimeIndex(['2020-01-01', '2020-01-02'], dtype='datetime64[ns]', freq=None)


## df['column'].dt
### Accessor object for datetime-like properties of the Series values

In [88]:
data = {'date': ['2020-01-01', '2020-01-02']}
df = pd.DataFrame(data)
df['date'] = pd.to_datetime(df['date'])
print(df['date'].dt.year)

0    2020
1    2020
Name: date, dtype: int32


## resample
### Resample time-series data

In [89]:
date_rng = pd.date_range(start='1/1/2020', end='1/08/2020', freq='D')
df = pd.DataFrame(date_rng, columns=['date'])
df['data'] = np.random.randint(0, 100, size=(len(date_rng)))
df.set_index('date', inplace=True)
resampled_df = df.resample('2D').mean()
print(resampled_df)

            data
date            
2020-01-01  73.0
2020-01-03  77.0
2020-01-05  56.0
2020-01-07  84.0
