In [1]:
import pandas as pd
import numpy as np

data = {
    'Animal': ['Cat', 'Dog', 'Python', 'Monkey', 'Tiger'],
    'Age': [2, 5, 3, 2, 4],
    'Visits': [1, 3, 2, 2, 1],
    'Priority': ['yes', 'yes', 'no', 'no', 'yes']
}
labels = ['A', 'B', 'C', 'D', 'E']
df = pd.DataFrame(data, index=labels)
print(df)

   Animal  Age  Visits Priority
A     Cat    2       1      yes
B     Dog    5       3      yes
C  Python    3       2       no
D  Monkey    2       2       no
E   Tiger    4       1      yes


In [2]:
df['Animal'].replace('Python', 'Boa', inplace=True)
df

Unnamed: 0,Animal,Age,Visits,Priority
A,Cat,2,1,yes
B,Dog,5,3,yes
C,Boa,3,2,no
D,Monkey,2,2,no
E,Tiger,4,1,yes


In [3]:
df.drop_duplicates(subset='Animal', keep='first', inplace=True)
df

Unnamed: 0,Animal,Age,Visits,Priority
A,Cat,2,1,yes
B,Dog,5,3,yes
C,Boa,3,2,no
D,Monkey,2,2,no
E,Tiger,4,1,yes


In [7]:
std_dev_age_per_animal = df.groupby('Animal')['Age'].std()
print(std_dev_age_per_animal)

Animal
Boa      NaN
Cat      NaN
Dog      NaN
Monkey   NaN
Tiger    NaN
Name: Age, dtype: float64


In [8]:
df['Young'] = df['Age'] < 3
df

Unnamed: 0,Animal,Age,Visits,Priority,Young
A,Cat,2,1,yes,True
B,Dog,5,3,yes,False
C,Boa,3,2,no,False
D,Monkey,2,2,no,True
E,Tiger,4,1,yes,False


In [None]:
data1 = {'Animal': ['Cat', 'Dog', 'Boa'], 'Cuteness': [9, 7, 5]}
df1 = pd.DataFrame(data1)
df1

Unnamed: 0,Animal,Cuteness
0,Cat,9
1,Dog,7
2,Boa,5


In [16]:
merge_df = pd.merge(df, df1, on = 'Animal', how="inner")
merged_df1 = pd.merge(df, df1, on='Animal', how='left')
print(merge_df)
merged_df1

  Animal  Age  Visits Priority  Young  Cuteness
0    Cat    2       1      yes   True         9
1    Dog    5       3      yes  False         7
2    Boa    3       2       no  False         5


Unnamed: 0,Animal,Age,Visits,Priority,Young,Cuteness
0,Cat,2,1,yes,True,9.0
1,Dog,5,3,yes,False,7.0
2,Boa,3,2,no,False,5.0
3,Monkey,2,2,no,True,
4,Tiger,4,1,yes,False,


In [17]:
# reshape
pivoted_df = df.pivot(index='Animal', columns='Visits', values='Age')
pivoted_df

Visits,1,2,3
Animal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Boa,,3.0,
Cat,2.0,,
Dog,,,5.0
Monkey,,2.0,
Tiger,4.0,,


In [18]:
df['Age'].fillna(df['Age'].median(), inplace=True)
df

Unnamed: 0,Animal,Age,Visits,Priority,Young
A,Cat,2,1,yes,True
B,Dog,5,3,yes,False
C,Boa,3,2,no,False
D,Monkey,2,2,no,True
E,Tiger,4,1,yes,False


In [19]:
business_days = pd.date_range(start='2020-01-01', end='2020-12-31', freq='B')
ts_df = pd.DataFrame(index=business_days)
ts_df['Values'] = np.random.randint(1, 100, len(business_days))
print(ts_df)

            Values
2020-01-01      43
2020-01-02      38
2020-01-03      45
2020-01-06      91
2020-01-07      64
...            ...
2020-12-25      85
2020-12-28      53
2020-12-29      30
2020-12-30      18
2020-12-31      92

[262 rows x 1 columns]


In [20]:
ts_df['7-day Rolling Average'] = ts_df['Values'].rolling(window=7).mean()
print(ts_df)

            Values  7-day Rolling Average
2020-01-01      43                    NaN
2020-01-02      38                    NaN
2020-01-03      45                    NaN
2020-01-06      91                    NaN
2020-01-07      64                    NaN
...            ...                    ...
2020-12-25      85              64.714286
2020-12-28      53              60.428571
2020-12-29      30              52.000000
2020-12-30      18              53.571429
2020-12-31      92              57.142857

[262 rows x 2 columns]


In [21]:
multi_index_df = df.set_index(['Animal', 'Visits'])
print(multi_index_df)

               Age Priority  Young
Animal Visits                     
Cat    1         2      yes   True
Dog    3         5      yes  False
Boa    2         3       no  False
Monkey 2         2       no   True
Tiger  1         4      yes  False


In [22]:
def range_func(x):
    return x.max() - x.min()

age_range_per_animal = df.groupby('Animal')['Age'].agg(range_func)

age_range_per_animal

Animal
Boa       0
Cat       0
Dog       0
Monkey    0
Tiger     0
Name: Age, dtype: int64