In [1]:
import pandas as pd

In [2]:
pd.__version__

'1.5.3'

In [None]:
# pip install pandas

## Pandas Series

In [3]:
data = pd.Series([1,2,3,4,5,6])
data

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
type(data)

pandas.core.series.Series

In [5]:
data.values

array([1, 2, 3, 4, 5, 6])

In [6]:
data.index

RangeIndex(start=0, stop=6, step=1)

In [7]:
data[1]

2

In [8]:
data = pd.Series([10,20,30,40,50], index=list("abcde"),
                 dtype="float",name = "Example Series")
data

a    10.0
b    20.0
c    30.0
d    40.0
e    50.0
Name: Example Series, dtype: float64

In [9]:
data.values

array([10., 20., 30., 40., 50.])

In [10]:
data.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [None]:
data.name

'Example Series'

In [None]:
data.dtype

dtype('float64')

In [11]:
# dict
pop_dict = {"Colifornia":123123,
            "Texas":1212123,
            "New York":4353,
            "Florida":123123}

In [None]:
pop_dict

{'Colifornia': 123123, 'Florida': 123123, 'New York': 4353, 'Texas': 1212123}

In [17]:
pop_series = pd.Series(pop_dict)
pop_series

Colifornia     123123
Texas         1212123
New York         4353
Florida        123123
dtype: int64

## DataFrame

In [26]:
area_series = {"Colifornia":36520,
            "Texas":365620,
            "New York":875421,
            "Florida":54752}

In [27]:
ser_area = pd.Series(area_series)

In [28]:
ser_area

Colifornia     36520
Texas         365620
New York      875421
Florida        54752
dtype: int64

In [29]:
df = pd.DataFrame({"population":pop_series,
                   "area":ser_area})
df

Unnamed: 0,population,area
Colifornia,123123,36520
Texas,1212123,365620
New York,4353,875421
Florida,123123,54752


In [30]:
df.columns

Index(['population', 'area'], dtype='object')

In [31]:
df.index

Index(['Colifornia', 'Texas', 'New York', 'Florida'], dtype='object')

In [32]:
df["area"]

Colifornia     36520
Texas         365620
New York      875421
Florida        54752
Name: area, dtype: int64

In [33]:
type(df["area"])

pandas.core.series.Series

In [34]:
import numpy as np
np.random.seed(0)
arr = np.random.randint(0,100,(4,5))
arr

array([[44, 47, 64, 67, 67],
       [ 9, 83, 21, 36, 87],
       [70, 88, 88, 12, 58],
       [65, 39, 87, 46, 88]])

In [35]:
df2 = pd.DataFrame(arr, index=list("abcd"), columns=["Col1","Col2","Col3","Col4","Col5"])
df2

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
a,44,47,64,67,67
b,9,83,21,36,87
c,70,88,88,12,58
d,65,39,87,46,88


In [None]:
df2.values

array([[44, 47, 64, 67, 67],
       [ 9, 83, 21, 36, 87],
       [70, 88, 88, 12, 58],
       [65, 39, 87, 46, 88]])

In [None]:
df2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [None]:
df2.columns

Index(['Col1', 'Col2', 'Col3', 'Col4', 'Col5'], dtype='object')

## Slicing and Indexing of Pandas Object

In [41]:
data = pd.Series(["a","b","c"], index=[1,3,5])
data

1    a
3    b
5    c
dtype: object

In [None]:
data[1]

'a'

In [37]:
# Array Based
data[1:3]

3    b
5    c
dtype: object

In [38]:
# loc = label indexing (inclusive)
data.loc[1]

'a'

In [39]:
data.loc[1:3]

1    a
3    b
dtype: object

In [42]:
# iloc = integer indexing (exclusive)
data.iloc[1]

'b'

In [45]:
data.iloc[1:3]

3    b
5    c
dtype: object

In [46]:
df2

Unnamed: 0,Col1,Col2,Col3,Col4,Col5
a,44,47,64,67,67
b,9,83,21,36,87
c,70,88,88,12,58
d,65,39,87,46,88


In [47]:
df2["Col1"]

a    44
b     9
c    70
d    65
Name: Col1, dtype: int64

In [48]:
df2["Col6"] = df2["Col2"]* df2["Col3"]
df2

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6
a,44,47,64,67,67,3008
b,9,83,21,36,87,1743
c,70,88,88,12,58,7744
d,65,39,87,46,88,3393


In [50]:
df2.iloc[:3,:2]

Unnamed: 0,Col1,Col2
a,44,47
b,9,83
c,70,88


In [51]:
df2.loc["a":"c","Col1":"Col4"]

Unnamed: 0,Col1,Col2,Col3,Col4
a,44,47,64,67
b,9,83,21,36
c,70,88,88,12


In [52]:
df2[["Col1","Col3","Col4"]]

Unnamed: 0,Col1,Col3,Col4
a,44,64,67
b,9,21,36
c,70,88,12
d,65,87,46


In [53]:
df2

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6
a,44,47,64,67,67,3008
b,9,83,21,36,87,1743
c,70,88,88,12,58,7744
d,65,39,87,46,88,3393


In [55]:
df2[df2["Col2"]>50]

Unnamed: 0,Col1,Col2,Col3,Col4,Col5,Col6
b,9,83,21,36,87,1743
c,70,88,88,12,58,7744


## Loading the dataset

In [57]:
from sklearn.datasets import load_diabetes

In [59]:
data = load_diabetes()

In [67]:
data.keys()

dict_keys(['data', 'target', 'frame', 'DESCR', 'feature_names', 'data_filename', 'target_filename', 'data_module'])

In [68]:
X = data.data
y = data.target

In [69]:
df = pd.DataFrame(X, columns= data.feature_names)

In [70]:
df

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


In [71]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641


In [8]:
df.tail()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
437,0.041708,0.05068,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.005515,0.05068,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018118,0.044485
439,0.041708,0.05068,-0.015906,0.017282,-0.037344,-0.01384,-0.024993,-0.01108,-0.046879,0.015491
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.02656,0.044528,-0.02593
441,-0.045472,-0.044642,-0.07303,-0.081414,0.08374,0.027809,0.173816,-0.039493,-0.00422,0.003064


In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 442 entries, 0 to 441
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   age     442 non-null    float64
 1   sex     442 non-null    float64
 2   bmi     442 non-null    float64
 3   bp      442 non-null    float64
 4   s1      442 non-null    float64
 5   s2      442 non-null    float64
 6   s3      442 non-null    float64
 7   s4      442 non-null    float64
 8   s5      442 non-null    float64
 9   s6      442 non-null    float64
dtypes: float64(10)
memory usage: 34.7 KB


In [73]:
df.describe()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
count,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0,442.0
mean,-2.511817e-19,1.23079e-17,-2.245564e-16,-4.79757e-17,-1.3814990000000001e-17,3.9184340000000004e-17,-5.777179e-18,-9.04254e-18,9.293722000000001e-17,1.130318e-17
std,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905,0.04761905
min,-0.1072256,-0.04464164,-0.0902753,-0.1123988,-0.1267807,-0.1156131,-0.1023071,-0.0763945,-0.1260971,-0.1377672
25%,-0.03729927,-0.04464164,-0.03422907,-0.03665608,-0.03424784,-0.0303584,-0.03511716,-0.03949338,-0.03324559,-0.03317903
50%,0.00538306,-0.04464164,-0.007283766,-0.005670422,-0.004320866,-0.003819065,-0.006584468,-0.002592262,-0.001947171,-0.001077698
75%,0.03807591,0.05068012,0.03124802,0.03564379,0.02835801,0.02984439,0.0293115,0.03430886,0.03243232,0.02791705
max,0.1107267,0.05068012,0.1705552,0.1320436,0.1539137,0.198788,0.1811791,0.1852344,0.1335973,0.1356118


In [74]:
df["target"] = y

In [75]:
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [129]:
df.to_csv("diabetes.csv",index=False)

In [130]:
df_diabetes = pd.read_csv("diabetes.csv")

In [78]:
df_diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


## Operations on DataFrame

In [79]:
df_diabetes

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [83]:
# Checks for duplicated rows 
df_diabetes.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
437    False
438    False
439    False
440    False
441    False
Length: 442, dtype: bool

In [84]:
df_diabetes[df_diabetes.duplicated()]

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target


In [85]:
df_diabetes.duplicated().sum()

0

In [86]:
df_diabetes.isnull()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...
437,False,False,False,False,False,False,False,False,False,False,False
438,False,False,False,False,False,False,False,False,False,False,False
439,False,False,False,False,False,False,False,False,False,False,False
440,False,False,False,False,False,False,False,False,False,False,False


In [22]:
df_diabetes.isnull().any()

age       False
sex       False
bmi       False
bp        False
s1        False
s2        False
s3        False
s4        False
s5        False
s6        False
target    False
dtype: bool

In [23]:
df_diabetes.isnull().any().any()

False

In [24]:
df_diabetes.isnull().sum()

age       0
sex       0
bmi       0
bp        0
s1        0
s2        0
s3        0
s4        0
s5        0
s6        0
target    0
dtype: int64

In [25]:
df_diabetes.isnull().sum().sum()

0

In [27]:
df_diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [28]:
df_diabetes["sex"].unique()

array([ 0.05068012, -0.04464164])

In [103]:
df_diabetes.groupby("sex").mean()

Unnamed: 0_level_0,age,bmi,bp,s1,s2,s3,s4,s5,s6,target
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
-0.044642,-0.007756,-0.003936,-0.010759,-0.001575,-0.006368,0.016923,-0.014826,-0.006693,-0.009291,149.021277
0.05068,0.008805,0.004468,0.012214,0.001788,0.007229,-0.019212,0.016832,0.007598,0.010548,155.666667


In [104]:
import numpy as np

In [108]:
df_diabetes.groupby("sex").agg([np.mean,max,min])

Unnamed: 0_level_0,age,age,age,bmi,bmi,bmi,bp,bp,bp,s1,...,s4,s5,s5,s5,s6,s6,s6,target,target,target
Unnamed: 0_level_1,mean,max,min,mean,max,min,mean,max,min,mean,...,min,mean,max,min,mean,max,min,mean,max,min
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
-0.044642,-0.007756,0.096197,-0.107226,-0.003936,0.160855,-0.084886,-0.010759,0.132044,-0.112399,-0.001575,...,-0.076395,-0.006693,0.133597,-0.104366,-0.009291,0.135612,-0.137767,149.021277,346.0,25.0
0.05068,0.008805,0.110727,-0.103593,0.004468,0.170555,-0.090275,0.012214,0.107944,-0.084856,0.001788,...,-0.076395,0.007598,0.133597,-0.126097,0.010548,0.135612,-0.10463,155.666667,341.0,39.0


In [109]:
df_diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [110]:
df_diabetes.reset_index()

Unnamed: 0,index,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...,...
437,437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [111]:
df2_copy = df_diabetes.copy()

In [112]:
df2_copy

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [115]:
df2_copy.drop(["age","sex"], axis=1)

Unnamed: 0,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...
437,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [116]:
df2_copy.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [117]:
df2_copy.drop(["age","sex"],axis = 1,inplace=True)
df2_copy.head()

Unnamed: 0,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,151.0
1,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0


In [118]:
df2_copy.drop(columns=["bmi","target"])

Unnamed: 0,bp,s1,s2,s3,s4,s5,s6
0,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646
1,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204
2,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930
3,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362
4,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641
...,...,...,...,...,...,...,...
437,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207
438,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485
439,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491
440,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930


## Other Methods of Pandas DataFrame

In [120]:
df_diabetes.sort_values("bp", ascending= True)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
224,-0.027310,-0.044642,-0.066563,-0.112399,-0.049727,-0.041397,0.000779,-0.039493,-0.035816,-0.009362,77.0
41,-0.099961,-0.044642,-0.067641,-0.108956,-0.074494,-0.072712,0.015505,-0.039493,-0.049872,-0.009362,55.0
93,-0.049105,-0.044642,-0.064408,-0.102070,-0.002945,-0.015406,0.063367,-0.047243,-0.033246,-0.054925,96.0
84,0.001751,-0.044642,-0.039618,-0.100934,-0.029088,-0.030124,0.044958,-0.050195,-0.068332,-0.129483,65.0
171,-0.020045,-0.044642,-0.046085,-0.098627,-0.075870,-0.059873,-0.017629,-0.039493,-0.051404,-0.046641,74.0
...,...,...,...,...,...,...,...,...,...,...,...
375,0.045341,0.050680,-0.002973,0.107944,0.035582,0.022485,0.026550,-0.002592,0.028020,0.019633,217.0
350,-0.027310,0.050680,0.060618,0.107944,0.012191,-0.017598,-0.002903,-0.002592,0.070207,0.135612,243.0
408,0.063504,-0.044642,-0.050396,0.107944,0.031454,0.019354,-0.017629,0.023608,0.058038,0.040343,189.0
71,-0.001882,-0.044642,0.033673,0.125158,0.024574,0.026243,-0.010266,-0.002592,0.026717,0.061054,270.0


In [46]:
df_diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646,151.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641,135.0


In [45]:
df_diabetes.sort_values(["bp","bmi"],ascending=False)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
340,-0.016412,-0.044642,-0.013751,0.132044,-0.009825,-0.003819,0.019187,-0.039493,-0.035817,-0.030072,216.0
71,-0.001882,-0.044642,0.033673,0.125158,0.024574,0.026243,-0.010266,-0.002592,0.026714,0.061054,270.0
350,-0.027310,0.050680,0.060618,0.107944,0.012191,-0.017598,-0.002903,-0.002592,0.070211,0.135612,243.0
375,0.045341,0.050680,-0.002973,0.107944,0.035582,0.022485,0.026550,-0.002592,0.028017,0.019633,217.0
408,0.063504,-0.044642,-0.050396,0.107944,0.031454,0.019354,-0.017629,0.023608,0.058039,0.040343,189.0
...,...,...,...,...,...,...,...,...,...,...,...
171,-0.020045,-0.044642,-0.046085,-0.098628,-0.075870,-0.059873,-0.017629,-0.039493,-0.051401,-0.046641,74.0
84,0.001751,-0.044642,-0.039618,-0.100923,-0.029088,-0.030124,0.044958,-0.050195,-0.068330,-0.129483,65.0
93,-0.049105,-0.044642,-0.064408,-0.102071,-0.002945,-0.015406,0.063367,-0.047243,-0.033249,-0.054925,96.0
41,-0.099961,-0.044642,-0.067641,-0.108957,-0.074494,-0.072712,0.015505,-0.039493,-0.049868,-0.009362,55.0


In [126]:
df_diabetes.replace(151.0, 999)

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.050680,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,999.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.050680,0.044451,-0.005670,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.025930,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
...,...,...,...,...,...,...,...,...,...,...,...
437,0.041708,0.050680,0.019662,0.059744,-0.005697,-0.002566,-0.028674,-0.002592,0.031193,0.007207,178.0
438,-0.005515,0.050680,-0.015906,-0.067642,0.049341,0.079165,-0.028674,0.034309,-0.018114,0.044485,104.0
439,0.041708,0.050680,-0.015906,0.017293,-0.037344,-0.013840,-0.024993,-0.011080,-0.046883,0.015491,132.0
440,-0.045472,-0.044642,0.039062,0.001215,0.016318,0.015283,-0.028674,0.026560,0.044529,-0.025930,220.0


In [53]:
df_diabetes.columns

Index(['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6',
       'target'],
      dtype='object')

In [127]:
df_diabetes.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6,target
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019907,-0.017646,999.0
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.068332,-0.092204,75.0
2,0.085299,0.05068,0.044451,-0.00567,-0.045599,-0.034194,-0.032356,-0.002592,0.002861,-0.02593,141.0
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022688,-0.009362,206.0
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031988,-0.046641,135.0
