## Library

In [36]:
! pip install pandas



In [37]:
import pandas as pd

## Creating Dataframes

In [38]:
fruits1_dict = {'Fruit' : ['Apple', 'Banana', 'Orange'],
                'Color' : ['Red', 'Yellow', 'Orange']}

fruits1 = pd.DataFrame(fruits1_dict)
fruits1

Unnamed: 0,Fruit,Color
0,Apple,Red
1,Banana,Yellow
2,Orange,Orange


In [39]:
fruits2_list = [['Kiwi', 'Green'], ['Lemon', 'Yellow'], ['Plums', 'Dark Red']]
fruits2 = pd.DataFrame(fruits2_list, columns = ['Fruit', 'Color'])
fruits2

Unnamed: 0,Fruit,Color
0,Kiwi,Green
1,Lemon,Yellow
2,Plums,Dark Red


In [40]:
fruits = pd.concat([fruits1, fruits2])
fruits

Unnamed: 0,Fruit,Color
0,Apple,Red
1,Banana,Yellow
2,Orange,Orange
0,Kiwi,Green
1,Lemon,Yellow
2,Plums,Dark Red


## Data loading and Inspection

In [41]:
# To read a CSV file
data = pd.read_csv("iris_csv.csv")

# to read a TSV file
# pd.read_tsv("dataset_path")

# to read a TSV file using CSV
# pd.read_csv("dataset_path", sep = '\t')

Basic Data Observations

In [42]:
data.head()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [43]:
data.head(3)

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa


In [44]:
data.tail()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth,class
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica
149,5.9,3.0,5.1,1.8,Iris-virginica


**Data Inspection**

In [45]:
data.shape

(150, 5)

In [46]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   sepallength  150 non-null    float64
 1   sepalwidth   150 non-null    float64
 2   petallength  150 non-null    float64
 3   petalwidth   150 non-null    float64
 4   class        150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [47]:
data.describe()

Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


**Accessing Columns Data**

In [49]:
data.sepallength

0      5.1
1      4.9
2      4.7
3      4.6
4      5.0
      ... 
145    6.7
146    6.3
147    6.5
148    6.2
149    5.9
Name: sepallength, Length: 150, dtype: float64

In [50]:
data.nunique()

sepallength    35
sepalwidth     23
petallength    43
petalwidth     22
class           3
dtype: int64

In [52]:
data['sepalwidth'].value_counts().head()

3.0    26
2.8    14
3.2    13
3.1    12
3.4    12
Name: sepalwidth, dtype: int64

## Data Selection and Indexing

Selecting Columns and Rows

In [53]:
data[['sepalwidth', 'sepallength']]

Unnamed: 0,sepalwidth,sepallength
0,3.5,5.1
1,3.0,4.9
2,3.2,4.7
3,3.1,4.6
4,3.6,5.0
...,...,...
145,3.0,6.7
146,2.5,6.3
147,3.0,6.5
148,3.4,6.2


Using **'.loc[]'**

In [54]:
data.loc[15:25, 'sepallength':'petallength']

Unnamed: 0,sepallength,sepalwidth,petallength
15,5.7,4.4,1.5
16,5.4,3.9,1.3
17,5.1,3.5,1.4
18,5.7,3.8,1.7
19,5.1,3.8,1.5
20,5.4,3.4,1.7
21,5.1,3.7,1.5
22,4.6,3.6,1.0
23,5.1,3.3,1.7
24,4.8,3.4,1.9


In [55]:
data.loc[75,:]

sepallength                6.6
sepalwidth                 3.0
petallength                4.4
petalwidth                 1.4
class          Iris-versicolor
Name: 75, dtype: object

In [56]:
data.loc[:,'petalwidth']

0      0.2
1      0.2
2      0.2
3      0.2
4      0.2
      ... 
145    2.3
146    1.9
147    2.0
148    2.3
149    1.8
Name: petalwidth, Length: 150, dtype: float64

Using **'.iloc[]'**

In [57]:
data.iloc[9,2] # value of 9th row, 2nd column

1.5

In [58]:
data.iloc[5:10, 1:3] # rows from 5 to 9 from 1st and 2nd column

Unnamed: 0,sepalwidth,petallength
5,3.9,1.7
6,3.4,1.4
7,3.4,1.5
8,2.9,1.4
9,3.1,1.5


In [59]:
data.iloc[[7, 9, 11], 0:2] #only 7,9 and 11th rows data

Unnamed: 0,sepallength,sepalwidth
7,5.0,3.4
9,4.9,3.1
11,4.8,3.4


In [60]:
data.corr()

  data.corr()


Unnamed: 0,sepallength,sepalwidth,petallength,petalwidth
sepallength,1.0,-0.109369,0.871754,0.817954
sepalwidth,-0.109369,1.0,-0.420516,-0.356544
petallength,0.871754,-0.420516,1.0,0.962757
petalwidth,0.817954,-0.356544,0.962757,1.0


In [65]:
data.groupby(data['sepalwidth']).agg(['mean', 'min','max'])

  data.groupby(data['sepalwidth']).agg(['mean', 'min','max'])


Unnamed: 0_level_0,sepallength,sepallength,sepallength,petallength,petallength,petallength,petalwidth,petalwidth,petalwidth
Unnamed: 0_level_1,mean,min,max,mean,min,max,mean,min,max
sepalwidth,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2.0,5.0,5.0,5.0,3.5,3.5,3.5,1.0,1.0,1.0
2.2,6.066667,6.0,6.2,4.5,4.0,5.0,1.333333,1.0,1.5
2.3,5.325,4.5,6.3,3.25,1.3,4.4,0.975,0.3,1.3
2.4,5.3,4.9,5.5,3.6,3.3,3.8,1.033333,1.0,1.1
2.5,5.7625,4.9,6.7,4.5125,3.0,5.8,1.55,1.1,2.0
2.6,6.16,5.5,7.7,4.88,3.5,6.9,1.42,1.0,2.3
2.7,5.855556,5.2,6.4,4.622222,3.9,5.3,1.555556,1.0,1.9
2.8,6.335714,5.6,7.7,5.042857,4.0,6.7,1.707143,1.2,2.4
2.9,6.06,4.4,7.3,4.35,1.4,6.3,1.32,0.2,1.8
3.0,6.015385,4.3,7.7,4.234615,1.1,6.6,1.403846,0.1,2.3
