In [1]:
import pandas as pd

# Pandas Series

In [None]:
series = pd.Series([3.3, 3.9, 3.5, 2.8, 2.5, 3.0])
series.name = "Student CGPA's"
series

0    3.3
1    3.9
2    3.5
3    2.8
4    2.5
5    3.0
Name: Student CGPA's, dtype: float64

In [None]:
series.values

array([3.3, 3.9, 3.5, 2.8, 2.5, 3. ])

In [None]:
series.index

RangeIndex(start=0, stop=6, step=1)

In [None]:
type(series.values)

numpy.ndarray

In [None]:
series[1]

3.9

In [None]:
series.index = [      # unlike lists we can explicitly define index names
    "Tom",
    "Bruce",
    "Tony",
    "Steve",
    "Harry",
    "John"
]
series

Tom      3.3
Bruce    3.9
Tony     3.5
Steve    2.8
Harry    2.5
John     3.0
Name: Student CGPA's, dtype: float64

In [None]:
series["Tom"]        # by defining indexes you can access the values like python dictionaries

3.3

In [None]:
series.Harry         # can also access values using .index

2.5

In [None]:
series.iloc[0]       # you can also use iloc to get values using numeric indexes

3.3

In [None]:
series.iloc[[0, 1]]   # you can also get multiple indexes

Tom      3.3
Bruce    3.9
Name: Student CGPA's, dtype: float64

In [None]:
series >= 3.0         # conditional selection

Tom       True
Bruce     True
Tony      True
Steve    False
Harry    False
John      True
Name: Student CGPA's, dtype: bool

# Operations

In [None]:
series + 0.1

Tom      3.4
Bruce    4.0
Tony     3.6
Steve    2.9
Harry    2.6
John     3.1
Name: Student CGPA's, dtype: float64

In [None]:
(series * 100) / 4

Tom      82.5
Bruce    97.5
Tony     87.5
Steve    70.0
Harry    62.5
John     75.0
Name: Student CGPA's, dtype: float64

In [None]:
series          # Above operations don't change the original values

Tom      3.3
Bruce    3.9
Tony     3.5
Steve    2.8
Harry    2.5
John     3.0
Name: Student CGPA's, dtype: float64

# Methods

In [None]:
series.mean()

3.1666666666666665

In [None]:
series.std()

0.5046450898073483

In [None]:
series.count()

6

# Pandas DataFrames

In [35]:
df = pd.DataFrame({
    "Name" : [    "Tom", "Bruce", "Tony", "Steve", "Harry", "John"],
    "CGPA": [3.3, 3.9, 3.5, 2.8, 2.5, 3.0],
    "Percentage": [82.5, 97.5, 87.5, 70.0, 62.5, 75.0],
    "Attendance": ["95%", "100%", "90%", "87%", "80%", "93%"]
})
df

Unnamed: 0,Name,CGPA,Percentage,Attendance
0,Tom,3.3,82.5,95%
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%
3,Steve,2.8,70.0,87%
4,Harry,2.5,62.5,80%
5,John,3.0,75.0,93%


In [36]:
df.head()       # Top 5 rows

Unnamed: 0,Name,CGPA,Percentage,Attendance
0,Tom,3.3,82.5,95%
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%
3,Steve,2.8,70.0,87%
4,Harry,2.5,62.5,80%


In [37]:
df.tail()       # Bottom 5 rows

Unnamed: 0,Name,CGPA,Percentage,Attendance
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%
3,Steve,2.8,70.0,87%
4,Harry,2.5,62.5,80%
5,John,3.0,75.0,93%


In [38]:
df.columns

Index(['Name', 'CGPA', 'Percentage', 'Attendance'], dtype='object')

In [39]:
df.index      # you can also change the indexes like in Series

RangeIndex(start=0, stop=6, step=1)

In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        6 non-null      object 
 1   CGPA        6 non-null      float64
 2   Percentage  6 non-null      float64
 3   Attendance  6 non-null      object 
dtypes: float64(2), object(2)
memory usage: 320.0+ bytes


In [43]:
df.size

24

In [44]:
df.shape

(6, 4)

In [45]:
df.describe()

Unnamed: 0,CGPA,Percentage
count,6.0,6.0
mean,3.166667,79.166667
std,0.504645,12.616127
min,2.5,62.5
25%,2.85,71.25
50%,3.15,78.75
75%,3.45,86.25
max,3.9,97.5


In [46]:
df.dtypes

Name           object
CGPA          float64
Percentage    float64
Attendance     object
dtype: object

In [47]:
df.iloc[1]

Name          Bruce
CGPA            3.9
Percentage     97.5
Attendance     100%
Name: 1, dtype: object

In [48]:
df[1:3]

Unnamed: 0,Name,CGPA,Percentage,Attendance
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%


In [49]:
df.iloc[1:3]

Unnamed: 0,Name,CGPA,Percentage,Attendance
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%


In [50]:
df.loc[df["CGPA"] > 3.0]

Unnamed: 0,Name,CGPA,Percentage,Attendance
0,Tom,3.3,82.5,95%
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%


In [51]:
df.drop(0)

Unnamed: 0,Name,CGPA,Percentage,Attendance
1,Bruce,3.9,97.5,100%
2,Tony,3.5,87.5,90%
3,Steve,2.8,70.0,87%
4,Harry,2.5,62.5,80%
5,John,3.0,75.0,93%


In [52]:
df.drop(columns=["CGPA", "Attendance"])

Unnamed: 0,Name,Percentage
0,Tom,82.5
1,Bruce,97.5
2,Tony,87.5
3,Steve,70.0
4,Harry,62.5
5,John,75.0


In [53]:
country = pd.Series(["England", "England", "England", "Canada"])
df["Country"] = country
df

Unnamed: 0,Name,CGPA,Percentage,Attendance,Country
0,Tom,3.3,82.5,95%,England
1,Bruce,3.9,97.5,100%,England
2,Tony,3.5,87.5,90%,England
3,Steve,2.8,70.0,87%,Canada
4,Harry,2.5,62.5,80%,
5,John,3.0,75.0,93%,


In [54]:
CGPA = df["CGPA"]

In [55]:
CGPA.min(), CGPA.max()

(2.5, 3.9)

In [56]:
CGPA.sum()

19.0

In [57]:
CGPA.mean()

3.1666666666666665

In [58]:
CGPA.std()

0.5046450898073483

In [59]:
CGPA.median()

3.15

In [60]:
CGPA.describe()

count    6.000000
mean     3.166667
std      0.504645
min      2.500000
25%      2.850000
50%      3.150000
75%      3.450000
max      3.900000
Name: CGPA, dtype: float64