In [101]:
import pandas as pd
import numpy as np

## Series


In [102]:
pd.Series([1, 2, 3, 4, 5], dtype=complex)

0    1.0+0.0j
1    2.0+0.0j
2    3.0+0.0j
3    4.0+0.0j
4    5.0+0.0j
dtype: complex128

In [103]:
coordinates = {'x': 2.90, 'y': 3.09, 'z': 0.81}
pd.Series(data=coordinates, index=['x', 'y'], name=" Coordinates")

x    2.90
y    3.09
Name:  Coordinates, dtype: float64

In [104]:
index = ["apple", "orange", "banana", "strawberry", "blueberry"]
data = [10, 20, 30, 40, 50]
pd.Series(data=data, index=index, name="Fruits")

apple         10
orange        20
banana        30
strawberry    40
blueberry     50
Name: Fruits, dtype: int64

In [105]:
array = [1, 2, 3, 4, 5, 6]
pd.Series(data=array, name="AP")

0    1
1    2
2    3
3    4
4    5
5    6
Name: AP, dtype: int64

## Dataframe


In [106]:
a = {
    "col1": [1, 2, 3, 4, 5],
    "col2": np.linspace(1, 100, 5),
    "col3": np.arange(1, 10, 2),
}
pd.DataFrame(data=a)

Unnamed: 0,col1,col2,col3
0,1,1.0,1
1,2,25.75,3
2,3,50.5,5
3,4,75.25,7
4,5,100.0,9


In [107]:
fruits = {"Apples": [21, 22, 26, 34], "Bananas": [30, 31, 28, 25], }
index = ["2017 Sales", "2018 Sales", "2019 Sales", "2020 Sales"]


fruits = pd.DataFrame(data=fruits, index=index)

fruits

Unnamed: 0,Apples,Bananas
2017 Sales,21,30
2018 Sales,22,31
2019 Sales,26,28
2020 Sales,34,25


In [108]:
# This will return a pandas series
fruits["Apples"]  # or fruits.Apples

2017 Sales    21
2018 Sales    22
2019 Sales    26
2020 Sales    34
Name: Apples, dtype: int64

In [109]:
fruits.to_csv("fruits.csv")

In [110]:
df = pd.read_csv(
    "C:/Users/vk706/Downloads/student.csv",)

df.shape  # Tells about no. of rows and column

(35, 5)

In [111]:
df.info()  # Information about the data

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      35 non-null     int64 
 1   name    35 non-null     object
 2   class   35 non-null     object
 3   mark    35 non-null     int64 
 4   gender  35 non-null     object
dtypes: int64(2), object(3)
memory usage: 1.5+ KB


In [112]:
df.head()  # First 5 rows

Unnamed: 0,id,name,class,mark,gender
0,1,John Deo,Four,75,female
1,2,Max Ruin,Three,85,male
2,3,Arnold,Three,55,male
3,4,Krish Star,Four,60,female
4,5,John Mike,Four,60,female


In [113]:
df.tail()  # Last 5 rows

Unnamed: 0,id,name,class,mark,gender
30,31,Marry Toeey,Four,88,male
31,32,Binn Rott,Seven,90,female
32,33,Kenn Rein,Six,96,female
33,34,Gain Toe,Seven,69,male
34,35,Rows Noump,Six,88,female


## Indexing & Slicing

In [114]:
# iloc is used to access the data by index
df.iloc[0]  # First row of the data

id               1
name      John Deo
class         Four
mark            75
gender      female
Name: 0, dtype: object

In [115]:
df.iloc[0:6:2]  # First 6 rows of the data with step 2

Unnamed: 0,id,name,class,mark,gender
0,1,John Deo,Four,75,female
2,3,Arnold,Three,55,male
4,5,John Mike,Four,60,female


In [116]:
df.iloc[[0, 2, 4], 1]  # First, third and fifth row of the data with second column
# It will return a series

0     John Deo
2       Arnold
4    John Mike
Name: name, dtype: object

In [117]:
df.iloc[-5:]  # Last 5 rows of the data

Unnamed: 0,id,name,class,mark,gender
30,31,Marry Toeey,Four,88,male
31,32,Binn Rott,Seven,90,female
32,33,Kenn Rein,Six,96,female
33,34,Gain Toe,Seven,69,male
34,35,Rows Noump,Six,88,female


In [123]:
# loc is used to access the data by label
df.loc[0:5,['name','gender']]  # First row of the data

Unnamed: 0,name,gender
0,John Deo,female
1,Max Ruin,male
2,Arnold,male
3,Krish Star,female
4,John Mike,female
5,Alex John,male


In [132]:
df['mark'] >= 90

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11     True
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31     True
32     True
33    False
34    False
Name: mark, dtype: bool

In [133]:
df[df['mark'] >= 90]

Unnamed: 0,id,name,class,mark,gender
11,12,Recky,Six,94,female
31,32,Binn Rott,Seven,90,female
32,33,Kenn Rein,Six,96,female


In [137]:
df.loc[(df['mark'] >= 80) & (df.gender == 'male')]

Unnamed: 0,id,name,class,mark,gender
1,2,Max Ruin,Three,85,male
7,8,Asruid,Five,85,male
14,15,Tade Row,Four,88,male
15,16,Gimmy,Four,88,male
24,25,Giff Tow,Seven,88,male
30,31,Marry Toeey,Four,88,male
