In [1]:
# !pip install pandas

In [2]:
import pandas as pd

## Creating Dataframe

In [3]:
# 1. From Dictionary
data = {'name': ['Alice', 'Bob', 'Charlie'], 'age': [25, 30, 35]}
df = pd.DataFrame(data)

In [4]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [5]:
df.columns

Index(['name', 'age'], dtype='object')

In [9]:
# 2. From list of lists
data = [['Alice', 25], ['Bob', 30], ['Charlie', 35]]
df = pd.DataFrame(data)

In [10]:
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [11]:
df.columns

RangeIndex(start=0, stop=2, step=1)

In [12]:
df.columns = ['name', 'age']

In [13]:
df.columns

Index(['name', 'age'], dtype='object')

In [14]:
df = pd.DataFrame(data, columns=['name', 'age'])

In [15]:
df.columns

Index(['name', 'age'], dtype='object')

## Accessing Data

In [16]:
df['name']

0      Alice
1        Bob
2    Charlie
Name: name, dtype: object

In [17]:
type(df['name'])

pandas.core.series.Series

In [18]:
list_col = ['name', 'age']

df[list_col]
# df[['name', 'age']]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30
2,Charlie,35


### Condition based Filtering

In [21]:
df['age'] >= 30

0    False
1     True
2     True
Name: age, dtype: bool

In [22]:
df[df['age'] >= 30]

Unnamed: 0,name,age
1,Bob,30
2,Charlie,35


In [25]:
df[df['name'].str.startswith('A') & (df['age'] >= 25)]

Unnamed: 0,name,age
0,Alice,25


In [26]:
df['age'].isna()

0    False
1    False
2    False
Name: age, dtype: bool

In [27]:
df[df['age'].isna()]

Unnamed: 0,name,age


### Filtering using existing methods

In [28]:
df.loc[0, 'name']

'Alice'

In [32]:
df.loc[0:1, ['name', 'age']]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30


In [33]:
df.iloc[0, 0]

'Alice'

In [36]:
df.iloc[0:1, [0, 1]]

Unnamed: 0,name,age
0,Alice,25


In [35]:
df.iloc[0:2, [0, 1]]

Unnamed: 0,name,age
0,Alice,25
1,Bob,30


In [37]:
df.at[0, 'name']

'Alice'

In [38]:
df.iat[0, 0]

'Alice'

## Reading and writing data

In [39]:
# df = pd.read_csv('/home/fm-pc-lt-68/Documents/Vrit/data.csv')
df = pd.read_csv('./../data.csv')

In [40]:
df

Unnamed: 0,name,age
0,Alice,25
1,Bob,30


In [42]:
df.loc[2, 'name'] = 'Charlie'
df.loc[2, 'age'] = 34

In [43]:
df

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0
2,Charlie,34.0


In [45]:
df.to_csv('./../data.csv', index=False)

## Data Exploration

In [46]:
df.head()

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0
2,Charlie,34.0


In [47]:
df.head(2)

Unnamed: 0,name,age
0,Alice,25.0
1,Bob,30.0


In [48]:
df.tail(2)

Unnamed: 0,name,age
1,Bob,30.0
2,Charlie,34.0


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   name    3 non-null      object 
 1   age     3 non-null      float64
dtypes: float64(1), object(1)
memory usage: 176.0+ bytes


In [50]:
df.describe()

Unnamed: 0,age
count,3.0
mean,29.666667
std,4.50925
min,25.0
25%,27.5
50%,30.0
75%,32.0
max,34.0


In [51]:
df.value_counts()

name     age 
Alice    25.0    1
Bob      30.0    1
Charlie  34.0    1
Name: count, dtype: int64

In [52]:
df['age'].nunique()

3

In [53]:
df.shape

(3, 2)

In [54]:
df.isnull()

Unnamed: 0,name,age
0,False,False
1,False,False
2,False,False


In [55]:
df.isnull().sum()

name    0
age     0
dtype: int64