# PANDAS

In [1]:
import pandas as pd

#### Load data from CSV

In [2]:
df = pd.read_csv('Mall_Customers.csv')

In [3]:
df.head(6)

Unnamed: 0,CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40
5,6,Female,22,17,76


#### Data Frame Operations

**A DataFrame is a two-dimensional data structure in which the data is aligned in a tabular form i.e. in rows and columns. Pandas DataFrames make manipulating your data easy. You can select, replace columns and rows and even reshape your data.**

In [4]:
# Get dataframe info

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 5 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   CustomerID              200 non-null    int64 
 1   Genre                   200 non-null    object
 2   Age                     200 non-null    int64 
 3   Annual Income (k$)      200 non-null    int64 
 4   Spending Score (1-100)  200 non-null    int64 
dtypes: int64(4), object(1)
memory usage: 7.9+ KB


In [6]:
# Rename column

df = df.rename(columns={'Genre' : 'Gender'})

In [7]:
df.head()

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [8]:
# Add column
df['Address'] = ''

In [9]:
df.head()

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100),Address
0,1,Male,19,15,39,
1,2,Male,21,15,81,
2,3,Female,20,16,6,
3,4,Female,23,16,77,
4,5,Female,31,17,40,


In [10]:
#Remove column

In [11]:
del df['Address']

In [12]:
df.head()

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
0,1,Male,19,15,39
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77
4,5,Female,31,17,40


In [13]:
# Select one or multiple column columns

In [14]:
df2  = df[['Gender','Age']]

In [15]:
df2.head()

Unnamed: 0,Gender,Age
0,Male,19
1,Male,21
2,Female,20
3,Female,23
4,Female,31


In [16]:
# select rows with a condition
df.loc[(df.Age>20) & (df.Gender=='Male')] [['Annual Income (k$)']]

Unnamed: 0,Annual Income (k$)
1,15
8,19
10,19
14,20
15,20
...,...
187,101
192,113
197,126
198,137


In [18]:
#slice dataframe
df.loc[1:3]

Unnamed: 0,CustomerID,Gender,Age,Annual Income (k$),Spending Score (1-100)
1,2,Male,21,15,81
2,3,Female,20,16,6
3,4,Female,23,16,77


In [19]:
df.iloc[1:3,2:4]

Unnamed: 0,Age,Annual Income (k$)
1,21,15
2,20,16


### Mapping/Dict/Dictionary (Key , Value)
Python Dictionary is an unordered collection of key-value pairs. It’s defined using braces and the elements are separated using comma. The key and value can be of any type. The key-value pair is defined using colon (key:value).

In [None]:
#create dictionary

In [1]:
d = {'col1':[1,2,4], 'col2':[6,7,8]}

In [None]:
#load dictionary into dataframe

In [4]:
df = pd.DataFrame.from_dict(d)

In [5]:
df.head()

Unnamed: 0,col1,col2
0,1,6
1,2,7
2,4,8


### List

In [None]:
#create list

In [6]:
l1 = ['one','two','three']
l2 = [1,2,3]
l = [l1,l2]

In [None]:
#create dataframe from list

In [7]:
df = pd.DataFrame(l)

In [8]:
df.head()

Unnamed: 0,0,1,2
0,one,two,three
1,1,2,3


In [None]:
#other option to load data into dataframe
pd.rea