In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine

In [2]:
isinstance(load_wine(), dict)

True

In [3]:
type(load_wine())
load_wine().keys()

sklearn.utils._bunch.Bunch

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names'])

In [4]:
wine = load_wine(as_frame=True).data
wine.head()
wine.shape

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


(178, 13)

### Selecting and accessing data
The three main accessors are:

- [] (The Indexing Operator): Primarily for selecting columns.

- .loc (Label-based): Selects by the name of the index/column.

- .iloc (Integer-based): Selects by the position (0-based index).

In [51]:
wine['alcohol']
type(wine['alcohol'])

wine[['alcohol']]
type(wine[['alcohol']])

0      14.23
1      13.20
2      13.16
3      14.37
4      13.24
       ...  
173    13.71
174    13.40
175    13.27
176    13.17
177    14.13
Name: alcohol, Length: 178, dtype: float64

pandas.core.series.Series

Unnamed: 0,alcohol
0,14.23
1,13.20
2,13.16
3,14.37
4,13.24
...,...
173,13.71
174,13.40
175,13.27
176,13.17


pandas.core.frame.DataFrame

In [6]:
wine[['ash', 'hue']]

Unnamed: 0,ash,hue
0,2.43,1.04
1,2.14,1.05
2,2.67,1.03
3,2.50,0.86
4,2.87,1.04
...,...,...
173,2.45,0.64
174,2.48,0.70
175,2.26,0.59
176,2.37,0.60


In [7]:
wine.loc[0]
wine.loc[[0]]

alcohol                           14.23
malic_acid                         1.71
ash                                2.43
alcalinity_of_ash                 15.60
magnesium                        127.00
total_phenols                      2.80
flavanoids                         3.06
nonflavanoid_phenols               0.28
proanthocyanins                    2.29
color_intensity                    5.64
hue                                1.04
od280/od315_of_diluted_wines       3.92
proline                         1065.00
Name: 0, dtype: float64

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0


In [8]:
wine.loc[[0, 1, 2]]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0


In [9]:
wine.iloc[:, [0, 1]]

Unnamed: 0,alcohol,malic_acid
0,14.23,1.71
1,13.20,1.78
2,13.16,2.36
3,14.37,1.95
4,13.24,2.59
...,...,...
173,13.71,5.65
174,13.40,3.91
175,13.27,4.28
176,13.17,2.59


In [10]:
wine.loc[2:10:2]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0
6,14.39,1.87,2.45,14.6,96.0,2.5,2.52,0.3,1.98,5.25,1.02,3.58,1290.0
8,14.83,1.64,2.17,14.0,97.0,2.8,2.98,0.29,1.98,5.2,1.08,2.85,1045.0
10,14.1,2.16,2.3,18.0,105.0,2.95,3.32,0.22,2.38,5.75,1.25,3.17,1510.0


In [11]:
wine.loc[0, 'ash']
wine.loc[0, ['ash']]

wine.loc[0, ['alcohol', 'ash', 'hue']]

np.float64(2.43)

ash    2.43
Name: 0, dtype: float64

alcohol    14.23
ash         2.43
hue         1.04
Name: 0, dtype: float64

In [12]:
wine.loc[0:4, 'ash']
wine.loc[0:4, ['ash', 'hue']]

0    2.43
1    2.14
2    2.67
3    2.50
4    2.87
Name: ash, dtype: float64

Unnamed: 0,ash,hue
0,2.43,1.04
1,2.14,1.05
2,2.67,1.03
3,2.5,0.86
4,2.87,1.04


In [13]:
wine.loc[0:4, 'ash':'hue']
wine.loc[[3, 4, 5], 'ash':'hue']

Unnamed: 0,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue
0,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04
1,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05
2,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03
3,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86
4,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04


Unnamed: 0,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue
3,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86
4,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04
5,2.45,15.2,112.0,3.27,3.39,0.34,1.97,6.75,1.05


In [14]:
wine.loc[:, ['alcohol', 'ash']]

Unnamed: 0,alcohol,ash
0,14.23,2.43
1,13.20,2.14
2,13.16,2.67
3,14.37,2.50
4,13.24,2.87
...,...,...
173,13.71,2.45
174,13.40,2.48
175,13.27,2.26
176,13.17,2.37


In [52]:
wine.iloc[0]
wine.iloc[[0]]
wine.iloc[[90, 91, 92]]

alcohol                           14.23
malic_acid                         1.71
ash                                2.43
alcalinity_of_ash                 15.60
magnesium                        127.00
total_phenols                      2.80
flavanoids                         3.06
nonflavanoid_phenols               0.28
proanthocyanins                    2.29
color_intensity                    5.64
hue                                1.04
od280/od315_of_diluted_wines       3.92
proline                         1065.00
Name: 0, dtype: float64

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
90,12.08,1.83,2.32,18.5,81.0,1.6,1.5,0.52,1.64,2.4,1.08,2.27,480.0
91,12.0,1.51,2.42,22.0,86.0,1.45,1.25,0.5,1.63,3.6,1.05,2.65,450.0
92,12.69,1.53,2.26,20.7,80.0,1.38,1.46,0.58,1.62,3.05,0.96,2.06,495.0


In [16]:
wine.iloc[0:5]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [17]:
type(wine.iloc[-1])
wine.iloc[-1]

pandas.core.series.Series

alcohol                          14.13
malic_acid                        4.10
ash                               2.74
alcalinity_of_ash                24.50
magnesium                        96.00
total_phenols                     2.05
flavanoids                        0.76
nonflavanoid_phenols              0.56
proanthocyanins                   1.35
color_intensity                   9.20
hue                               0.61
od280/od315_of_diluted_wines      1.60
proline                         560.00
Name: 177, dtype: float64

In [18]:
wine.iloc[0:5, 0:4]
wine.iloc[[0, 2], [2, 5]]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash
0,14.23,1.71,2.43,15.6
1,13.2,1.78,2.14,11.2
2,13.16,2.36,2.67,18.6
3,14.37,1.95,2.5,16.8
4,13.24,2.59,2.87,21.0


Unnamed: 0,ash,total_phenols
0,2.43,2.8
2,2.67,2.8


### Fast Scalar Access (Single Value)

In [19]:
wine.at[0, 'alcohol']
wine.iat[0, 0]

np.float64(14.23)

np.float64(14.23)

### Boolean indexing - to filter rows
- You create a "mask" — a Series of True/False values — and pass it to the DataFrame.

- The DataFrame keeps only the rows where the mask is True.

In [20]:
mask = wine['ash'] > 2.8
mask.sum()
mask.head()

np.int64(6)

0    False
1    False
2    False
3    False
4     True
Name: ash, dtype: bool

In [21]:
wine[mask]

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0
25,13.05,2.05,3.22,25.0,124.0,2.63,2.68,0.47,1.92,3.58,1.13,3.2,830.0
36,13.28,1.64,2.84,15.5,110.0,2.6,2.68,0.34,1.36,4.6,1.09,2.78,880.0
112,11.76,2.68,2.92,20.0,103.0,1.75,2.03,0.6,1.05,3.8,1.23,2.5,607.0
121,11.56,2.05,3.23,28.5,119.0,3.18,5.08,0.47,1.87,6.0,0.93,3.69,465.0
169,13.4,4.6,2.86,25.0,112.0,1.98,0.96,0.27,1.11,8.5,0.67,1.92,630.0


### Multiple Conditions

You must use bitwise operators and wrap every condition in parentheses.

- AND `(&)`
- OR `(|)`
- NOT `(~)`

In [22]:
mask = (wine['total_phenols'] > 1) & (wine['nonflavanoid_phenols'] > 0.6)
mask.sum()
wine[mask]

np.int64(8)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
60,12.33,1.1,2.28,16.0,101.0,2.05,1.09,0.63,0.41,3.27,1.25,1.67,680.0
83,13.05,3.86,2.32,22.5,85.0,1.65,1.59,0.61,1.62,4.8,0.84,2.01,515.0
105,12.42,2.55,2.27,22.0,90.0,1.68,1.84,0.66,1.42,2.7,0.86,3.3,315.0
135,12.6,2.46,2.2,18.5,94.0,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695.0
137,12.53,5.51,2.64,25.0,96.0,1.79,0.6,0.63,1.1,5.0,0.82,1.69,515.0
153,13.23,3.3,2.28,18.5,98.0,1.8,0.83,0.61,1.87,10.52,0.56,1.51,675.0
155,13.17,5.19,2.32,22.0,93.0,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725.0
157,12.45,3.03,2.64,27.0,97.0,1.9,0.58,0.63,1.14,7.5,0.67,1.73,880.0


**Handy Filtering Methods**

Pandas provides built-in methods that return boolean masks, making your code cleaner

In [23]:
mask = wine['magnesium'].isin([120, 100, 70])  # is a value present for this col
wine[mask]
mask.sum()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
16,14.3,1.92,2.72,20.0,120.0,2.8,3.14,0.33,1.97,6.2,1.07,2.65,1280.0
35,13.48,1.81,2.41,20.5,100.0,2.7,2.98,0.26,1.86,5.1,1.04,3.47,920.0
61,12.64,1.36,2.02,16.8,100.0,2.02,1.41,0.53,0.62,5.75,0.98,1.59,450.0
89,12.08,1.33,2.3,23.6,70.0,2.2,1.59,0.42,1.38,1.74,1.07,3.21,625.0
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.2,0.59,1.56,835.0
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.3,0.6,1.62,840.0


np.int64(7)

In [24]:
mask = wine['flavanoids'].between(0.6, 0.7)  # inclusive of boundaries by default
mask.sum()
wine[mask]

np.int64(13)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
135,12.6,2.46,2.2,18.5,94.0,1.62,0.66,0.63,0.94,7.1,0.73,1.58,695.0
137,12.53,5.51,2.64,25.0,96.0,1.79,0.6,0.63,1.1,5.0,0.82,1.69,515.0
139,12.84,2.96,2.61,24.0,101.0,2.32,0.6,0.53,0.81,4.92,0.89,2.15,590.0
147,12.87,4.61,2.48,21.5,86.0,1.7,0.65,0.47,0.86,7.65,0.54,1.86,625.0
155,13.17,5.19,2.32,22.0,93.0,1.74,0.63,0.61,1.55,7.9,0.6,1.48,725.0
162,12.85,3.27,2.58,22.0,106.0,1.65,0.6,0.6,0.96,5.58,0.87,2.11,570.0
163,12.96,3.45,2.35,18.5,106.0,1.39,0.7,0.4,0.94,5.28,0.68,1.75,675.0
164,13.78,2.76,2.3,22.0,90.0,1.35,0.68,0.41,1.03,9.58,0.7,1.68,615.0
167,12.82,3.37,2.3,19.5,88.0,1.48,0.66,0.4,0.97,10.26,0.72,1.75,685.0
172,14.16,2.51,2.48,20.0,91.0,1.68,0.7,0.44,1.24,9.7,0.62,1.71,660.0


### String Methods (.str)
- If you have text data, you can use vectorised string methods.

In [25]:
students = pd.DataFrame({
    "name": ['rahul', 'harikesh', 'vishnu', 'vidu'],
    "age": [22, 23, 10, 21],
    "salary": [1000, 950, 1100, 5000],
    "marks": [100, None, None, 90]
})

In [26]:
students
type(students)
type(students['name'])

Unnamed: 0,name,age,salary,marks
0,rahul,22,1000,100.0
1,harikesh,23,950,
2,vishnu,10,1100,
3,vidu,21,5000,90.0


pandas.core.frame.DataFrame

pandas.core.series.Series

In [27]:
students['name'].str.upper()
students['name'].str.capitalize()
students['name'].str.lower()

0       RAHUL
1    HARIKESH
2      VISHNU
3        VIDU
Name: name, dtype: object

0       Rahul
1    Harikesh
2      Vishnu
3        Vidu
Name: name, dtype: object

0       rahul
1    harikesh
2      vishnu
3        vidu
Name: name, dtype: object

In [28]:
students['name'].str.startswith('vi')
students['name'].str.slice(1)
students['name'].str.contains('u')
students['name'].str.len()

0    False
1    False
2     True
3     True
Name: name, dtype: bool

0       ahul
1    arikesh
2      ishnu
3        idu
Name: name, dtype: object

0     True
1    False
2     True
3     True
Name: name, dtype: bool

0    5
1    8
2    6
3    4
Name: name, dtype: int64

Null Checks (.`isna`/.`notna`)

In [29]:
mask = students['marks'].isna()
students[mask]

mask = students['marks'].notna()
students[mask]

Unnamed: 0,name,age,salary,marks
1,harikesh,23,950,
2,vishnu,10,1100,


Unnamed: 0,name,age,salary,marks
0,rahul,22,1000,100.0
3,vidu,21,5000,90.0


### Modifying
- What is a View vs. a Copy?

- DataFrame values are stored in memory. To be efficient, Pandas tries not to duplicate data unless it has to.

    - View: This is a "window" into the original data. It shares the same memory buffer.

- If you change the View, you change the Original.
    - **Copy**: This is a completely new object with its own memory.

- If you change the Copy, the Original is untouched.

- **The Problem**: When you filter or slice data, Pandas doesn't always tell you explicitly if it returned a View or a Copy.

    - It decides based on memory layout and performance. This ambiguity causes the "SettingWithCopy" warning.

In [30]:
jobs = pd.DataFrame({
    'status': np.random.permutation(['pending'] * 10 + ['processing'] * 10),
    'priority': np.random.permutation(['low'] * 5 + ['medium'] * 5 + ['high'] * 10),
})
jobs.head()

Unnamed: 0,status,priority
0,processing,medium
1,pending,high
2,pending,high
3,pending,high
4,pending,medium


In [31]:
# this is a copy (a temporary subset of original dataframe)
jobs[jobs['priority'] == 'low']

Unnamed: 0,status,priority
5,processing,low
7,pending,low
10,pending,low
13,processing,low
15,processing,low


In [32]:
jobs[jobs['priority'] == 'low']['status'] = 'pending'

# The temporary copy is thrown away. The original dataframe is not updated.
# Pandas sees this and warns.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  jobs[jobs['priority'] == 'low']['status'] = 'pending'


In [33]:
jobs[jobs['priority'] == 'low']  # status of the original data is not modified

Unnamed: 0,status,priority
5,processing,low
7,pending,low
10,pending,low
13,processing,low
15,processing,low


### How to Update Data Correctly

**Scenario A: You want to update the ORIGINAL DataFrame**

Use .`loc`. This is a single operation, not two steps, so it guarantees you are operating on the original data.

In [34]:
jobs.loc[jobs['status'] == 'pending', 'priority'] = 'high'

In [35]:
jobs[jobs['status'] == 'pending']  # jobs is modified

Unnamed: 0,status,priority
1,pending,high
2,pending,high
3,pending,high
4,pending,high
7,pending,high
8,pending,high
10,pending,high
12,pending,high
16,pending,high
17,pending,high


### Scenario B: You want a SEPARATE COPY

- You intentionally create a subset and want to work on it separately without affecting the main dataframe.

Explicitly tell Pandas to make a copy using .`copy`().

In [36]:
processing_jobs = jobs[jobs['status'] == 'processing'].copy()
processing_jobs

Unnamed: 0,status,priority
0,processing,medium
5,processing,low
6,processing,high
9,processing,high
11,processing,medium
13,processing,low
14,processing,high
15,processing,low
18,processing,high
19,processing,high


In [37]:
processing_jobs['priority'] = 'low'
processing_jobs

Unnamed: 0,status,priority
0,processing,low
5,processing,low
6,processing,low
9,processing,low
11,processing,low
13,processing,low
14,processing,low
15,processing,low
18,processing,low
19,processing,low


In [38]:
jobs[jobs['status'] == 'processing']

Unnamed: 0,status,priority
0,processing,medium
5,processing,low
6,processing,high
9,processing,high
11,processing,medium
13,processing,low
14,processing,high
15,processing,low
18,processing,high
19,processing,high


### Renaming Columns & Rows
- The primary method is .rename(), which accepts a dictionary mapping { 'Old Name' : 'New Name' }.

⚠️ By default, .rename()/.drop() do not change the original DataFrame. They return a new copy with the changes.

- To save changes:

  - Reassign the variable (df = df.drop(...))
  - To change in-place: Use the argument inplace=True (not preferred).

In [39]:
students.rename(columns={'age': 'years'})
students  # not modified

Unnamed: 0,name,years,salary,marks
0,rahul,22,1000,100.0
1,harikesh,23,950,
2,vishnu,10,1100,
3,vidu,21,5000,90.0


Unnamed: 0,name,age,salary,marks
0,rahul,22,1000,100.0
1,harikesh,23,950,
2,vishnu,10,1100,
3,vidu,21,5000,90.0


In [40]:
_students = students.copy()
_students.columns = ['first_name', 'years', 'income', 'score']
_students

Unnamed: 0,first_name,years,income,score
0,rahul,22,1000,100.0
1,harikesh,23,950,
2,vishnu,10,1100,
3,vidu,21,5000,90.0


### Dropping Columns
⭐ Axis 0: Points DOWN (↓). It follows the Rows.

⭐ Axis 1: Points ACROSS (→). It follows the Columns.

- When you do math, the axis parameter tells Pandas which dimension to squash (collapse).

- df.`sum(axis=0)`: The arrow goes DOWN the rows. It squashes all the rows together.

-  **Result**: You get totals for each Column.
    - df.`sum(axis=1)`: The arrow goes ACROSS the columns. It squashes all the columns together.

- **Result**: You get a total for each Row.
    - When you use drop, the axis parameter tells Pandas where to look for the label.

- df.`drop(..., axis=0)`: Look at the Row Labels (Index).

- **Result**: You delete a Row.
    - df.`drop(..., axis=1)`: Look at the Column Names (Headers).

- **Result**: You delete a Column.

In [41]:
students.drop('age', axis=1)

Unnamed: 0,name,salary,marks
0,rahul,1000,100.0
1,harikesh,950,
2,vishnu,1100,
3,vidu,5000,90.0


In [42]:
students.drop(['age', 'salary'], axis=1)

Unnamed: 0,name,marks
0,rahul,100.0
1,harikesh,
2,vishnu,
3,vidu,90.0


In [43]:
popped_col = _students.pop('first_name')
popped_col

0       rahul
1    harikesh
2      vishnu
3        vidu
Name: first_name, dtype: object

### Dropping Rows

In [44]:
students.drop(0)
jobs.drop(jobs.index[:5])

Unnamed: 0,name,age,salary,marks
1,harikesh,23,950,
2,vishnu,10,1100,
3,vidu,21,5000,90.0


Unnamed: 0,status,priority
5,processing,low
6,processing,high
7,pending,high
8,pending,high
9,processing,high
10,pending,high
11,processing,medium
12,pending,high
13,processing,low
14,processing,high


In [45]:
jobs[jobs['status'] == 'pending']

Unnamed: 0,status,priority
1,pending,high
2,pending,high
3,pending,high
4,pending,high
7,pending,high
8,pending,high
10,pending,high
12,pending,high
16,pending,high
17,pending,high


In [54]:
name_map = {
    "rahul": "bro",
    # "harikesh": Not in dictionary,
    "vidu": "me",
    "vishnu": "monkey",
}

students['name'].map(name_map)

0       bro
1       NaN
2    monkey
3        me
Name: name, dtype: object

In [55]:
students['name'].map(len)
students['name'].str.len()

0    5
1    8
2    6
3    4
Name: name, dtype: int64

0    5
1    8
2    6
3    4
Name: name, dtype: int64

In [56]:
def classify_salary(salary):
  return 'Rich' if salary >= 5000 else 'Poor'


students['salary'].apply(classify_salary)

0    Poor
1    Poor
2    Poor
3    Rich
Name: salary, dtype: object

In [57]:
def calculate_power(row):
  return len(row['name']) * row['salary']


students.apply(calculate_power, axis=1)

0     5000
1     7600
2     6600
3    20000
dtype: int64

In [58]:
def calculate_mean(col):
  return sum(col)/len(col)


wine.apply(calculate_mean, axis=0)

alcohol                          13.000618
malic_acid                        2.336348
ash                               2.366517
alcalinity_of_ash                19.494944
magnesium                        99.741573
total_phenols                     2.295112
flavanoids                        2.029270
nonflavanoid_phenols              0.361854
proanthocyanins                   1.590899
color_intensity                   5.058090
hue                               0.957449
od280/od315_of_diluted_wines      2.611685
proline                         746.893258
dtype: float64