In [3]:
import pandas as pd
import numpy as np

In [4]:
weather_data = {
    'Day' : ['01/04/2025','02/04/2025','03/04/2025','04/04/2025','05/04/2025'],
    'Temperature' : [32,33,33,35,38],
    'Windspeed' : [6,7,2,7,4],
    'Event' : ['Rain','Rain','Sunny','Sunny','Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,Day,Temperature,Windspeed,Event
0,01/04/2025,32,6,Rain
1,02/04/2025,33,7,Rain
2,03/04/2025,33,2,Sunny
3,04/04/2025,35,7,Sunny
4,05/04/2025,38,4,Sunny


In [5]:
df.shape

(5, 4)

In [6]:
df.head(2)

Unnamed: 0,Day,Temperature,Windspeed,Event
0,01/04/2025,32,6,Rain
1,02/04/2025,33,7,Rain


In [7]:
df.tail(3)

Unnamed: 0,Day,Temperature,Windspeed,Event
2,03/04/2025,33,2,Sunny
3,04/04/2025,35,7,Sunny
4,05/04/2025,38,4,Sunny


In [8]:
df[2:5]

Unnamed: 0,Day,Temperature,Windspeed,Event
2,03/04/2025,33,2,Sunny
3,04/04/2025,35,7,Sunny
4,05/04/2025,38,4,Sunny


In [9]:
df.columns


Index(['Day', 'Temperature', 'Windspeed', 'Event'], dtype='object')

In [10]:
print(df.Day)
df['Event']  # both are same

0    01/04/2025
1    02/04/2025
2    03/04/2025
3    04/04/2025
4    05/04/2025
Name: Day, dtype: object


0     Rain
1     Rain
2    Sunny
3    Sunny
4    Sunny
Name: Event, dtype: object

In [11]:
type(df['Event'])

pandas.core.series.Series

In [12]:
df[['Temperature','Day']]

Unnamed: 0,Temperature,Day
0,32,01/04/2025
1,33,02/04/2025
2,33,03/04/2025
3,35,04/04/2025
4,38,05/04/2025


In [13]:
df['Temperature'].max()

np.int64(38)

In [14]:
df['Day'][df.Temperature>33]

3    04/04/2025
4    05/04/2025
Name: Day, dtype: object

In [15]:
df.set_index('Day',inplace=True)
df

Unnamed: 0_level_0,Temperature,Windspeed,Event
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
01/04/2025,32,6,Rain
02/04/2025,33,7,Rain
03/04/2025,33,2,Sunny
04/04/2025,35,7,Sunny
05/04/2025,38,4,Sunny


In [16]:
df.loc['01/04/2025']

Temperature      32
Windspeed         6
Event          Rain
Name: 01/04/2025, dtype: object

In [17]:
df.reset_index(inplace=True)
df

Unnamed: 0,Day,Temperature,Windspeed,Event
0,01/04/2025,32,6,Rain
1,02/04/2025,33,7,Rain
2,03/04/2025,33,2,Sunny
3,04/04/2025,35,7,Sunny
4,05/04/2025,38,4,Sunny


In [18]:
df1 = pd.read_csv('car-sales-missing-data.csv')
df1

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,,4.0,"$4,500"
6,Honda,,,4.0,"$7,500"
7,Honda,Blue,,4.0,
8,Toyota,White,60000.0,,
9,,White,31600.0,4.0,"$9,700"


**Handle Missing Values with Fillna**

In [19]:
new_df = df1.fillna({
    'Colour':'Other',
    'Odometer':0,
    'Doors':'Unknown',
    'Price':0,
    'Make':'Unknown'})
new_df

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,0.0,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,0.0,4.0,"$4,500"
6,Honda,Other,0.0,4.0,"$7,500"
7,Honda,Blue,0.0,4.0,0
8,Toyota,White,60000.0,Unknown,0
9,Unknown,White,31600.0,4.0,"$9,700"


In [20]:
new_df = df1.fillna(method='ffill')
new_df

  new_df = df1.fillna(method='ffill')


Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,87899.0,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,213095.0,4.0,"$4,500"
6,Honda,Green,213095.0,4.0,"$7,500"
7,Honda,Blue,213095.0,4.0,"$7,500"
8,Toyota,White,60000.0,4.0,"$7,500"
9,Toyota,White,31600.0,4.0,"$9,700"


**Use of Interpolar**

In [21]:
new_df= new_df.interpolate()
new_df

  new_df= new_df.interpolate()


Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
2,Toyota,Blue,87899.0,3.0,"$7,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,213095.0,4.0,"$4,500"
6,Honda,Green,213095.0,4.0,"$7,500"
7,Honda,Blue,213095.0,4.0,"$7,500"
8,Toyota,White,60000.0,4.0,"$7,500"
9,Toyota,White,31600.0,4.0,"$9,700"


**Use of drop**

In [22]:
new_df = df1.dropna()
new_df

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Red,87899.0,4.0,"$5,000"
3,BMW,Black,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"


In [23]:
new_df = df1.replace(['Red','Black'],'Brown')
new_df

Unnamed: 0,Make,Colour,Odometer,Doors,Price
0,Toyota,White,150043.0,4.0,"$4,000"
1,Honda,Brown,87899.0,4.0,"$5,000"
2,Toyota,Blue,,3.0,"$7,000"
3,BMW,Brown,11179.0,5.0,"$22,000"
4,Nissan,White,213095.0,4.0,"$3,500"
5,Toyota,Green,,4.0,"$4,500"
6,Honda,,,4.0,"$7,500"
7,Honda,Blue,,4.0,
8,Toyota,White,60000.0,,
9,,White,31600.0,4.0,"$9,700"


In [26]:
weather_data = {
    'Day' : ['01/04/2025','02/04/2025','03/04/2025','04/04/2025','05/04/2025'],
    'Temperature' : ['32 C',33,33,35,'38 F'],
    'Windspeed' : ['6 mph','7 mph',2,7,4],
    'Event' : ['Rain','Rain','Sunny','Sunny','Sunny']
}
df = pd.DataFrame(weather_data)
df.to_csv('weather_data',index=False)
print("weather_data.csv has been created successfully!")

weather_data.csv has been created successfully!


In [27]:
new_df = df.replace({
    'Temperature':'[A-Za-z]',
    'Windspeed':'[A-Za-z]'
},'',regex=True)
new_df

Unnamed: 0,Day,Temperature,Windspeed,Event
0,01/04/2025,32,6,Rain
1,02/04/2025,33,7,Rain
2,03/04/2025,33,2,Sunny
3,04/04/2025,35,7,Sunny
4,05/04/2025,38,4,Sunny


In [29]:
df2 = pd.read_csv('weather_by_cities.csv')
df2

Unnamed: 0,day,city,temperature,windspeed,event
0,1/1/2017,new york,32,6,Rain
1,1/2/2017,new york,36,7,Sunny
2,1/3/2017,new york,28,12,Snow
3,1/4/2017,new york,33,7,Sunny
4,1/1/2017,mumbai,90,5,Sunny
5,1/2/2017,mumbai,85,12,Fog
6,1/3/2017,mumbai,87,15,Fog
7,1/4/2017,mumbai,92,5,Rain
8,1/1/2017,paris,45,20,Sunny
9,1/2/2017,paris,50,13,Cloudy


**Use of groupby**

In [31]:
g = df2.groupby('city')
g

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x6952020>

In [32]:
for city,city_df in g:
    print(city)
    print(city_df)

mumbai
        day    city  temperature  windspeed  event
4  1/1/2017  mumbai           90          5  Sunny
5  1/2/2017  mumbai           85         12    Fog
6  1/3/2017  mumbai           87         15    Fog
7  1/4/2017  mumbai           92          5   Rain
new york
        day      city  temperature  windspeed  event
0  1/1/2017  new york           32          6   Rain
1  1/2/2017  new york           36          7  Sunny
2  1/3/2017  new york           28         12   Snow
3  1/4/2017  new york           33          7  Sunny
paris
         day   city  temperature  windspeed   event
8   1/1/2017  paris           45         20   Sunny
9   1/2/2017  paris           50         13  Cloudy
10  1/3/2017  paris           54          8  Cloudy
11  1/4/2017  paris           42         10  Cloudy


In [33]:
g.get_group('mumbai')

Unnamed: 0,day,city,temperature,windspeed,event
4,1/1/2017,mumbai,90,5,Sunny
5,1/2/2017,mumbai,85,12,Fog
6,1/3/2017,mumbai,87,15,Fog
7,1/4/2017,mumbai,92,5,Rain


In [34]:
g.max()

Unnamed: 0_level_0,day,temperature,windspeed,event
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
mumbai,1/4/2017,92,15,Sunny
new york,1/4/2017,36,12,Sunny
paris,1/4/2017,54,20,Sunny


In [37]:
g.mean(['temperature','windspeed'])

Unnamed: 0_level_0,temperature,windspeed
city,Unnamed: 1_level_1,Unnamed: 2_level_1
mumbai,88.5,9.25
new york,32.25,8.0
paris,47.75,12.75


In [41]:
india_weather = pd.DataFrame({
    "city" : ["mumbai","delhi","banglore"],
    "temperature" : [32,45,30],
    "humidity": [80,60,78]
})
india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [42]:
usa_weather = pd.DataFrame({
    "city" : ["new york","chicago","oriando"],
    "temperature" : [21,14,35],
    "humidity": [68,65,76]
})
usa_weather

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,oriando,35,76


In [45]:
df = pd.concat([india_weather,usa_weather], keys=["india","usa"])
df

Unnamed: 0,Unnamed: 1,city,temperature,humidity
india,0,mumbai,32,80
india,1,delhi,45,60
india,2,banglore,30,78
usa,0,new york,21,68
usa,1,chicago,14,65
usa,2,oriando,35,76


In [46]:
df.loc['india']

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [56]:
india_temp = pd.DataFrame({
    "city" : ["mumbai","delhi","banglore","chennai"],
    "temperature" : [32,45,30,28],
})
india_windspeed = pd.DataFrame({
    "city" : ["mumbai","delhi","banglore"],
    "windspeed":[7,12,9]
})
df = pd.concat([india_temp,india_windspeed],axis=1)
df

Unnamed: 0,city,temperature,city.1,windspeed
0,mumbai,32,mumbai,7.0
1,delhi,45,delhi,12.0
2,banglore,30,banglore,9.0
3,chennai,28,,


In [57]:
df = pd.merge(india_temp,india_windspeed,on="city")
df

Unnamed: 0,city,temperature,windspeed
0,mumbai,32,7
1,delhi,45,12
2,banglore,30,9


In [58]:
df = pd.merge(india_temp,india_windspeed,on="city",how="outer",indicator=True)
df

Unnamed: 0,city,temperature,windspeed,_merge
0,banglore,30,9.0,both
1,chennai,28,,left_only
2,delhi,45,12.0,both
3,mumbai,32,7.0,both


In [59]:
df = pd.DataFrame({'marks': [40, 75, 90, 55]})

df['grade'] = df['marks'].map(lambda x: 'Pass' if x >= 50 else 'Fail')
print(df)

   marks grade
0     40  Fail
1     75  Pass
2     90  Pass
3     55  Pass


In [65]:
df['bonus_marks'] = df['marks'].apply(lambda x: x * 1.10 if x >=90 else x)
print(df)

   marks grade  bonus_marks
0     40  Fail         40.0
1     75  Pass         75.0
2     90  Pass         99.0
3     55  Pass         55.0
