In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("forestfires.csv")
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00


In [3]:
# Create custom category
def classify_area(area):
    if area == 0:
        return "NotAffected"
    elif area <= 10:
        return "PartiallyAffected"
    else:
        return "MostlyAffected"

df['RegionImpact'] = df['area'].apply(classify_area)

# Subsets
not_affected = df[df['RegionImpact'] == 'NotAffected']
partially_affected = df[df['RegionImpact'] == 'PartiallyAffected']
mostly_affected = df[df['RegionImpact'] == 'MostlyAffected']

In [4]:
df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,RegionImpact
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.00,NotAffected
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.00,NotAffected
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.00,NotAffected
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.00,NotAffected
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.00,NotAffected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
512,4,3,aug,sun,81.6,56.7,665.6,1.9,27.8,32,2.7,0.0,6.44,PartiallyAffected
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,MostlyAffected
514,7,4,aug,sun,81.6,56.7,665.6,1.9,21.2,70,6.7,0.0,11.16,MostlyAffected
515,1,4,aug,sat,94.4,146.0,614.7,11.3,25.6,42,4.0,0.0,0.00,NotAffected


In [5]:
merged_df = pd.concat([partially_affected, mostly_affected])
merged_df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,RegionImpact
138,9,9,jul,tue,85.8,48.3,313.4,3.9,18.0,42,2.7,0.0,0.36,PartiallyAffected
139,1,4,sep,tue,91.0,129.5,692.6,7.0,21.7,38,2.2,0.0,0.43,PartiallyAffected
140,2,5,sep,mon,90.9,126.5,686.5,7.0,21.9,39,1.8,0.0,0.47,PartiallyAffected
141,1,2,aug,wed,95.5,99.9,513.3,13.2,23.3,31,4.5,0.0,0.55,PartiallyAffected
142,8,6,aug,fri,90.1,108.0,529.8,12.5,21.2,51,8.9,0.0,0.61,PartiallyAffected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498,6,5,aug,tue,96.1,181.1,671.2,14.3,33.3,26,2.7,0.0,40.54,MostlyAffected
499,7,5,aug,tue,96.1,181.1,671.2,14.3,27.3,63,4.9,6.4,10.82,MostlyAffected
504,4,3,aug,wed,94.5,139.4,689.1,20.0,28.9,29,4.9,0.0,49.59,MostlyAffected
513,2,4,aug,sun,81.6,56.7,665.6,1.9,21.9,71,5.8,0.0,54.29,MostlyAffected


In [6]:
sorted_df = df.sort_values(by=['temp', 'wind', 'area'], ascending=[False, False, False])
sorted_df

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,RegionImpact
498,6,5,aug,tue,96.1,181.1,671.2,14.3,33.3,26,2.7,0.0,40.54,MostlyAffected
484,2,5,aug,sun,94.9,130.3,587.1,14.1,33.1,25,4.0,0.0,26.43,MostlyAffected
496,4,5,aug,mon,96.2,175.5,661.8,16.8,32.6,26,3.1,0.0,2.77,PartiallyAffected
491,4,4,aug,thu,95.8,152.0,624.1,13.8,32.4,21,4.5,0.0,0.00,NotAffected
492,1,3,aug,fri,95.9,158.0,633.6,11.3,32.4,27,2.2,0.0,0.00,NotAffected
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279,4,4,dec,mon,85.4,25.4,349.7,2.6,4.6,21,8.5,0.0,9.77,PartiallyAffected
463,6,5,feb,tue,75.1,4.4,16.2,1.9,4.6,82,6.3,0.0,5.39,PartiallyAffected
465,2,2,feb,sat,79.5,3.6,15.3,1.8,4.6,59,0.9,0.0,6.84,PartiallyAffected
282,6,3,feb,sun,84.9,27.5,353.5,3.4,4.2,51,4.0,0.0,0.00,NotAffected


In [7]:
transposed_df = df.transpose()  # Only transpose a sample for readability
transposed_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,507,508,509,510,511,512,513,514,515,516
X,7,7,7,8,8,8,8,8,8,7,...,2,1,5,6,8,4,2,7,1,6
Y,5,4,4,6,6,6,6,6,6,5,...,4,2,4,5,6,3,4,4,4,3
month,mar,oct,oct,mar,mar,aug,aug,aug,sep,sep,...,aug,aug,aug,aug,aug,aug,aug,aug,aug,nov
day,fri,tue,sat,fri,sun,sun,mon,mon,tue,sat,...,fri,fri,fri,fri,sun,sun,sun,sun,sat,tue
FFMC,86.2,90.6,90.6,91.7,89.3,92.3,92.3,91.5,91.0,92.5,...,91.0,91.0,91.0,91.0,81.6,81.6,81.6,81.6,94.4,79.5
DMC,26.2,35.4,43.7,33.3,51.3,85.3,88.9,145.4,129.5,88.0,...,166.9,166.9,166.9,166.9,56.7,56.7,56.7,56.7,146.0,3.0
DC,94.3,669.1,686.9,77.5,102.2,488.0,495.6,608.2,692.6,698.6,...,752.6,752.6,752.6,752.6,665.6,665.6,665.6,665.6,614.7,106.7
ISI,5.1,6.7,6.7,9.0,9.6,14.7,8.5,10.7,7.0,7.1,...,7.1,7.1,7.1,7.1,1.9,1.9,1.9,1.9,11.3,1.1
temp,8.2,18.0,14.6,8.3,11.4,22.2,24.1,8.0,13.1,22.8,...,25.9,25.9,21.1,18.2,27.8,27.8,21.9,21.2,25.6,11.8
RH,51,33,33,97,99,29,27,86,63,40,...,41,41,71,62,35,32,71,70,42,31


In [8]:
melted_df = pd.melt(df, id_vars=['month', 'day', 'RegionImpact'], 
                    value_vars=['temp', 'wind', 'area'],
                    var_name='Measurement', value_name='Value')
melted_df

Unnamed: 0,month,day,RegionImpact,Measurement,Value
0,mar,fri,NotAffected,temp,8.20
1,oct,tue,NotAffected,temp,18.00
2,oct,sat,NotAffected,temp,14.60
3,mar,fri,NotAffected,temp,8.30
4,mar,sun,NotAffected,temp,11.40
...,...,...,...,...,...
1546,aug,sun,PartiallyAffected,area,6.44
1547,aug,sun,MostlyAffected,area,54.29
1548,aug,sun,MostlyAffected,area,11.16
1549,aug,sat,NotAffected,area,0.00


In [9]:
casted_df = melted_df.pivot_table(index=['month', 'day', 'RegionImpact'], 
                                  columns='Measurement', values='Value', aggfunc='mean').reset_index()
casted_df

Measurement,month,day,RegionImpact,area,temp,wind
0,apr,fri,NotAffected,0.000000,16.70,3.100000
1,apr,mon,PartiallyAffected,3.350000,10.90,3.100000
2,apr,sat,NotAffected,0.000000,9.30,4.500000
3,apr,sun,MostlyAffected,61.130000,13.70,9.400000
4,apr,sun,NotAffected,0.000000,15.50,3.800000
...,...,...,...,...,...,...
124,sep,tue,NotAffected,0.000000,16.95,4.033333
125,sep,tue,PartiallyAffected,3.126667,19.40,3.200000
126,sep,wed,MostlyAffected,52.640000,20.40,3.433333
127,sep,wed,NotAffected,0.000000,19.75,4.166667
