# Hacking with pandas: dummy vs proficient solutions

## df[mask] vs df.query

**Exercise**: Filter the year 2021.

In [1]:
import pandas as pd

df_energy = pd.read_csv('../../../data/raw/energy_generation_spain_2019_2021.csv')
df_energy

Unnamed: 0,technology,year,month,day,hour,generation_mwh
0,Carbon,2019,1,1,0,1867.0
1,Carbon,2019,1,1,1,1618.0
...,...,...,...,...,...,...
420862,Other Renewables,2021,12,31,22,607.5
420863,Other Renewables,2021,12,31,23,591.6


### Dummy

In [2]:
mask_year = df_energy.year == 2021
df_energy[mask_year]

Unnamed: 0,technology,year,month,day,hour,generation_mwh
17544,Carbon,2021,1,1,0,250.0
17545,Carbon,2021,1,1,1,250.0
...,...,...,...,...,...,...
420862,Other Renewables,2021,12,31,22,607.5
420863,Other Renewables,2021,12,31,23,591.6


### Proficient

In [3]:
df_energy.query('year == 2021')

Unnamed: 0,technology,year,month,day,hour,generation_mwh
17544,Carbon,2021,1,1,0,250.0
17545,Carbon,2021,1,1,1,250.0
...,...,...,...,...,...,...
420862,Other Renewables,2021,12,31,22,607.5
420863,Other Renewables,2021,12,31,23,591.6


## Query with multiple conditions

**Exercise**: Filter the year `2021` and `Eolic` technology.

### Dummy

In [4]:
mask_2021 = df_energy.year == 2021
mask_eolic = df_energy.technology == 'Eolic'

df_energy[mask_2021 & mask_eolic]

Unnamed: 0,technology,year,month,day,hour,generation_mwh
122760,Eolic,2021,1,1,0,8557.5
122761,Eolic,2021,1,1,1,8661.6
...,...,...,...,...,...,...
131518,Eolic,2021,12,31,22,6081.8
131519,Eolic,2021,12,31,23,6255.3


### Proficient

In [5]:
df_energy.query('year == 2021 & technology == "Eolic"')

Unnamed: 0,technology,year,month,day,hour,generation_mwh
122760,Eolic,2021,1,1,0,8557.5
122761,Eolic,2021,1,1,1,8661.6
...,...,...,...,...,...,...
131518,Eolic,2021,12,31,22,6081.8
131519,Eolic,2021,12,31,23,6255.3


## Filter rows with regular expressions

**Exercise**: Filter all `fiat` from the following `DataFrame`.

In [6]:
import seaborn as sns

df_mpg = sns.load_dataset('mpg')
df_mpg.name = df_mpg.name.str.title()
df_mpg

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,Chevrolet Chevelle Malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,Buick Skylark 320
...,...,...,...,...,...,...,...,...,...
396,28.0,4,120.0,79.0,2625,18.6,82,usa,Ford Ranger
397,31.0,4,119.0,82.0,2720,19.4,82,usa,Chevy S-10


### Dummy

In [7]:
list_fiat = ['Fiat 124b', 'Fiat 124 Sport Coupe', 'Fiat 128', 'Fiat 128', 'Fiat 124 Tc', 'Fiat X1.9', 'Fiat 131', 'Fiat Strada Custom']

mask_fiat = df_mpg.name.isin(list_fiat)
df_mpg[mask_fiat]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
114,26.0,4,98.0,90.0,2265,15.5,73,europe,Fiat 124 Sport Coupe
117,29.0,4,68.0,49.0,1867,19.5,73,europe,Fiat 128
147,24.0,4,90.0,75.0,2108,15.5,74,europe,Fiat 128
148,26.0,4,116.0,75.0,2246,14.0,74,europe,Fiat 124 Tc
151,31.0,4,79.0,67.0,2000,16.0,74,europe,Fiat X1.9
182,28.0,4,107.0,86.0,2464,15.5,76,europe,Fiat 131
304,37.3,4,91.0,69.0,2130,14.7,79,europe,Fiat Strada Custom


### Proficient

In [8]:
mask_fiat = df_mpg.name.str.contains('Fiat')
df_mpg[mask_fiat]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
52,30.0,4,88.0,76.0,2065,14.5,71,europe,Fiat 124B
114,26.0,4,98.0,90.0,2265,15.5,73,europe,Fiat 124 Sport Coupe
117,29.0,4,68.0,49.0,1867,19.5,73,europe,Fiat 128
147,24.0,4,90.0,75.0,2108,15.5,74,europe,Fiat 128
148,26.0,4,116.0,75.0,2246,14.0,74,europe,Fiat 124 Tc
151,31.0,4,79.0,67.0,2000,16.0,74,europe,Fiat X1.9
182,28.0,4,107.0,86.0,2464,15.5,76,europe,Fiat 131
304,37.3,4,91.0,69.0,2130,14.7,79,europe,Fiat Strada Custom


## Regex flag to IGNORECASE

**Exercise**: Select all `sport` cars from the `name` column.

### Dummy

In [9]:
mask_sport = df_mpg.name.str.contains('Sport|sport|SPORT')
df_mpg[mask_sport]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
45,18.0,6,258.0,110.0,2962,13.5,71,usa,Amc Hornet Sportabout (Sw)
114,26.0,4,98.0,90.0,2265,15.5,73,europe,Fiat 124 Sport Coupe
263,17.7,6,231.0,165.0,3445,13.4,78,usa,Buick Regal Sport Coupe (Turbo)


### Proficient

In [10]:
import re

mask_sport = df_mpg.name.str.contains('sport', flags=re.IGNORECASE)
df_mpg[mask_sport]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
45,18.0,6,258.0,110.0,2962,13.5,71,usa,Amc Hornet Sportabout (Sw)
114,26.0,4,98.0,90.0,2265,15.5,73,europe,Fiat 124 Sport Coupe
263,17.7,6,231.0,165.0,3445,13.4,78,usa,Buick Regal Sport Coupe (Turbo)
