# Filtros y selecciones en Pandas

## Importamos Dataset y hacemos un EDA

In [1]:
# Importar la librería Pandas
import pandas as pd

In [2]:
# Montar la unidad
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Importamos el Dataset Netflix
df = pd.read_csv("/content/drive/MyDrive/datasets/netflix_titles.csv")
df.head()

In [7]:
# Ver información general del Dataframe
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [None]:
# Ver los primeros registros
df.head(3)

In [None]:
# Rápida exploración de duplicados
df.duplicated().sum()

In [None]:
# Rápida exploración de nulos
df.isnull().sum()

## Selección de columnas y filas

In [None]:
df.columns

In [9]:
# Visualizar columnas
df["type"]

Unnamed: 0,type
0,Movie
1,TV Show
2,TV Show
3,TV Show
4,TV Show
...,...
8802,Movie
8803,TV Show
8804,Movie
8805,Movie


In [10]:
# Observar o filtrar una columna
cols = df.columns[1:5]
# df[["type", "title", "country"]]
df[cols]

Unnamed: 0,type,title,director,cast
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,
1,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban..."
2,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi..."
3,TV Show,Jailbirds New Orleans,,
4,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K..."
...,...,...,...,...
8802,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J..."
8803,TV Show,Zombie Dumb,,
8804,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ..."
8805,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma..."


In [11]:
# Observar o filtrar más de una columna
df[["type", "title", "country"]]

Unnamed: 0,type,title,country
0,Movie,Dick Johnson Is Dead,United States
1,TV Show,Blood & Water,South Africa
2,TV Show,Ganglands,
3,TV Show,Jailbirds New Orleans,
4,TV Show,Kota Factory,India
...,...,...,...
8802,Movie,Zodiac,United States
8803,TV Show,Zombie Dumb,
8804,Movie,Zombieland,United States
8805,Movie,Zoom,United States


In [None]:
# Agregamos ordenamiento
df[["type", "title", "country"]].sort_values(by="country")

In [None]:
# Seleccionar filas específicas
df.loc[1:3]

In [13]:
# Seleccionar un subset (filas y columnas especíicas)
df.loc[1:3, ["type", "country"]]

Unnamed: 0,type,country
1,TV Show,South Africa
2,TV Show,
3,TV Show,


## Filtros

### Filtros simples

In [14]:
df["type"].unique()

array(['Movie', 'TV Show'], dtype=object)

In [17]:
# Filtrar los registros que sean Movie
# df["type"].unique()
df[df["type"]=="TVShow"]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description


In [20]:
# Los registros cuyo campo country es Argentina
df[df["country"]=="Argentina"]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
275,s276,TV Show,The Kingdom,,"Chino Darín, Nancy Dupláa, Joaquín Furriel, Pe...",Argentina,"August 13, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, Spanis...","After his running mate's murder, a controversi..."
303,s304,Movie,Esperando la carroza,Alejandro Doria,"Luis Brandoni, China Zorrilla, Antonio Gasalla...",Argentina,"August 5, 2021",1985,TV-MA,95 min,"Comedies, Cult Movies, International Movies",Cora has three sons and a daughter and she´s a...
434,s435,TV Show,Okupas,,"Rodrigo de la Serna, Diego Alonso, Ariel Stalt...",Argentina,"July 20, 2021",2020,TV-MA,1 Season,"Classic & Cult TV, Crime TV Shows, Internation...","In 2000, a group of young men takes over a hou..."
673,s674,TV Show,No Time for Shame,,Santiago Artemis,Argentina,"June 19, 2021",2019,TV-MA,1 Season,"International TV Shows, Reality TV, Spanish-La...","Follow Santiago Artemis, a Buenos Aires fashio..."
709,s710,TV Show,Millennials,,"Nicolás Riera, Laura Laprida, Juan Manuel Guil...",Argentina,"June 16, 2021",2020,TV-MA,3 Seasons,"International TV Shows, Romantic TV Shows, Spa...",A group of millennials copes with the pressure...
769,s770,Movie,From Stress to Happiness,"Alejandro De Grazia, Juan Stadler",,Argentina,"June 3, 2021",2020,TV-G,58 min,"Documentaries, International Movies",A stressed-out documentary filmmaker goes on a...
838,s839,Movie,Soy Rada: Serendipity,Pablo Faro,Agustín Aristarán,Argentina,"May 27, 2021",2021,TV-MA,70 min,Stand-Up Comedy,The delightful Argentine comic Agustín Aristar...
1417,s1418,Movie,Al acecho,Francisco D'Eufemia,"Rodrigo de la Serna, Belen Blanco, Walter Jako...",Argentina,"January 12, 2021",2019,TV-MA,81 min,"International Movies, Thrillers","Looking for a fresh start, a park ranger gets ..."
1657,s1658,Movie,Notes for My Son,Carlos Sorín,"Valeria Bertuccelli, Esteban Lamothe, Julián S...",Argentina,"November 24, 2020",2020,TV-MA,84 min,"Dramas, International Movies","Battling terminal cancer, a woman writes a one..."
1713,s1714,Movie,Memories of a Teenager,Lucas Santa Ana,"Renato Quattordio, Malena Narvay, Thomás Leper...",Argentina,"November 12, 2020",2019,TV-MA,97 min,"Dramas, International Movies, LGBTQ Movies","To cope with a friend's death, a teen blogs ab..."


### Filtros con datetime

In [21]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [None]:
# Listar los registros (movies/TV Shows) cuyo "release_year" es anterior al 2020
df[df["release_year"]<2020].sort_values(by="release_year", ascending=False)

In [45]:
# Listar los registros (movies/TV Shows) cuyo date_added es anterior al 2020
df_pp1=df[df["date_added"]<"2000"]

In [34]:
# Comparar el dtype de las columnas "release_year" y "date_added"
df[["release_year", "date_added"]].dtypes

Unnamed: 0,0
release_year,int64
date_added,object


In [47]:
# Listar registros random con "release_year" y "date_added"
df_pp1.sample(5)[["release_year", "date_added"]]

Unnamed: 0,release_year,date_added
6763,2015,"December 14, 2018"
6766,2014,"July 20, 2018"
7705,2016,"August 4, 2017"
7709,2015,"January 13, 2016"
6357,2016,"November 1, 2019"


In [52]:
# Convertimos el dtype de "date_added" a datetime
df["date_added_dt"] = pd.to_datetime(df["date_added"].str.strip())

In [54]:
# Validar
df[["release_year", "date_added", "date_added_dt"]].dtypes

Unnamed: 0,0
release_year,int64
date_added,object
date_added_dt,datetime64[ns]


In [65]:
df["date_added_dt"].dt.month[2:5]

Unnamed: 0,date_added_dt
2,9.0
3,9.0
4,9.0


In [None]:
# Volver a correr el filtro
df[df["date_added_dt"].dt.year<2020].sort_values(by="date_added_dt", ascending=False)

### Combinación de condiciones (&, |)

In [None]:
# Filtrar las movies/TV Shows Argentinas cuyo time_added sea posterior al 2010
df[(df["country"]=="Argentina") & (df["date_added_dt"].dt.year>2010)]

In [None]:
# Filtrar las movies/TV Shows con release_year anterior al 2020 y date_added posterior al 2020
df[(df["release_year"]<2010) & (df["date_added_dt"].dt.year>2010)]

### Filtros con contains

In [None]:
# Listar las registros cuyo title incluya las palabra "cielo"
df[df["title"].str.contains("cielo", case=False)]

In [None]:
# Filtrar las películas cuyo director contiene Johnson

### Filtros con isin()

In [90]:
# Listar los registros (movies/TV shows) de ["Argentina", "Mexico", "Chile"]
df[df["country"].str.lower().str.strip().isin(["argentina", "mexico", "chile"])]

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,date_added_dt
17,s18,TV Show,Falsa identidad,,"Luis Ernesto Franco, Camila Sodi, Sergio Goyri...",Mexico,"September 22, 2021",2020,TV-MA,2 Seasons,"Crime TV Shows, Spanish-Language TV Shows, TV ...",Strangers Diego and Isabel flee their home in ...,2021-09-22
275,s276,TV Show,The Kingdom,,"Chino Darín, Nancy Dupláa, Joaquín Furriel, Pe...",Argentina,"August 13, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, Spanis...","After his running mate's murder, a controversi...",2021-08-13
282,s283,Movie,La diosa del asfalto,Julián Hernández,"Ximena Romo, Mabel Cadena, Alejandra Herrera, ...",Mexico,"August 11, 2021",2020,TV-MA,127 min,"Dramas, Independent Movies, International Movies",A woman from a tough neighborhood outside Mexi...,2021-08-11
303,s304,Movie,Esperando la carroza,Alejandro Doria,"Luis Brandoni, China Zorrilla, Antonio Gasalla...",Argentina,"August 5, 2021",1985,TV-MA,95 min,"Comedies, Cult Movies, International Movies",Cora has three sons and a daughter and she´s a...,2021-08-05
311,s312,TV Show,Control Z,,"Ana Valeria Becerril, Michael Ronda, Yankel St...",Mexico,"August 4, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Spanish-Language TV Sh...",When a hacker begins releasing students' secre...,2021-08-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8185,s8186,Movie,The Aerial,Esteban Sapir,"Rafael Ferro, Sol Moreno, Jonathan Sandor, Ale...",Argentina,"April 1, 2018",2007,TV-MA,99 min,"Dramas, International Movies, Sci-Fi & Fantasy","In the City Without a Voice, only faceless sin...",2018-04-01
8358,s8359,Movie,The Incident,Isaac Ezban,"Raúl Méndez, Nailea Norvind, Hernán Mendoza, H...",Mexico,"June 19, 2017",2014,TV-MA,100 min,"International Movies, Sci-Fi & Fantasy, Thrillers","Two criminals race down a stairwell, a cop clo...",2017-06-19
8508,s8509,Movie,The Similars,Isaac Ezban,"Gustavo Sánchez Parra, Cassandra Ciangherotti,...",Mexico,"January 15, 2017",2015,NR,90 min,"Horror Movies, Independent Movies, Internation...",Stranded in a transit station one stormy night...,2017-01-15
8699,s8700,Movie,Warehoused,Jack Zagha Kababie,"José Carlos Ruiz, Hoze Meléndez",Mexico,"November 1, 2017",2015,TV-14,92 min,"Comedies, Dramas, International Movies",A soon-to-be-retiring Mr. Lino teaches 20-some...,2017-11-01


In [93]:
if "Argentina" in ("Argentina", "mexico", "chile"):
  print("Si")

Si


In [None]:
# Usar la IA para mejorar el filtro y que no sea case sensitive
