# Dataframes en Pandas

![Image](recursos/series_dataframes.png)

In [1]:
import pandas as pd
import numpy as np
from numpy.random import randn

## Crear un Dataframe

In [2]:
df = pd.DataFrame(randn(5, 4), index='A B C D E'.split(), columns=['W', 'X', 'Y', 'Z'])

In [3]:
df

Unnamed: 0,W,X,Y,Z
A,-0.035562,-1.228944,-0.880965,-1.289002
B,0.085804,1.159249,0.65416,-0.018256
C,-0.360907,-0.543939,-0.046147,2.468277
D,0.212339,-0.615159,0.662941,-1.476297
E,0.14784,1.218207,0.548877,0.504417


In [4]:
type(df)

pandas.core.frame.DataFrame

## Consultamos una columna (Series)

In [5]:
colY = df['Y']

In [6]:
colY

A   -0.880965
B    0.654160
C   -0.046147
D    0.662941
E    0.548877
Name: Y, dtype: float64

In [7]:
type(colY)

pandas.core.series.Series

In [8]:
colY.sum()

0.9388655762948859

## Consultamos más de una columna (Series)

In [12]:
df[['W', 'Z']]

Unnamed: 0,W,Z
A,-0.035562,-1.289002
B,0.085804,-0.018256
C,-0.360907,2.468277
D,0.212339,-1.476297
E,0.14784,0.504417


## Función info()

In [15]:
df

Unnamed: 0,W,X,Y,Z
A,-0.035562,-1.228944,-0.880965,-1.289002
B,0.085804,1.159249,0.65416,-0.018256
C,-0.360907,-0.543939,-0.046147,2.468277
D,0.212339,-0.615159,0.662941,-1.476297
E,0.14784,1.218207,0.548877,0.504417


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, A to E
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   W       5 non-null      float64
 1   X       5 non-null      float64
 2   Y       5 non-null      float64
 3   Z       5 non-null      float64
dtypes: float64(4)
memory usage: 360.0+ bytes


## Algunas funciones

In [16]:
df['X'].mean()

-0.002116962011921286

In [17]:
df['W'].max()

0.2123394876936447

In [18]:
df['W'].count()

5

## Funcion describe()

In [19]:
df.describe()

Unnamed: 0,W,X,Y,Z
count,5.0,5.0,5.0,5.0
mean,0.009903,-0.002117,0.187773,0.037828
std,0.22657,1.119426,0.665323,1.595383
min,-0.360907,-1.228944,-0.880965,-1.476297
25%,-0.035562,-0.615159,-0.046147,-1.289002
50%,0.085804,-0.543939,0.548877,-0.018256
75%,0.14784,1.159249,0.65416,0.504417
max,0.212339,1.218207,0.662941,2.468277


## Operaciones con Dataframes

## Borrar fila/columna

In [20]:
df

Unnamed: 0,W,X,Y,Z
A,-0.035562,-1.228944,-0.880965,-1.289002
B,0.085804,1.159249,0.65416,-0.018256
C,-0.360907,-0.543939,-0.046147,2.468277
D,0.212339,-0.615159,0.662941,-1.476297
E,0.14784,1.218207,0.548877,0.504417


In [24]:
df.drop('X', axis=1, inplace=True)

In [25]:
df

Unnamed: 0,W,Y,Z
A,-0.035562,-0.880965,-1.289002
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
D,0.212339,0.662941,-1.476297
E,0.14784,0.548877,0.504417


In [26]:
df.drop('D', inplace=True)

In [27]:
df

Unnamed: 0,W,Y,Z
A,-0.035562,-0.880965,-1.289002
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
E,0.14784,0.548877,0.504417


In [28]:
## Seleccionar columnas

In [29]:
df['Z']

A   -1.289002
B   -0.018256
C    2.468277
E    0.504417
Name: Z, dtype: float64

In [30]:
## Seleccionar filas

In [33]:
df

Unnamed: 0,W,Y,Z
A,-0.035562,-0.880965,-1.289002
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
E,0.14784,0.548877,0.504417


In [36]:
df.loc['C']

W   -0.360907
Y   -0.046147
Z    2.468277
Name: C, dtype: float64

In [37]:
df.loc[['C', 'A']]

Unnamed: 0,W,Y,Z
C,-0.360907,-0.046147,2.468277
A,-0.035562,-0.880965,-1.289002


In [38]:
# seleccionar por un indice

In [39]:
df.iloc[0]

W   -0.035562
Y   -0.880965
Z   -1.289002
Name: A, dtype: float64

In [40]:
df.iloc[1:4]

Unnamed: 0,W,Y,Z
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
E,0.14784,0.548877,0.504417


## Seleccion Condicional

In [41]:
df

Unnamed: 0,W,Y,Z
A,-0.035562,-0.880965,-1.289002
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
E,0.14784,0.548877,0.504417


In [42]:
df > 0.5

Unnamed: 0,W,Y,Z
A,False,False,False
B,False,True,False
C,False,False,True
E,False,True,True


In [43]:
df[df['Y'] > 0]

Unnamed: 0,W,Y,Z
B,0.085804,0.65416,-0.018256
E,0.14784,0.548877,0.504417


In [44]:
df[(df['Y'] > 0) & (df['W'] > 0.1)]

Unnamed: 0,W,Y,Z
E,0.14784,0.548877,0.504417


In [45]:
df

Unnamed: 0,W,Y,Z
A,-0.035562,-0.880965,-1.289002
B,0.085804,0.65416,-0.018256
C,-0.360907,-0.046147,2.468277
E,0.14784,0.548877,0.504417


In [46]:
df[df['Z'] > 0]['W'].sum()

-0.21306702731361196

In [47]:
-0.360907 + 0.147840

-0.21306699999999998