### El objetivo de esta notebook es obtener los valores de precipitaciones y temperaturas para todos los días entre los años 2016 y 2019

In [16]:
import numpy as np
import pandas as pd
import re

In [17]:
# Importo dataset del Observatorio de BS AS
datos_OCBA = pd.read_excel('../Data/datos_OCBA.xlsx')

In [18]:
datos_OCBA.sample(5)

Unnamed: 0,Año,Mes,Dia,Precipitaciones (mm),Temperatura máxima (°C),Temperatura mínima (°C)
21523,1966,12,5,0.0,26.1,9.2
16285,1952,8,2,0.0,11.6,2.8
4521,1920,5,18,0.0,17.4,11.2
1327,1911,8,20,0.1,14.7,10.3
30029,1990,3,20,0.0,26.9,16.0


In [19]:
# Resumen
datos_OCBA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40999 entries, 0 to 40998
Data columns (total 6 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Año                      40999 non-null  int64  
 1   Mes                      40999 non-null  int64  
 2   Dia                      40999 non-null  int64  
 3   Precipitaciones (mm)     40999 non-null  float64
 4   Temperatura máxima (°C)  40752 non-null  float64
 5   Temperatura mínima (°C)  40750 non-null  float64
dtypes: float64(3), int64(3)
memory usage: 1.9 MB


In [20]:
# Cambiamos los formatos de los valores de "mes" y "dia"
datos_OCBA.Mes = datos_OCBA.Mes.astype(str)
datos_OCBA.Dia = datos_OCBA.Dia.astype(str)

In [21]:
datos_OCBA.loc[0,'Mes']

'1'

In [22]:
len(datos_OCBA.loc[0,'Mes']) == 0

False

In [23]:
# Filtramos por los años que nos interesa
año_mask = (datos_OCBA['Año'] > 2015) & (datos_OCBA['Año'] < 2020)
OCBA_2016_2019 = datos_OCBA.loc[año_mask]
OCBA_2016_2019.sample(5)

Unnamed: 0,Año,Mes,Dia,Precipitaciones (mm),Temperatura máxima (°C),Temperatura mínima (°C)
40902,2019,12,26,48.0,27.5,15.3
40799,2019,9,14,0.0,20.4,11.6
40689,2019,5,27,0.0,18.8,13.5
40431,2018,9,11,0.0,20.6,13.0
40253,2018,3,17,29.0,27.0,20.1


In [24]:
# Cambio los valores de la columna "mes"
for i in OCBA_2016_2019.index:
    if len(OCBA_2016_2019.loc[i,'Mes']) == 1:
        OCBA_2016_2019.loc[i,'MesClean'] = '0' + OCBA_2016_2019.loc[i,'Mes']
    else:
        OCBA_2016_2019.loc[i,'MesClean'] = OCBA_2016_2019.loc[i,'Mes']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [25]:
# Cambio los valores de la columna "Dia"
for i in OCBA_2016_2019.index:
    if len(OCBA_2016_2019.loc[i,'Dia']) == 1:
        OCBA_2016_2019.loc[i,'DiaClean'] = '0' + OCBA_2016_2019.loc[i,'Dia']
    else:
        OCBA_2016_2019.loc[i,'DiaClean'] = OCBA_2016_2019.loc[i,'Dia']

In [26]:
# Vemos los resultados de nuestras transformaciones
OCBA_2016_2019.sample(5)

Unnamed: 0,Año,Mes,Dia,Precipitaciones (mm),Temperatura máxima (°C),Temperatura mínima (°C),MesClean,DiaClean
40146,2017,11,30,0.0,27.4,18.4,11,30
39530,2016,3,24,0.0,27.1,15.4,3,24
40181,2018,1,4,0.0,36.7,22.2,1,4
39572,2016,5,5,3.0,18.5,10.6,5,5
40804,2019,9,19,0.0,21.0,12.9,9,19


In [27]:
OCBA_2016_2019['fecha'] = OCBA_2016_2019['Año'].astype(str) + '-' + OCBA_2016_2019['MesClean'] + '-' + OCBA_2016_2019['DiaClean']
OCBA_2016_2019.sample(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,Año,Mes,Dia,Precipitaciones (mm),Temperatura máxima (°C),Temperatura mínima (°C),MesClean,DiaClean,fecha
40370,2018,7,12,0.0,16.5,9.8,7,12,2018-07-12
40392,2018,8,3,0.5,12.4,8.9,8,3,2018-08-03
40771,2019,8,17,20.0,17.2,11.6,8,17,2019-08-17
39501,2016,2,24,0.0,34.7,22.4,2,24,2016-02-24
40719,2019,6,26,0.0,14.4,4.4,6,26,2019-06-26


In [28]:
# Eliminamos las columnas que ya no necesitamos
OCBA_2016_2019_clean = OCBA_2016_2019.drop(columns=['Año','Mes','Dia','MesClean','DiaClean'])
OCBA_2016_2019_clean.sample(5)

Unnamed: 0,Precipitaciones (mm),Temperatura máxima (°C),Temperatura mínima (°C),fecha
40380,0.0,12.0,7.5,2018-07-22
40831,2.0,16.6,11.5,2019-10-16
39500,0.0,32.5,23.0,2016-02-23
40444,0.0,22.9,11.8,2018-09-24
40693,0.0,17.5,10.4,2019-05-31


In [31]:
# Valores nulos o algo raro?
OCBA_2016_2019_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1461 entries, 39447 to 40907
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Precipitaciones (mm)     1461 non-null   float64
 1   Temperatura máxima (°C)  1461 non-null   float64
 2   Temperatura mínima (°C)  1461 non-null   float64
 3   fecha                    1461 non-null   object 
dtypes: float64(3), object(1)
memory usage: 97.1+ KB


In [34]:
# Y duplicados?
OCBA_2016_2019_clean.duplicated().value_counts()

False    1461
dtype: int64

In [35]:
condiciones_climaticas = OCBA_2016_2019_clean

# Dataframe listo para usar: "condiciones_climaticas"

In [37]:
condiciones_climaticas.shape()

TypeError: 'tuple' object is not callable