# Github del curso

https://github.com/zamorays/cursoDatosGeograficos

# Data

https://www.dropbox.com/s/o7arhhlesislqch/data.zip?dl=0

### ¿Qué es Data Science? 

La Ciencia de Datos trata 
del 
estudio de la 
extracción generalizada de 
conocimiento a partir de información.



“Un Científico de Datos, es la persona que sabe más de estadística que cualquier programador y que a la vez sabe más programación que cualquier estadístico“

[Josh Wills](https://www.linkedin.com/in/josh-wills-13882b/)



Ingeniería de datos, método científico, matemáticas, estadística, computación avanzada, visualización y expertos en los distintos ámbitos de especialidad.


 [Un gran artículo sobre como ser un científico de datos.](http://www.solocodigoweb.com/blog/2017/01/30/quiero-ser-un-cientifico-de-datos-por-donde-empiezo/)

### La caja de herramientas
* Bases de datos relacionales: SQL, (postgis como extensión de postgresql)
* Bases de datos no relacionales,  NoSQL, Cassandra, MongoDb, ...etc
* Lenguajes de programación: R, Python (Pandas+Geopandas, Fiona, Shapely, geomatplotlib, rasterio)
* Machine Learning (Scikit-Learn, pybrain, !!!TENSORFLOW!!!)
* Programación de altas prestaciones, programación distribuida, Hadoop (Hadoop no! Cloudera), Spark (Qué no! PySpark) 

![title](img/data-science.jpg)

![title](img/ciclo-vida.png)

![historia](img/historia_ciencia_datos.jpeg)

### Sitios de interés

* Common Crawl, que pone a disposición un gran número de datos de miles de  millones de webs:

http://www.sorayapaniagua.com/2013/01/28/common



* Kaggle, un sitio interesante en el que existen competiciones públicas extracción de conocimiento y predicción a partir de datos:

http://www.kaggle.com





# Numpy

## Documentación
https://docs.scipy.org/doc/numpy-1.13.0/index.html

In [None]:
import numpy as np

instalacion:
ubuntu: sudo pip install numpy

windows: conda install numpy

In [None]:
print('arreglo dimension 3x1')
a = np.array((1,2,3))
print(a)
print(a.shape)
print(a[0])


In [None]:
print('arreglo dimension 2x3')
b = np.array([[1,2,3],[4,5,6]])
print(b)
print(b.shape)
print(b[0])
print(b[0][0])

In [None]:
print('arreglo dimension 3x2')
c = np.array([[1,2],[3,4],[5,6]])
print(c)
print(c.shape)

In [None]:
# matriz de ceros
a = np.zeros((10,10))   
a        

In [None]:
# matriz de unos
b = np.ones((10,20))    
b

In [None]:
# matriz de sietes
c = np.full((2,2), 7)  
print(c)                       

In [None]:
#matriz diagonal
d = np.eye(5)         
print(d)    

In [None]:
# matriz con numeros aleatorios
e = np.random.random((5,5))  
print(e)  

# Distribuciones

instalacion:
ubuntu: sudo pip install matplotlib
        
sudo pip install seaborn

windows: conda install matplotlib
         
conda install seaborn

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline




In [None]:
x = np.random.random(size = 10000000)
sns.distplot(x)


In [None]:
x = np.random.normal(loc=3,scale=2,size = 10000)
sns.distplot(x)



In [None]:
x = np.random.poisson(lam=1,size = 10000000)
sns.distplot(x)


In [None]:
# numeros aleatorios
print(np.random.randint(low=1,high=100,size=5))

In [None]:
print(np.random.choice([2.0,1,3.0,5.0],3))

# Operaciones matriciales

In [None]:
x = np.array([[1,2],[3,4]], dtype=np.float64)
y = np.array([[5,6],[7,8]], dtype=np.float64)
print(x)
print(y)

In [None]:
# suma y resta
print(x+y)
print(x-y)

In [None]:
# multiplicación y división
print(x*y)   #producto matricial elemento a elemento
print()
print(x/y)

In [None]:
print(np.sin(x))
print()
print(np.cos(x))
print()
print(np.sqrt(x))
print()
print(np.dot(x,y))  #producto matricial
print()
print(x.dot(y))

In [None]:
print(x)
print(x.T)

# Broadcasting

In [None]:

x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, 1])
y = x + v  
print(y) 

In [None]:
print(x)
print()
print(x+1)

# reshape

In [None]:
u = np.random.randint(1,10,size=10)
print(u)

In [None]:
a = np.reshape(u, (2,5))
a




In [None]:
a = np.reshape(u,[5,2])
a

In [None]:
print(0 * np.nan)
print(np.nan == np.nan)
print(np.inf > np.nan)
print(np.nan - np.nan)
print(0.3 == (3 * 0.1))
print(0.3)
print(3. * 0.1)
print(False is False)
print(np.arange(1,100000,2))

In [None]:
m = np.array([[1,2],[3,4]])
u = np.linalg.inv(m)
np.dot(m,u)
m.dot(u)

** Quiz 1: ** Cree una matriz de 100 por 100 con entradas del 1 al 10,000

In [None]:
u = np.array(list(range(1,10001))).reshape(100,100)
u

** Quiz 2: ** Cree una función a la que le pases una posición (x,y) y te devuelva (x',y') rotadas en un ángulo $\alpha$

In [None]:
def rotar(alfa,P):
    ''' Funcion para rotar un punto(x,y)'''
    R = np.array([[np.cos(alfa),-np.sin(alfa)],[np.sin(alfa),np.cos(alfa)]])
    result = np.dot(R,P)
    return result
rotar(np.deg2rad(45),(1,0))

# Pandas

## Documentación
http://pandas.pydata.org/pandas-docs/stable/


ubuntu: sudo pip install pandas


windows: conda install pandas

In [None]:
import pandas as pd

## Series

In [None]:
s = pd.Series([1,2,'a',4,5])
s

In [None]:
s = pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
s


### Las series pueden trabajarse con arreglos(listas) o como diccionarios

In [None]:
print(s[1:])
print(s['b'])

In [None]:
s=pd.Series(np.random.random(5),index=s.index)
s

In [None]:
s=pd.Series(np.random.random(5),index=s.values)
print(s)

In [None]:
np.exp(s)

In [None]:
s=pd.Series(np.arange(0,10))

print(s)
print(s[:5])
print(s[5:])
print(s[:5]+s[5:])

In [None]:
print(s[:5]+s[5:])

In [None]:
u = s[5:]
u

In [None]:
u.reset_index(drop=True)

In [None]:
u=s[:5]
v=s[5:]
u=u.reset_index(drop=True)
v=v.reset_index(drop=True)
print(u+v)

## DataFrame


In [None]:
diccionario={'uno':np.random.randint(0,5,10),
            'dos':np.random.random(10)}
diccionario

In [None]:
df = pd.DataFrame(diccionario,index=list('abcdefghij'))
df

In [None]:
df.columns=['floats','ints']
df.index=list('confirmado')
df

In [None]:
df.index

In [None]:
df.columns

In [None]:
df=pd.DataFrame([[1,2,'hola'],[3,4,'mundo']]\
                ,columns=['col1','col2','col3'],index=['row1','row2'])

In [None]:
df

In [None]:
df.info()

In [None]:
import pandas as pd

In [None]:
data = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
df=pd.DataFrame(data)
df

## agregando y eliminando columnas

In [None]:
df['d'] = df.a/df.b # df.a/df.b
df

In [None]:
del df['d'] 

In [None]:
df

In [None]:
del df.a # aqui no funciona del df.a, puesto que tomaria a d como atributo de df
df

## Lectura y escritura

## Formatos principales
* csv
* json
* html
* hdf5
* parquet
* stata
* sql
* google big query

In [None]:
df=pd.DataFrame(data)
df

In [None]:
df.to_json('json_split.json',orient='split')
df.to_json('json_index.json',orient='index')
df.to_json('json_records.json',orient='records')

In [None]:
pd.read_json('json_split.json',orient='split')

In [None]:
df.to_csv('dataframe_csv.csv')
df.to_html('html.html')
df.to_hdf('hdf.hdf',key='x')

In [None]:
cd ciencia_datos_curso/

In [None]:
df = pd.read_csv('data/CIE.csv')
df.loc[:,['NUM','NC','EXPEDIENTE']].info()

In [None]:
df['NUM']

## Head and Tail

In [None]:
df.head(10)

** Quiz 3 ** Cree una función que te devuelva los promedios de las columnas de un arreglo de numpy

# Análisis descriptivo



* sum
* mean
* mad
* median
* min
* max
* mode
* abs
* prod
* std
* var
* sem
* skew
* kurt
* quantile
* cumsum
* cumprod
* cummax
* cummin

In [None]:
df.prod(axis=0)

In [None]:
df.NC.quantile([0.25,0.5,0.75])

In [None]:
df.describe(percentiles=[0.1,0.2,0.3])

In [None]:
df.NC.sort_index(ascending=False).cumsum().head()

In [None]:
df.columns =[x.lower() for x in df.columns]  # el nombre de las columnas funciona como una lista
df.head()

# Missing data

In [None]:
# Eliminar NAs
print(len(df))
print(len(df.dropna(how='all')))

In [None]:
df

In [None]:
df.dropna(subset=['de2'],inplace=True)

In [None]:
len(df)

In [None]:
df = pd.read_csv('data/CIE.csv')
df.fillna(0)

# Selección de datos

In [None]:
df.head(5)

In [None]:
# seleccionando por posicion
df.index=df.EXPEDIENTE
df[2:4]

In [None]:
# seleccionando columnas
df[['NUM','NC']].tail()

In [None]:
df.head()

In [None]:
## seleccionando por indice y columnas por posicion
df.iloc[2:3,2:3]

In [None]:
df.loc[(df.SEXO=='M')|(df.NC<5),['Id1','hora1','NC','SEXO']].head(10)

In [None]:
df.info()

In [None]:
df.loc[(df.EDAD.astype(int)>30)&(df.SEXO=='H'),:]

In [None]:
df.loc[df.EDAD=='FF','EDAD']='0'

In [None]:
df.loc[df.EDAD=='0','EDAD']

In [None]:
df.loc[(df.EDAD.astype(int)>30)&(df.SEXO=='F'),:]

# operaciones booleanas
* negación  ~
* and    &
* or     |

In [None]:
## Ordenar
df.sort_values(['LEU']).head().loc[:,['Id1','LEU']]

In [None]:
df[['LEU','NEU','LIN','ME']].head()

In [None]:
df.loc[df.SEXO.isin(['M','F']),:].sample(5)\
            .sort_values('EXPEDIENTE',ascending=True)

In [None]:
df.loc[df.NC<40,'NC']=40
df

# Agrupando

In [None]:
import pandas as pd
df=pd.read_csv('data/probe.probe').sample(100000)
df.head()

In [None]:
df=df.rename(columns={'Unnamed: 0':'id'})
df.head()

In [None]:
df.groupby('provider').mean()

In [None]:
print(len(df))
df=df.drop_duplicates()
len(df)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
df.plot(x='x',y='y',kind='scatter')

In [None]:
df.groupby('provider').describe()

In [None]:
df.groupby('provider').mean().plot(x='x',y='y',kind='scatter')

In [None]:
df.info()

In [None]:
df['timestamp'] = pd.to_datetime(df['timestamp'],format= '%Y-%m-%dT%H:%M:%SZ')
df['timestamp']=df.timestamp.dt.tz_localize('UTC').dt.tz_convert('America/Mexico_City') 

In [None]:
df.info()

In [None]:
df['hora']=df.timestamp.dt.hour
df.head()

In [None]:

df.loc[df.speed>0,:].groupby(['provider','hora'])\
            ['speed'].mean().reset_index().head(24)


In [None]:
df.loc[df.speed>0,:].groupby(['provider','hora'])\
                    ['speed'].mean().reset_index().head(24)\
                    .plot(x='hora',y='speed')

In [None]:
df.loc[df.speed>0].groupby('provider')['speed','x','y']\
                .agg(['mean','count','median','std'])

In [None]:
def sum_coordenadas(row):
    return row.x+row.y
def limite_outlier_superior(column):
    return column.mean()+2*column.std()
def limite_outlier_inferior(column):
    return column.mean()-2*column.std()  # Valores atípicos

In [None]:
df['suma_coordenadas'] = df.apply(sum_coordenadas,axis=1)
df.head()

In [None]:
df[['id','x','y','speed','provider','hora']]\
                    .apply(limite_outlier_superior)

In [None]:
df[['id','x','y','speed','provider','hora']].groupby('provider')\
              .apply(limite_outlier_superior)

In [None]:
limite_outliers = df[['id','x','y','speed','provider','hora']].groupby('provider')\
                    .agg([limite_outlier_inferior,limite_outlier_superior])
limite_outliers

## Diferencias entre map, apply y applymap

In [None]:
def sumar_1(element):
    return element+1
def promedio_armonico(serie):
    return len(serie)/sum(1/serie)

In [None]:
df.head()

In [None]:
df.x.map(sumar_1).head()   # elemento a elemento sobre una serie


In [None]:
df.loc[df.speed>0,['id','x','y','speed','provider']].apply(promedio_armonico)  # serie a serie sobre dataframe


In [None]:
df.loc[df.speed>0,['id','x','y','speed','provider']].apply(promedio_armonico,axis=1).head()


In [None]:
df.loc[:,['id','x','y','speed','provider']].applymap(sumar_1).head()  ## elemento a elemento sobre un dataframe

In [None]:
u = df.head(1000)
u.head()

## Rolling

In [None]:
df.loc[:,['id','x','y','speed','provider']].rolling(5,center=True).apply(np.mean).head(10)

In [None]:
limite_outliers = limite_outliers['speed'].reset_index()
limite_outliers

## Joins

In [None]:
u = pd.merge(df,limite_outliers,how='inner',on='provider')
u.head()

In [None]:
u.loc[(u.speed<u.limite_outlier_inferior) | (u.speed>u.limite_outlier_superior),:].head()

In [None]:
df1 = pd.DataFrame({'col1': [0, 1], 'col_left':['a', 'b']})
df2 = pd.DataFrame({'col1': [1, 2, 2],'col_right':[2, 2, 2]})
pd.merge(df1, df2, on='col1', how='right', indicator=True)  #.dtypes

## Concatenate

In [None]:
len(df)

In [None]:
u = df.sample(50000)
v = df.loc[~u.index.isin(u),:]


In [None]:
print(len(u),len(v))

In [None]:
print(len(u.append(v)))  #ignore_index=False

In [None]:
len(pd.concat([u,v]))

In [None]:
u = u[['x','y']]
v = v[['x','y']]
pd.concat([u,v],axis=1).head(10)
   # por qué aparecen los na's?

In [None]:
u.reset_index(drop=True,inplace=True)
v.reset_index(drop=True,inplace=True)
pd.concat([u,v],axis=1).head(10)

## Testeo de hipótesis

** Quiz **  Crea una función que tomedos series y testee si las medias de ambas series son iguales con un nivel de significancia del 5% (prueba de medias)

In [None]:


df.groupby('provider').agg(['mean','size'])

In [None]:
def prueba_medias(serie1,serie2):
    media1 = serie1.mean()
    media2 = serie2.mean()
    std1 = serie1.std()
    std2 = serie2.std()
    n1 = len(serie1)
    n2 = len(serie2)
    z_critica=1.96
    sigma2 = ((n1-1)*std1**2+(n2-1)*std2**2)/(n1+n2-2)
    varianza_combinada = np.sqrt(sigma2*(1.0/float(n1)+1.0/float(n2)))
    z_calculada = (media1-media2)/varianza_combinada
    if abs(z_calculada) < z_critica: return True
    return False
serie1 = df.loc[df.provider==6,'hora']
serie2 = df.loc[df.provider==7,'hora']
prueba_medias(serie1,serie2)

## Tablas pivote

In [None]:
df['fecha']=df.timestamp.dt.date
df.head()

In [None]:
u = df.loc[df.speed>0,:].groupby(['provider','fecha'])[['x','y','speed']].mean().reset_index()
u

In [None]:
u.pivot(index='fecha', columns='provider', values='speed')

# Text data

In [None]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
s.str.upper()

In [None]:
idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank'])  #strip(), lstrip(), rstrip()
idx.str.strip()

In [None]:
s2 = pd.Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h'])
s2.str.split('_').str.get(0)

In [None]:
s2.str.split('_').str.join('-').replace(np.nan,'x')

In [None]:
s2.str.pad(2,side='both')

In [None]:
cat()	Concatenate strings
split()	Split strings on delimiter
rsplit()	Split strings on delimiter working from the end of the string
get()	Index into each element (retrieve i-th element)
join()	Join strings in each element of the Series with passed separator
get_dummies()	Split strings on the delimiter returning DataFrame of dummy variables
contains()	Return boolean array if each string contains pattern/regex
replace()	Replace occurrences of pattern/regex with some other string or the return value of a callable given the occurrence
repeat()	Duplicate values (s.str.repeat(3) equivalent to x * 3)
pad()	Add whitespace to left, right, or both sides of strings
center()	Equivalent to str.center
ljust()	Equivalent to str.ljust
rjust()	Equivalent to str.rjust
zfill()	Equivalent to str.zfill
wrap()	Split long strings into lines with length less than a given width
slice()	Slice each string in the Series
slice_replace()	Replace slice in each string with passed value
count()	Count occurrences of pattern
startswith()	Equivalent to str.startswith(pat) for each element
endswith()	Equivalent to str.endswith(pat) for each element
findall()	Compute list of all occurrences of pattern/regex for each string
match()	Call re.match on each element, returning matched groups as list
extract()	Call re.search on each element, returning DataFrame with one row for each element and one column for each regex capture group
extractall()	Call re.findall on each element, returning DataFrame with one row for each match and one column for each regex capture group
len()	Compute string lengths
strip()	Equivalent to str.strip
rstrip()	Equivalent to str.rstrip
lstrip()	Equivalent to str.lstrip
partition()	Equivalent to str.partition
rpartition()	Equivalent to str.rpartition
lower()	Equivalent to str.lower
upper()	Equivalent to str.upper
find()	Equivalent to str.find
rfind()	Equivalent to str.rfind
index()	Equivalent to str.index
rindex()	Equivalent to str.rindex
capitalize()	Equivalent to str.capitalize
swapcase()	Equivalent to str.swapcase
normalize()	Return Unicode normal form. Equivalent to unicodedata.normalize
translate()	Equivalent to str.translate
isalnum()	Equivalent to str.isalnum
isalpha()	Equivalent to str.isalpha
isdigit()	Equivalent to str.isdigit
isspace()	Equivalent to str.isspace
islower()	Equivalent to str.islower
isupper()	Equivalent to str.isupper
istitle()	Equivalent to str.istitle
isnumeric()	Equivalent to str.isnumeric
isdecimal()	Equivalent to str.isdecimal

## Series de tiempo

In [None]:
u = pd.DataFrame(np.random.randn(1000, 4), index=pd.date_range('1/1/2000', periods=1000), columns=list('ABCD'))
u = u.cumsum()
u.plot()

In [None]:

u[['mA','mB','mC','mD']] = u.rolling(100).mean()
u.plot()

In [None]:
pd.to_datetime(pd.Series(['Jul 31, 2009','2010-01-10', None ]))

In [None]:
pd.to_datetime(['04-01-2012 10:00'], dayfirst=True)

In [None]:
pd.to_datetime('2010/11/12')

In [None]:
pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M')

In [None]:
from datetime import datetime
start = datetime(2011, 1, 1)
end = datetime(2012, 1, 1)
index = pd.date_range(start, end)
index

In [None]:
pd.date_range(start, periods=1000, freq='M')

In [None]:
Property	Description
year	The year of the datetime
month	The month of the datetime
day	The days of the datetime
hour	The hour of the datetime
minute	The minutes of the datetime
second	The seconds of the datetime
microsecond	The microseconds of the datetime
nanosecond	The nanoseconds of the datetime
date	Returns datetime.date (does not contain timezone information)
time	Returns datetime.time (does not contain timezone information)m
dayofyear	The ordinal day of year
weekofyear	The week ordinal of the year
week	The week ordinal of the year
dayofweek	The number of the day of the week with Monday=0, Sunday=6
weekday	The number of the day of the week with Monday=0, Sunday=6
weekday_name	The name of the day in a week (ex: Friday)
quarter	Quarter of the date: Jan-Mar = 1, Apr-Jun = 2, etc.
days_in_month	The number of days in the month of the datetime
is_month_start	Logical indicating if first day of month (defined by frequency)
is_month_end	Logical indicating if last day of month (defined by frequency)
is_quarter_start	Logical indicating if first day of quarter (defined by frequency)
is_quarter_end	Logical indicating if last day of quarter (defined by frequency)
is_year_start	Logical indicating if first day of year (defined by frequency)
is_year_end	Logical indicating if last day of year (defined by frequency)
is_leap_year	Logical indicating if the date belongs to a leap year

In [None]:
#df.timestamp.dt.is_month_end

In [None]:
#analicemos con detalle las siguientes líneas

df['timestamp'] = pd.to_datetime(df['timestamp'],format= '%Y-%m-%dT%H:%M:%SZ')
df['timestamp']=df.timestamp.dt.tz_localize('UTC').dt.tz_convert('America/Mexico_City') 

# Tarea 

* Crea un script que calcule los primeros N números de fibonacci
* Crea una función que calcule los coeficientes de binomiales C(n,k)
* Agrega un atributo a la clase coordCityP (si desea cámbiele el nombre) que consista en una lista que contiene las coordenadas del polígono que define a la ciudad
* Agregue un método que devuelva las coordenadas cartesianas en la clase coordCityP
* Agregue un método que devuelva las coordenadas de la proyección estereográfica tanto en coordenadas cartesianas como en cilíndricas https://en.wikipedia.org/wiki/Stereographic_projection

* Del archivo Coppel.csv calcule:

 * Número de tiendas por estado
 * Número de tiendas por municipio
 * Número de altas por mes y año 

# Geopandas|

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
% matplotlib inline


In [None]:
gdf = gpd.read_file('data/Municipios/')
gdf.head()


In [None]:
type(gdf)

In [None]:
type(gdf[['CVE_ENT','CVE_MUN','OID']])

In [None]:
gdf.info()

In [None]:
type(gdf.geometry)

In [None]:
gdf.plot()

In [None]:

gdf = gdf.loc[gdf.CVE_ENT=='09',:]
gdf.plot()
plt.style.use('bmh')

## Lectura

Casi cualquier formato de información geoespacial vectorial

In [None]:
import fiona; fiona.supported_drivers

In [None]:
gpd.read_file('data/delegaciones.json')

## Administrando proyecciones, CRS (Coordinate Reference Systems)

In [None]:
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world = world[(world.name != "Antarctica") & (world.name != "Fr. S. Antarctic Lands")]
world.head()

In [None]:
ax = world.plot()
ax.set_title("WGS84 (lat/lon)")

In [None]:
world.crs

In [None]:
# Re asignación
#world.crs = {'init' :'epsg:4326'}

In [None]:
## Re-proyeción
world = world.to_crs({'init': 'epsg:3395'})
world.crs


In [None]:
ax = world.plot()
ax.set_title("Mercator")

In [None]:
world.head()

## Ok, qué es una geoserie?

In [None]:
from shapely.geometry import Point, LineString, MultiPoint, MultiLineString, Polygon, MultiPolygon

In [None]:
p1 = Point([0,0])
p2 = Point([0,1])
p3 = Point([1,1])
gpd.GeoSeries([p1,p2,p3]).plot()

In [None]:
l1 = LineString([[1,2],[2,0]])
l2 = LineString([[3,1],[4,3]])
gpd.GeoSeries([l1,l2]).plot()

In [None]:
P1 = Polygon([[3,4],[4,3],[4,4]])
P2 = Polygon([[5,0],[6,-1],[7,-1],[8,0],[7,1],[6,1]])
gpd.GeoSeries([P1,P2]).plot()

In [None]:
mp = MultiPoint([p1,p2,p3])
ml = MultiLineString([l1,l2])
mP = MultiPolygon([P1,P2])
gpd.GeoSeries([mp,ml,mP]).plot()

In [None]:
gpd.GeoSeries([mp,ml,mP])

In [None]:
## qué es un GeoDataFrame?

u = pd.DataFrame(np.random.random(size=[3,3]),columns=['A','B','C'])
u

In [None]:
u = gpd.GeoDataFrame(u, geometry= gpd.GeoSeries([mp,ml,mP]))
u

In [None]:
df = pd.read_csv('data/probe.probe').sample(10000)
df.head()

In [None]:
def hacer_punto(row):
    p = Point(row.x,row.y)
    return p
df['geometry'] = df.apply(hacer_punto,axis=1)
df = gpd.GeoDataFrame(df,geometry='geometry')

In [None]:
type(df)

In [None]:
df.plot()

In [None]:
df.crs = {'init' :'epsg:4326'}

In [None]:
gdf.convex_hull.plot()