# **Introduction to Python - Part 3**

**Remember to make a copy of this Notebook in your Google Drive**

First of all make sure that you're connected to an environment. In the top right corner you'll see a 'Connect' button, once you click on it you'll be assigned some resources of disk and ram running in Google Cloud servers.

* Numpy basics
* Pandas basics
* Matplotlib basics
* Seaborn basics

# Matplotlib


In [None]:
import matplotlib
import matplotlib.pyplot as plt

# Seaborn


"Seaborn is a library for making statistical graphics in Python. It builds on top of matplotlib and integrates closely with pandas data structures."
http://seaborn.pydata.org/introduction.html

In [None]:
import seaborn as sns

# Numpy

In [None]:
import numpy as np

Numpy as they self-describe in their [website](https://numpy.org) is a package for scientific computing with Python.

Here you can see a few examples of functions used in numpy so you can infer some of the functionality of the library. For detailed information of each function check out their [documentation](https://numpy.org/doc/stable/).



1. **Convert list into array.**

In [None]:
var_a = [0, 1, 2, 3, 4]
np.array(var_a)

2. **Vector or matrix array of zeros or ones.**

In [None]:
np.zeros((3,3))

In [None]:
np.ones(3)

3. **Linearly spaced list**

In [None]:
np.linspace(1, 10, num=10)

4. **Same as previous one but instead of indicating the number of elements you indicate the step.**

In [None]:
np.arange(0, 10, 1)

In [None]:
list_matrix = [[x*y for x in range(10)] for y in range(10)]
matrix = np.array(list_matrix)

In [None]:
matrix

In [None]:
plt.imshow(matrix)

In [None]:
matrix.ravel()

In [None]:
matrix.mean(), matrix.max(), matrix.min()

In [None]:
matrix.max(axis=1)

In [None]:
matrix.trace()

In [None]:
a=matrix.reshape((5,20))

In [None]:
a

In [None]:
a.shape

In [None]:
x = np.linspace(-np.pi, np.pi, 100)
plt.plot(x, np.sin(x))
plt.plot(x, np.sin(2*x))
plt.plot(x, np.sin(4*x))
plt.xlabel('Angle [rad]')
plt.ylabel('sin(x)')
plt.legend(['x', '2x', '4x'])
plt.show()

In [None]:
x

# Pandas

pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

In [None]:
import pandas as pd

## Series

In [None]:
s = pd.Series([1,2,3,4], index=["a", "b", "c", "d"])

In [None]:
s

## Dataframes

In [None]:
df = pd.DataFrame({
    "Country": ["France", "Portugal", "Spain", "Andorra"], 
    "Capital": ["Paris", "Lisbon", "Madrid", "Andorra la Vella"], 
    "Population": [2161000, 504718, 3223000, 22256]
    })

In [None]:
df

In [None]:
df.iloc[[0],[0]]

In [None]:
df['Capital']

In [None]:
df[(df['Population'] > 1e6)]

In [None]:
df.sort_values(by="Population")

In [None]:
df.sort_values(by="Population", ascending=False)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.columns

In [None]:
df.append({"Country": "Italy", "Capital": "Rome", "Population": 2873000}, ignore_index=True)

## Load external dataset

In [None]:
iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')

In [None]:
# you can also read a csv file from a local path
iris_df = pd.read_csv('/home/user/file.csv')

In [None]:
iris_df.shape

In [None]:
iris_df.head()

In [None]:
iris_df.describe()

In [None]:
plt.title('Species Count')
sns.countplot(x=iris_df['species'])

In [None]:
sns.histplot(data=iris_df, x="petal_length", bins=20)

In [None]:
sns.histplot(data=iris_df, x="petal_length", hue="species", bins=20, kde=True)

In [None]:
plt.figure(figsize=(12,8))
plt.title('Comparison between sepal width and length')
sns.scatterplot(x=iris_df['sepal_length'], y=iris_df['sepal_width'])

In [None]:
plt.figure(figsize=(12,8))
plt.title('Comparison between sepal width and length on the basis of species')
sns.scatterplot(iris_df['sepal_length'], iris_df['sepal_width'], hue = iris_df['species'], s= 50)

In [None]:
plt.figure(figsize=(12,8))
plt.title('Comparison between petal width and length on the basis of species')
sns.scatterplot(iris_df['petal_length'], iris_df['petal_width'], hue = iris_df['species'], s= 50);

In [None]:
sns.pairplot(iris_df,hue="species",height=3);

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16,9))
sns.boxplot(  y="petal_width", x= "species", data=iris_df,  orient='v' , ax=axes[0, 0])
sns.boxplot(  y="petal_length", x= "species", data=iris_df,  orient='v' , ax=axes[0, 1])
sns.boxplot(  y="sepal_length", x= "species", data=iris_df,  orient='v' , ax=axes[1, 0])
sns.boxplot(  y="sepal_length", x= "species", data=iris_df,  orient='v' , ax=axes[1, 1])
plt.show()

In [None]:
iris_df.corr()

## Can we infer the Barcelona's street topological pattern from tree data?

In [None]:
bcn_trees = pd.read_csv("https://opendata-ajuntament.barcelona.cat/data/dataset/27b3f8a7-e536-4eea-b025-ce094817b2bd/resource/23124fd5-521f-40f8-85b8-efb1e71c2ec8/download")

In [None]:
bcn_trees.shape

In [None]:
bcn_trees.head()

In [None]:
bcn_trees.describe()

In [None]:
bcn_trees = bcn_trees[["latitud", "longitud", "nom_cientific", "tipus_reg", "nom_barri"]]

In [None]:
bcn_trees[:20]

In [None]:
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=bcn_trees['longitud'], 
    y=bcn_trees['latitud'], 
    s= .5, 
    legend=False)

In [None]:
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=bcn_trees['longitud'], 
    y=bcn_trees['latitud'], 
    hue=bcn_trees['nom_cientific'], 
    s=.5, 
    legend=False)

In [None]:
pd.value_counts(bcn_trees['nom_cientific']).iloc[:20].index

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x=bcn_trees['nom_cientific'],  order=pd.value_counts(bcn_trees['nom_cientific']).iloc[:20].index)
plt.xticks(rotation=90)

In [None]:
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=bcn_trees['longitud'], 
    y=bcn_trees['latitud'], 
    hue=bcn_trees['nom_cientific'], 
    s=4, 
    legend=False)

s.set(xlim=(2.15,2.17))
s.set(ylim=(41.38,41.41))


In [None]:
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=bcn_trees['longitud'], 
    y=bcn_trees['latitud'], 
    hue=bcn_trees['tipus_reg'], 
    s=1)

s.set(xlim=(2.11,2.21))
s.set(ylim=(41.36,41.44))

s.legend(title="Irrigation type")

In [None]:
bcn_trees['tipus_reg'] != "SENSE INFORMAR"

In [None]:
winfo_irrigation = bcn_trees[bcn_trees['tipus_reg'] != "SENSE INFORMAR"]

In [None]:
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=winfo_irrigation['longitud'], 
    y=winfo_irrigation['latitud'], 
    hue=winfo_irrigation['tipus_reg'], 
    s=1)

s.set(xlim=(2.11,2.21))
s.set(ylim=(41.36,41.44))

s.legend(title="Irrigation type")

In [None]:
fig, ax = plt.subplots(figsize=(12,10))
s = sns.scatterplot(
    x=bcn_trees['longitud'], 
    y=bcn_trees['latitud'], 
    hue=bcn_trees['nom_barri'], 
    s=.5, legend=True)

s.set(xlim=(2.10,2.22))
s.set(ylim=(41.35,41.46))

handles, labels = ax.get_legend_handles_labels()
ax.legend(
    handles=handles,
    labels=labels, 
    bbox_to_anchor=(1.25, 1),
    ncol=9,
    title="Trees of barcelona in each neighborhood")


In [None]:
plt.figure(figsize=(30,8))

sns.countplot(
    x=bcn_trees['nom_barri'], 
    order=bcn_trees['nom_barri'].value_counts().index)

plt.xticks(rotation=90)

In [None]:
upper_diagonal = bcn_trees[(bcn_trees["latitud"] > bcn_trees["longitud"]/3.897 + 40.8425)]
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=upper_diagonal['longitud'], 
    y=upper_diagonal['latitud'], 
    s= .5, 
    legend=False)

s.set(xlim=(2.10,2.22))
s.set(ylim=(41.35,41.46))

In [None]:
lower_diagonal = bcn_trees[(bcn_trees["latitud"] < bcn_trees["longitud"]/3.897 + 40.8425)]
plt.figure(figsize=(12,10))
s = sns.scatterplot(
    x=lower_diagonal['longitud'], 
    y=lower_diagonal['latitud'], 
    s= .5, 
    legend=False)

s.set(xlim=(2.10,2.22))
s.set(ylim=(41.35,41.46))

In [None]:
plt.figure(figsize=(8,6))
sns.barplot(x=["upper", "lower"], y =[upper_diagonal.count()[0], lower_diagonal.count()[0]])
plt.ylabel("Counts")
plt.title("Number of trees in both sides of the diagonal street")