# Plotting 2

In [None]:
# ignore this cell (it's just to make certain text red later, but you don't need to understand it).
from IPython.core.display import display, HTML
display(HTML('<style>em { color: red; }</style> <style>.container { width:100% !important; }</style>'))

In [None]:
import pandas as pd
from pandas import DataFrame, Series

import sqlite3
import os

import matplotlib
from matplotlib import pyplot as plt

%matplotlib inline

matplotlib.rcParams["font.size"] = 15

### How do you plot a *bar chart* from a *pandas series*?

In [None]:
#Series.plot.bar()

### What are the *axes* for a *bar chart*?

In [None]:
# index  => x-axis
# values => y-axis

## Continuing scatter plot example from Plotting 1 lecture

In [None]:
trees = [
    {"age": 1, "height": 1.5, "diameter": 0.8},
    {"age": 1, "height": 1.9, "diameter": 1.2},
    {"age": 1, "height": 1.8, "diameter": 1.4},
    {"age": 2, "height": 1.8, "diameter": 0.9},
    {"age": 2, "height": 2.5, "diameter": 1.5},
    {"age": 2, "height": 3, "diameter": 1.8},
    {"age": 2, "height": 2.9, "diameter": 1.7},
    {"age": 3, "height": 3.2, "diameter": 2.1},
    {"age": 3, "height": 3, "diameter": 2},
    {"age": 3, "height": 2.4, "diameter": 2.2},
    {"age": 2, "height": 3.1, "diameter": 2.9},
    {"age": 4, "height": 2.5, "diameter": 3.1},
    {"age": 4, "height": 3.9, "diameter": 3.1},
    {"age": 4, "height": 4.9, "diameter": 2.8},
    {"age": 4, "height": 5.2, "diameter": 3.5},
    {"age": 4, "height": 4.8, "diameter": 4},
]
df = DataFrame(trees)
df.head()

In [None]:
df.plot.scatter(x="age", y="height")

### What are some aspects of scatter plots that you can control with different variables? 
1. *X-axis*: age
2. *Y-axis*: height
3. *Color of plot points*
4. *Size of plot points*: diameter
5. *Shape of plot points*

### How to vary *color* of plot points?

In [None]:
df.plot.scatter(x="age", y="height", color="r")

### How to vary *size* of plot points?

In [None]:
df.plot.scatter(x="age", y="height", s=100)

### How to vary *shape* of plot points?

In [None]:
df.plot.scatter(x="age", y="height", marker="^")

### How do we add *tree diameter* data to this *scatter plot*?

In [None]:
# df.plot.scatter(x="age", y="height", s="diameter") 
# Doesn't work because size parameter expects a series!

In [None]:
df.plot.scatter(x="age", y="height", s=df["diameter"])

In [None]:
df["diameter"] * 25

In [None]:
df.plot.scatter(x="age", y="height", s=df["diameter"] * 25)

In [None]:
# Not recommended: don't use same variable to represent multiple aspects of the plot!
# Dummy example
df.plot.scatter(x="age", y="height", s=df["diameter"] * 25, c=df["diameter"])

In [None]:
# Not recommended: don't use same variable to represent multiple aspects of the plot!
# Dummy example
df.plot.scatter(x="age", y="height", s=df["diameter"] * 25, c=df["diameter"],
               vmin = df["diameter"].min()-2)

## IRIS dataset: http://archive.ics.uci.edu/ml/datasets/iris

In [None]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data")
df.head()

In [None]:
df = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",
                 names=["sep-len", "sep-wid", "pet-len", "pet-width", "class"])
df.head()

In [None]:
df.tail()

### How do we create a *scatter plot* for various *class types*?

In [None]:
classes = set(df["class"])
classes

In [None]:
df[df["class"] == "Iris-versicolor"].plot.scatter(x = "pet-width", y = "pet-len")

In [None]:
for variety in classes:
    df[df["class"] == variety].plot.scatter(x = "pet-width", y = "pet-len")

### When we call a plotting function, like scatter
1. RULE 1: if AxesSuplot ax passed, then plot in that subplot
2. RULE 2: if ax is None, create a new AxesSubplot
3. RULE 3: return AxesSuplot that was used

In [None]:
plot_area = None
colors = ["blue", "green", "red"]
markers = ["o", "^", "v"]
for variety in classes:
    sub_df = df[df["class"] == variety]
    plot_area = sub_df.plot.scatter(x="pet-width", y="pet-len",
                                    ax=plot_area, color=colors.pop(0),
                                    label=variety, marker=markers.pop(0))

## Line plots

In [None]:
s = Series([0, 100, 300, 200, 400])
s.plot.line()

In [None]:
s = Series([0, 100, 300, 200, 400], index=[0, 1, 20, 21, 22])
s

In [None]:
s.plot.line()

In [None]:
s = Series([0, 100, 300, 200, 400], index=[0, 20, 21, 22, 1])
s.plot.line()

In [None]:
s.sort_index().plot.line()

### Temperature dataset line plot

In [None]:
df = DataFrame({
    "high": [26, 31, 43, 57, 68, 78, 82, 79, 72, 59, 44, 30],
    "low": [11, 15, 25, 36, 46, 56, 61, 59, 50, 39, 28, 16]
})
df

In [None]:
ax = df.plot.line()
ax.set_xlabel("Month")
ax.set_ylabel("Temp (Fahrenheit)")

In [None]:
ax = df.plot.line()
ax.set_xlabel("Month")
ax.set_ylabel("Temp (Fahrenheit)")
ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"])
ax.set_xticks(range(12))
None

In [None]:
ax = df.plot.line(figsize = (8, 4)) # Option 1: control figsize to space out
ax.set_xlabel("Month")
ax.set_ylabel("Temp (Fahrenheit)")
ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=90)
ax.set_xticks(range(12))
None

In [None]:
ax = df.plot.line() # Option 2: rotate x-tick labels
ax.set_xlabel("Month")
ax.set_ylabel("Temp (Fahrenheit)")
ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=90)
ax.set_xticks(range(12))
None

### Convert temperature to *celcius*

In [None]:
celcius = (df - 32) / (212 - 32) * 100
celcius.head()

In [None]:
celcius["freezing"] = 0
celcius

In [None]:
ax = celcius.plot.line()
ax.set_xlabel("Month")
ax.set_ylabel("Temp (Celcius)")
ax.set_xticklabels(["Jan", "Feb", "Mar", "Apr", "May", "Jun",
                    "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], rotation=90)
ax.set_xticks(range(12))
None

# Stock Market Example

In [None]:
df = pd.read_csv("sp500.csv")
df.head()

### How much *current wealth (2018)* would we have given we had invested *1000$ in 1970*?

In [None]:
df["tot"] = df["return"].cumprod()
df.tail()

In [None]:
starting = 1000
df["tot"] = df["return"].cumprod()
df["wealth"] = df["tot"] * starting
df.set_index("year").tail()

In [None]:
ax = df.set_index("year")["wealth"].plot.line()
ax.set_ylabel("Wealth ($)")