# Introduction to Matplotlib

<font size=3>This notebook serves as beginner's introduction to matplotlib package. It doesn't assume any prior knowledge of matplotlib, but it requires basic understanding of python and pandas. </font>

In [None]:
# import required packages
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

## Basic Plots

### Line Plot

In [None]:
# let's generate some dummy points

# x axis from 0 till 20
x_points = range(0, 21)
# y = 2 * x + 1
linear_points = [2*x + 1 for x in x_points]
# y = x^2
non_linear_points = [x**2 for x in x_points]

In [None]:
# create a 7x7 figure 
plt.figure(figsize=(7, 7))
# plot linear points
plt.plot(x_points, linear_points)
# plot non linear points
plt.plot(x_points, non_linear_points)

<font size=3>This is a nice informative plot, right?</font>

<font size=3>Is it missing anything?</font>

In [None]:
# we can do better
plt.style.use("seaborn-darkgrid")
# create a 7x7 figure 
plt.figure(figsize=(7, 7))
# plot linear points
plt.plot(linear_points, label='y = 2x + 1', color='orangered')
# plot non linear points
plt.plot(non_linear_points, label='y = x^2')
# add title
plt.title('Functions Plots', fontdict={'fontsize': 18})
# add x and y labels
plt.xlabel('X', fontdict={'fontsize': 18})
plt.ylabel('Y', fontdict={'fontsize': 18},)
plt.legend(prop={"size": 14})
# add legend

## Pandas and Matplotlib

In [None]:
df = pd.DataFrame(columns=['X', 'Y', 'City', 'Country', 'Population', 'GDP'],
                  data=[[1, 2, 'Cairo', 'Egypt', 2000000, 35000],
                        [4, 5, 'Giza', 'Egypt', 100000, 4000],
                        [5, 8, 'Alexandaria', 'Egypt', 1800000, 50000],
                        [4, 6, 'Mansoura', 'Egypt', 50000, 8000],
                        [2, 9, 'Casablanca', 'Morroco', 60000, 60000],
                        [10, 11, 'Roma', 'Italy', 600000, 80000],
                        [7, 8, 'Suez', 'Egypt', 40000, 90000],
                        [7, 9, 'Milan', 'Italy', 400000, 100000]])
df.head(10)

### Multiple Line plot

In [None]:
# multiple lines
# plt.figure(figsize=(7, 7))
# ax1 = plt.gca()
# ax1.plot(df['Population'])
# ax2 = ax1.twinx()
# ax2.plot(df['GDP'], color='r')
# ax1.set_ylabel('Population')
# ax2.set_ylabel('GDP')

### Scatter Plot

In [None]:
# lines don't make sense
plt.figure(figsize=(7, 7))
plt.plot(df['X'], df['Y'])

In [None]:
plt.figure(figsize=(7, 7))
plt.scatter(df['X'], df['Y'])

In [None]:
plt.figure(figsize=(7, 7))
plt.scatter(df['Population'], df['GDP'])

In [None]:
# maybe add the size

### Bar plot

In [None]:
plt.figure(figsize=(10, 7))
plt.bar(range(len(df['Population'])), df['Population'])
ticks = plt.xticks(range(len(df['Population'])), df['City'], fontsize=15, rotation=90)

In [None]:
plt.figure(figsize=(10, 7))
plt.barh(range(len(df['Population'])), df['Population'])
ticks = plt.yticks(range(len(df['Population'])), df['City'], fontsize=15)

### Histogram

In [None]:
plt.figure(figsize=(10, 7))
plt.hist(df['Population'])

In [None]:
plt.figure(figsize=(10, 7))
plt.hist(df['GDP'])

## TED Talks

In [None]:
# from google.colab import drive 
# drive.mount('/content/gdrive')
# df = pd.read_csv('gdrive/My Drive/ted_main.csv')

In [None]:
df = pd.read_csv('./ted_main.csv')
df.head()