# CODING TASK #1. PLOT BASIC SINGLE LINE PLOTS USING MATPLOTLIB

In [None]:
# The plot method on Pandas Series and DataFrames is just a simple wrapper around plt.plot():
import matplotlib.pyplot as plt
import pandas as pd
import datetime
%matplotlib inline

In [None]:
# Use Pandas read_csv to read crypto daily prices (BTC, ETH, and LTC)
investments_df = pd.read_csv('crypto_daily_prices.csv')
investments_df

In [None]:
# Use matplotlib on the Pandas DataFrame to plot the data
investments_df.plot(x = 'Date', y = 'BTC-USD Price', label = 'Bitcoin Price', linewidth = 3, figsize = (14, 6));
plt.ylabel('Price [$]')
plt.title('My First Data Visualization Exercise!')
plt.legend(loc = "upper right")
plt.grid()


**PRACTICE OPPORTUNITY #1 [OPTIONAL]:**
- **Plot similar kind of graph for Ethereum instead**
- **Change the line color to red**


# CODING TASK #2. PLOT MULTIPLE LINE PLOTS USING MATPLOTLIB

In [None]:
# Use Pandas read_csv to read crypto daily prices (BTC, ETH, and LTC)
investments_df = pd.read_csv('crypto_daily_prices.csv')
investments_df

In [None]:
investments_df.plot(x = 'Date', y = [ 'BTC-USD Price', 'ETH-USD Price'], linewidth = 2, figsize = (14,6))
plt.ylabel('Price')
plt.title('Crypto Prices')
plt.grid()

**PRACTICE OPPORTUNITY #2 [OPTIONAL]:**
- **Add Litecoin (LTC) to the list and plot similar kind of graph showing all three crypto currencies**

# CODING TASK #3. PLOT SUBPLOTS USING MATPLOTLIB

In [None]:
investments_df.plot(x = 'Date', title = 'Crypto Prices', subplots = True, grid = True, figsize = (15, 25));

**PRACTICE OPPORTUNITY #3 [OPTIONAL]:**
- **Try to set subplots = False and examine the output.** 

# CODING TASK #4. PLOT SCATTERPLOTS USING MATPLOTLIB

In [None]:
# Read daily return data using pandas

daily_return_df = pd.read_csv('crypto_daily_returns.csv')
daily_return_df

In [None]:
# Plot Daily returns of BTC vs. ETH
daily_return_df.plot.scatter('BTC', 'ETH', grid = True, figsize = (12, 7));

**PRACTICE OPPORTUNITY #4 [OPTIONAL]:**
- **Plot the daily returns of BTC vs. LTC**

# CODING TASK #5. PLOT PIE CHARTS USING MATPLOTLIB

In [None]:
# Define a dictionary with all crypto allocation in a portfolio
# Note that total summation = 100% 
my_dict =  {'allocation %': [20, 55, 5, 17, 3]}

In [None]:
crypto_df = pd.DataFrame(data = my_dict, index = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'])
crypto_df

In [None]:
# Use matplotlib to plot a pie chart 
crypto_df.plot.pie(y = 'allocation %', figsize = (8, 8))
plt.title('CRYPTO PORTFOLIO PIE CHART')


**PRACTICE OPPORTUNITY #5 [OPTIONAL]:**
- **Assume that you became bullish on XRP and decided to allocate 60% of your assets in it. You also decided to equally divide the rest of your assets in other coins (BTC, LTC, ADA, and ETH). Change the allocations and plot the pie chart.**
- **Use 'explode' to increase the separation between XRP and the rest of the portfolio (External Research is Required)**

# CODING TASK #6. PLOT HISTOGRAMS USING MATPLOTLIB

In [None]:
# A histogram represents data using bars with various heights
# Each bar groups numbers into specific ranges
# Taller bars show that more data falls within that specific range

daily_return_df = pd.read_csv('crypto_daily_returns.csv')
daily_return_df


In [None]:

mu = daily_return_df['BTC'].mean() # mean of distribution
sigma = daily_return_df['BTC'].std() # standard deviation of distribution
 
num_bins = 40

# Plot the histogram of the returns
plt.figure(figsize = (20, 15)) 

daily_return_df['BTC'].plot.hist(bins = 50, alpha = 0.5, figsize = (12, 6))

plt.grid()
plt.ylabel('Probability')
plt.title('Histogram: mu=' + str(mu) + ', sigma=' +str(sigma));
 

**PRACTICE OPPORTUNITY #6 [OPTIONAL]:**
- **Plot the histogram for ETH returns using 30 bins with red color**


# CODING TASK #7. PLOT SCATTERPLOT & COUNTPLOT USING SEABORN

In [None]:
# Updgrade Pandas version
!pip3 install seaborn --upgrade

In [None]:
# Seaborn is a visualization library that sits on top of matplotlib
# Seaborn offers enhanced features compared to matplotlib
# https://seaborn.pydata.org/examples/index.html

# import libraries 
import pandas as pd # Import Pandas for data manipulation using dataframes
import numpy as np # Import Numpy for data statistical analysis 
import matplotlib.pyplot as plt # Import matplotlib for data visualisation
import seaborn as sns # Statistical data visualization

In [None]:
# Import Cancer data 
cancer_df = pd.read_csv('cancer.csv')

In [None]:
# Check out the head of the dataframe
cancer_df.head(25)

In [None]:
# Check out the tail of the dataframe
cancer_df.tail()

In [None]:
# Plot scatter plot between mean area and mean smoothness
sns.scatterplot(x = 'mean area', y = 'mean smoothness', hue = 'target', data = cancer_df);

In [None]:
# Plot scatter plot between mean area and mean smoothness
sns.scatterplot(x = 'mean area', y = 'mean smoothness',data = cancer_df)

In [None]:
# Let's print out countplot to know how many samples belong to class #0 and #1
sns.countplot(x = cancer_df['target']) 

**PRACTICE OPPORTUNITY #7 [OPTIONAL]:**
- **Plot the scatterplot between the mean radius and mean area. Comment on the plot** 


# CODING TASK #8. PLOT PAIRPLOT, DISPLOT, AND HEATMAPS/CORRELATIONS USING SEABORN

In [None]:
# Plot the pairplot
sns.pairplot(cancer_df, hue = 'target', vars = ['mean radius', 'mean texture', 'mean area', 'mean perimeter', 'mean smoothness'] )

In [None]:
# Strong correlation between the mean radius and mean perimeter, mean area and mean primeter
plt.figure(figsize = (20, 10)) 
sns.heatmap(cancer_df.corr(), annot = True) 

In [None]:
# plot the distplot 
# Displot combines matplotlib histogram function with kdeplot() (Kernel density estimate)
# KDE is used to plot the Probability Density of a continuous variable. 

sns.distplot(x = cancer_df['mean radius'], bins = 5, color = 'blue')

**PRACTICE OPPORTUNITY #8 [OPTIONAL]:**
- **Plot two separate distplot for each target class #0 and target class #1**


# FINAL CAPSTONE PROJECT 

- In this project, we will visualize stock prices using Seaborn and Matplotlib. 3 Stocks are considered including Facebook (FB), Twitter (TWTR) and Netflix (NFLX).
- Using the stock_daily_prices.csv and stocks_daily_returns.csv dataset included in the course/workshop package, please do the following:
    - Import both datasets using Pandas.
    - Using Matplotlib, plot lineplots that display all 3 stocks daily prices on one single figure.
    - Using Matplotlib, plot 3 stocks daily prices on multiple subplots.
    - Using Matplotlib, plot the 3 plots on subplots next to each other (all figures in one row).
    - Using Matplotlib, plot the scatterplot between Facebook and Twitter daily returns. 
    - Using Seaborn, plot similar scatterplot between Facebook and Twitter daily returns. 
    - Assume that you now expanded your portfolio to include additional stocks such as Amazon (AMZN) and Google (GOOG). You decided to become bullish on Twitter and you allocated 60% of your assets in it. You also decided to equally divide the rest of your assets in other stocks (AMZN, FB, GOOG, NFLX). Using Matplotlib, plot a pie chart that shows these allocations. Use 'explode’ attribute to increase the separation between TWTR and the rest of the portfolio.
    - Using Matplotlib, plot the histogram for FB returns using 40 bins with red color. Display the mean and Standard deviation on top of the figure.
    - Using Seaborn, plot a heatmap that shows the correlations between stocks daily returns.
    - Plot a 3D plot showing all daily returns from FB, TWTR and NFLX [External Research is required].

# EXCELLENT JOB!

# PRACTICE OPPORTUNITY SOLUTIONS

**PRACTICE OPPORTUNITY #1 SOLUTION:**
- **Plot similar kind of graph for Ethereum instead**
- **Change the color line to red**


In [None]:
investments_df.plot(x = 'Date', y = 'ETH-USD Price', label = 'ETH Price', linewidth = 3, color = 'r', figsize = (14, 6));
plt.ylabel('Price [$]')
plt.title('Ethereum Price')
plt.legend(loc = "upper right")
plt.grid()


**PRACTICE OPPORTUNITY #2 SOLUTION:**
- **Add Litecoin (LTC) to the list and plot similar kind of graph showing all three currencies**

In [None]:
investments_df.plot(x = 'Date', y = [ 'BTC-USD Price', 'ETH-USD Price', 'LTC-USD Price'], linewidth = 2, figsize = (14,6))
plt.ylabel('Price')
plt.title('Crypto Prices')
plt.grid()

**PRACTICE OPPORTUNITY #3 SOLUTION:**
- **Try to set subplots = False and examine the output.** 
- **Try to set sharey to True and examine the output. What do you infer?**

In [None]:
investments_df.plot(x = 'Date', title = 'Crypto Prices', subplots = False, grid = True, figsize = (14, 6));


**PRACTICE OPPORTUNITY #4 SOLUTION:**
- **Plot the daily returns of BTC vs. LTC**

In [None]:
# Plot Daily returns of BTC vs. ETH
daily_return_df.plot.scatter('BTC', 'LTC', figsize = (12, 7))

**PRACTICE OPPORTUNITY #5 SOLUTION:**
- **Assume that you became bullish on XRP and decided to allocate 60% of your assets in it. You also decided to equally divide the rest of your assets in other coins (BTC, LTC, ADA, and ETH). Change the allocations and plot the pie chart.**
- **Use 'explode' to increase the separation between BTC and the rest of the portfolio (External Research is Required)**

In [None]:
my_dict =  {'allocation %': [10, 10, 10, 60, 10]}
explode = (0, 0, 0, 0.2, 0)  

crypto_df = pd.DataFrame(data = my_dict, index = ['BTC', 'ETH', 'LTC', 'XRP', 'ADA'])
crypto_df.plot.pie(y = 'allocation %', explode = explode, figsize = (8, 8))

plt.title('CRYPTO PORTFOLIO PIE CHART')

**PRACTICE OPPORTUNITY #6 SOLUTION:**
- **Plot the histogram for ETH returns using 30 bins with red color**


In [None]:
mu = daily_return_df['ETH'].mean() # mean of distribution
sigma = daily_return_df['ETH'].std() # standard deviation of distribution
 
num_bins = 40

# Plot the histogram of the returns
plt.figure(figsize = (20, 15)) 

daily_return_df['ETH'].plot.hist(bins = 30, alpha = 0.5, facecolor = 'r', figsize = (12, 6))

plt.grid()
plt.ylabel('Probability')
plt.title('Histogram: mu=' + str(mu) + ', sigma=' +str(sigma));

**PRACTICE OPPORTUNITY #7 SOLUTION:**
- **Plot the scatterplot between the mean radius and mean area. Comment on the plot** 


In [None]:
sns.scatterplot(x = 'mean radius', y = 'mean area', hue = 'target', data = cancer_df)
# As mean radius increases, mean area increases 
# class #0 generally has larger mean radius and mean area compared to class #1

**PRACTICE OPPORTUNITY #8 SOLUTION:**
- **Plot two separate distplot for each target class #0 and target class #1**


In [None]:
class_0_df = cancer_df[ cancer_df['target']==0 ]
class_1_df = cancer_df[ cancer_df['target']==1 ]

In [None]:
class_0_df

In [None]:
class_1_df

In [None]:
# Plot the distplot for both classes
plt.figure(figsize=(10, 7))
sns.distplot(class_0_df['mean radius'], bins = 25, color = 'blue')
sns.distplot(class_1_df['mean radius'], bins = 25, color = 'red')
# plt.grid()

# FINAL CAPSTONE PROJECT SOLUTION

In [None]:
stocks_df = pd.read_csv('stock_daily_prices.csv')
stocks_df

In [None]:
# Read daily return data using pandas
daily_return_df = pd.read_csv('stocks_daily_returns.csv')
daily_return_df

In [None]:
stocks_df.plot(x = 'Date', y = ['FB', 'TWTR', 'NFLX'], linewidth = 3, figsize = (12, 8))
plt.ylabel('Price')
plt.title('Stock Prices')
plt.legend(loc = "upper center")
plt.grid()

In [None]:
stocks_df.plot(x = 'Date', y = ['FB', 'TWTR', 'NFLX'], subplots = True, grid = True, linewidth = 3, figsize = (12, 8))
plt.ylabel('Price')
plt.title('Stock Prices')
plt.legend(loc = "upper center")


In [None]:
plt.figure(figsize = (20, 10))

plt.subplot(1, 3, 1)
plt.plot(stocks_df['NFLX'], 'r--');
plt.grid()

plt.subplot(1, 3, 2)
plt.plot(stocks_df['FB'], 'b.');
plt.grid()


plt.subplot(1, 3, 3)
plt.plot(stocks_df['TWTR'], 'g.');
plt.grid()

In [None]:
X = daily_return_df['FB']
Y = daily_return_df['TWTR']
plt.figure(figsize = (15, 10))
plt.scatter(X, Y);
plt.grid()

In [None]:
plt.figure(figsize = (15, 8))
sns.scatterplot(x = 'FB', y = 'TWTR', data = daily_return_df);
plt.grid()

In [None]:
values  = [10, 60, 10, 10, 10]
colors  = ['g', 'r', 'y', 'b', 'm']
explode = [0, 0.2, 0, 0, 0]
labels  = ['FB', 'TWTR', 'NFLX', 'GOOG', 'AMZN']

# Use matplotlib to plot a pie chart 
plt.figure(figsize = (10, 10))
plt.pie(values, colors = colors, labels = labels, explode = explode)
plt.title('STOCK PORTFOLIO')
plt.show()

In [None]:
# A histogram represents data using bars of various heights. 
# Each bar groups numbers into specific ranges. 
# Taller bars show that more data falls within that specific range.

mu = daily_return_df['FB'].mean() # mean of distribution
sigma = daily_return_df['FB'].std() # standard deviation of distribution
 
num_bins = 40

# Plot the histogram of the returns
plt.figure(figsize = (15, 9)) 
plt.hist(daily_return_df['FB'], num_bins, facecolor = 'red');
plt.grid()
plt.ylabel('Probability')
plt.title('Histogram: mu = ' + str(mu) + ', sigma = ' +str(sigma));
 

In [None]:
plt.figure(figsize = (12, 10))
cm = daily_return_df.corr()
sns.heatmap(cm, annot = True)

In [None]:
# Toolkits are collections of application-specific functions that extend Matplotlib.
# mpl_toolkits.mplot3d provides tools for basic 3D plotting.
# https://matplotlib.org/mpl_toolkits/index.html

from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(111, projection = '3d')

x = daily_return_df['FB'].tolist()
y = daily_return_df['TWTR'].tolist()
z = daily_return_df['NFLX'].tolist()

ax.scatter(x, y, z, c = 'r', marker = 'o', s = 1000)

ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

# APPENDIX

In [None]:
# np.C_ class object translates slice objects to concatenation along the second axis.

x1 = np.array([1,2,3])
x1.shape

In [None]:
x2 = np.array([4,5,6])
x2.shape

In [None]:
z = np.c_[x1, x2]
print(z)
print(z.shape)