<a href="https://colab.research.google.com/github/zuhayerror3i8/AI-ML-Expert-With-Phitron-Batch-01/blob/main/000%20Python%20For%20ML/021_Module_16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Module 16 â€” Data Visualization with Seaborn and Plotly

In [None]:
# <--- Importing Required Libraries --->
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
# <--- Loading Student Data --->
student = pd.read_csv('sns_data.csv')
student.head()

## Line Plots using Seaborn

In [None]:
# <--- Axes-level Line Plot --->
# errorbar=None removes the error bands
sns.lineplot(data=student, x='week', y='attendance_rate', errorbar=None)

In [None]:
# Line plot with different colors for gender using hue parameter
sns.lineplot(data=student, x='week', y='attendance_rate', errorbar=None, hue='gender')

In [None]:
# <--- Figure-level Line Plot --->
# relplot provides more flexibility for creating complex visualizations
sns.relplot(kind='line', data=student, x='week', y='attendance_rate',
            errorbar=None, hue='gender')

In [None]:
# Line plot showing test score trends by gender
sns.relplot(kind='line', data=student, x='week', y='test_score',
            errorbar=None, hue='gender')

In [None]:
# Line plot showing test score trends by class level
sns.relplot(kind='line', data=student, x='week', y='test_score',
            errorbar=None, hue='class_level')

In [None]:
# Line plot showing attendance rate trends by class level
sns.relplot(kind='line', data=student, x='week', y='attendance_rate',
            errorbar=None, hue='class_level')

## Scatter Plots using Seaborn

In [None]:
# <--- Basic Scatter Plot --->
sns.scatterplot(data=student, x='study_hours', y='test_score')

In [None]:
# <--- Scatter Plot with Multiple Dimensions --->
# hue: color by category, style: marker shape by category, size: marker size by value
sns.scatterplot(data=student, x='study_hours', y='test_score',
                hue='gender', style='subject', size='Tshirt_size')

In [None]:
# Figure-level scatter plot with multiple dimensions
sns.relplot(kind='scatter', data=student, x='study_hours', y='test_score',
            hue='gender', style='subject', size='Tshirt_size')

In [None]:
# <--- Using Seaborn's Built-in Dataset --->
tips_data = sns.load_dataset("tips")
tips_data.head()

In [None]:
# Scatter plot of tips dataset with multiple categorical variables
sns.relplot(kind='scatter', data=tips_data, x='total_bill', y='tip',
            hue='sex', style='time', size='size')

In [None]:
# Axes-level version of the same scatter plot
sns.scatterplot(data=tips_data, x='total_bill', y='tip',
                hue='sex', style='time', size='size')

In [None]:
# <--- Loading Complete Student Dataset --->
student = pd.read_csv('student_dataset_complete.csv')
student.head()

## Facet Plots

In [None]:
# <--- Axes-level Scatter Plot with Hue --->
sns.scatterplot(data=student, x='study_hours', y='test_score', hue='gender')

In [None]:
# Figure-level scatter plot with hue (color by gender)
sns.relplot(kind='scatter', data=student, x='study_hours', y='test_score', hue='gender')

In [None]:
# <--- Facet Plot by Columns --->
# col parameter creates separate subplots for each category
sns.relplot(kind='scatter', data=student, x='study_hours', y='test_score', col='gender')

In [None]:
# <--- Facet Plot by Rows and Columns --->
# row and col create a grid of subplots
sns.relplot(kind='scatter', data=student, x='study_hours', y='test_score',
            col='gender', row='hostel')

In [None]:
# <--- Facet Plot with Column Wrap --->
# col_wrap limits number of columns and wraps to next row
sns.relplot(kind='scatter', data=student, x='study_hours', y='test_score',
            col='week', col_wrap=2)

## Histograms using Seaborn

In [None]:
# <--- Axes-level Histogram --->
# element='step' creates a step-style histogram
sns.histplot(data=student, x='attendance_rate', hue='gender', bins=10, element='step')

In [None]:
# <--- Figure-level Histogram --->
sns.displot(kind='hist', data=student, x='attendance_rate')

In [None]:
# Figure-level histogram with facets by gender
sns.displot(kind='hist', data=student, x='attendance_rate', col='gender')

In [None]:
# Histogram of study hours split by gender
sns.displot(kind='hist', data=student, x='study_hours', col='gender')

In [None]:
# Load tips dataset for histogram examples
tips_data = sns.load_dataset("tips")
tips_data.head()

In [None]:
# Histogram of total bill with 20 bins
sns.displot(kind='hist', data=tips_data, x='total_bill', bins=20)

In [None]:
# Histogram of tip amounts with 20 bins
sns.displot(kind='hist', data=tips_data, x='tip', bins=20)

## KDE (Kernel Density Estimate) Plots

In [None]:
# <--- Axes-level KDE Plot --->
# KDE shows the probability density of continuous data
sns.kdeplot(data=student, x='attendance_rate')

In [None]:
# KDE plot with filled areas for different categories
sns.kdeplot(data=tips_data, x='tip', hue='sex', fill=True)

In [None]:
# <--- Figure-level KDE Plot --->
sns.displot(kind='kde', data=student, x='attendance_rate', col='gender')

## Count Plots

In [None]:
# <--- Count Plot for Categorical Data --->
# Shows count of observations in each category
sns.countplot(data=student, x='gender')

In [None]:
# Count plot with hue to show subcategories
sns.countplot(data=student, x='subject', hue='gender')

## Bar Plots

In [None]:
# <--- Bar Plot with Mean Estimator --->
# By default, barplot shows the mean of values
sns.barplot(data=student, x='gender', y='Marks', errorbar=None)

In [None]:
# <--- Bar Plot with Median Estimator --->
# Using estimator parameter to show median instead of mean
sns.barplot(data=student, x='gender', y='Marks', errorbar=None, estimator=np.median)

In [None]:
# <--- Bar Plot with Max Estimator --->
# Using estimator parameter to show maximum values
sns.barplot(data=student, x='gender', y='Marks', errorbar=None, estimator=np.max)

## Regression Plots

In [None]:
# <--- Axes-level Regression Plot --->
# Shows scatter plot with fitted regression line
sns.regplot(data=student, x='study_hours', y='test_score')

In [None]:
# <--- Figure-level Regression Plot --->
# lmplot (linear model plot) with different lines for each category
sns.lmplot(data=student, x='study_hours', y='test_score', hue='gender')

## Pair Plots

In [None]:
# <--- Selecting Columns for Pair Plot --->
student_marks = student[['Marks', 'study_hours', 'attendance_rate', 'gender']]
student_marks

In [None]:
# <--- Pair Plot with Scatter Plots --->
# Creates a grid of plots showing relationships between all pairs of variables
sns.pairplot(data=student_marks, hue='gender')

In [None]:
# <--- Pair Plot with Histograms --->
# Using kind='hist' to show histograms instead of scatter plots
sns.pairplot(data=student_marks, kind='hist')

## Joint Plots

In [None]:
# <--- Joint Plot with Scatter and Distributions --->
# Shows bivariate relationship with univariate distributions on the margins
sns.jointplot(data=student_marks, x='study_hours', y='Marks', hue='gender')

In [None]:
# <--- Joint Plot with KDE --->
# kind='kde' creates a 2D kernel density estimate
sns.jointplot(data=student_marks, x='study_hours', y='Marks', kind='kde')

## Interactive Visualizations with Plotly

In [None]:
# <--- Interactive Scatter Plot with Plotly --->
# Plotly creates interactive plots with zoom, pan, and hover features
fig = px.scatter(student, x='time_study', y='Marks',
                 color='gender',
                 size='Tshirt_size',
                 hover_data=['hostel'])

fig.show()

## Line Plots using Plotly

In [None]:
# <--- Interactive Line Plot --->
enrollment = pd.read_csv('enrollment_data.csv')

fig = px.line(enrollment, x='Year', y='Programming')
fig.show()

In [None]:
# <--- Line Plot with Markers --->
# markers=True adds data point markers to the line
fig = px.line(enrollment, x='Year', y='Digital Marketing', markers=True)
fig.show()

In [None]:
# <--- Saving Interactive Plot as HTML --->
# Export the interactive plot to an HTML file
fig.write_html('digital_marketing_data.html')

## Histograms using Plotly

In [None]:
# <--- Interactive Histogram --->
fig = px.histogram(student_marks, x='attendance_rate')
fig.show()

In [None]:
# <--- Interactive Histogram with Color and Custom Bins --->
# color parameter separates data by category, nbins controls number of bins
fig = px.histogram(student_marks, x='attendance_rate', color='gender', nbins=5)
fig.show()