<a href="https://colab.research.google.com/github/zuhayerror3i8/AI-ML-Expert-With-Phitron-Batch-01/blob/main/000%20Python%20For%20ML/022_Module_16_05_Practice_Day_07.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Module 16.5 â€” Practice Day 7

In [None]:
# Custom Dataset Generator

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

rng = np.random.default_rng(42)

def generate_students(n=200, seed=42):
    rng = np.random.default_rng(seed)
    hours_studied = rng.normal(5, 2, n).clip(1, 10)
    marks = (hours_studied * rng.uniform(8, 12, n) + rng.normal(0, 5, n)).clip(30, 100)
    gender = rng.choice(['Male', 'Female'], n)
    subject = rng.choice(['Math', 'Science', 'English', 'History'], n)
    study_method = rng.choice(['Solo', 'Group'], n, p=[0.6, 0.4])
    attempts = rng.integers(1, 4, n)
    return pd.DataFrame({
        'hours_studied': hours_studied.round(2),
        'marks': marks.round(2),
        'gender': gender,
        'subject': subject,
        'study_method': study_method,
        'attempts': attempts
    })

def generate_daily_sales(n=60, seed=42):
    rng = np.random.default_rng(seed)
    base = pd.date_range(start='2024-01-01', periods=n, freq='D')
    trend = np.linspace(50, 80, n)
    season = 8 * np.sin(np.linspace(0, 3*np.pi, n))
    noise = rng.normal(0, 3, n)
    sales = (trend + season + noise).round(2)
    return pd.DataFrame({'date': base, 'sales': sales})

students = generate_students(220)
sales = generate_daily_sales(90)

## Problem 1: Line Plot of Date vs Sales

In [None]:
sns.lineplot(data=sales, x='date', y='sales')
plt.xticks(rotation=45)
plt.xlabel('Date')
plt.ylabel('Sales')
plt.title('Sales Over Time')
plt.tight_layout()
plt.show()

## Problem 2: Scatter Plot Hours Studied vs Marks Colored by Gender

In [None]:
sns.scatterplot(data=students, x='hours_studied', y='marks', hue='gender')
plt.xlabel('Hours Studied')
plt.ylabel('Marks')
plt.title('Hours Studied vs Marks by Gender')
plt.show()

## Problem 3: Grid of Scatterplots Faceted by Subject

In [None]:
g = sns.FacetGrid(students, col='subject', col_wrap=2, height=4)
g.map(sns.scatterplot, 'hours_studied', 'marks')
g.set_axis_labels('Hours Studied', 'Marks')
g.set_titles(col_template='{col_name}')
plt.show()

## Problem 4: Histogram of Marks with KDE Overlay

In [None]:
sns.histplot(students['marks'], kde=True, bins=20)
plt.xlabel('Marks')
plt.ylabel('Frequency')
plt.title('Distribution of Marks with KDE')
plt.show()

## Problem 5: KDE Curve for Hours Studied

In [None]:
sns.kdeplot(students['hours_studied'])
plt.xlabel('Hours Studied')
plt.ylabel('Density')
plt.title('KDE of Hours Studied')
plt.show()

## Problem 6: Count of Records Per Subject

In [None]:
sns.countplot(data=students, x='subject')
plt.xlabel('Subject')
plt.ylabel('Count')
plt.title('Count of Records Per Subject')
plt.show()

## Problem 7: Regression Line for Hours Studied and Marks

In [None]:
sns.regplot(data=students, x='hours_studied', y='marks')
plt.xlabel('Hours Studied')
plt.ylabel('Marks')
plt.title('Regression: Hours Studied vs Marks')
plt.show()

## Problem 8: Pairplot and Jointplot

In [None]:
sns.pairplot(students)
plt.show()

In [None]:
sns.jointplot(data=students, x='hours_studied', y='marks')
plt.show()

## Problem 9: Plotly Scatter Plot Colored by Gender

In [None]:
fig = px.scatter(students, x='hours_studied', y='marks', color='gender',
                 title='Hours Studied vs Marks by Gender')
fig.show()

## Problem 10: Plotly Line Chart and Histogram

In [None]:
fig = px.line(sales, x='date', y='sales', title='Sales Over Time')
fig.show()

In [None]:
fig = px.histogram(students, x='marks', nbins=20, title='Distribution of Marks')
fig.show()