In [13]:
%pip install seaborn

In [14]:
%matplotlib inline


In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [16]:
# Load the dataset
df = pd.read_csv('fcc-forum-pageviews.csv', index_col='date', parse_dates=True)

# Show the first few rows of the data
df.head()


Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2016-05-09,1201
2016-05-10,2329
2016-05-11,1716
2016-05-12,10539
2016-05-13,6933


In [17]:
# Step 3: Clean the data by filtering out days when the page views were in the top 2.5% or bottom 2.5% of the dataset
# Calculate the 2.5% and 97.5% percentiles for the 'value' column
lower_percentile = df['value'].quantile(0.025)
upper_percentile = df['value'].quantile(0.975)

# Filter the data to exclude the outliers
df_cleaned = df[(df['value'] >= lower_percentile) & (df['value'] <= upper_percentile)]


In [18]:
def draw_line_plot():
    # Create a copy of the cleaned data to avoid modifying the original data
    df_line = df_cleaned.copy()

    # Create a line plot
    plt.figure(figsize=(12, 6))
    plt.plot(df_line.index, df_line['value'], color='blue')
    
    # Set the title and labels
    plt.title('Daily freeCodeCamp Forum Page Views 5/2016-12/2019')
    plt.xlabel('Date')
    plt.ylabel('Page Views')
    
    # Show the plot
    plt.tight_layout()
    plt.show()


In [19]:
def draw_bar_plot():
    # Create a copy of the cleaned data for bar plot
    df_bar = df_cleaned.copy()

    # Set the year and month for grouping
    df_bar['year'] = df_bar.index.year
    df_bar['month'] = df_bar.index.month

    # Calculate the average daily page views for each month grouped by year
    df_monthly_avg = df_bar.groupby(['year', 'month'])['value'].mean().unstack()

    # Create the bar plot
    df_monthly_avg.plot(kind='bar', figsize=(12, 6))
    
    # Set the labels and title
    plt.title('Average Daily Page Views per Month')
    plt.xlabel('Years')
    plt.ylabel('Average Page Views')
    plt.legend(title='Months', labels=['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'])
    
    # Show the plot
    plt.tight_layout()
    plt.show()


In [20]:
import seaborn as sns

def draw_box_plot():
    # Create a copy of the cleaned data for box plot
    df_box = df_cleaned.copy()

    # Set the year and month for grouping
    df_box['year'] = df_box.index.year
    df_box['month'] = df_box.index.month

    # Create a box plot for year-wise trend
    plt.figure(figsize=(12, 6))
    sns.boxplot(x='year', y='value', data=df_box)
    plt.title('Year-wise Box Plot (Trend)')
    plt.xlabel('Year')
    plt.ylabel('Page Views')
    plt.show()

    # Create a box plot for month-wise seasonality
    plt.figure(figsize=(12, 6))
    sns.boxplot(x='month', y='value', data=df_box)
    plt.title('Month-wise Box Plot (Seasonality)')
    plt.xlabel('Month')
    plt.ylabel('Page Views')
    plt.show()
