In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 1. Load Dataset
df =  pd.read_csv('Out_Sales_Data_LRDP.csv'  , index_col=0)  # Replace with actual file path


In [30]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st

# Streamlit UI
st.set_page_config(layout="wide")
st.title("Ralph Lauren Size Curve Forecast EDA")

# Load Data
uploaded_file = st.file_uploader("Upload Forecast CSV File", type="csv")
if uploaded_file is not None:
    df = pd.read_csv(uploaded_file)

    # 1. Overview and Data Completeness
    st.header("1. Data Summary")
    st.write("**Shape of Data:**", df.shape)
    st.write("**Unique Rows:**", df.drop_duplicates().shape[0])
    st.write("**Missing Values:**")
    st.dataframe(df.isnull().sum())
    st.write("**Data Types:**")
    st.dataframe(df.dtypes)

    # 2. Sample Data
    st.header("2. Sample Data")
    st.dataframe(df.head())

    # 3. Unique Value Counts
    st.header("3. Unique Value Summary by Key Dimensions")
    check_cols = [
        'Global Plan Brand.[Global Plan Brand]', 'Region.[Region]', 'Channel.[Channel]',
        'Global Plan L1.[Global Plan L1]', 'Global Plan L2.[Global Plan L2]',
        'Global Plan L3.[Global Plan L3]', 'Global Plan L4.[Global Plan L4]',
        'Item.[PPL]', 'Evergreen.[Evergreen]'
    ]
    for col in check_cols:
        if col in df.columns:
            st.write(f"**{col}**: {df[col].nunique()} unique values")

    # 4. Data Horizon Check
    st.header("4. Data Horizon Check")
    month_col = 'Time.[Planning Month]'
    if month_col in df.columns:
        df[month_col] = pd.to_datetime(df[month_col], errors='coerce')
        st.write("Min Month:", df[month_col].min())
        st.write("Max Month:", df[month_col].max())
        st.write("Total Months Available:", df[month_col].nunique())

    # 5. Duplicate Check
    st.header("5. Duplicate Check")
    st.write("Duplicate Rows:", df.duplicated().sum())

    # 6. Monthly Distribution
    st.header("6. Monthly Distribution of Records")
    monthly_counts = df[month_col].value_counts().sort_index()
    if not monthly_counts.empty:
        st.bar_chart(monthly_counts)
    else:
        st.write("No data available to plot monthly distribution.")

    # 7. Zero Sales Check
    st.header("7. Zero Sales Check")
    if 'Sales (Units)' in df.columns and 'Item.[PPL]' in df.columns:
        zero_counts = df['Sales (Units)'].eq(0).groupby(df['Item.[PPL]']).sum()
        st.dataframe(zero_counts.sort_values(ascending=False).head())

    # 8. Outlier Check for Sales Units
    st.header("8. Sales Units Outlier Check")
    if 'Sales (Units)' in df.columns:
        fig, ax = plt.subplots(figsize=(10, 4))
        sns.boxplot(x=df['Sales (Units)'], ax=ax)
        st.pyplot(fig)

    st.success("EDA Completed")
else:
    st.info("Please upload a CSV file to begin EDA.")




In [31]:
df['Evergreen.[Evergreen]'].unique()

array(['NON EVERGREEN', 'EVERGREEN'], dtype=object)