In [None]:
from ydata_profiling import ProfileReport
import pandas as pd
import os

excel_path = '/Users/yashpadiyar/Downloads/2 PET2 (Hotfill) 2023.xlsx'
output_dir = 'profiling_reports_1'
selected_cols = ['Asset', 'Super Reason']

os.makedirs(output_dir, exist_ok=True)

excel_file = pd.ExcelFile(excel_path)

for sheet in excel_file.sheet_names:
    print(f"Processing sheet: {sheet}")

    df = pd.read_excel(excel_path, sheet_name=sheet)

    if not set(selected_cols).issubset(df.columns):
        print(f"⚠️ Skipping sheet {sheet} (columns missing)")
        continue

    df_selected = df[selected_cols]

    if df_selected.empty:
        print(f"⚠️ Skipping empty sheet: {sheet}")
        continue
    
    print(f"Missing values analysis for {sheet}:")
    print("-" * 50)
    
    total_rows = len(df_selected)
    print(f"Total rows: {total_rows}")
    
    for col in selected_cols:
        missing_count = df_selected[col].isnull().sum()
        missing_pct = (missing_count / total_rows) * 100
        non_missing = total_rows - missing_count
        
        print(f"\n{col}:")
        print(f"  Non-missing values: {non_missing}")
        print(f"  Missing values: {missing_count}")
        print(f"  Missing percentage: {missing_pct:.2f}%")
    
    print("\n" + "="*50 + "\n")

    try:
        profile = ProfileReport(
            df_selected,
            title=f"Missing Values Analysis - {sheet}",
            minimal=True,  
            explorative=False,  
        )

        output_file = os.path.join(output_dir, f"{sheet}_missing_values_report.html")
        profile.to_file(output_file)
        print(f"✅ Report saved to: {output_file}")
        
    except Exception as e:
        print(f"❌ Error generating report for {sheet}: {e}")

print("🎉 Processing complete!")

print("\n" + "="*60)
print("SUMMARY OF MISSING VALUES ACROSS ALL SHEETS")
print("="*60)

summary_data = []

for sheet in excel_file.sheet_names:
    df = pd.read_excel(excel_path, sheet_name=sheet)
    
    if not set(selected_cols).issubset(df.columns):
        continue
        
    df_selected = df[selected_cols]
    
    if df_selected.empty:
        continue
    
    total_rows = len(df_selected)
    
    for col in selected_cols:
        missing_count = df_selected[col].isnull().sum()
        missing_pct = (missing_count / total_rows) * 100
        
        summary_data.append({
            'Sheet': sheet,
            'Column': col,
            'Total_Rows': total_rows,
            'Missing_Count': missing_count,
            'Missing_Percentage': round(missing_pct, 2)
        })

if summary_data:
    summary_df = pd.DataFrame(summary_data)
    print(summary_df.to_string(index=False))
    
    summary_file = os.path.join(output_dir, 'missing_values_summary.csv')
    summary_df.to_csv(summary_file, index=False)
    print(f"\n📊 Summary saved to: {summary_file}")
else:
    print("No data found to summarize.")

Processing sheet: Data_Selection
⚠️ Skipping sheet Data_Selection (columns missing)
Processing sheet: Data
Missing values analysis for Data:
--------------------------------------------------
Total rows: 117959

Asset:
  Non-missing values: 117959
  Missing values: 0
  Missing percentage: 0.00%

Super Reason:
  Non-missing values: 117959
  Missing values: 0
  Missing percentage: 0.00%




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={"index": "df_index"}, inplace=True)


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 392.08it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

✅ Report saved to: profiling_reports_1/Data_missing_values_report.html
Processing sheet: Hoja2
⚠️ Skipping sheet Hoja2 (columns missing)
Processing sheet: Hoja1
⚠️ Skipping sheet Hoja1 (columns missing)
Processing sheet: Pivot
⚠️ Skipping sheet Pivot (columns missing)
🎉 Processing complete!

SUMMARY OF MISSING VALUES ACROSS ALL SHEETS
Sheet       Column  Total_Rows  Missing_Count  Missing_Percentage
 Data        Asset      117959              0                 0.0
 Data Super Reason      117959              0                 0.0

📊 Summary saved to: profiling_reports_1/missing_values_summary.csv


In [10]:
from ydata_profiling import ProfileReport
import pandas as pd
import os

def analyze_missing_values(file_path, sheet_column_mapping, output_dir='profiling_reports'):
    """
    Analyze missing values for specified columns in Excel sheets.
    
    Parameters:
    -----------
    file_path : str
        Path to the Excel file
    sheet_column_mapping : dict
        Dictionary with sheet names as keys and list of columns as values
        Example: {'Sheet1': ['col1', 'col2'], 'Sheet2': ['col3', 'col4']}
    output_dir : str, default='profiling_reports'
        Directory to save the reports
        
    Returns:
    --------
    dict: Summary of missing values analysis
    """
    
    # Create output directory
    os.makedirs(output_dir, exist_ok=True)
    
    # Validate file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    
    excel_file = pd.ExcelFile(file_path)
    summary_data = []
    processed_sheets = []
    
    print(f"Starting missing values analysis for: {file_path}")
    print(f"Output directory: {output_dir}")
    print("="*60)
    
    for sheet_name, columns_to_analyze in sheet_column_mapping.items():
        print(f"\nProcessing sheet: {sheet_name}")
        
        # Check if sheet exists
        if sheet_name not in excel_file.sheet_names:
            print(f"⚠️ Sheet '{sheet_name}' not found in Excel file")
            continue
            
        try:
            df = pd.read_excel(file_path, sheet_name=sheet_name)
            
            # Check if specified columns exist
            missing_cols = [col for col in columns_to_analyze if col not in df.columns]
            if missing_cols:
                print(f"⚠️ Columns not found in sheet '{sheet_name}': {missing_cols}")
                # Use only available columns
                available_cols = [col for col in columns_to_analyze if col in df.columns]
                if not available_cols:
                    print(f"⚠️ No valid columns found for sheet '{sheet_name}', skipping...")
                    continue
                columns_to_analyze = available_cols
                print(f"✓ Using available columns: {available_cols}")
            
            df_selected = df[columns_to_analyze]
            
            if df_selected.empty:
                print(f"⚠️ No data found for sheet '{sheet_name}', skipping...")
                continue
            
            # Print detailed missing value info to console
            print(f"\nMissing values analysis for '{sheet_name}':")
            print("-" * 40)
            
            total_rows = len(df_selected)
            print(f"Total rows: {total_rows:,}")
            
            sheet_summary = []
            for col in columns_to_analyze:
                missing_count = df_selected[col].isnull().sum()
                missing_pct = (missing_count / total_rows) * 100
                non_missing = total_rows - missing_count
                
                print(f"\n  {col}:")
                print(f"    Non-missing: {non_missing:,}")
                print(f"    Missing: {missing_count:,}")
                print(f"    Missing %: {missing_pct:.2f}%")
                
                # Store for summary
                summary_data.append({
                    'Sheet': sheet_name,
                    'Column': col,
                    'Total_Rows': total_rows,
                    'Missing_Count': missing_count,
                    'Missing_Percentage': round(missing_pct, 2),
                    'Non_Missing_Count': non_missing
                })
                
                sheet_summary.append({
                    'column': col,
                    'missing_count': missing_count,
                    'missing_percentage': missing_pct
                })
            
            # Generate HTML profile report
            try:
                print(f"\n  Generating HTML report for '{sheet_name}'...")
                profile = ProfileReport(
                    df_selected,
                    title=f"Missing Values Analysis - {sheet_name}",
                    minimal=True,
                    explorative=False,
                )
                
                output_file = os.path.join(output_dir, f"{sheet_name}_missing_values_report.html")
                profile.to_file(output_file)
                print(f"  ✅ HTML report saved: {output_file}")
                
            except Exception as e:
                print(f"  ❌ Error generating HTML report for '{sheet_name}': {e}")
            
            processed_sheets.append({
                'sheet_name': sheet_name,
                'columns_analyzed': columns_to_analyze,
                'total_rows': total_rows,
                'summary': sheet_summary
            })
            
        except Exception as e:
            print(f"❌ Error processing sheet '{sheet_name}': {e}")
            continue
    
    # Generate summary report
    print("\n" + "="*60)
    print("SUMMARY OF MISSING VALUES ACROSS ALL SHEETS")
    print("="*60)
    
    if summary_data:
        summary_df = pd.DataFrame(summary_data)
        
        # Display summary table
        print("\nDetailed Summary:")
        print(summary_df.to_string(index=False))
        
        # Save summary to CSV
        summary_file = os.path.join(output_dir, 'missing_values_summary.csv')
        summary_df.to_csv(summary_file, index=False)
        print(f"\n📊 Summary CSV saved: {summary_file}")
        
        # Generate aggregated summary by column across sheets
        if len(summary_df) > 0:
            print("\nAggregated Summary by Column:")
            agg_summary = summary_df.groupby('Column').agg({
                'Total_Rows': 'sum',
                'Missing_Count': 'sum',
                'Missing_Percentage': 'mean'
            }).round(2)
            print(agg_summary.to_string())
            
            # Save aggregated summary
            agg_file = os.path.join(output_dir, 'aggregated_missing_summary.csv')
            agg_summary.to_csv(agg_file)
            print(f"📊 Aggregated summary saved: {agg_file}")
        
        return {
            'processed_sheets': processed_sheets,
            'summary_dataframe': summary_df,
            'output_directory': output_dir,
            'total_sheets_processed': len(processed_sheets)
        }
    else:
        print("❌ No data was processed successfully.")
        return {
            'processed_sheets': [],
            'summary_dataframe': None,
            'output_directory': output_dir,
            'total_sheets_processed': 0
        }


# Example usage:
if __name__ == "__main__":
    # Define the file path
    excel_path = '/Users/yashpadiyar/Downloads/2 PET2 (Hotfill) 2023.xlsx'
    
    # Define which columns to analyze for each sheet
    sheet_mapping = {
        'Data': ['Asset', 'Super Reason'],
        'Hoja1': ['Fecha', 'cant. Horas', 'Minutos'],
        # Add more sheets and their respective columns as needed
        # 'AnotherSheet': ['Column A', 'Column B', 'Column C']
    }
    
    # Run the analysis
    try:
        results = analyze_missing_values(
            file_path=excel_path,
            sheet_column_mapping=sheet_mapping,
            output_dir='missing_values_reports'
        )
        
        print(f"\n🎉 Analysis complete! Processed {results['total_sheets_processed']} sheets.")
        
    except Exception as e:
        print(f"❌ Error running analysis: {e}")

Starting missing values analysis for: /Users/yashpadiyar/Downloads/2 PET2 (Hotfill) 2023.xlsx
Output directory: missing_values_reports

Processing sheet: Data

Missing values analysis for 'Data':
----------------------------------------
Total rows: 117,959

  Asset:
    Non-missing: 117,959
    Missing: 0
    Missing %: 0.00%

  Super Reason:
    Non-missing: 117,959
    Missing: 0
    Missing %: 0.00%

  Generating HTML report for 'Data'...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={"index": "df_index"}, inplace=True)


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 2/2 [00:00<00:00, 46345.90it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

  ✅ HTML report saved: missing_values_reports/Data_missing_values_report.html

Processing sheet: Hoja1

Missing values analysis for 'Hoja1':
----------------------------------------
Total rows: 365

  Fecha:
    Non-missing: 365
    Missing: 0
    Missing %: 0.00%

  cant. Horas:
    Non-missing: 365
    Missing: 0
    Missing %: 0.00%

  Minutos:
    Non-missing: 6
    Missing: 359
    Missing %: 98.36%

  Generating HTML report for 'Hoja1'...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={"index": "df_index"}, inplace=True)


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

100%|██████████| 3/3 [00:00<00:00, 110.18it/s]


Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

  ✅ HTML report saved: missing_values_reports/Hoja1_missing_values_report.html

SUMMARY OF MISSING VALUES ACROSS ALL SHEETS

Detailed Summary:
Sheet       Column  Total_Rows  Missing_Count  Missing_Percentage  Non_Missing_Count
 Data        Asset      117959              0                0.00             117959
 Data Super Reason      117959              0                0.00             117959
Hoja1        Fecha         365              0                0.00                365
Hoja1  cant. Horas         365              0                0.00                365
Hoja1      Minutos         365            359               98.36                  6

📊 Summary CSV saved: missing_values_reports/missing_values_summary.csv

Aggregated Summary by Column:
              Total_Rows  Missing_Count  Missing_Percentage
Column                                                     
Asset             117959              0                0.00
Fecha                365              0                0.00
Minu

In [12]:
import streamlit as st
import pandas as pd
import os
import tempfile
import zipfile
from io import BytesIO
import json
from ydata_profiling import ProfileReport

def analyze_missing_values_streamlit(excel_file, sheet_column_mapping):
    """
    Analyze missing values for Streamlit app with file downloads
    """
    # Create temporary directory for outputs
    temp_dir = tempfile.mkdtemp()
    summary_data = []
    html_reports = {}
    
    try:
        excel_file_obj = pd.ExcelFile(excel_file)
        
        for sheet_name, columns_to_analyze in sheet_column_mapping.items():
            if sheet_name not in excel_file_obj.sheet_names:
                st.warning(f"Sheet '{sheet_name}' not found in Excel file")
                continue
                
            try:
                df = pd.read_excel(excel_file, sheet_name=sheet_name)
                
                # Check if specified columns exist
                missing_cols = [col for col in columns_to_analyze if col not in df.columns]
                if missing_cols:
                    st.warning(f"Columns not found in sheet '{sheet_name}': {missing_cols}")
                    available_cols = [col for col in columns_to_analyze if col in df.columns]
                    if not available_cols:
                        continue
                    columns_to_analyze = available_cols
                
                df_selected = df[columns_to_analyze]
                
                if df_selected.empty:
                    continue
                
                total_rows = len(df_selected)
                
                # Collect summary data
                for col in columns_to_analyze:
                    missing_count = df_selected[col].isnull().sum()
                    missing_pct = (missing_count / total_rows) * 100
                    non_missing = total_rows - missing_count
                    
                    summary_data.append({
                        'Sheet': sheet_name,
                        'Column': col,
                        'Total_Rows': total_rows,
                        'Missing_Count': missing_count,
                        'Missing_Percentage': round(missing_pct, 2),
                        'Non_Missing_Count': non_missing
                    })
                
                # Generate HTML report
                try:
                    profile = ProfileReport(
                        df_selected,
                        title=f"Missing Values Analysis - {sheet_name}",
                        minimal=True,
                        explorative=False,
                    )
                    
                    html_content = profile.to_html()
                    html_reports[sheet_name] = html_content
                    
                except Exception as e:
                    st.error(f"Error generating HTML report for '{sheet_name}': {e}")
                
            except Exception as e:
                st.error(f"Error processing sheet '{sheet_name}': {e}")
                continue
        
        return summary_data, html_reports
        
    except Exception as e:
        st.error(f"Error analyzing file: {e}")
        return [], {}

def create_download_zip(summary_df, html_reports):
    """Create a zip file with all reports"""
    zip_buffer = BytesIO()
    
    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        # Add CSV summary
        csv_buffer = BytesIO()
        summary_df.to_csv(csv_buffer, index=False)
        zip_file.writestr('missing_values_summary.csv', csv_buffer.getvalue())
        
        # Add aggregated summary if multiple sheets
        if len(summary_df) > 0:
            agg_summary = summary_df.groupby('Column').agg({
                'Total_Rows': 'sum',
                'Missing_Count': 'sum',
                'Missing_Percentage': 'mean'
            }).round(2)
            
            agg_csv_buffer = BytesIO()
            agg_summary.to_csv(agg_csv_buffer)
            zip_file.writestr('aggregated_missing_summary.csv', agg_csv_buffer.getvalue())
        
        # Add HTML reports
        for sheet_name, html_content in html_reports.items():
            zip_file.writestr(f'{sheet_name}_missing_values_report.html', html_content)
    
    return zip_buffer.getvalue()

def main():
    st.set_page_config(
        page_title="Missing Values Analysis Tool",
        page_icon="📊",
        layout="wide"
    )
    
    st.title("📊 Missing Values Analysis Tool")
    st.markdown("Upload an Excel file and analyze missing values across sheets and columns")
    
    # File upload
    uploaded_file = st.file_uploader(
        "Choose an Excel file",
        type=['xlsx', 'xls'],
        help="Upload an Excel file to analyze missing values"
    )
    
    if uploaded_file is not None:
        try:
            # Show file info
            st.success(f"✅ File uploaded: {uploaded_file.name}")
            
            # Load Excel file to show available sheets
            excel_file = pd.ExcelFile(uploaded_file)
            available_sheets = excel_file.sheet_names
            
            st.subheader("📋 Available Sheets")
            st.write(f"Found {len(available_sheets)} sheets: {', '.join(available_sheets)}")
            
            # Configuration section
            st.subheader("⚙️ Configuration")
            
            # Method to configure analysis
            config_method = st.radio(
                "Choose configuration method:",
                ["Interactive Setup", "JSON Configuration", "Quick Analysis (All Sheets, Same Columns)"]
            )
            
            sheet_column_mapping = {}
            
            if config_method == "Quick Analysis (All Sheets, Same Columns)":
                # Quick setup - same columns for all sheets
                st.markdown("**Quick Setup:** Analyze the same columns across all sheets")
                
                # Load first sheet to show available columns
                sample_df = pd.read_excel(uploaded_file, sheet_name=available_sheets[0])
                available_columns = list(sample_df.columns)
                
                selected_columns = st.multiselect(
                    f"Select columns to analyze (from {available_sheets[0]}):",
                    available_columns,
                    help="These columns will be analyzed in all sheets where they exist"
                )
                
                if selected_columns:
                    # Apply same columns to all sheets
                    for sheet in available_sheets:
                        sheet_column_mapping[sheet] = selected_columns
            
            elif config_method == "Interactive Setup":
                # Interactive setup - different columns for each sheet
                st.markdown("**Interactive Setup:** Choose specific columns for each sheet")
                
                for sheet in available_sheets:
                    with st.expander(f"📄 Configure Sheet: {sheet}"):
                        # Load sheet to show available columns
                        try:
                            sheet_df = pd.read_excel(uploaded_file, sheet_name=sheet)
                            sheet_columns = list(sheet_df.columns)
                            
                            selected_cols = st.multiselect(
                                f"Select columns for {sheet}:",
                                sheet_columns,
                                key=f"cols_{sheet}"
                            )
                            
                            if selected_cols:
                                sheet_column_mapping[sheet] = selected_cols
                                
                        except Exception as e:
                            st.error(f"Error loading sheet {sheet}: {e}")
            
            elif config_method == "JSON Configuration":
                # JSON configuration
                st.markdown("**JSON Configuration:** Paste or type your configuration")
                
                json_example = {
                    "Sheet1": ["Asset", "Super Reason"],
                    "Sheet2": ["Asset", "Super Reason", "Other Column"]
                }
                
                st.code(json.dumps(json_example, indent=2), language="json")
                
                json_input = st.text_area(
                    "Enter JSON configuration:",
                    height=150,
                    placeholder=json.dumps(json_example, indent=2)
                )
                
                if json_input:
                    try:
                        sheet_column_mapping = json.loads(json_input)
                        st.success("✅ JSON configuration loaded successfully")
                    except json.JSONDecodeError as e:
                        st.error(f"❌ Invalid JSON format: {e}")
            
            # Show current configuration
            if sheet_column_mapping:
                st.subheader("📋 Current Configuration")
                config_df = []
                for sheet, columns in sheet_column_mapping.items():
                    for col in columns:
                        config_df.append({"Sheet": sheet, "Column": col})
                
                if config_df:
                    st.dataframe(pd.DataFrame(config_df), use_container_width=True)
                
                # Analyze button
                if st.button("🚀 Run Missing Values Analysis", type="primary"):
                    with st.spinner("Analyzing missing values..."):
                        summary_data, html_reports = analyze_missing_values_streamlit(
                            uploaded_file, sheet_column_mapping
                        )
                        
                        if summary_data:
                            summary_df = pd.DataFrame(summary_data)
                            
                            # Display results
                            st.subheader("📊 Results")
                            
                            # Summary metrics
                            col1, col2, col3, col4 = st.columns(4)
                            
                            with col1:
                                st.metric("Sheets Processed", len(summary_df['Sheet'].unique()))
                            with col2:
                                st.metric("Columns Analyzed", len(summary_df['Column'].unique()))
                            with col3:
                                total_missing = summary_df['Missing_Count'].sum()
                                st.metric("Total Missing Values", f"{total_missing:,}")
                            with col4:
                                avg_missing_pct = summary_df['Missing_Percentage'].mean()
                                st.metric("Avg Missing %", f"{avg_missing_pct:.1f}%")
                            
                            # Detailed summary table
                            st.subheader("📋 Detailed Summary")
                            st.dataframe(summary_df, use_container_width=True)
                            
                            # Aggregated summary
                            if len(summary_df) > 1:
                                st.subheader("📈 Aggregated Summary by Column")
                                agg_summary = summary_df.groupby('Column').agg({
                                    'Total_Rows': 'sum',
                                    'Missing_Count': 'sum',
                                    'Missing_Percentage': 'mean'
                                }).round(2)
                                st.dataframe(agg_summary, use_container_width=True)
                            
                            # Charts
                            st.subheader("📊 Visualizations")
                            
                            # Missing percentage by column
                            if len(summary_df) > 0:
                                col1, col2 = st.columns(2)
                                
                                with col1:
                                    st.bar_chart(
                                        summary_df.set_index(['Sheet', 'Column'])['Missing_Percentage'],
                                        use_container_width=True
                                    )
                                    st.caption("Missing Percentage by Sheet and Column")
                                
                                with col2:
                                    # Missing count by sheet
                                    sheet_totals = summary_df.groupby('Sheet')['Missing_Count'].sum()
                                    st.bar_chart(sheet_totals, use_container_width=True)
                                    st.caption("Total Missing Values by Sheet")
                            
                            # Download section
                            st.subheader("💾 Download Reports")
                            
                            # Individual downloads
                            col1, col2 = st.columns(2)
                            
                            with col1:
                                # CSV summary download
                                csv_data = summary_df.to_csv(index=False)
                                st.download_button(
                                    label="📄 Download Summary CSV",
                                    data=csv_data,
                                    file_name="missing_values_summary.csv",
                                    mime="text/csv"
                                )
                            
                            with col2:
                                # Aggregated summary download
                                if len(summary_df) > 1:
                                    agg_csv_data = agg_summary.to_csv()
                                    st.download_button(
                                        label="📈 Download Aggregated Summary",
                                        data=agg_csv_data,
                                        file_name="aggregated_missing_summary.csv",
                                        mime="text/csv"
                                    )
                            
                            # HTML reports downloads
                            if html_reports:
                                st.markdown("**Individual HTML Reports:**")
                                cols = st.columns(min(len(html_reports), 3))
                                for idx, (sheet_name, html_content) in enumerate(html_reports.items()):
                                    with cols[idx % 3]:
                                        st.download_button(
                                            label=f"📊 {sheet_name} Report",
                                            data=html_content,
                                            file_name=f"{sheet_name}_missing_values_report.html",
                                            mime="text/html"
                                        )
                            
                            # All reports as ZIP
                            st.markdown("**Download All Reports:**")
                            zip_data = create_download_zip(summary_df, html_reports)
                            st.download_button(
                                label="🗜️ Download All Reports (ZIP)",
                                data=zip_data,
                                file_name=f"missing_values_analysis_{uploaded_file.name.split('.')[0]}.zip",
                                mime="application/zip"
                            )
                            
                            st.success("🎉 Analysis completed successfully!")
                            
                        else:
                            st.error("❌ No data could be processed. Please check your configuration.")
            
            else:
                st.info("👆 Please configure which columns to analyze for each sheet above.")
                
        except Exception as e:
            st.error(f"❌ Error processing file: {e}")
    
    else:
        st.info("👆 Please upload an Excel file to get started.")
        
        # Show example configuration
        st.subheader("💡 Example Usage")
        st.markdown("""
        1. **Upload** your Excel file
        2. **Configure** which columns to analyze for each sheet
        3. **Run analysis** to see missing values statistics  
        4. **Download** detailed reports (CSV summaries and HTML reports)
        
        **Configuration Options:**
        - **Quick Analysis**: Same columns across all sheets
        - **Interactive Setup**: Different columns for each sheet  
        - **JSON Configuration**: Advanced configuration with JSON
        """)

if __name__ == "__main__":
    main()

2025-09-19 14:34:16.523 
  command:

    streamlit run /Users/yashpadiyar/Desktop/data-profiling/venv/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
