## 🧪 Data Quality Report Generator
This notebook analyzes a clinical dataset and produces a markdown report summarizing its quality metrics.

In [None]:
import pandas as pd

# Load your dataset
csv_file_path = "../data/data_es.csv"  # Modify if needed
df = pd.read_csv(csv_file_path)
df.head()


#### 🔍 Missing Values per Column

In [None]:
missing_values = df.isnull().sum()
missing_values[missing_values > 0]


#### 🔁 Duplicate Rows

In [None]:
duplicate_count = df.duplicated().sum()
f"Duplicate rows: {duplicate_count}"


#### 🧬 Data Types

In [None]:
df.dtypes


#### 🔢 Unique Values per Column

In [None]:
df.nunique().sort_values(ascending=False)


#### 📊 Summary Statistics (Numerical Columns)

In [None]:
df.describe(include=[float, int]).transpose().round(2)


### 📝 Export Report as Markdown

In [None]:
quality_md = "# Data Quality Report\n\n"

quality_md += "## Missing Values per Column\n"
for col, val in df.isnull().sum().items():
    quality_md += f"- **{col}**: {val} missing values\n"

quality_md += f"\n## Duplicate Rows\n- {df.duplicated().sum()} duplicate rows detected\n"

quality_md += "\n## Data Types\n"
for col, dtype in df.dtypes.astype(str).items():
    quality_md += f"- **{col}**: {dtype}\n"

quality_md += "\n## Unique Values per Column\n"
for col, val in df.nunique().items():
    quality_md += f"- **{col}**: {val} unique values\n"

quality_md += "\n## Summary Statistics (Numerical Columns)\n"
quality_md += df.describe(include=[float, int]).transpose().round(2).to_markdown()

with open("QUALITY_REPORT.md", "w", encoding="utf-8") as f:
    f.write(quality_md)

print("✅ QUALITY_REPORT.md generated successfully.")
