# Lab Results Analysis

This notebook analyzes lab results data and generates distribution reports for each field.


## 1. Import Libraries and Create Sample Data


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)


In [None]:
# Generate sample lab results data
np.random.seed(42)

# Lab types
lab_types = ['Complete Blood Count', 'Lipid Panel', 'Liver Function', 'Kidney Function', 
             'Thyroid Panel', 'Hemoglobin A1C', 'Vitamin D', 'Cholesterol']

# Generate 500 sample records
n_records = 500

# Patient IDs (100 unique patients)
patient_ids = [f'PAT{str(i).zfill(5)}' for i in range(1, 101)]

# Generate data
data = {
    'Patient ID': np.random.choice(patient_ids, n_records),
    'Lab Type': np.random.choice(lab_types, n_records, p=[0.2, 0.15, 0.15, 0.15, 0.1, 0.1, 0.1, 0.05]),
    'Lab Value': np.round(np.random.normal(100, 30, n_records), 2),
    'Lab Date': [(datetime.now() - timedelta(days=np.random.randint(0, 365))).strftime('%Y-%m-%d') 
                 for _ in range(n_records)]
}

# Create DataFrame
df = pd.DataFrame(data)

# Adjust lab values based on lab type for more realistic data
lab_value_ranges = {
    'Complete Blood Count': (4.5, 11.0),
    'Lipid Panel': (120, 200),
    'Liver Function': (10, 40),
    'Kidney Function': (0.6, 1.2),
    'Thyroid Panel': (0.5, 5.0),
    'Hemoglobin A1C': (4.0, 6.5),
    'Vitamin D': (20, 50),
    'Cholesterol': (150, 250)
}

for lab_type, (min_val, max_val) in lab_value_ranges.items():
    mask = df['Lab Type'] == lab_type
    df.loc[mask, 'Lab Value'] = np.round(np.random.uniform(min_val, max_val, mask.sum()), 2)

# Convert Lab Date to datetime
df['Lab Date'] = pd.to_datetime(df['Lab Date'])

print(f"Dataset created with {len(df)} records")
print(f"\nFirst few records:")
df.head(10)


## 2. Data Overview


In [None]:
print("Dataset Info:")
print(f"Total Records: {len(df)}")
print(f"Total Patients: {df['Patient ID'].nunique()}")
print(f"Total Lab Types: {df['Lab Type'].nunique()}")
print(f"\nDate Range: {df['Lab Date'].min().date()} to {df['Lab Date'].max().date()}")
print(f"\nData Types:")
print(df.dtypes)
print(f"\nBasic Statistics:")
df.describe()
