# ALOTT Top 10 Primary Diagnoses Analysis

This notebook analyzes the top 10 primary diagnoses in ALOTT admissions.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# Load admission data
admission_df = pd.read_csv('files/alott/1.0.0/emr-datasets/admission.csv')

# Display basic info
print(f"Total admissions: {len(admission_df)}")
print(f"\nDataset columns: {admission_df.columns.tolist()}")
print(f"\nFirst few rows:")
admission_df.head()

In [None]:
# Get top 10 primary diagnoses
top_diagnoses = admission_df['Primary_Diagnosis'].value_counts().head(10)

print("Top 10 Primary Diagnoses in ALOTT Admissions:")
print("="*60)
for idx, (diagnosis, count) in enumerate(top_diagnoses.items(), 1):
    percentage = (count / len(admission_df)) * 100
    print(f"{idx:2d}. {diagnosis:50s} : {count:4d} ({percentage:.1f}%)")

top_diagnoses

In [None]:
# Visualize top 10 diagnoses - Bar chart
plt.figure(figsize=(14, 8))
ax = top_diagnoses.plot(kind='barh', color='steelblue')
plt.xlabel('Number of Admissions', fontsize=12)
plt.ylabel('Primary Diagnosis', fontsize=12)
plt.title('Top 10 Primary Diagnoses in ALOTT Admissions', fontsize=14, fontweight='bold')
plt.gca().invert_yaxis()  # Highest at top

# Add count labels on bars
for i, v in enumerate(top_diagnoses.values):
    ax.text(v + 0.5, i, str(v), va='center', fontsize=10)

plt.tight_layout()
plt.show()

In [None]:
# Pie chart visualization
plt.figure(figsize=(12, 10))
colors = sns.color_palette('Set3', 10)
plt.pie(top_diagnoses.values, labels=top_diagnoses.index, autopct='%1.1f%%', 
        startangle=90, colors=colors)
plt.title('Distribution of Top 10 Primary Diagnoses in ALOTT Admissions', 
          fontsize=14, fontweight='bold', pad=20)
plt.axis('equal')
plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
print("\nSummary Statistics:")
print("="*60)
print(f"Total unique primary diagnoses: {admission_df['Primary_Diagnosis'].nunique()}")
print(f"Total admissions: {len(admission_df)}")
print(f"Top 10 diagnoses account for: {top_diagnoses.sum()} admissions ({(top_diagnoses.sum()/len(admission_df)*100):.1f}%)")
print(f"Most common diagnosis: {top_diagnoses.index[0]} ({top_diagnoses.values[0]} cases)")