In [None]:
# Import necessary libraries
import os
import pandas as pd
from google.cloud import bigquery
import matplotlib.pyplot as plt
import seaborn as sns


# Define a list of patient demographics and clinical characteristics to retrieve
demographics = [
    'subject_id',
    'gender',
    'dob',
    'admittime',
    'dischtime',
    'insurance',
    'ethnicity',
    'hospital_expire_flag'
]

# Construct BigQuery SQL query to retrieve demographic data
query = f"""
SELECT
    {', '.join(demographics)}
FROM
    `{project_id}.{dataset_name}.patients`
"""

# Create a BigQuery client
client = bigquery.Client(project=project_id)

# Retrieve demographic data
demographic_data = client.query(query).to_dataframe()


 Gender distribution
gender_counts = demographic_data['gender'].value_counts()
plt.figure(figsize=(8, 6))
sns.barplot(x=gender_counts.index, y=gender_counts.values)
plt.title('Gender Distribution')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

 Age distribution
demographic_data['dob'] = pd.to_datetime(demographic_data['dob'])
demographic_data['age'] = (demographic_data['admittime'] - demographic_data['dob']).dt.days / 365
plt.figure(figsize=(10, 6))
sns.histplot(demographic_data['age'], bins=20, kde=True)
plt.title('Age Distribution')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()

 Ethnicity distribution
ethnicity_counts = demographic_data['ethnicity'].value_counts().head(10)
plt.figure(figsize=(12, 6))
sns.barplot(x=ethnicity_counts.index, y=ethnicity_counts.values)
plt.title('Top 10 Ethnicities')
plt.xlabel('Ethnicity')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

 Hospital mortality analysis
hospital_expire_counts = demographic_data['hospital_expire_flag'].value_counts()
plt.figure(figsize=(6, 6))
sns.barplot(x=hospital_expire_counts.index, y=hospital_expire_counts.values)
plt.title('Hospital Mortality')
plt.xlabel('Expired (1) or Survived (0)')
plt.ylabel('Count')
plt.show()


