In [None]:
#Step 1 
#Read the file & Structure
import pandas as pd

df = pd.read_csv("oregon_2022_hospital_payment.csv")

df.head()
df.dtypes


Line of business                 object
Service category                 object
Procedure                        object
Statewide indicator               int64
Hospital type indicator           int64
Region indicator                  int64
Hospital                         object
Hospital type                    object
Region                           object
Number of discharges, 2022        int64
25th percentile, 2022             int64
Median, 2022                      int64
75th percentile, 2022             int64
Total paid, 2022                  int64
Number of discharges, 2021      float64
25th percentile, 2021           float64
Median, 2021                    float64
75th percentile, 2021           float64
Total paid, 2021                float64
Difference in medians           float64
Percent difference from 2021     object
dtype: object

In [None]:
#Step 2
#Compute
col = "Total paid, 2022"

mean_val = df[col].mean()
median_val = df[col].median()
mode_val = df[col].mode()[0]

print("Mean:", mean_val)
print("Median:", median_val)
print("Mode:", mode_val)

Mean: 923229.6697247706
Median: 115726.0
Mode: 22687


In [5]:
#Step 3
#Repeat in the hard way
import csv

filename = "oregon_2022_hospital_payment.csv"  
col_name = "Total paid, 2022"

values = []

with open(filename, newline="", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        val = row[col_name]
        if val and val.isnumeric():      
            values.append(float(val))

# Mean
mean_val = sum(values) / len(values)

# Median
sorted_vals = sorted(values)
n = len(sorted_vals)
if n % 2 == 1:
    median_val = sorted_vals[n // 2]
else:
    median_val = (sorted_vals[n // 2 - 1] + sorted_vals[n // 2]) / 2

# Mode
counts = {}
for v in values:
    counts[v] = counts.get(v, 0) + 1
mode_val = max(counts, key=counts.get)

print("Mean:", mean_val)
print("Median:", median_val)
print("Mode:", mode_val)


Mean: 923229.6697247706
Median: 115726.0
Mode: 22687.0


In [9]:
#Step 4
#Visualization
bins = [0, 10000, 20000, 30000, 40000, 50000, 100000]
labels = [f"{bins[i]}-{bins[i+1]}" for i in range(len(bins)-1)]
counts = [0]*(len(bins)-1)

for v in values:
    for i in range(len(bins)-1):
        if bins[i] <= v < bins[i+1]:
            counts[i] += 1
            break

for label, c in zip(labels, counts):
    bar = "ğŸ‡¨ğŸ‡³" * (c // 50) 
    print(f"{label}: {bar} ({c})")


0-10000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ (1466)
10000-20000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ (978)
20000-30000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ (735)
30000-40000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ (583)
40000-50000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ (507)
50000-100000: ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğŸ‡¨ğŸ‡³ğ

In [11]:
# Optional: Sparkline version
print("\nOregon Hospital Payments (in thousands)\n")

year_cols = ["Total paid, 2021", "Total paid, 2022"]

for col in year_cols:
    year = col.split(",")[1].strip()
    total = df[col].sum() / 1_000_000 
    stars = "â˜…" * int(total / 1000)  
    print(f"{year}: {stars} ({round(total, 1)}M)")



Oregon Hospital Payments (in thousands)

2021: â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜… (11459.5M)
2022: â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜…â˜… (11773.9M)
