In [4]:
import pandas as pd
import numpy as np
from scipy.optimize import newton
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

file_paths = [
    ("Asia_Fortune.csv", "Asia"),
    ("Europe_Fortune.csv", "Europe"),
    ("North_America_Fortune.csv", "North America")
]

def calculate_alpha_mad(data, num_iterations=100):
    alpha_mad_values = []
    for _ in range(num_iterations):
        data = data.dropna()
        train_data, _ = train_test_split(data, test_size=0.5)

        median = np.median(train_data)
        mad = np.mean(np.abs(train_data - median))
        beta_MAD = np.min(train_data)

        def equation(alpha):
            return mad - (alpha * beta_MAD * (2**(1/alpha) - 1) / (alpha - 1))

        alpha_MAD = newton(equation, 1.1)
        alpha_mad_values.append(alpha_MAD)

    return np.mean(alpha_mad_values)

alpha_dfs = []

for file_path, continent_name in file_paths:
    data = pd.read_csv(file_path)
    alpha_df = pd.DataFrame(columns=["Year", "Alpha"])

    for year in range(2001, 2024):
        year_data = data[data["Year"] == year]
        worth_data = year_data["Worth"]
        alpha_mad = calculate_alpha_mad(worth_data)
        alpha_df = pd.concat([alpha_df, pd.DataFrame({"Year": [year], "Alpha": [alpha_mad]})], ignore_index=True)

    alpha_dfs.append((alpha_df, continent_name))

# Concatenate all alpha data into one DataFrame
all_alpha_data = pd.concat([df.set_index('Year') for df, _ in alpha_dfs], axis=1, keys=[name for _, name in alpha_dfs])

# Plotting
plt.figure(figsize=(12, 6))
colors = ['red', 'green', 'blue']

# Plot each continent's alpha values
for continent_name in ["Asia", "Europe", "North America"]:
    plt.plot(all_alpha_data.index, all_alpha_data[continent_name]['Alpha'], label=f'{continent_name} Alpha (MAD)', linestyle='-', marker='o')

plt.title("Pareto Distribution Alpha (MAD) Over the Years by Continent")
plt.xlabel("Year")
plt.ylabel("Alpha")
# Rotate x-axis labels vertically
plt.xticks(rotation='vertical')
# Add a legend and a grid
plt.legend()
plt.grid(True)

# Tight layout to prevent label overlapping
plt.tight_layout()

# Show the plot
plt.show()

# Print the table
for df, continent_name in alpha_dfs:
    print(f"\n{continent_name} Alpha (MAD) over the years:\n{df}")


KeyError: 'Year'

<Figure size 1200x600 with 0 Axes>