In [None]:
import pandas as pd
import re

# Sample data
df = pd.DataFrame({
    'raw_date': ['Mon, 28th May 2020 12:34:56 GMT', 'Tue, 1st Jan 2019 05:00:00 GMT']
})

# Remove day suffixes like 'st', 'nd', 'rd', 'th' using regex
df['cleaned_date'] = df['raw_date'].apply(lambda x: re.sub(r'(\d+)(st|nd|rd|th)', r'\1', x))

# Parse into datetime format
df['parsed_date'] = pd.to_datetime(df['cleaned_date'], format='%a, %d %b %Y %H:%M:%S %Z', errors='coerce')

# Optional: just keep the date part (no time)
df['just_date'] = df['parsed_date'].dt.date

print(df[['raw_date', 'just_date']])


In [None]:
import matplotlib.pyplot as plt

# Sample data
years = ['2010', '2025']
car1_prices = [15000, 18000]
car2_prices = [20000, 25000]
car3_prices = [17000, 22000]

# Plot
fig, ax = plt.subplots()

# Stacked bars
bar1 = ax.bar(years, car1_prices, label='Car 1')
bar2 = ax.bar(years, car2_prices, bottom=car1_prices, label='Car 2')

# Compute bottoms for car 3 (stacked on car1 + car2)
car1_plus_car2 = [c1 + c2 for c1, c2 in zip(car1_prices, car2_prices)]
bar3 = ax.bar(years, car3_prices, bottom=car1_plus_car2, label='Car 3')

# Add labels and legend
ax.set_ylabel('Total Price ($)')
ax.set_title('Stacked Car Prices in 2010 vs 2025')
ax.legend()

plt.tight_layout()
plt.show()


In [None]:
import shap

# Get SHAP values for the dataset
explainer = shap.TreeExplainer(model)
shap_values = explainer(X)

# Select the row you're interested in (e.g., index 0)
i = 0
shap_value = shap_values[i]

# Get top 10 features by absolute SHAP value
top_inds = np.argsort(np.abs(shap_value.values))[-10:][::-1]  # Top 10, sorted descending

# Slice to top features
shap_value_top = shap.Explanation(
    values=shap_value.values[top_inds],
    base_values=shap_value.base_values,
    data=shap_value.data[top_inds],
    feature_names=np.array(shap_value.feature_names)[top_inds]
)

# Plot the waterfall chart
shap.plots.waterfall(shap_value_top)
