In [79]:
import numpy as np
import pandas as pd

In [80]:
shark_df = pd.read_csv('shark_final_df.csv')
shark_df.head(5)

Unnamed: 0,date,year,country,activity,sex,age,fatal,datetime_column,string_column,month,season
0,2024-10-11 00:00:00,2024,USA,surfing,M,16,no,2024-10-11 00:00:00,,10,Autumn
1,2024-07-23 00:00:00,2024,Australia,surfing,M,23,no,2024-07-23 00:00:00,,7,Summer
2,2024-07-18 00:00:00,2024,Australia,surfing,M,41,no,2024-07-18 00:00:00,,7,Summer
3,2024-07-08 00:00:00,2024,USA,diving,M,14,no,2024-07-08 00:00:00,,7,Summer
4,2024-07-05 00:00:00,2024,USA,wading,M,26,no,2024-07-05 00:00:00,,7,Summer


In [81]:
shark_df.activity.value_counts()

activity
surfing            336
swimming           181
fishing            171
diving              53
snorkeling          50
wading              38
standing            24
boogie boarding     20
body boarding       17
kayaking            14
Name: count, dtype: int64

# Function Block (Risk Calculation)

In [83]:
# must be updated relative to the cleaned dataframe
coastline_lengths = {
    'USA': 19924,
    'Australia': 25760,
    'South Africa': 2798,
    'Bahamas': 3542,
    'New Zealand': 15134,
    'Mexico': 9330,
    'Brazil': 7491,
    'New Caledonia': 2254,
    'French Polynesia': 2525,
    'Egypt': 2450}

def risk_country(client_country):
    
    top_10_countries = shark_df.country.value_counts().head(10)
    
    # attack risk for the client country = number of attacks in the client country divided by the coastline length 
    attack_risk = round(top_10_countries[client_country] / coastline_lengths[client_country], 3)     
    
    # number of attacks in the client country that were fatal
    number_of_fatal_attacks = len(shark_df[(shark_df.country == client_country) & (shark_df.fatal == 'yes')])
    
    # number of fatal attacks divided by the total number of shark attacks in that country
    fatality_risk = number_of_fatal_attacks / top_10_countries[client_country]
    
    return fatality_risk * attack_risk


def risk_season(client_season):
    
    seasons = shark_df.season.value_counts()
    
    number_of_fatal_attacks = len(shark_df[(shark_df.season == client_season) & (shark_df.fatal == 'yes')])
    fatality_risk = number_of_fatal_attacks / seasons[client_season]
    
    return fatality_risk


def risk_activity(client_activity):
    
    top_10_risks = shark_df.activity.value_counts().head(10)
    
    number_of_fatal_attacks = len(shark_df[(shark_df.activity == client_activity) & (shark_df.fatal == 'yes')])
    fatality_risk = number_of_fatal_attacks / top_10_risks[client_activity]
    
    return fatality_risk


#def risk_age(client_age):
    
    #number_of_fatal_attacks = len(shark_df[(shark_df.age == client_age) & (shark_df.fatal == 'yes')])
    #fatality_risk = number_of_fatal_attacks / shark_df.age.value_counts()[client_age]
    
    #return fatality_risk


def risk_sex(client_sex):
    
    sexs = shark_df.sex.value_counts()
    
    number_of_fatal_attacks = len(shark_df[(shark_df.sex == client_sex) & (shark_df.fatal == 'yes')])
    fatality_risk = number_of_fatal_attacks / sexs[client_sex]
    
    return fatality_risk


def assign_insurance(client_country, client_season, client_activity, client_sex): #client_age
    
    risk = risk_country(client_country) * risk_season(client_season) * risk_activity(client_activity) * risk_sex(client_sex) #* risk_age(client_age)
    
    print(risk)

    if risk >= float(percentiles.loc[0.25]):
        return 'high risk'
    
    elif risk > float(percentiles.loc[0.15]):
        return 'medium risk'  
    else:
        return 'low risk'

In [84]:
# assign a probability to everyone in the dataset
# take the 25% percentile of that as low, 50% percentile as medium and 75% percentile as high

shark_df['country_risk'] = shark_df['country'].apply(lambda country: risk_country(country))
shark_df['season_risk'] = shark_df['season'].apply(lambda season: risk_season(season))
shark_df['activity_risk'] = shark_df['activity'].apply(lambda activity: risk_activity(activity))
#shark_df['age_risk'] = shark_df['age'].apply(lambda age: risk_age(age))
shark_df['sex_risk'] = shark_df['sex'].apply(lambda sex: risk_sex(sex))

shark_df['total_risk'] = shark_df['country_risk'] * shark_df['season_risk'] * shark_df['activity_risk'] * shark_df['sex_risk'] #* shark_df['age_risk']
percentiles = shark_df['total_risk'].quantile([0.15, 0.25]).apply(lambda x: f"{x:.4e}")
print(percentiles.loc[0.15], percentiles.loc[0.25])

1.0969e-07 1.3557e-07


In [85]:
# User input function
def client_entries():
    client_country = input('Please enter the country you travel to: ').strip()
    client_activity = input('Please enter your activity: ').strip()
    #client_age = input('Please enter your age: ').strip()
    client_sex = input('Please enter your sex (M/F): ').strip()
    client_season = input('Please enter the season of your trip: ').strip()
    return client_country, client_season, client_activity, client_sex #client_age

In [86]:
assign_insurance(*client_entries())

Please enter the country you travel to:  Egypt
Please enter your activity:  standing
Please enter your sex (M/F):  F
Please enter the season of your trip:  Autumn


0.0


'low risk'