In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [51]:
df = pd.read_csv('US_Accidents_Dec20_updated.csv')

In [13]:
import plotly.io as pio
print(pio.renderers)
pio.renderers.default = 'notebook'

Renderers configuration
-----------------------
    Default renderer: 'notebook'
    Available renderers:
        ['plotly_mimetype', 'jupyterlab', 'nteract', 'vscode',
         'notebook', 'notebook_connected', 'kaggle', 'azure', 'colab',
         'cocalc', 'databricks', 'json', 'png', 'jpeg', 'jpg', 'svg',
         'pdf', 'browser', 'firefox', 'chrome', 'chromium', 'iframe',
         'iframe_connected', 'sphinx_gallery', 'sphinx_gallery_png']



In [57]:
data = df[df['State'] == 'WY'].copy()

In [58]:
# dropping Number (too many null values)
data.drop(columns=['Number'], inplace=True)
# dropping Description because we don't use it
data.drop(columns=['Description'], inplace=True)


# Adding a Date column and a Weekday column
data['Date'] = pd.to_datetime(data['Start_Time']).dt.date
weekday_dict = dict(zip(range(0,8), ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']))
data['Weekday'] = pd.to_datetime(data.Date).dt.weekday.map(lambda x: weekday_dict[x])
data['Year'] = pd.to_datetime(data.Date).dt.year
data['Month'] = pd.to_datetime(data.Date).dt.month

# Adding Time of day
def time_of_day(t):
    if t < 2:
        t = 24
    slot = int((t-2)/4)
    if slot == 5:
        return '22 to 2'
    return f'{slot*4+2} to {slot*4+6}'

data['Time_of_Day'] = pd.to_datetime(data['Start_Time']).dt.hour.map(lambda x: time_of_day(x))
data.Time_of_Day = pd.Categorical(data.Time_of_Day, 
                                categories=sorted(data.Time_of_Day.unique(), key=lambda x: int(x[:2].strip())))


data['Traffic_Affected_Hrs'] = ((pd.to_datetime(data.End_Time) - pd.to_datetime(data.Start_Time))\
                                .dt.total_seconds()/3600).map(lambda x: round(x,2))

data['Junction%'] = data.groupby('State').Junction.sum() * 100 / data.groupby('State').Junction.count()


In [59]:
data.head()

Unnamed: 0,ID,Severity,Start_Time,End_Time,Start_Lat,Start_Lng,End_Lat,End_Lng,Distance(mi),Street,...,Civil_Twilight,Nautical_Twilight,Astronomical_Twilight,Date,Weekday,Year,Month,Time_of_Day,Traffic_Affected_Hrs,Junction%
12908,A-2729508,2,2016-12-07 11:04:57,2016-12-07 17:04:57,41.11532,-104.84465,41.11345,-104.85679,0.645,I-80 W,...,Day,Day,Day,2016-12-07,Tuesday,2016,12,10 to 14,6.0,
13686,A-2730286,4,2016-12-14 14:22:45,2016-12-14 20:22:45,41.17527,-104.06937,41.15879,-104.25969,9.964,I-80 W,...,Day,Day,Day,2016-12-14,Tuesday,2016,12,14 to 18,6.0,
15476,A-2732076,4,2017-01-04 15:39:32,2017-01-04 21:39:32,41.11698,-104.874138,41.28863,-105.52424,35.816,I-80 W,...,Day,Day,Day,2017-01-04,Tuesday,2017,1,14 to 18,6.0,
15657,A-2732257,2,2017-01-05 11:13:52,2017-01-05 17:13:52,41.11345,-104.85679,41.11699,-104.872687,0.863,I-80 W,...,Day,Day,Day,2017-01-05,Wednesday,2017,1,10 to 14,6.0,
16545,A-2733145,2,2017-01-12 21:17:40,2017-01-13 03:17:40,44.263531,-104.954369,44.242059,-104.910699,2.621,US Highway 14,...,Night,Night,Night,2017-01-12,Wednesday,2017,1,18 to 22,6.0,


In [61]:
data.Junction


12908      False
13686       True
15476      False
15657      False
16545      False
           ...  
1501140    False
1504440    False
1507938    False
1509275    False
1512755     True
Name: Junction, Length: 330, dtype: bool

In [79]:
# Create dimensions
day_night_dim = go.parcats.Dimension(values=data.Sunrise_Sunset, label="Day or Night")

junction_dim = go.parcats.Dimension(values=data.Junction.astype(int), label="Junction"
                                    , categoryarray=[0,1]
                                    , ticktext=['No Junction', 'Junction']
                                   )

dow_dim = go.parcats.Dimension(values=data.Weekday, label="Day of Week",
                              categoryarray=['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])

sev_labels = {
    1: 'Low', 2: 'Moderate', 3: 'High', 4: 'Extreme',
}

severity_dim = go.parcats.Dimension(
    values=data.Severity, label="Severity"
    , categoryarray=sorted(data.Severity.unique())
    , ticktext=[sev_labels[i] for i in sorted(data.Severity.unique())]
)

# Create parcats trace
color = data.Severity;
colorscale = [
    [0, 'rgb(255,255,229)'],
    [0.25, 'rgb(217,240,163)'],
    [0.5, 'rgb(65,171,93)'],
    [1, 'rgb(0,69,41)'],
]

fig = go.Figure(data = [go.Parcats(dimensions=[day_night_dim, dow_dim, junction_dim, severity_dim],
        line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])

fig.show()

In [68]:
sorted(data.Severity.unique())

[2, 3, 4]

In [43]:
color

12121      2
12122      2
12123      3
12126      2
12128      2
          ..
1515796    2
1515797    3
1515798    3
1515800    3
1515801    3
Name: Severity, Length: 75142, dtype: int64

In [20]:

titanic_df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/titanic.csv")

# Create dimensions
class_dim = go.parcats.Dimension(
    values=titanic_df.Pclass,
    categoryorder='category ascending', label="Class"
)

gender_dim = go.parcats.Dimension(values=titanic_df.Sex, label="Gender")

survival_dim = go.parcats.Dimension(
    values=titanic_df.Survived, label="Outcome", categoryarray=[0, 1],
    ticktext=['perished', 'survived']
)

# Create parcats trace
color = titanic_df.Survived;
colorscale = [[0, 'lightsteelblue'], [1, 'mediumseagreen']];

fig = go.Figure(data = [go.Parcats(dimensions=[class_dim, gender_dim, survival_dim],
#         line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])

fig.show()

In [30]:
import plotly.express as px

df = px.data.tips()
fig = px.parallel_categories(df)

fig.show()

In [75]:
px.colors.sequential.YlGn

['rgb(255,255,229)',
 'rgb(247,252,185)',
 'rgb(217,240,163)',
 'rgb(173,221,142)',
 'rgb(120,198,121)',
 'rgb(65,171,93)',
 'rgb(35,132,67)',
 'rgb(0,104,55)',
 'rgb(0,69,41)']

In [76]:
len(_)

9