# Getting data from a Socrata API

Many governments use socrata as their platform to serve data to the public. The Socrata API allows direct and real-time access to open data.

For this tutorial, we will use traffic collision data:

https://data.lacity.org/A-Safe-City/Traffic-Collision-Data-from-2010-to-Present/d5tf-ez2w

To access the data, we will use the `sodapy` library:

https://github.com/xmunoz/sodapy



In [None]:
#!/usr/bin/env python

# make sure to install these packages before running:
# pip install pandas
# !pip install sodapy

In [None]:
import pandas as pd
from sodapy import Socrata

# Unauthenticated client only works with public data sets. Note 'None'
# in place of application token, and no username or password:
client = Socrata("data.lacity.org", None)

# Example authenticated client (needed for non-public datasets):
# client = Socrata(data.lacity.org,
#                  MyAppToken,
#                  userame="user@example.com",
#                  password="AFakePassword")

In [None]:
results = client.get("d5tf-ez2w", 
                     limit = 2000,
                     order = "date_rptd desc")

# Convert to pandas DataFrame
df = pd.DataFrame.from_records(results)

In [None]:
df.sample()

# Mapping

In [None]:
df.info()

In [None]:
df.location_1[0]

In [None]:
new_lat_col = []
new_lon_col = []
for x in df.location_1:
    new_lat_col.append(x['latitude'])
    new_lon_col.append(x['longitude'])
df['latitude'] = new_lat_col
df['longitude'] = new_lon_col


In [None]:
# convert lat/lon's to floats
df['latitude'] = df['latitude'].astype(float)
df['longitude'] = df['longitude'].astype(float)
df.info()

In [None]:
df.sample(5)

In [None]:
import plotly.express as px

In [None]:
px.scatter(df,x='longitude',y='latitude')

In [None]:

df.sort_values(by='latitude')


In [None]:
df2 = df.query('longitude != 0')
len(df2)
# df.sort_values(by='latitude')

In [None]:
df2.sample(5)

In [None]:
px.scatter?


In [None]:
px.scatter(df3,x='longitude',y='latitude',
          facet_col="vict_descent",
           facet_col_wrap=4,
           color="vict_descent",
           height=600
          )

In [None]:
px.scatter_geo?

In [None]:
fig = px.scatter_geo(df3,lat='latitude',lon='longitude',
                    color='vict_descent')
fig.update_geos(fitbounds="locations", scope = "usa")


In [None]:
fig = px.scatter_mapbox(df2,lat='latitude',lon='longitude')
# fig.update_geos(fitbounds="locations", scope = "usa")
fig.show()

In [None]:
df3 = df2.dropna()
len(df3)

In [None]:
fig = px.scatter_mapbox(df3, 
                        lat="latitude", 
                        lon="longitude", 
#                         hover_name="vict_descent", 
#                         hover_data=["vict_descent"], 
                        color="vict_descent",
#                         color_discrete_sequence=["fuchsia"], 
                        zoom=8, height=300,
                       animation_frame="vict_descent",
                       animation_group="vict_descent")
fig.update_layout(mapbox_style="carto-darkmatter")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()