In [13]:
import numpy as np 
import pandas as pd 
from datetime import datetime

In [14]:
def convert(string):
    """Converts from string to datetime
    """
    date = datetime.strptime(string,'%m/%d/%Y') #Accepts date string, converts it to datetime object. 
    return date


def hwarrests(csv):
    """
    Returns dataframe which displays the total number of arrests in Hollywood per 
    day, every day since January 1st, 2017.  
    """
    
    df = pd.read_csv(csv) #Reads csv file and forms a new dataframe 
    
    hdf = df[df['Area ID'] == 6] #Filters dataframe to only include date from Hollywood. 
    hdf['Converted Dates'] = hdf['Arrest Date'].apply(convert, 0) #Applies convert function to all dates in 'Arrest Date Column', and places those dates in new column called 'Converted Dates'
    
    
    start_date = datetime.strptime('1/1/2017','%m/%d/%Y') #Sets start date for filter
    end_date = datetime.strptime('12/31/2019','%m/%d/%Y') # Sets end date for filter      
    hdf = hdf[(hdf['Converted Dates'] >= start_date) & (hdf['Converted Dates'] <= end_date)] #Filters hdf to only include dates between start and end dates. 

    
    datecounts = pd.DataFrame(columns = ['Date', 'Number of Arrests']) #Creates empty dataframe with dates and number of arrests as columns
    dateray = np.array(hdf['Converted Dates'])  #Creates array of arrest dates in hdf
    unique_elements, counts_elements = np.unique(dateray, return_counts = True) #Returns two arrays with (1) unique dates and (2) number of arrests for those dates
    datecounts['Date'] = unique_elements #Sets array for unique dates to 'Date' in datecounts
    datecounts['Number of Arrests'] = counts_elements #sets array for arrest counts per date to 'Number of Arrest' in datecounts
    
    #Checks for dates which are not present in the dataframe and assigns corresponding count to 0. 
    idx = pd.date_range(start_date, end_date) 
    s = pd.Series(datecounts['Number of Arrests'])
    s.index = datecounts['Date']
    s = s.reindex(idx, fill_value = 0) 
    datecounts = pd.DataFrame({'Date':s.index, 'Number of Arrests':s.values})
    
    return datecounts.sort_values(by = 'Date') #Returns datecounts dataframe sorted by chronological order
    
    

hwarrests('arrestdata.csv') #Calls hwarrests functions on LA arrest dataset. 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Date,Number of Arrests
0,2017-01-01,20
1,2017-01-02,4
2,2017-01-03,19
3,2017-01-04,35
4,2017-01-05,41
5,2017-01-06,21
6,2017-01-07,9
7,2017-01-08,14
8,2017-01-09,8
9,2017-01-10,9


In [15]:
def convert(string):
    """Converts from string to datetime
    """
    date = datetime.strptime(string,'%m/%d/%Y') #Accepts date string, converts it to datetime object. 
    return date


def hwvehstops(csv):
    """ 
    Reads csv file and returns dataframe which displays total number of
    vehicle stops in Hollywood for every day between January 1st, 2017 and December 31, 2019.
    Code currently only reads data from 2017. 
    """
    
    df = pd.read_csv(csv)
    hdf = df[(df['Division Description 1'] == 'HOLLYWOOD') & (df['Stop Type'] == 'VEH')] #Filters dataframe to only include Hollywood pedestrian stops. 
    hdf['Converted Dates'] = hdf['Stop Date'].apply(convert, 0)
    
    
    
    start_date = datetime.strptime('1/1/2017','%m/%d/%Y') #Sets start date for filter
    end_date = datetime.strptime('12/31/2019','%m/%d/%Y') # Sets end date for filter
    
    
    
    hdf = hdf[(hdf['Converted Dates'] >= start_date) & (hdf['Converted Dates'] <= end_date)] #Filters hdf to only include relevant dates
    
    
    datecounts = pd.DataFrame(columns = ['Date', 'Number of Vehicle Stops'])
    dateray = np.array(hdf['Converted Dates'])
    unique_elements, counts_elements = np.unique(dateray, return_counts = True)
    
    datecounts['Date'] = unique_elements
    datecounts['Number of Vehicle Stops'] = counts_elements
    
    
    #Checks for dates which are not present in the dataframe and assigns corresponding count to 0. 
    idx = pd.date_range(start_date, end_date) 
    s = pd.Series(datecounts['Number of Vehicle Stops'])
    s.index = datecounts['Date']
    s = s.reindex(idx, fill_value=0) 
    datecounts = pd.DataFrame({'Date':s.index, 'Number of Vehicle Stops':s.values})
    
    return datecounts



def hwpedstops(csv):
    """ 
    Reads csv file and returns dataframe which displays total number of
    pedestrian stops in Hollywood for every day between January 1st, 2017 and December 31, 2019.
    Code currently only reads data from 2017. 
    """
    
    df = pd.read_csv(csv)
    hdf = df[(df['Division Description 1'] == 'HOLLYWOOD') & (df['Stop Type'] == 'PED')] #Filters dataframe to only include Hollywood pedestrian stops. 
    hdf['Converted Dates'] = hdf['Stop Date'].apply(convert, 0)
    
    
    
    start_date = datetime.strptime('1/1/2017','%m/%d/%Y') #Sets start date for filter
    end_date = datetime.strptime('12/31/2019','%m/%d/%Y') # Sets end date for filter
    
    
    
    hdf = hdf[(hdf['Converted Dates'] >= start_date) & (hdf['Converted Dates'] <= end_date)] #Filters hdf to only include relevant dates
    
    
    datecounts = pd.DataFrame(columns = ['Date', 'Number of Pedestrian Stops'])
    dateray = np.array(hdf['Converted Dates'])
    unique_elements, counts_elements = np.unique(dateray, return_counts = True)
    
    datecounts['Date'] = unique_elements
    datecounts['Number of Pedestrian Stops'] = counts_elements
    
    #Checks for dates which are not present in the dataframe and assigns corresponding count to 0. 
    idx = pd.date_range(start_date, end_date) 
    s = pd.Series(datecounts['Number of Pedestrian Stops'])
    s.index = datecounts['Date']
    s = s.reindex(idx, fill_value=0) 
    datecounts = pd.DataFrame({'Date':s.index, 'Number of Pedestrian Stops':s.values})
    
    return datecounts
    
def combined(csv):
    """
    Returns combined dataframe for vehicle and pedestrian counts.
    """
    vehdf = hwvehstops('vehped.csv')
    peddf = hwpedstops('vehped.csv')
        
    peddf['Number of Vehicle Stops'] = vehdf['Number of Vehicle Stops'] #Creates new column in peddf with vehicle stop counts.
    
    return peddf #Returns combined dataframe


combined('vehped.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Date,Number of Pedestrian Stops,Number of Vehicle Stops
0,2017-01-01,12,33
1,2017-01-02,4,24
2,2017-01-03,16,33
3,2017-01-04,31,26
4,2017-01-05,24,33
5,2017-01-06,34,46
6,2017-01-07,18,69
7,2017-01-08,22,38
8,2017-01-09,10,27
9,2017-01-10,11,24


In [16]:
def convert(string):
    """Converts from string to datetime
    """
    date = datetime.strptime(string[:10],'%m/%d/%Y') #Accepts date string, converts it to datetime object. 
    return date


def hwcrimes(csv):
    """ 
    Reads csv file and returns dataframe which displays total number of
    crimes committed in Hollywood for every day between January 1st, 2017 and December 31, 2019. 
    Data: https://data.lacity.org/A-Safe-City/Crime-Data-from-2010-to-2019/63jg-8b9z/data
    """
    
    df = pd.read_csv(csv)
    hdf = df[df['AREA '] == 6] #Filters dataframe to only include Hollywood pedestrian stops. 
    hdf['Converted Dates'] = hdf['DATE OCC'].apply(convert, 0)
    
    
    
    start_date = datetime.strptime('1/1/2017','%m/%d/%Y') #Sets start date for filter
    end_date = datetime.strptime('12/31/2019','%m/%d/%Y') # Sets end date for filter
    
    
    
    hdf = hdf[(hdf['Converted Dates'] >= start_date) & (hdf['Converted Dates'] <= end_date)] #Filters hdf to only include relevant dates
    
    
    datecounts = pd.DataFrame(columns = ['Date', 'Number of Crimes Committed'])
    dateray = np.array(hdf['Converted Dates'])
    unique_elements, counts_elements = np.unique(dateray, return_counts = True)
    
    datecounts['Date'] = unique_elements
    datecounts['Number of Crimes Committed'] = counts_elements
    
    #Checks for dates which are not present in the dataframe and assigns corresponding count to 0. 
    idx = pd.date_range(start_date, end_date) 
    s = pd.Series(datecounts['Number of Crimes Committed'])
    s.index = datecounts['Date']
    s = s.reindex(idx, fill_value=0) 
    datecounts = pd.DataFrame({'Date':s.index, 'Number of Crimes Committed':s.values})
    
    return datecounts
    

hwcrimes('hwcrime.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Date,Number of Crimes Committed
0,2017-01-01,66
1,2017-01-02,28
2,2017-01-03,32
3,2017-01-04,19
4,2017-01-05,25
5,2017-01-06,22
6,2017-01-07,23
7,2017-01-08,28
8,2017-01-09,13
9,2017-01-10,28
