In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import scipy.stats as st
from datetime import datetime
import gmaps
import os

# Import API key
from api_keys import g_key



In [2]:
# Make a reference to the starter csv file path and create df
prisoner_df = pd.read_csv('../Resources/Texas_Prison_Data.csv') 
prisoner_df.head()
# prisoner_df.isna().sum()
# prisoner_df.dtypes

Unnamed: 0,SID Number,TDCJ Number,Name,Current Facility,Gender,Race,Age,Projected Release,Maximum Sentence Date,Parole Eligibility Date,Case Number,County,Offense Code,TDCJ Offense,Sentence Date,Offense Date,Sentence (Years),Last Parole Decision,Next Parole Review Date,Parole Review Status
0,234378,2174505,"LITTLE,AVA JESSUP",Young,F,W,75,10/01/2020,11/15/2025,09/26/2018,068770,Grayson,35990003,POSS CS 4-200G WID METH,12/14/2017,09/13/2017,8.0,Approved on 07/01/2020,,NOT IN REVIEW PROCESS
1,524524,758486,"HARPER,DAVID JOHN JR",Estelle,M,W,91,01/01/9999,01/01/9999,08/03/2031,95-765-C,McLennan,36010001,INDECENCY WITH A CHILD,08/13/1996,09/15/1995,Life,,08/03/2031,NOT IN REVIEW PROCESS
2,703267,841623,"RODRIGUEZ,THEODORE",Beto,M,W,82,08/03/2043,08/03/2043,02/01/2021,97-396-C,McLennan,11990003,AGG SEX ASLT,08/05/1998,06/01/1996,45.0,,02/01/2021,IN PAROLE REVIEW PROCESS
3,708520,256174,"ALONZO,BENITO",Hospital Galveston,M,H,85,01/01/9999,01/01/9999,11/25/1985,49995,Travis,49030000,INTRODUCE DEADLY WPN PENAL INSTI,02/27/1976,06/01/1974,Life,Denied on 12/20/2019,12/2020,IN PAROLE REVIEW PROCESS
4,766785,365547,"BROWN,NARRIES EARL",Jester IV,M,W,84,01/01/9999,01/01/9999,08/24/2003,F83-89728-HI,Dallas,11220000,AGG SEX ABUSE CHILD/U14,08/24/1983,03/15/1983,Life,Denied on 12/05/2016,Unavailable at this time.,NOT IN REVIEW PROCESS


In [3]:
# This df is from the Texas Department of Criminal Justice (TDCJ)
# It has ALL the criminal code assignments - those that are currently being
# used AND those no longer used.  Some individuals have codes assigned that are no
# longer active.  This df has a column (Offense_Category)that can be used as a key 
# to link many different specific, detailed codes (Offense Codes) together.

offense_df = pd.read_csv('../Resources/offenseCodes_ut8.csv', encoding="utf-8")
# offense_df.head()
# offense_df.dtypes

In [4]:
# Create project specific df for merging. 
# This step creates a smaller dataframe that will be used to merge 
# with the prisoner df on the key offense cateogry

offproject_df = offense_df.loc[:,["Offense Code", "Offense_Category"]]
# offproject_df.head()

In [5]:
# This step actually creates the "bins" - the last column "Offense_Category" can be sorted on
# and capture the respective individuals by offense type.  The "ffill" will place a Nan in 
# the "Offense_Category" column if there is no value in that location.  
# The "merge_ordered" maintains the integrity of the elements in the order of the main df,
# prisoner_df, while matching the Offense_Cateogry" with the respective "Offense Code".
# The prisoner df has approximately 208K rows with many duplicated Offense Code.  
# The offproject df has 2,738 rows of 62 unique Offense Cateogries and related Offense Codes
# which results in the 2,738 rows on information in the df.  The Offense Code column is mapped
# to the respective value in the prisoner df and Offense_Category is assigned to the respective row.

binlookup_df = pd.merge_ordered(prisoner_df, offproject_df, fill_method='ffill', left_by='Offense Code')
# binlookup_df.head()
# binlookup_df.isna().sum()
# binlookup_df["Offense_Category"].unique()

In [6]:
# Create bins in which to place values based upon Offense Categories
# The step serves the purpose of assigning a label to the respective offense category
# this is some of the initial segregation steps of the data to see what story can be told
# the columns may or may not be used in the final sorting for visualization
# must have 1 extra bin then actual catorgies - hence the 0 at the beginning

bins = [0, 100, 200, 300, 400, 500, 600, 700, 900, 1100, 1200, 1400, 
        1500, 1600, 1800, 1900, 2100, 2200, 2300, 2400, 2500, 2600, 
        2700, 2900, 3100, 3200, 3300, 3400, 3500, 3600, 3700, 3900, 
        4100, 4200, 4300, 4400, 4500, 4600, 4700, 4900, 5200, 5300, 
        5400, 5500, 6100, 6200]

# Create labels for these bins
group_labels = ["Human_Trafficking", "Murder", "Kidnapping", "Sexual_Assault", "Robbery", "Assault",
                "Coercion_Politics", "Harrassment", "Terrorism", "Arson", "Unlawful_Use", "Burglary", "Theft",
                "Unlawful_Rep", "Fraud_Forgery", "Vandalism", "Drugs", "Sexual_Related_Materials", "Child_Endanger", "Bigamy",
                "Elderly_at_Risk_Adult_Endanger", "Violate_Order", "Prostitution", "Evading_Arrest", "Criminal_Escape",
                "Perjury_Contemp", "Jump_Bail_No_Show", "Interference_Records", "Bribery", "Weapon_Related", 
                "Riot_Obstruction", "Vehicle_Incident", "Alcohol_Incident", "Animal_Related", "Agriculture_Related",
                "Health_Code", "Hazardous_Material", "Discrimination_Civil_Rights", "Record_Tampering", "Tax_Evasion",
                "Pollution", "Money_Laundering", "Negligence", "Business_License", "Organized_Crime"]

In [7]:
# Place the data series (offense category label) into a new (last) column inside of the DataFrame

binlookup_df["Bin_Cat"] = pd.cut(binlookup_df["Offense_Category"], bins, labels=group_labels)
# binlookup_df.head()

In [8]:
# renaming of the columns - rc - in front
# original columns had spaces between names and words with ()
# which may cause issues later when trying to manipulate

rcbinlookup_df = binlookup_df.rename(columns={"SID Number": "SID_Number", 
                            "TDCJ Number": "TDCJ_Number",
                            "Current Facility" : "Current_Facility",
                            "Projected Release" : "Projected_Release",
                            "Maximum Sentence Date" : "Max_Sentence_Date",
                            "Parole Eligibility Date" : "Parole_Eligible_Date",
                            "Case Number" : "Case_Number",
                            "Offense Code" :"Offense_Code",
                            "TDCJ Offense" :"TDCJ_Offence",
                            "Sentence Date" : "Sentence_Date",
                            "Offense Date": "Offense_Date",
                            "Sentence (Years)" : "Sentence_Years",
                            "Last Parole Decision" : "Last_Parole_Decision",
                            "Next Parole Review Date" : "Next_Parole_Review",
                            "Parole Review Status" : "Parole_Review_Status"})

# rcbinlookup_df["Sentence_Years"].unique()
# rcbinlookup_df.dtypes
# rcbinlookup_df["Offense_Code"].unique()

In [9]:
# after doing sentence years unique, identified many that needed to be 
# cleaned up - 1st pass is replacing words in number column.
# By TDCJ definition - person serving Life is elgible for parole after 
# serving 40 years - looked at similar sentences in offense cateegory and 
# selected 50.9 - reason for using .9 was to make easy to find in sort to designate
# value that had been amended
# LWOP is Life without parole - chose 100.9, .9 was to designate value amended
# Capital Life - 101.9 - differeniate between LWOP and Cap Life
# Death - 150.9 to make easy to find

rcbinlookup_df=rcbinlookup_df.replace(to_replace="Life",value="50.9")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="LWOP", value="100.9")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="Capital Life", value = "101.9") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="Death", value = "150.9")

#Not able to do NaN with this method
# Index 71997 SID_Number 4340086   TDCJ_Number 2319589 is the NaN
# rcbinlookup_df=rcbinlookup_df.replace(to_replace="NaN", value = "3999.9")

rcbinlookup_df.head()

Unnamed: 0,SID_Number,TDCJ_Number,Name,Current_Facility,Gender,Race,Age,Projected_Release,Max_Sentence_Date,Parole_Eligible_Date,...,Offense_Code,TDCJ_Offence,Sentence_Date,Offense_Date,Sentence_Years,Last_Parole_Decision,Next_Parole_Review,Parole_Review_Status,Offense_Category,Bin_Cat
0,234378,2174505,"LITTLE,AVA JESSUP",Young,F,W,75,10/01/2020,11/15/2025,09/26/2018,...,35990003,POSS CS 4-200G WID METH,12/14/2017,09/13/2017,8.0,Approved on 07/01/2020,,NOT IN REVIEW PROCESS,2200,Drugs
1,1116232,1366652,"SIDLE,JOHN ARTHUR",Pack,M,W,78,05/02/2021,06/18/2033,03/08/2016,...,35990003,UNLAW POSS WIT C/S-METHAM,04/06/2006,09/03/2004,25.0,Approved on 09/17/2020,,NOT IN REVIEW PROCESS,2200,Drugs
2,1409034,729430,"LERMA,GILBERT LOPEZ",Wynne,M,H,70,01/01/9999,01/01/9999,08/22/2020,...,35990003,DEL C/S,09/19/1995,09/28/1994,50.9,Denied on 07/16/2020,07/2021,NOT IN REVIEW PROCESS,2200,Drugs
3,1468825,2205051,"EVANS,DAYTON BUD",Pack,M,W,70,04/21/2028,03/11/2043,10/20/2020,...,35990003,POSS CONT SUB PG1 4-200 WITD,03/12/2018,03/31/2017,25.0,Denied on 08/13/2020,08/2021,NOT IN REVIEW PROCESS,2200,Drugs
4,1473183,2038325,"GREEN,ROGER DALE SR",Duncan,M,W,73,08/16/2025,06/27/2040,02/14/2018,...,35990003,DEL CONT SUB 4G-200G,11/10/2015,05/05/2015,25.0,Denied on 02/06/2020,02/2021,NOT IN REVIEW PROCESS,2200,Drugs


In [10]:
# the original dataset from the State of Texas had year designation for sentence length.  Had to 
# convert this value to years to identify sentence length.  This was completed by
# looking at the calender year the sentencing occured and then taking the difference
# between that value and the year identified below.  The majority of this coding issue was from
# a prison hospital location.

rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.9",value="6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2092.7", value="8.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2027.0", value = "10.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2026.4", value = "11.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2027.1",  value = "10.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.8",value="6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.7", value="7.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2023.5", value = "5.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2026.3", value = "8.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2020.8",  value = "5.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.4",value="8.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2019.8", value="10.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.2", value = "5.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.0", value = "6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2029.9",  value = "10.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.3",value="6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2027.9", value="8.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2023.4", value = "6.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2021.4", value = "6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2021.5",value="2.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.9", value="3.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.8", value = "4.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2023.0", value = "5.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.3",  value = "6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2021.6",value="4.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2021.2", value="5.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.7", value = "5.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2023.9", value = "4.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2025.1",  value = "9.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2020.5",value="1.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2019.5", value="4.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2029.5", value = "10.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2021.9", value = "10.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2025.0",  value = "6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.5",value="5.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2022.6", value="3.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2020.2", value = "2.0") 
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2030.0", value = "11.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2023.6",  value = "5.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2024.2",value="6.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2029.7",  value = "8.0")
rcbinlookup_df=rcbinlookup_df.replace(to_replace="2020.9",value="2.0")

# rcbinlookup_df
# rcbinlookup_df.loc[rcbinlookup_df["Sentence_Years"] == "800.0"]

In [11]:
# Assigning value to NaN to location.  Other sentence lengths in bin were taken into account
# when assigning number

# this did not work
# nanloc_df = rcbinlookup_df[pd.to_numeric(rcbinlookup_df['Sentence_Years'], errors='coerce').isnull()]
# nanloc_df

# 	SID_Number	TDCJ_Number	Name	Current_Facility	Gender	Race	Age	Projected_Release	Max_Sentence_Date	Parole_Eligible_Date	...	Offense_Code	TDCJ_Offence	Sentence_Date	Offense_Date	Sentence_Years	Last_Parole_Decision	Next_Parole_Review	Parole_Review_Status	Offense_Category	Bin_Cat
# 71997	4340086	2319589	DAVIS,CHARLES ELLIS	Estelle	M	W	49	06/29/2021	NaN	NaN	...	35990015	POSS CONT SUB 1-4G	04/14/2020	07/30/2019	NaN	NaN	NaN	NaN	2200	Drugs

rcbinlookup_df.loc[71997,"Sentence_Years"] = 10.0
# rcbinlookup_df["Sentence_Years"].unique()

In [12]:
# Create age bins
# as noted with other bin creation - created to potentially help with sorting
# when planning ways to visualize the data
# must have 1 extra bin then actual catorgies - hence the 0 at the beginning

agebins = [0, 19.9, 29.9, 39.9, 49.9, 59.9, 69.9, 79.9, 89.9, 99.9, 199.9]

# Create labels for these bins
agegroup_labels = ["Less_Than_20", "20_to_30", "30_to_40",
                   "40_to_50", "50_to_60", "60_to_70",
                   "70_to_80", "80_to_90", "90_to_100", "Greater_than_100"]   

In [13]:
# Create new column - Age_Bin

rcbinlookup_df["Age_Bin"] = pd.cut(rcbinlookup_df["Age"], agebins, labels=agegroup_labels)
rcbinlookup_df.head()

Unnamed: 0,SID_Number,TDCJ_Number,Name,Current_Facility,Gender,Race,Age,Projected_Release,Max_Sentence_Date,Parole_Eligible_Date,...,TDCJ_Offence,Sentence_Date,Offense_Date,Sentence_Years,Last_Parole_Decision,Next_Parole_Review,Parole_Review_Status,Offense_Category,Bin_Cat,Age_Bin
0,234378,2174505,"LITTLE,AVA JESSUP",Young,F,W,75,10/01/2020,11/15/2025,09/26/2018,...,POSS CS 4-200G WID METH,12/14/2017,09/13/2017,8.0,Approved on 07/01/2020,,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80
1,1116232,1366652,"SIDLE,JOHN ARTHUR",Pack,M,W,78,05/02/2021,06/18/2033,03/08/2016,...,UNLAW POSS WIT C/S-METHAM,04/06/2006,09/03/2004,25.0,Approved on 09/17/2020,,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80
2,1409034,729430,"LERMA,GILBERT LOPEZ",Wynne,M,H,70,01/01/9999,01/01/9999,08/22/2020,...,DEL C/S,09/19/1995,09/28/1994,50.9,Denied on 07/16/2020,07/2021,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80
3,1468825,2205051,"EVANS,DAYTON BUD",Pack,M,W,70,04/21/2028,03/11/2043,10/20/2020,...,POSS CONT SUB PG1 4-200 WITD,03/12/2018,03/31/2017,25.0,Denied on 08/13/2020,08/2021,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80
4,1473183,2038325,"GREEN,ROGER DALE SR",Duncan,M,W,73,08/16/2025,06/27/2040,02/14/2018,...,DEL CONT SUB 4G-200G,11/10/2015,05/05/2015,25.0,Denied on 02/06/2020,02/2021,NOT IN REVIEW PROCESS,2200,Drugs,70_to_80


In [14]:
# starting cleanup on Data Column for Counties 
# file sent to CSV for easier review

rc_un = rcbinlookup_df['County'].unique()
rc_un = np.sort(rc_un)
rc_un = pd.DataFrame(rc_un)
rc_un.to_csv('../Resources/rc_counties.csv', index=False)

In [15]:
# Replace Lasalle with La Salle

rcbinlookup_df = rcbinlookup_df.replace(to_replace="Lasalle", value= "La Salle")

In [79]:
# Loading and manipulating population data for 2020 pop data
# the original data file was from state of Texas site and had out year projections
# needed to extract year 2020 values

countyall_df = pd.read_csv('../Resources/pop_data_with_projection.csv')
texas2020_df = countyall_df.query('year == 2020')
county2020_df = texas2020_df.loc[texas2020_df['age_group'] == "ALL"]
notex_df = county2020_df.drop([60])
proj_county2020 = notex_df.loc[:,["area_name", "total"]]
rcproj_county2020_df = proj_county2020.rename(columns={"area_name": "County", "total" : "Population_2020"})

pd.set_option('display.max_columns', None)
rcproj_county2020_df



Unnamed: 0,County,Population_2020
306,Anderson,58199
552,Andrews,22269
798,Angelina,90437
1044,Aransas,27699
1290,Archer,8344
...,...,...
61560,Wood,45292
61806,Yoakum,9225
62052,Young,18712
62298,Zapata,14409


In [82]:
# write renamed column county file to csv for use in analysis notebook
rcproj_county2020_df.to_csv('../Resources/rcproj_county2020_df.csv', index=False)

In [17]:
# Merge Prison and Population database
mother_ship = pd.merge_ordered(rcbinlookup_df, rcproj_county2020_df, fill_method='ffill', left_by='County')
# mother_ship.isna().sum()

# found NaN that need to be corrected
# looked up county pop and corrected

values = {"Population_2020": 21737}
mother_ship.fillna(value=values)

# mother_ship.isna().sum()
#county_error = mother_ship[mother_ship['Population_2020'].isnull()]
#county_error.head()
#county_error = county_error['County'].unique()

# mother_ship.head()
# mother_ship.isna().sum()

# writing mothership to resource folder for importing into analysis file
mother_ship.to_csv('../Resources/mother_ship.csv', index=False)


In [18]:
# Read in county coordinates
# this info will be used in heat map creation
# for location of county seat within each county

# --------NOTE -------- lAT AND lNG ARE FLIPPED -----NEED TO RENAME BELOW

countycoordfull_df = pd.read_csv('../Resources/Texas_Counties_Centroid_Map.csv')
countycoordfull_df

Unnamed: 0,Lat,Lng,County,CNTY_NBR,FIPS,Shape_Leng,Shape_Area,County Location
0,-97.492799,29.456415,Gonzales,90,48177,2.124911,0.257805,"(-97.492799, 29.456415)"
1,-98.697292,27.043405,Jim Hogg,125,48247,2.271751,0.267624,"(-98.697292, 27.043405)"
2,-97.681378,26.924094,Kenedy,66,48261,5.067864,0.389397,"(-97.681378, 26.924094)"
3,-96.965687,30.310651,Lee,144,48287,2.213853,0.153990,"(-96.965687, 30.310651)"
4,-95.853568,32.211881,Henderson,108,48213,3.152909,0.235056,"(-95.853568, 32.211881)"
...,...,...,...,...,...,...,...,...
249,-94.371557,32.548149,Harrison,103,48203,2.563571,0.227726,"(-94.371557, 32.548149)"
250,-99.976248,31.831054,Runnels,200,48399,2.040586,0.260770,"(-99.976248, 31.831054)"
251,-97.116812,33.205574,Denton,61,48121,1.969324,0.238359,"(-97.116812, 33.205574)"
252,-98.086210,29.173159,Wilson,247,48493,1.917176,0.193944,"(-98.08621, 29.173159)"


In [19]:
# Creating df to start gathering data for plotting - as noted above - need to switch lat lng

countycoord_df = countycoordfull_df.loc[:,["County", "Lng", "Lat"]]
countycoord_df = countycoord_df.rename(columns={"Lat": "Longitude", "Lng" : "Latitude"})
countycoord_df

Unnamed: 0,County,Latitude,Longitude
0,Gonzales,29.456415,-97.492799
1,Jim Hogg,27.043405,-98.697292
2,Kenedy,26.924094,-97.681378
3,Lee,30.310651,-96.965687
4,Henderson,32.211881,-95.853568
...,...,...,...
249,Harrison,32.548149,-94.371557
250,Runnels,31.831054,-99.976248
251,Denton,33.205574,-97.116812
252,Wilson,29.173159,-98.086210


In [76]:
tymothership = mother_ship.loc[:,["SID_Number", "TDCJ_Number", "Current_Facility", 
                                  "Gender", "Race", "Age", "County", "Offense_Code", 
                                  "TDCJ_Offence", "Sentence_Years", "Offense_Category", 
                                  "Bin_Cat", "Age_Bin", "Population_2020"]]
# tymothership

In [84]:
typlot = pd.merge_ordered(tymothership, countycoord_df, fill_method='ffill', left_by='County')
typlot
# typlot.isna().sum()

Unnamed: 0,SID_Number,TDCJ_Number,Current_Facility,Gender,Race,Age,County,Offense_Code,TDCJ_Offence,Sentence_Years,Offense_Category,Bin_Cat,Age_Bin,Population_2020,Latitude,Longitude
0,234378,2174505,Young,F,W,75,Grayson,35990003,POSS CS 4-200G WID METH,8.0,2200,Drugs,70_to_80,131710,33.626844,-96.677649
1,1468825,2205051,Pack,M,W,70,Grayson,35990003,POSS CONT SUB PG1 4-200 WITD,25.0,2200,Drugs,70_to_80,131710,33.626844,-96.677649
2,1505838,2119905,Pack,M,W,68,Grayson,35990003,POSS CS WITD-METH 4-200G,12.0,2200,Drugs,60_to_70,131710,33.626844,-96.677649
3,2141308,2153019,Ellis,M,W,64,Grayson,35990003,POSS CS 4-200G WID METH,16.0,2200,Drugs,60_to_70,131710,33.626844,-96.677649
4,2360914,2290868,Gurney,M,W,62,Grayson,35990003,MAN DEL CS PG1 4-200G METH,30.0,2200,Drugs,60_to_70,131710,33.626844,-96.677649
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120702,5826119,2251746,Polunsky,M,H,40,McMullen,48990012,TAMPERING W/A WITNESS,8.0,3100,Evading_Arrest,40_to_50,783,28.353511,-98.568423
120703,4231127,2043573,Wynne,M,H,61,McMullen,35620013,POSS MARIJUANA M/2000LBS,45.0,2200,Drugs,60_to_70,783,28.353511,-98.568423
120704,7987519,2284322,Dominguez,M,W,31,McMullen,23990008,(LIO) THEFT OF FIREARM,2.0,1600,Theft,30_to_40,783,28.353511,-98.568423
120705,7273574,2311921,Willacy County,M,H,30,McMullen,64110001,SMUGGLING OF PERSONS,8.0,100,Human_Trafficking,30_to_40,783,28.353511,-98.568423


In [78]:
# Running nunique identified 252 unique populations for 253 counties 
# need to investigate
# typlot.nunique()

In [23]:
# looking for reason of 252 unique populations - possible dup
dupPop = rcproj_county2020_df[rcproj_county2020_df.duplicated(["Population_2020"])]
# dupPop

In [24]:
# Found dup populations and identified the 2 county names - both are 
# relatively small so will leave values as presented by owner of data

rcproj_county2020_df.loc[rcproj_county2020_df['Population_2020'] == 13592]

Unnamed: 0,County,Population_2020
14328,Dawson,13592
58608,Ward,13592


In [25]:
# The are 254 counties in Texas.  nunique identified 253
# so either there is a dup or 1 county does not have person in 
# prisoner database issued by State.  Database is upated,
# so this is a snap shot in time.

# Found a numpy function that will subject 1 array from another - 
# proably could have used one that compares names - but chose
# one that will subtract one from another - thus yielding the population
# of the missing county.  Once that is obtained, just match the pop with the county

# The creates the array of counties populations in Texas

pop_original = rcproj_county2020_df["Population_2020"].unique()
pop_original
spop_original = np.sort(pop_original)
# spop_original

In [26]:
# This creates the array of county populations identified in the Texas prison file

pop_typlot = typlot["Population_2020"].unique()
spop_typlot = np.sort(pop_typlot)
# spop_typlot

In [27]:
# This compares the 2 arrays and provides the outlier
# population number

result = np.setxor1d(spop_original, spop_typlot)
result

array([309], dtype=int64)

In [28]:
# This is the lookup for the county that has a population equal to 309
#  this gives the name of the county that as of the time of the dataset from Texas
# does not have a person in the system

countynotlisted = rcproj_county2020_df.loc[rcproj_county2020_df['Population_2020'] == 309]
countynotlisted

Unnamed: 0,County,Population_2020
33270,King,309


In [29]:
# Creates adjusted listing of counties with population (removing King)
# so it can be merged with grouped _cobincat and then per pop values can be determined
# Dropping the county to ensure NaN's do not show up in future eforts
# This will be used in developing the database for creating heat maps 
# to present data visualization

adrcproj_county2020_df = rcproj_county2020_df.drop([33270])
# adrcproj_county2020_df

In [30]:
# Create baseline population distribution heat map
# this will allow comparison to offender count heat maps

baselineheat_df = pd.merge_ordered(adrcproj_county2020_df, countycoord_df, fill_method='ffill', left_by='County')
baselineheat_df

Unnamed: 0,County,Population_2020,Latitude,Longitude
0,Anderson,58199,31.813215,-95.652518
1,Andrews,22269,32.305063,-102.637884
2,Angelina,90437,31.254759,-94.611742
3,Aransas,27699,28.127096,-96.992590
4,Archer,8344,33.615392,-98.687834
...,...,...,...,...
248,Wood,45292,32.786344,-95.382065
249,Yoakum,9225,33.172935,-102.827879
250,Young,18712,33.176708,-98.687814
251,Zapata,14409,27.000891,-99.168601


In [31]:
gmaps.configure(api_key=g_key)

In [32]:
# data from baseline heat df

# # Store 'Lat' and 'Lng' into  locations
locationsbase = baselineheat_df[["Latitude", "Longitude"]]

# Weight
baseweight = baselineheat_df["Population_2020"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerbase = gmaps.heatmap_layer(locationsbase, weights=baseweight, 
                                 dissipating=False, max_intensity=1500000,
                                 point_radius=1)

# Add layer
fig.add_layer(heat_layerbase)

# Display figure
fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [83]:
# Groupby function to collect information by County and Bin_Cat aggregating on Offense_Category
# Another step in merging dfs to create plot dataset

# Task was assigned to present heat maps based on County Outliers - the term was
# open ended to allow for creating more specific visualizations as the data 
# manipulation presented itself

# df created ties county to number of individuals in prison for that 
# respective offense (Bin_Cat)

grouped_cobincat = typlot.groupby(["County", "Bin_Cat"]).agg({'Offense_Category': ['count']}).reset_index()
grouped_cobincat.columns = ["County", "Bin_Cat", "Count"]
grouped_cobincat
# grouped_cobincat.dtypes

# Groupby that was not used - data not presented well - added additional parameter
# grouped_bincat = typlot.groupby(["County"]).agg({'Offense_Category': ['count']})
# grouped_bincat.head(50)


Unnamed: 0,County,Bin_Cat,Count
0,Anderson,Human_Trafficking,0
1,Anderson,Murder,38
2,Anderson,Kidnapping,6
3,Anderson,Sexual_Assault,75
4,Anderson,Robbery,28
...,...,...,...
11380,Zavala,Pollution,0
11381,Zavala,Money_Laundering,0
11382,Zavala,Negligence,0
11383,Zavala,Business_License,0


In [34]:
# Merging step and creating additional columns to allow for normalizing data by population
# Kept overall poulation column for reference, used per 1000 to create maps

typlotsub = pd.merge_ordered(grouped_cobincat , adrcproj_county2020_df, fill_method='ffill', left_by='County')
typlotsub["Pop_per_1000"] = (typlotsub["Population_2020"] / 1000).round(3)
typlotsub["Offenders_per_Pop"] = (typlotsub["Count"] / typlotsub["Population_2020"]).round(4)
typlotsub["Offenders_per_1000"] = (typlotsub["Count"] / typlotsub["Pop_per_1000"]).round(3)
typlotsub

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000
0,Anderson,Human_Trafficking,0,58199,58.199,0.0000,0.000
1,Anderson,Murder,38,58199,58.199,0.0007,0.653
2,Anderson,Kidnapping,6,58199,58.199,0.0001,0.103
3,Anderson,Sexual_Assault,75,58199,58.199,0.0013,1.289
4,Anderson,Robbery,28,58199,58.199,0.0005,0.481
...,...,...,...,...,...,...,...
11380,Zavala,Pollution,0,12682,12.682,0.0000,0.000
11381,Zavala,Money_Laundering,0,12682,12.682,0.0000,0.000
11382,Zavala,Negligence,0,12682,12.682,0.0000,0.000
11383,Zavala,Business_License,0,12682,12.682,0.0000,0.000


In [35]:
# this is only for informational purposes
# total per top 5 bins
typlotsubcounts = typlotsub.groupby(["Bin_Cat"]).agg({'Count': ['sum']})
typlotsubcounts.columns = typlotsubcounts.columns.droplevel(0)
typcss = typlotsubcounts.sort_values(by=['sum'], ascending=False)
typcss.head()

Unnamed: 0_level_0,sum
Bin_Cat,Unnamed: 1_level_1
Sexual_Assault,22052
Assault,17812
Robbery,17630
Drugs,17151
Murder,14937


In [85]:
# this step merges df that has coordinates of each county seat for the 
# respective county - this will be used to create the heat maps

typlotalldata = pd.merge_ordered(typlotsub, countycoord_df, fill_method='ffill', left_by='County')
typlotalldata

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude
0,Anderson,Human_Trafficking,0,58199,58.199,0.0000,0.000,31.813215,-95.652518
1,Anderson,Murder,38,58199,58.199,0.0007,0.653,31.813215,-95.652518
2,Anderson,Kidnapping,6,58199,58.199,0.0001,0.103,31.813215,-95.652518
3,Anderson,Sexual_Assault,75,58199,58.199,0.0013,1.289,31.813215,-95.652518
4,Anderson,Robbery,28,58199,58.199,0.0005,0.481,31.813215,-95.652518
...,...,...,...,...,...,...,...,...,...
11380,Zavala,Pollution,0,12682,12.682,0.0000,0.000,28.865306,-99.761020
11381,Zavala,Money_Laundering,0,12682,12.682,0.0000,0.000,28.865306,-99.761020
11382,Zavala,Negligence,0,12682,12.682,0.0000,0.000,28.865306,-99.761020
11383,Zavala,Business_License,0,12682,12.682,0.0000,0.000,28.865306,-99.761020


In [74]:
# Create pop bins - doing this now to allow for evaluting data - 
# created because larger counties will wash out their rates
# these are very granular bins that may be used for creating data visualization on 
# county size range basis - as noted above  - may or may not use based on final eval

# must have 1 extra bin then actual catorgies - reason for the 0 at the beginning

agebins = [0, 1000, 10000, 50000, 100000, 250000, 500000, 750000, 1000000, 10000000]

# Create labels for these bins
agegroup_labels = ["LT1K", "1K_to_10K", "10K_to_50K", "50K_to_100K",
                   "100K_to_250K", "250K_to_500K", "500K_to_750K",
                   "750K_to_1M", "GT1M"]  

# Create new column - Pop_Bin

typlotalldata["Pop_Bin"] = pd.cut(typlotalldata["Population_2020"], agebins, labels=agegroup_labels)
typlotalldata

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
0,Anderson,Human_Trafficking,0,58199,58.199,0.0000,0.000,31.813215,-95.652518,50K_to_100K
1,Anderson,Murder,38,58199,58.199,0.0007,0.653,31.813215,-95.652518,50K_to_100K
2,Anderson,Kidnapping,6,58199,58.199,0.0001,0.103,31.813215,-95.652518,50K_to_100K
3,Anderson,Sexual_Assault,75,58199,58.199,0.0013,1.289,31.813215,-95.652518,50K_to_100K
4,Anderson,Robbery,28,58199,58.199,0.0005,0.481,31.813215,-95.652518,50K_to_100K
...,...,...,...,...,...,...,...,...,...,...
11380,Zavala,Pollution,0,12682,12.682,0.0000,0.000,28.865306,-99.761020,10K_to_50K
11381,Zavala,Money_Laundering,0,12682,12.682,0.0000,0.000,28.865306,-99.761020,10K_to_50K
11382,Zavala,Negligence,0,12682,12.682,0.0000,0.000,28.865306,-99.761020,10K_to_50K
11383,Zavala,Business_License,0,12682,12.682,0.0000,0.000,28.865306,-99.761020,10K_to_50K


In [38]:
#total counties per  pop bin counts - 
# this is done to get an idea of the number of 
# counties per bin  - the count is number of times the Pop_bin
# was hit - df frame was summary of overall which has county listing with all bins
# could have approached different ways - choose to create df and then the math
# 45 is number of offense bins being currently used

# -------------------------------------------------------------------------

# AFTER COMPLETING SOME INTITIAL DATA VISUALIZATIONS USING THE ABOVE BINS
# DID NOT USE THESE BINS IN THE FINAL ANALYSIS - DATA DID NOT PRESENT WELL
# LEFT CODE FOR REFERENCE PURPOSES

# ENDED UP WITH 4 POP BINS - LESS THAN 10K; POP BETWEEN 10K AND 100K;
#                            POP BETWEEN 100K AND 500K; GREATER THAN 500K

# --------------------------------------------------------------------------

ctytyplotsubcounts = typlotalldata.groupby(["Pop_Bin"]).agg({'Count': ['count']})
ctytyplotsubcounts.columns = ctytyplotsubcounts.columns.droplevel(0)
ctytyplotsubcounts['number of counties'] = (ctytyplotsubcounts["count"] / 45)
ctytyplotsubcounts

Unnamed: 0_level_0,count,number of counties
Pop_Bin,Unnamed: 1_level_1,Unnamed: 2_level_1
LT1K,270,6.0
1K_to_10K,3645,81.0
10K_to_50K,4410,98.0
50K_to_100K,1260,28.0
100K_to_250K,855,19.0
250K_to_500K,405,9.0
500K_to_750K,90,2.0
750K_to_1M,180,4.0
GT1M,270,6.0


In [39]:
#Sort 1 - overall sorted by Offense per 1000
# there is so much data  - chose to look at top 5 offense categories
# Indiviual dfs will be created to then create heatmaps and how
# dispersed or concentrated the respective top 5 categories are
# The top 5 are Sexual Assault, Assault, Murder, Robbery, and Drugs

# These 5 categories represent ~74% of the prison population
# if Theft and Burglary were combined with Roberry - value would approach 80%

typadsortb1k = typlotalldata.sort_values(by=['Count'], ascending=False)
typadsortb1k.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
4504,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M
4501,Harris,Murder,3192,4978845,4978.845,0.0006,0.641,29.859671,-95.397821,GT1M
2524,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M
4503,Harris,Sexual_Assault,2520,4978845,4978.845,0.0005,0.506,29.859671,-95.397821,GT1M
2523,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M


In [40]:
# sort verifies largest pop counties have the hights total counts
# no anomilies identified - df not used but maintined for review as needed
#Sort 2 - overall sorted by highest number count

typadsortcou = typlotalldata.sort_values(by=['Count'], ascending=False)
# typadsortcou.head(20)

In [41]:
# The following data manipulation serves the purpose of 
# creating total population based dfs that can be used to compare rates
# for the top 5 offense categories.  There is a df for each pop and offense
# category combination - understand should have used a loop - time to trouble shot
# a loop would take longer since I am learning than to bang it out manually  - cut n, paste
# future goal is a loop

# Not all datasets will be presented for class project because of time restraints
# more interesting heat maps will be selected and presented

In [42]:
# Data for Heat map 1
# Sort 12  - Identifies each counties worst Bin per 1000

countysworst = typlotalldata.loc[typlotalldata.groupby('County').Count.idxmax().values]
countysworst
cws = countysworst.sort_values(by=['Count'], ascending=False)
cws.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
4504,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M
2524,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M
9813,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M
634,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M
10128,Travis,Sexual_Assault,691,1291502,1291.502,0.0005,0.535,30.334233,-97.781947,GT1M


In [43]:
# Heat Map 1 - using data from Sort 12
# this gives an overall heat map for each county and their "worst" bin
# based on total number of individuals in prision - the heat map matches 
# population distribution


gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locations = cws[["Latitude", "Longitude"]]

# Weight
bincount = cws["Count"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=bincount, 
                                 dissipating=False, max_intensity=1000,
                                 point_radius=1)


# Add layer
fig.add_layer(heat_layer)

# Display figure
fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [44]:
# Heat Map 2  - using data from Sort 12
# this gives an overall heat map for each county and their "worst" bin
# per 1000 pop - this removes the larger county dilution - presents
# different look and allows for inclusion and evaluation of the smaller pop
# size counties in the discussion

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locations = cws[["Latitude", "Longitude"]]

# Weight
bincount = cws["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layer = gmaps.heatmap_layer(locations, weights=bincount, 
                                 dissipating=False, max_intensity=30,
                                 point_radius=1)


# Add layer
fig.add_layer(heat_layer)

# Display figure
fig


Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [45]:
# sort 20 - highest rates - top 10  used for heat map6
countysworst3 = typlotalldata.loc[typlotalldata.groupby('County').Count.idxmax().values]
# countysworst3
cws3 = countysworst3.sort_values(by=['Offenders_per_1000'], ascending=False)


cws3plot = cws3.loc[cws3['Offenders_per_1000'] > 3.845]
# cws3plot.to_csv('../Resources/map6data.csv', index=False)
cws3plot

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin
5866,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K
6721,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K
1456,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K
1096,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K
2986,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K
723,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K
4280,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K
2255,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K
8116,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K
5956,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K


In [46]:
# Used to find approximate max_intensity for heap map 6
# cws3["Offenders_per_1000"].max()

In [47]:
# Create table for heat map support - placed on chart to provide county info
# for each blue dot location

map6_df = cws3plot[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map6_df = map6_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df

map6_df.style.format({'Offenders_per_1000': '{:.2f}'})


Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
5866,Kenedy,Drugs,8,476,16.81
6721,Loving,Drugs,1,92,10.87
1456,Carson,Drugs,42,5799,7.24
1096,Brown,Drugs,220,38923,5.65
2986,Eastland,Drugs,87,18205,4.78
723,Borden,Sexual_Assault,3,685,4.38
4280,Hall,Assault,14,3305,4.24
2255,Cottle,Assault,6,1510,3.97
8116,Palo Pinto,Drugs,108,27859,3.88
5956,Kerr,Drugs,201,52267,3.85


In [48]:
# # Heat Map 6 - using data from Sort 20
# # this gives an overall heat map for each county and their "worst" bin
# # based on total number of individuals in prision - the heat map matches 
# # population distribution

# -------------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationsw = countysworst3[["Latitude", "Longitude"]]

# Weight
bincountw = countysworst3["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerw = gmaps.heatmap_layer(locationsw, weights=bincountw, 
                                 dissipating=False, max_intensity=12,
                                 point_radius=1)

# --------------------------------


# Add layer
# fig.add_layer(heat_layerw)

# # Display figure
# fig


In [49]:
# Heat Map 6  - using data from Sort 20

# Using the template add the marks to the heatmap
# this is top5 for county - smaller df
info_box_templatewc = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infowc = [info_box_templatewc.format(**row) for index, row in cws3plot.iterrows()]
locationswc = cws3plot[["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerwc = gmaps.symbol_layer(
    locationswc, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


markerswc = gmaps.marker_layer(locations = locationswc, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infowc])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))
      

fig.add_layer(heat_layerw)
fig.add_layer(topc_layerwc)
fig.add_layer(markerswc)

fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [50]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower "Sort numbers" were for sorts not used - 

# Sort 21 - Cities pop 100,000 < X > 10,000

# creates single column with True or False meeting criteria
inihm7 = (typlotalldata["Population_2020"] >= 10000) & (typlotalldata["Population_2020"] < 100000)
# inihm7
# converting to dataframe with column name crit
inihm7 = inihm7.to_frame('crit')
# merging criteria dataframe with bigger dataframe
hm7cri = typlotalldata.merge(inihm7, how = "outer", left_index=True, right_index=True)
# converting crit column to value so it can be sorted upon - True becomes 1 and False becomes 0
hm7cri["crit"] = hm7cri["crit"]*1
hm7 = hm7cri.loc[hm7cri['crit'] == 1]
# hm7["County"].unique()
# hm7

# Sexual Assault
sexaslthm7_df = hm7.loc[hm7['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm7_df = hm7.loc[hm7['Bin_Cat'] == "Assault"]

# Murder
murhm7_df = hm7.loc[hm7['Bin_Cat'] == "Murder"]

# Robbery
robhm7_df = hm7.loc[hm7['Bin_Cat'] == "Robbery"]

# Drugs
drugshm7_df = hm7.loc[hm7['Bin_Cat'] == "Drugs"]


frameshm7 = [sexaslthm7_df, aslthm7_df, murhm7_df, robhm7_df, drugshm7_df]
top5hm7 = pd.concat(frameshm7)
top5hm7

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm7 = top5hm7.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm7.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1096,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
2986,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
8116,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
5956,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
1085,Brown,Assault,129,38923,38.923,0.0033,3.314,31.774323,-98.999896,10K_to_50K,1


In [51]:
t5sorttop5hm7dups = sorttop5hm7.loc[sorttop5hm7['Offenders_per_1000'] > 2.645]
t5sorttop5hm7dups

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1096,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
2986,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
8116,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
5956,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
1085,Brown,Assault,129,38923,38.923,0.0033,3.314,31.774323,-98.999896,10K_to_50K,1
5371,Jackson,Drugs,49,15899,15.899,0.0031,3.082,28.956031,-96.578833,10K_to_50K,1
2973,Eastland,Sexual_Assault,55,18205,18.205,0.003,3.021,32.327393,-98.832571,10K_to_50K,1
8643,Red River,Sexual_Assault,38,12610,12.61,0.003,3.013,33.620745,-95.050189,10K_to_50K,1
6226,Lamar,Drugs,149,50014,50.014,0.003,2.979,33.667462,-95.571108,50K_to_100K,1
5011,Hopkins,Drugs,109,37040,37.04,0.0029,2.943,33.149338,-95.564142,10K_to_50K,1


In [52]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm7 =  t5sorttop5hm7dups.drop_duplicates(subset=['County'])
t5sorttop5hm7 

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
1096,Brown,Drugs,220,38923,38.923,0.0057,5.652,31.774323,-98.999896,10K_to_50K,1
2986,Eastland,Drugs,87,18205,18.205,0.0048,4.779,32.327393,-98.832571,10K_to_50K,1
8116,Palo Pinto,Drugs,108,27859,27.859,0.0039,3.877,32.753169,-98.312995,10K_to_50K,1
5956,Kerr,Drugs,201,52267,52.267,0.0038,3.846,30.06153,-99.35017,50K_to_100K,1
5371,Jackson,Drugs,49,15899,15.899,0.0031,3.082,28.956031,-96.578833,10K_to_50K,1
8643,Red River,Sexual_Assault,38,12610,12.61,0.003,3.013,33.620745,-95.050189,10K_to_50K,1
6226,Lamar,Drugs,149,50014,50.014,0.003,2.979,33.667462,-95.571108,50K_to_100K,1
5011,Hopkins,Drugs,109,37040,37.04,0.0029,2.943,33.149338,-95.564142,10K_to_50K,1
3965,Gonzales,Assault,60,21347,21.347,0.0028,2.811,29.456415,-97.492799,10K_to_50K,1
2626,Deaf Smith,Drugs,48,18143,18.143,0.0026,2.646,34.96602,-102.604816,10K_to_50K,1


In [53]:
# Create table for visualization
map7_df = t5sorttop5hm7[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map7_df = map7_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map7_df.to_csv('../Resources/map7_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map7_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
1096,Brown,Drugs,220,38923,5.65
2986,Eastland,Drugs,87,18205,4.78
8116,Palo Pinto,Drugs,108,27859,3.88
5956,Kerr,Drugs,201,52267,3.85
5371,Jackson,Drugs,49,15899,3.08
8643,Red River,Sexual_Assault,38,12610,3.01
6226,Lamar,Drugs,149,50014,2.98
5011,Hopkins,Drugs,109,37040,2.94
3965,Gonzales,Assault,60,21347,2.81
2626,Deaf Smith,Drugs,48,18143,2.65


In [54]:
# Heat Map 7 - using data from Sort 21
# this gives a heat map for counties with population between 10k and 100k

# --------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm7 = sorttop5hm7[["Latitude", "Longitude"]]

# Weight
bincounthm7 = sorttop5hm7["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerhm7 = gmaps.heatmap_layer(locationshm7, weights=bincounthm7, 
                                 dissipating=False, max_intensity=30,
                                 point_radius=1)

# ------------------------------------------

# Add layer
# fig.add_layer(heat_layerhm7)

# # Display figure
# fig


In [55]:
# Heat Map 7  - using data from Sort 21

# Using the template add the marks to the heatmap


info_box_templatehm7 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infohm7 = [info_box_templatehm7.format(**row) for index, row in t5sorttop5hm7.iterrows()]
locationshm7 = t5sorttop5hm7 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm7 = gmaps.symbol_layer(
    locationshm7, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


markershm7 = gmaps.marker_layer(locations = locationshm7, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm7])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm7)
fig.add_layer(markershm7)
fig.add_layer(topc_layerhm7)

fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [56]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 

# Sort 22 - Cities pop > 500,000

inihm8 = (typlotalldata["Population_2020"] >= 500000) 
inihm8 = inihm8.to_frame('crit')
hm8cri = typlotalldata.merge(inihm8, how = "outer", left_index=True, right_index=True)
hm8cri["crit"] = hm8cri["crit"]*1
hm8 = hm8cri.loc[hm8cri['crit'] == 1]
# hm8["County"].unique()
# hm8

# Sexual Assault
sexaslthm8_df = hm8.loc[hm8['Bin_Cat'] == "Sexual_Assault"]


# Assault
aslthm8_df = hm8.loc[hm8['Bin_Cat'] == "Assault"]

# Murder
murhm8_df = hm8.loc[hm8['Bin_Cat'] == "Murder"]

# Robbery
robhm8_df = hm8.loc[hm8['Bin_Cat'] == "Robbery"]


# Drugs
drugshm8_df = hm8.loc[hm8['Bin_Cat'] == "Drugs"]


frameshm8 = [sexaslthm8_df, aslthm8_df, murhm8_df, robhm8_df, drugshm8_df]
top5hm8 = pd.concat(frameshm8)
top5hm8

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm8 = top5hm8.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm8.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
4504,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
2524,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
2523,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M,1
9813,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
2521,Dallas,Murder,2109,2734111,2734.111,0.0008,0.771,32.766537,-96.777819,GT1M,1


In [57]:
t5sorttop5hm8dups = sorttop5hm8.loc[sorttop5hm8['Offenders_per_1000'] > 0.35]
t5sorttop5hm8dups.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
4504,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
2524,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
2523,Dallas,Sexual_Assault,2305,2734111,2734.111,0.0008,0.843,32.766537,-96.777819,GT1M,1
9813,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
2521,Dallas,Murder,2109,2734111,2734.111,0.0008,0.771,32.766537,-96.777819,GT1M,1
7563,Montgomery,Sexual_Assault,459,613951,613.951,0.0007,0.748,30.300224,-95.503014,500K_to_750K,1
634,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M,1
9814,Tarrant,Robbery,1394,2143755,2143.755,0.0007,0.65,32.771852,-97.291165,GT1M,1
635,Bexar,Assault,1358,2093502,2093.502,0.0006,0.649,29.448845,-98.519663,GT1M,1
4501,Harris,Murder,3192,4978845,4978.845,0.0006,0.641,29.859671,-95.397821,GT1M,1


In [58]:
# need to drop counties listed more than once to get highest value to present in heat map
#hm8
t5sorttop5hm8 =  t5sorttop5hm8dups.drop_duplicates(subset=['County'])
t5sorttop5hm8 

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
4504,Harris,Robbery,4849,4978845,4978.845,0.001,0.974,29.859671,-95.397821,GT1M,1
2524,Dallas,Robbery,2595,2734111,2734.111,0.0009,0.949,32.766537,-96.777819,GT1M,1
9813,Tarrant,Sexual_Assault,1715,2143755,2143.755,0.0008,0.8,32.771852,-97.291165,GT1M,1
7563,Montgomery,Sexual_Assault,459,613951,613.951,0.0007,0.748,30.300224,-95.503014,500K_to_750K,1
634,Bexar,Robbery,1492,2093502,2093.502,0.0007,0.713,29.448845,-98.519663,GT1M,1
1893,Collin,Sexual_Assault,566,1039369,1039.369,0.0005,0.545,33.187891,-96.572489,GT1M,1
10128,Travis,Sexual_Assault,691,1291502,1291.502,0.0005,0.535,30.334233,-97.781947,GT1M,1
4818,Hidalgo,Sexual_Assault,422,870366,870.366,0.0005,0.485,26.396627,-98.180887,750K_to_1M,1
3110,El Paso,Assault,352,876120,876.12,0.0004,0.402,31.768623,-106.235223,750K_to_1M,1
10983,Williamson,Sexual_Assault,235,589914,589.914,0.0004,0.398,30.648349,-97.601055,500K_to_750K,1


In [59]:
# Create table for visualization
map8_df = t5sorttop5hm8[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map8_df = map8_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map8_df.to_csv('../Resources/map8_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map8_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
4504,Harris,Robbery,4849,4978845,0.97
2524,Dallas,Robbery,2595,2734111,0.95
9813,Tarrant,Sexual_Assault,1715,2143755,0.8
7563,Montgomery,Sexual_Assault,459,613951,0.75
634,Bexar,Robbery,1492,2093502,0.71
1893,Collin,Sexual_Assault,566,1039369,0.55
10128,Travis,Sexual_Assault,691,1291502,0.54
4818,Hidalgo,Sexual_Assault,422,870366,0.48
3110,El Paso,Assault,352,876120,0.4
10983,Williamson,Sexual_Assault,235,589914,0.4


In [60]:
# Heat Map 8 - using data from Sort 22
# this gives a heat map for counties with pops x > 500k

# ------------------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm8 = sorttop5hm8[["Latitude", "Longitude"]]

# Weight
bincounthm8 = sorttop5hm8["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerhm8 = gmaps.heatmap_layer(locationshm8, weights=bincounthm8, 
                                 dissipating=False, max_intensity=5,
                                 point_radius=1)


# --------------------------------

# Add layer
# fig.add_layer(heat_layerhm7)

# # Display figure
# fig


In [61]:
# Heat Map 8  - using data from Sort 22

# Using the template add the marks to the heatmap

info_box_templatehm8 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""

county_infohm8 = [info_box_templatehm8.format(**row) for index, row in t5sorttop5hm8.iterrows()]
locationshm8 = t5sorttop5hm8 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm8 = gmaps.symbol_layer(
    locationshm8, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


markershm8 = gmaps.marker_layer(locations = locationshm8, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm8])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm8)
fig.add_layer(markershm8)
fig.add_layer(topc_layerhm8)

fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [62]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 


# Sort 23 - Cities pop > 500,000 and > 100,000

inihm9 = (typlotalldata["Population_2020"] >= 100000) & (typlotalldata["Population_2020"] < 500000)
inihm9 = inihm9.to_frame('crit')
hm9cri = typlotalldata.merge(inihm9, how = "outer", left_index=True, right_index=True)
hm9cri["crit"] = hm9cri["crit"]*1
hm9 = hm9cri.loc[hm9cri['crit'] == 1]
# hm9["County"].unique()
# hm9

# Sexual Assault
sexaslthm9_df = hm9.loc[hm9['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm9_df = hm9.loc[hm9['Bin_Cat'] == "Assault"]

# Murder
murhm9_df = hm9.loc[hm9['Bin_Cat'] == "Murder"]

# Robbery
robhm9_df = hm9.loc[hm9['Bin_Cat'] == "Robbery"]

# Drugs
drugshm9_df = hm9.loc[hm9['Bin_Cat'] == "Drugs"]

frameshm9 = [sexaslthm9_df, aslthm9_df, murhm9_df, robhm9_df, drugshm9_df]
top5hm9 = pd.concat(frameshm9)
top5hm9

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm9 = top5hm9.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm9.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
9871,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
8386,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
8373,Potter,Sexual_Assault,282,122706,122.706,0.0023,2.298,35.401289,-101.894048,100K_to_250K,1
7158,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
8375,Potter,Assault,259,122706,122.706,0.0021,2.111,35.401289,-101.894048,100K_to_250K,1
10096,Tom Green,Drugs,254,123276,123.276,0.0021,2.06,31.404444,-100.462068,100K_to_250K,1
7171,McLennan,Drugs,516,253066,253.066,0.002,2.039,31.552345,-97.201849,250K_to_500K,1
4066,Grayson,Drugs,262,131710,131.71,0.002,1.989,33.626844,-96.677649,100K_to_250K,1
5641,Johnson,Drugs,301,171701,171.701,0.0018,1.753,32.378999,-97.366605,100K_to_250K,1
9466,Smith,Drugs,401,235143,235.143,0.0017,1.705,32.375146,-95.269095,100K_to_250K,1


In [63]:
t5sorttop5hm9dups = sorttop5hm9.loc[sorttop5hm9['Offenders_per_1000'] > 1.3]
t5sorttop5hm9dups.head()

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
9871,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
8386,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
8373,Potter,Sexual_Assault,282,122706,122.706,0.0023,2.298,35.401289,-101.894048,100K_to_250K,1
7158,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
8375,Potter,Assault,259,122706,122.706,0.0021,2.111,35.401289,-101.894048,100K_to_250K,1


In [64]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm9 =  t5sorttop5hm9dups.drop_duplicates(subset=['County'])
t5sorttop5hm9

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
9871,Taylor,Drugs,400,139457,139.457,0.0029,2.868,32.301382,-99.890039,100K_to_250K,1
8386,Potter,Drugs,303,122706,122.706,0.0025,2.469,35.401289,-101.894048,100K_to_250K,1
7158,McLennan,Sexual_Assault,539,253066,253.066,0.0021,2.13,31.552345,-97.201849,250K_to_500K,1
10096,Tom Green,Drugs,254,123276,123.276,0.0021,2.06,31.404444,-100.462068,100K_to_250K,1
4066,Grayson,Drugs,262,131710,131.71,0.002,1.989,33.626844,-96.677649,100K_to_250K,1
5641,Johnson,Drugs,301,171701,171.701,0.0018,1.753,32.378999,-97.366605,100K_to_250K,1
9466,Smith,Drugs,401,235143,235.143,0.0017,1.705,32.375146,-95.269095,100K_to_250K,1
4100,Gregg,Assault,191,125730,125.73,0.0015,1.519,32.480365,-94.817237,100K_to_250K,1
5494,Jefferson,Robbery,377,258678,258.678,0.0015,1.457,29.884258,-94.170878,250K_to_500K,1
8206,Parker,Drugs,189,135621,135.621,0.0014,1.394,32.778523,-97.804722,100K_to_250K,1


In [65]:
# Create table for visualization
map9_df = t5sorttop5hm9[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map9_df = map9_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map9_df.to_csv('../Resources/map9_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map9_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
9871,Taylor,Drugs,400,139457,2.87
8386,Potter,Drugs,303,122706,2.47
7158,McLennan,Sexual_Assault,539,253066,2.13
10096,Tom Green,Drugs,254,123276,2.06
4066,Grayson,Drugs,262,131710,1.99
5641,Johnson,Drugs,301,171701,1.75
9466,Smith,Drugs,401,235143,1.71
4100,Gregg,Assault,191,125730,1.52
5494,Jefferson,Robbery,377,258678,1.46
8206,Parker,Drugs,189,135621,1.39


In [66]:
# Heat Map 9 - using data from Sort 23
# this gives a heat map for counties with pops  100k < x < 500k

# ---------------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm9 = sorttop5hm9[["Latitude", "Longitude"]]

# Weight
bincounthm9 = sorttop5hm9["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerhm9 = gmaps.heatmap_layer(locationshm9, weights=bincounthm9, 
                                 dissipating=False, max_intensity=10,
                                 point_radius=1)

# ---------------------------

# Add layer
# fig.add_layer(heat_layerhm9)

# # Display figure
# fig


In [67]:
# Heat Map 9  - using data from Sort 23

# Using the template add the marks to the heatmap

info_box_templatehm9 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""
county_infohm9 = [info_box_templatehm9.format(**row) for index, row in t5sorttop5hm9.iterrows()]
locationshm9 = t5sorttop5hm9 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm9 = gmaps.symbol_layer(
    locationshm9, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


markershm9 = gmaps.marker_layer(locations = locationshm9, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm9])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm9)
fig.add_layer(markershm9)
fig.add_layer(topc_layerhm9)

fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…

In [68]:
# Data collection for visualization
# learned original pop_bins created were 
# actually to granular - therefore amended direction
# and went less detailed.  Detailed data
# did provide guidance for the final "slicing"
# for note - lower sort numbers were for sorts not used - 


# Sort 24 - Cities pop X < 10,000

inihm10 = (typlotalldata["Population_2020"] < 10000) 
inihm10 = inihm10.to_frame('crit')
hm10cri = typlotalldata.merge(inihm10, how = "outer", left_index=True, right_index=True)
hm10cri["crit"] = hm10cri["crit"]*1
hm10 = hm10cri.loc[hm10cri['crit'] == 1]
# hm9["County"].unique()
# hm9

# Sexual Assault
sexaslthm10_df = hm10.loc[hm10['Bin_Cat'] == "Sexual_Assault"]

# Assault
aslthm10_df = hm10.loc[hm10['Bin_Cat'] == "Assault"]

# Murder
murhm10_df = hm10.loc[hm10['Bin_Cat'] == "Murder"]

# Robbery
robhm10_df = hm10.loc[hm10['Bin_Cat'] == "Robbery"]

# Drugs
drugshm10_df = hm10.loc[hm10['Bin_Cat'] == "Drugs"]


frameshm10 = [sexaslthm10_df, aslthm10_df, murhm10_df, robhm10_df, drugshm10_df]
top5hm10 = pd.concat(frameshm10)
top5hm10

# coupophm7 = hm7.loc[:,"Population_2020"]
# nduphm7 = coupophm7.drop_duplicates().sum()
# # ndupgt50

sorttop5hm10 = top5hm10.sort_values(by=['Offenders_per_1000'], ascending=False)
sorttop5hm10.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
5866,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
6721,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
1456,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
723,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
4280,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
2255,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
7441,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
1861,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
5988,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
1940,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [69]:
t5sorttop5hm10dups = sorttop5hm10.loc[sorttop5hm10['Offenders_per_1000'] > 3.3]
t5sorttop5hm10dups.head(20)

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
5866,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
6721,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
1456,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
723,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
4280,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
2255,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
7441,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
1861,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
5988,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
1940,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [70]:
# need to drop counties listed more than once to get highest value to present in heat map
t5sorttop5hm10 =  t5sorttop5hm10dups.drop_duplicates(subset=['County'])
t5sorttop5hm10

Unnamed: 0,County,Bin_Cat,Count,Population_2020,Pop_per_1000,Offenders_per_Pop,Offenders_per_1000,Latitude,Longitude,Pop_Bin,crit
5866,Kenedy,Drugs,8,476,0.476,0.0168,16.807,26.924094,-97.681378,LT1K,1
6721,Loving,Drugs,1,92,0.092,0.0109,10.87,31.84913,-103.579906,LT1K,1
1456,Carson,Drugs,42,5799,5.799,0.0072,7.243,35.403468,-101.354204,1K_to_10K,1
723,Borden,Sexual_Assault,3,685,0.685,0.0044,4.38,32.743692,-101.431753,LT1K,1
4280,Hall,Assault,14,3305,3.305,0.0042,4.236,34.530741,-100.680988,1K_to_10K,1
2255,Cottle,Assault,6,1510,1.51,0.004,3.974,34.077657,-100.278698,1K_to_10K,1
7441,Mills,Drugs,18,4870,4.87,0.0037,3.696,31.495243,-98.595527,1K_to_10K,1
1861,Coleman,Drugs,31,8478,8.478,0.0037,3.657,31.773294,-99.453626,1K_to_10K,1
5988,Kimble,Sexual_Assault,15,4344,4.344,0.0035,3.453,30.486763,-99.748927,1K_to_10K,1
1940,Collingsworth,Assault,11,3210,3.21,0.0034,3.427,34.964985,-100.270073,1K_to_10K,1


In [71]:
# Create table for visualization
map10_df = t5sorttop5hm10[["County", "Bin_Cat", "Count", "Population_2020", "Offenders_per_1000"]]
map10_df = map10_df.rename(columns={"Bin_Cat": "Offense"})
# map6_df
# map10_df.to_csv('../Resources/map10_df.csv', index=False)
# map6_df.style.set_properties(align="center")
map10_df.style.format({'Offenders_per_1000': '{:.2f}'})

Unnamed: 0,County,Offense,Count,Population_2020,Offenders_per_1000
5866,Kenedy,Drugs,8,476,16.81
6721,Loving,Drugs,1,92,10.87
1456,Carson,Drugs,42,5799,7.24
723,Borden,Sexual_Assault,3,685,4.38
4280,Hall,Assault,14,3305,4.24
2255,Cottle,Assault,6,1510,3.97
7441,Mills,Drugs,18,4870,3.7
1861,Coleman,Drugs,31,8478,3.66
5988,Kimble,Sexual_Assault,15,4344,3.45
1940,Collingsworth,Assault,11,3210,3.43


In [72]:
# Heat Map 10 - using data from Sort 24
# this gives a heat map for counties with pops  x < 1000

# ----------------

gmaps.configure(api_key=g_key)

# Store 'Lat' and 'Lng' into  locations 
locationshm10 = sorttop5hm10[["Latitude", "Longitude"]]

# Weight
bincounthm10 = sorttop5hm10["Offenders_per_1000"]

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

# Plot Heatmap
fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

# Create heat layer
heat_layerhm10 = gmaps.heatmap_layer(locationshm10, weights=bincounthm10, 
                                 dissipating=False, max_intensity=20,
                                 point_radius=1)

# ---------------------------

# Add layer
# fig.add_layer(heat_layerhm9)

# # Display figure
# fig


In [73]:
# Heat Map 10  - using data from Sort 24

# Using the template add the marks to the heatmap

info_box_templatehm10 = """
<dl>
<dt>County</dt><dd>{County}</dd>
<dt>Offense</dt><dd>{Bin_Cat}</dd>
<dt>Prisoners_per_1000_pop</dt><dd>{Offenders_per_1000}</dd>
</dl>
"""
county_infohm10 = [info_box_templatehm10.format(**row) for index, row in t5sorttop5hm10.iterrows()]
locationshm10 = t5sorttop5hm10 [["Latitude", "Longitude"]]
marker_locations = [1,2,3,4,5,6,7,8,9,10]


# Add marker layer ontop of heat map
topc_layerhm10 = gmaps.symbol_layer(
    locationshm10, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=6)


markershm10 = gmaps.marker_layer(locations = locationshm10, 
                             label = [f" {x}" for x in marker_locations],
                             info_box_content=[f"{county}" for county in county_infohm10])

figure_layout = {
    'width': '800px',
    'height': '600px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'}

fig = gmaps.figure(layout=figure_layout,zoom_level=6,center=(31.3,-99.5))

fig.add_layer(heat_layerhm10)
fig.add_layer(markershm10)
fig.add_layer(topc_layerhm10)

fig

Figure(layout=FigureLayout(border='1px solid black', height='600px', margin='0 auto 0 auto', padding='1px', wi…