#!pip install sodapy
import pandas as pd
import numpy as np
from sodapy import Socrata
import time
import warnings
warnings.filterwarnings('ignore')
client = Socrata("data.cityofchicago.org",
app_token="Your App Token",
username="example@domain.com",
password="Your Password")
start = time.time()
results = client.get("ijzp-q8t2", limit=5000000, where = "year>2015")
end = time.time()
print(end-start)
results_df = pd.DataFrame.from_records(results)
set(results_df["year"])
results_df.to_csv("Chicago_Crime_16_21.csv", index = False)
data = pd.read_csv("Chicago_Crime_16_21.csv")
data.head(3)
id | case_number | date | block | iucr | primary_type | description | arrest | domestic | beat | ... | community_area | fbi_code | year | updated_on | location_description | x_coordinate | y_coordinate | latitude | longitude | location | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11645836 | JC212333 | 2016-05-01T00:25:00.000 | 055XX S ROCKWELL ST | 1153 | DECEPTIVE PRACTICE | FINANCIAL IDENTITY THEFT OVER $ 300 | False | False | 824 | ... | 63.0 | 11 | 2016 | 2019-04-06T16:04:43.000 | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 11043021 | JA367631 | 2016-10-19T19:00:00.000 | 075XX S YATES BLVD | 0610 | BURGLARY | FORCIBLE ENTRY | False | False | 421 | ... | 43.0 | 05 | 2016 | 2017-08-05T15:50:08.000 | RESTAURANT | NaN | NaN | NaN | NaN | NaN |
2 | 11243066 | JB168427 | 2016-03-29T07:00:00.000 | 067XX S RIDGELAND AVE | 1153 | DECEPTIVE PRACTICE | FINANCIAL IDENTITY THEFT OVER $ 300 | False | False | 332 | ... | 43.0 | 11 | 2016 | 2018-03-01T15:54:55.000 | OTHER | NaN | NaN | NaN | NaN | NaN |
3 rows × 22 columns
data.date = data.date.str.replace("T", " ")
data.date = pd.to_datetime(data.date, format="%Y-%m-%d %H:%M:%S.%f")
data.updated_on = data.updated_on.str.replace("T", " ")
data.updated_on = pd.to_datetime(data.updated_on, format="%Y-%m-%d %H:%M:%S.%f")
data.dropna(how="any", inplace=True)
data["hour"] = data.date.dt.hour
data=data[data.year!=2022]
data.district = data.district.astype("int")
data.community_area=data.community_area.astype("int")
data.drop(columns=["id","case_number", "iucr", "beat", "ward", "fbi_code", "x_coordinate", "y_coordinate","location"], inplace=True)
data.reset_index(drop=True, inplace=True)
data.shape
(1461450, 14)
data.primary_type.replace("CRIM SEXUAL ASSAULT", "CRIMINAL SEXUAL ASSAULT", inplace=True)
data.primary_type.replace("NON-CRIMINAL (SUBJECT SPECIFIED)", "NON-CRIMINAL", inplace=True)
data.primary_type.replace("NON - CRIMINAL", "NON-CRIMINAL", inplace=True)
data.primary_type.replace("CRIMINAL SEXUAL ASSAULT", "SEX OFFENSE", inplace=True)
data.to_csv("crime_clean.csv", index=False)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
data=pd.read_csv('crime_clean.csv')
data.head()
date | block | primary_type | description | arrest | domestic | district | community_area | year | updated_on | location_description | latitude | longitude | hour | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-05-03 09:45:00 | 055XX W VAN BUREN ST | ASSAULT | AGGRAVATED - HANDGUN | False | False | 15 | 25 | 2016 | 2020-03-25 15:45:43 | STREET | 41.874758 | -87.762379 | 9 |
1 | 2016-05-19 00:00:00 | 026XX W NORTH AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 14 | 24 | 2016 | 2020-03-25 15:45:43 | STREET | 41.910261 | -87.692759 | 0 |
2 | 2016-05-22 21:25:00 | 057XX W BLOOMINGDALE AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 25 | 25 | 2016 | 2020-03-25 15:45:43 | PARK PROPERTY | 41.912628 | -87.768864 | 21 |
3 | 2016-06-01 01:22:00 | 013XX W LELAND AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 19 | 3 | 2016 | 2020-03-25 15:45:43 | SIDEWALK | 41.967145 | -87.663292 | 1 |
4 | 2016-06-10 15:00:00 | 007XX S HOMAN AVE | ASSAULT | AGGRAVATED - HANDGUN | False | False | 11 | 27 | 2016 | 2020-03-25 15:45:43 | STREET | 41.872253 | -87.710733 | 15 |
#Group by year and count crime types
Count_by_year=pd.DataFrame(data.groupby('year').primary_type.value_counts())
#Reformat the dataframe
Count_by_year['Year']=[year for year,crimetype in Count_by_year.index]
Count_by_year['Crime_Type']=[crimetype for year,crimetype in Count_by_year.index]
Count_by_year.index=range(Count_by_year.shape[0])
Count_by_year.rename(columns={'primary_type':'Count'},inplace=True)
Count_by_year=Count_by_year.loc[:,['Crime_Type','Year','Count']]
Count_by_year.Year=Count_by_year.Year.astype('int')
Crimetypes=Count_by_year.Crime_Type.unique()
Count_by_year
Crime_Type | Year | Count | |
---|---|---|---|
0 | THEFT | 2016 | 61037 |
1 | BATTERY | 2016 | 50246 |
2 | CRIMINAL DAMAGE | 2016 | 30931 |
3 | ASSAULT | 2016 | 18720 |
4 | DECEPTIVE PRACTICE | 2016 | 17374 |
... | ... | ... | ... |
176 | GAMBLING | 2021 | 13 |
177 | HUMAN TRAFFICKING | 2021 | 12 |
178 | NON-CRIMINAL | 2021 | 4 |
179 | PUBLIC INDECENCY | 2021 | 4 |
180 | OTHER NARCOTIC VIOLATION | 2021 | 2 |
181 rows × 3 columns
Count_by_year.sort_values(by = ["Year", "Count"], ascending=False, inplace=True)
output_list = []
for year in [2016,2017,2018,2019,2020,2021]:
temp = []
for i in Count_by_year[Count_by_year.Year==2020].values:
temp.append([i[0],i[2]])
output_list.append(temp)
output_list
[[['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]]]
Count_by_year.sort_values(by = ["Year","Count"], ascending=[True,False], inplace=True)
Count_by_year
Crime_Type | Year | Count | |
---|---|---|---|
0 | THEFT | 2016 | 61037 |
1 | BATTERY | 2016 | 50246 |
2 | CRIMINAL DAMAGE | 2016 | 30931 |
3 | ASSAULT | 2016 | 18720 |
4 | DECEPTIVE PRACTICE | 2016 | 17374 |
... | ... | ... | ... |
176 | GAMBLING | 2021 | 13 |
177 | HUMAN TRAFFICKING | 2021 | 12 |
178 | NON-CRIMINAL | 2021 | 4 |
179 | PUBLIC INDECENCY | 2021 | 4 |
180 | OTHER NARCOTIC VIOLATION | 2021 | 2 |
181 rows × 3 columns
fig=px.bar(
Count_by_year,x='Crime_Type',y='Count',animation_frame='Year',
category_orders={'Crime_Type':Count_by_year.Crime_Type.unique()},
color='Count',color_continuous_scale='Agsunset_r',log_y=True)
fig.update_layout(
font={"family":"sans-serif", "color":"black"},
title={
'text': "Crimes Cases of Different Crime Types: 2016-2021",
'font': dict(size=20),
'y':0.975,
'x':0,
'yanchor': 'top'},
template="plotly_white",
yaxis=dict(type="log",title='Crime Cases'),
xaxis = dict(title='Select Year with Slider', tickangle=-45)
)
fig["layout"].pop("updatemenus")
fig.update(layout_coloraxis_showscale=False)
fig['layout']['sliders'][0]['pad']=dict(r= 10, t= 200,l=-70)
fig.write_html('barplot_trend.html',config={'responsive':False})
fig.show()