#!pip install sodapy
import pandas as pd
import numpy as np
from sodapy import Socrata
import time
import warnings
warnings.filterwarnings('ignore')
client = Socrata("data.cityofchicago.org",
app_token="Your App Token",
username="example@domain.com",
password="Your Password")
start = time.time()
results = client.get("ijzp-q8t2", limit=5000000, where = "year>2015")
end = time.time()
print(end-start)
results_df = pd.DataFrame.from_records(results)
set(results_df["year"])
results_df.to_csv("Chicago_Crime_16_21.csv", index = False)
data = pd.read_csv("Chicago_Crime_16_21.csv")
data.head(3)
id | case_number | date | block | iucr | primary_type | description | arrest | domestic | beat | ... | community_area | fbi_code | year | updated_on | location_description | x_coordinate | y_coordinate | latitude | longitude | location | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11645836 | JC212333 | 2016-05-01T00:25:00.000 | 055XX S ROCKWELL ST | 1153 | DECEPTIVE PRACTICE | FINANCIAL IDENTITY THEFT OVER $ 300 | False | False | 824 | ... | 63.0 | 11 | 2016 | 2019-04-06T16:04:43.000 | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 11043021 | JA367631 | 2016-10-19T19:00:00.000 | 075XX S YATES BLVD | 0610 | BURGLARY | FORCIBLE ENTRY | False | False | 421 | ... | 43.0 | 05 | 2016 | 2017-08-05T15:50:08.000 | RESTAURANT | NaN | NaN | NaN | NaN | NaN |
2 | 11243066 | JB168427 | 2016-03-29T07:00:00.000 | 067XX S RIDGELAND AVE | 1153 | DECEPTIVE PRACTICE | FINANCIAL IDENTITY THEFT OVER $ 300 | False | False | 332 | ... | 43.0 | 11 | 2016 | 2018-03-01T15:54:55.000 | OTHER | NaN | NaN | NaN | NaN | NaN |
3 rows × 22 columns
data.date = data.date.str.replace("T", " ")
data.date = pd.to_datetime(data.date, format="%Y-%m-%d %H:%M:%S.%f")
data.updated_on = data.updated_on.str.replace("T", " ")
data.updated_on = pd.to_datetime(data.updated_on, format="%Y-%m-%d %H:%M:%S.%f")
data.dropna(how="any", inplace=True)
data["hour"] = data.date.dt.hour
data=data[data.year!=2022]
data.district = data.district.astype("int")
data.community_area=data.community_area.astype("int")
data.drop(columns=["id","case_number", "iucr", "beat", "ward", "fbi_code", "x_coordinate", "y_coordinate","location"], inplace=True)
data.reset_index(drop=True, inplace=True)
data.shape
(1461450, 14)
data.primary_type.replace("CRIM SEXUAL ASSAULT", "CRIMINAL SEXUAL ASSAULT", inplace=True)
data.primary_type.replace("NON-CRIMINAL (SUBJECT SPECIFIED)", "NON-CRIMINAL", inplace=True)
data.primary_type.replace("NON - CRIMINAL", "NON-CRIMINAL", inplace=True)
data.primary_type.replace("CRIMINAL SEXUAL ASSAULT", "SEX OFFENSE", inplace=True)
data.to_csv("crime_clean.csv", index=False)
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
data=pd.read_csv('crime_clean.csv')
data.head()
date | block | primary_type | description | arrest | domestic | district | community_area | year | updated_on | location_description | latitude | longitude | hour | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-05-03 09:45:00 | 055XX W VAN BUREN ST | ASSAULT | AGGRAVATED - HANDGUN | False | False | 15 | 25 | 2016 | 2020-03-25 15:45:43 | STREET | 41.874758 | -87.762379 | 9 |
1 | 2016-05-19 00:00:00 | 026XX W NORTH AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 14 | 24 | 2016 | 2020-03-25 15:45:43 | STREET | 41.910261 | -87.692759 | 0 |
2 | 2016-05-22 21:25:00 | 057XX W BLOOMINGDALE AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 25 | 25 | 2016 | 2020-03-25 15:45:43 | PARK PROPERTY | 41.912628 | -87.768864 | 21 |
3 | 2016-06-01 01:22:00 | 013XX W LELAND AVE | BATTERY | AGGRAVATED - HANDGUN | False | False | 19 | 3 | 2016 | 2020-03-25 15:45:43 | SIDEWALK | 41.967145 | -87.663292 | 1 |
4 | 2016-06-10 15:00:00 | 007XX S HOMAN AVE | ASSAULT | AGGRAVATED - HANDGUN | False | False | 11 | 27 | 2016 | 2020-03-25 15:45:43 | STREET | 41.872253 | -87.710733 | 15 |
#Group by year and count crime types
Count_by_year=pd.DataFrame(data.groupby('year').primary_type.value_counts())
#Reformat the dataframe
Count_by_year['Year']=[year for year,crimetype in Count_by_year.index]
Count_by_year['Crime_Type']=[crimetype for year,crimetype in Count_by_year.index]
Count_by_year.index=range(Count_by_year.shape[0])
Count_by_year.rename(columns={'primary_type':'Count'},inplace=True)
Count_by_year=Count_by_year.loc[:,['Crime_Type','Year','Count']]
Count_by_year.Year=Count_by_year.Year.astype('int')
Crimetypes=Count_by_year.Crime_Type.unique()
Count_by_year
Crime_Type | Year | Count | |
---|---|---|---|
0 | THEFT | 2016 | 61037 |
1 | BATTERY | 2016 | 50246 |
2 | CRIMINAL DAMAGE | 2016 | 30931 |
3 | ASSAULT | 2016 | 18720 |
4 | DECEPTIVE PRACTICE | 2016 | 17374 |
... | ... | ... | ... |
176 | GAMBLING | 2021 | 13 |
177 | HUMAN TRAFFICKING | 2021 | 12 |
178 | NON-CRIMINAL | 2021 | 4 |
179 | PUBLIC INDECENCY | 2021 | 4 |
180 | OTHER NARCOTIC VIOLATION | 2021 | 2 |
181 rows × 3 columns
Count_by_year.sort_values(by = ["Year", "Count"], ascending=False, inplace=True)
output_list = []
for year in [2016,2017,2018,2019,2020,2021]:
temp = []
for i in Count_by_year[Count_by_year.Year==2020].values:
temp.append([i[0],i[2]])
output_list.append(temp)
output_list
[[['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]], [['BATTERY', 41413], ['THEFT', 40225], ['CRIMINAL DAMAGE', 24694], ['ASSAULT', 18207], ['DECEPTIVE PRACTICE', 15906], ['OTHER OFFENSE', 12396], ['MOTOR VEHICLE THEFT', 9896], ['BURGLARY', 8703], ['WEAPONS VIOLATION', 8416], ['ROBBERY', 7847], ['NARCOTICS', 7269], ['CRIMINAL TRESPASS', 4151], ['SEX OFFENSE', 2033], ['OFFENSE INVOLVING CHILDREN', 1842], ['PUBLIC PEACE VIOLATION', 1266], ['HOMICIDE', 791], ['INTERFERENCE WITH PUBLIC OFFICER', 654], ['ARSON', 587], ['PROSTITUTION', 275], ['STALKING', 194], ['INTIMIDATION', 161], ['CONCEALED CARRY LICENSE VIOLATION', 148], ['LIQUOR LAW VIOLATION', 140], ['KIDNAPPING', 118], ['OBSCENITY', 50], ['GAMBLING', 25], ['PUBLIC INDECENCY', 9], ['OTHER NARCOTIC VIOLATION', 6], ['HUMAN TRAFFICKING', 5], ['NON-CRIMINAL', 1], ['RITUALISM', 1]]]
Count_by_year.sort_values(by = ["Year","Count"], ascending=[True,False], inplace=True)
Count_by_year
Crime_Type | Year | Count | |
---|---|---|---|
0 | THEFT | 2016 | 61037 |
1 | BATTERY | 2016 | 50246 |
2 | CRIMINAL DAMAGE | 2016 | 30931 |
3 | ASSAULT | 2016 | 18720 |
4 | DECEPTIVE PRACTICE | 2016 | 17374 |
... | ... | ... | ... |
176 | GAMBLING | 2021 | 13 |
177 | HUMAN TRAFFICKING | 2021 | 12 |
178 | NON-CRIMINAL | 2021 | 4 |
179 | PUBLIC INDECENCY | 2021 | 4 |
180 | OTHER NARCOTIC VIOLATION | 2021 | 2 |
181 rows × 3 columns
fig=px.bar(
Count_by_year,x='Crime_Type',y='Count',animation_frame='Year',
category_orders={'Crime_Type':Count_by_year.Crime_Type.unique()},
color='Count',color_continuous_scale='Agsunset_r',log_y=True)
fig.update_layout(
font={"family":"sans-serif", "color":"black"},
title={
'text': "Crimes Cases of Different Crime Types: 2016-2021",
'font': dict(size=20),
'y':0.975,
'x':0,
'yanchor': 'top'},
template="plotly_white",
yaxis=dict(type="log",title='Crime Cases'),
xaxis = dict(title='Select Year with Slider', tickangle=-45)
)
fig["layout"].pop("updatemenus")
fig.update(layout_coloraxis_showscale=False)
fig['layout']['sliders'][0]['pad']=dict(r= 10, t= 200,l=-70)
fig.write_html('barplot_trend.html',config={'responsive':False})
fig.show()
#!pip install plotly_calplot
#!pip install calmap
from plotly_calplot import calplot
import calmap
data.date = pd.to_datetime(data.date, format="%Y-%m-%d %H:%M:%S.%f")
data["day"] = data["date"].dt.date
df_calendar = data[data.primary_type.isin(["THEFT","BATTERY","CRIMINAL DAMAGE","ASSAULT", "ROBBERY"])][["day","date"]]\
.groupby(["day"]).count().reset_index().rename(columns={"date":"count"})
df_calendar.day = pd.to_datetime(df_calendar.day, format="%Y-%m-%d")
df_calendar
day | count | |
---|---|---|
0 | 2016-01-01 | 533 |
1 | 2016-01-02 | 360 |
2 | 2016-01-03 | 435 |
3 | 2016-01-04 | 383 |
4 | 2016-01-05 | 405 |
... | ... | ... |
2187 | 2021-12-27 | 319 |
2188 | 2021-12-28 | 292 |
2189 | 2021-12-29 | 304 |
2190 | 2021-12-30 | 350 |
2191 | 2021-12-31 | 351 |
2192 rows × 2 columns
fig = go.Figure(data=calplot(
df_calendar,
x="day",
y="count",
years_title=True,
title = 'Crime Case Calendar of Chicago: 2016-2021 ',
dark_theme=False,
month_lines_width=3,
month_lines_color="#fff",
colorscale= "Agsunset_r"
))
fig.update_traces(selector=dict(type="heatmap"), zmax=df_calendar["count"].max(), zmin=df_calendar["count"].min())
fig.update_traces(
showscale=True,
selector=dict(type="heatmap"),
)
fig.update_layout(
title=dict(font=dict(size=20,
family="sans-serif",
color="black")),
title_x=0,
title_y=0.98,
margin = dict(t=70,b=50),
hoverlabel = dict(
bgcolor = "white",
font_size = 13,
font_family = "sans-serif"
)
)
fig.show()
fig.write_html("calendar_plot.html",config={'responsive':False})
df_radar = data[["hour","date","primary_type"]]\
.groupby(["hour","primary_type"]).count().reset_index().rename(columns={"date":"count"})
df_radar.hour = pd.DataFrame(["0"+str(i) if len(str(i))==1 else str(i) for i in df_radar.hour])
df_radar
hour | primary_type | count | |
---|---|---|---|
0 | 00 | ARSON | 205 |
1 | 00 | ASSAULT | 3859 |
2 | 00 | BATTERY | 14428 |
3 | 00 | BURGLARY | 3065 |
4 | 00 | CONCEALED CARRY LICENSE VIOLATION | 42 |
... | ... | ... | ... |
693 | 23 | ROBBERY | 3227 |
694 | 23 | SEX OFFENSE | 716 |
695 | 23 | STALKING | 48 |
696 | 23 | THEFT | 10420 |
697 | 23 | WEAPONS VIOLATION | 2941 |
698 rows × 3 columns
palette=["#9b5de5","#c65ccd","#f15bb5","#f8a07b","#fbc25e","#fee440","#7fd09d","#00bbf9","#00d8e7","#00f5d4"]
layout=go.Layout(polar=dict(angularaxis = dict(rotation = 90,direction = "clockwise",tickfont=dict(size=15))))
fig = go.Figure(data=[
go.Scatterpolar(
name = "THEFT",
r=df_radar[df_radar.primary_type=="THEFT"]["count"].append(df_radar[(df_radar.primary_type=="THEFT")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="THEFT"]["hour"].append(df_radar[(df_radar.primary_type=="THEFT")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[0], shape="spline", smoothing=1)),
go.Scatterpolar(
name = "BATTERY",
r=df_radar[df_radar.primary_type=="BATTERY"]["count"].append(df_radar[(df_radar.primary_type=="BATTERY")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="BATTERY"]["hour"].append(df_radar[(df_radar.primary_type=="BATTERY")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[1], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "CRIMINAL DAMAGE",
r=df_radar[df_radar.primary_type=="CRIMINAL DAMAGE"]["count"].append(df_radar[(df_radar.primary_type=="CRIMINAL DAMAGE")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="CRIMINAL DAMAGE"]["hour"].append(df_radar[(df_radar.primary_type=="CRIMINAL DAMAGE")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[2], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "ASSAULT",
r=df_radar[df_radar.primary_type=="ASSAULT"]["count"].append(df_radar[(df_radar.primary_type=="ASSAULT")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="ASSAULT"]["hour"].append(df_radar[(df_radar.primary_type=="ASSAULT")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[3], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "ROBBERY",
r=df_radar[df_radar.primary_type=="ROBBERY"]["count"].append(df_radar[(df_radar.primary_type=="ROBBERY")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="ROBBERY"]["hour"].append(df_radar[(df_radar.primary_type=="ROBBERY")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[4], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "DECEPTIVE PRACTICE",
r=df_radar[df_radar.primary_type=="DECEPTIVE PRACTICE"]["count"].append(df_radar[(df_radar.primary_type=="DECEPTIVE PRACTICE")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="DECEPTIVE PRACTICE"]["hour"].append(df_radar[(df_radar.primary_type=="DECEPTIVE PRACTICE")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[5], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "NARCOTICS",
r=df_radar[df_radar.primary_type=="NARCOTICS"]["count"].append(df_radar[(df_radar.primary_type=="NARCOTICS")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="NARCOTICS"]["hour"].append(df_radar[(df_radar.primary_type=="NARCOTICS")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[6], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "MOTOR VEHICLE THEFT",
r=df_radar[df_radar.primary_type=="MOTOR VEHICLE THEFT"]["count"].append(df_radar[(df_radar.primary_type=="MOTOR VEHICLE THEFT")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="MOTOR VEHICLE THEFT"]["hour"].append(df_radar[(df_radar.primary_type=="MOTOR VEHICLE THEFT")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[7], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "BURGLARY",
r=df_radar[df_radar.primary_type=="BURGLARY"]["count"].append(df_radar[(df_radar.primary_type=="BURGLARY")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="BURGLARY"]["hour"].append(df_radar[(df_radar.primary_type=="BURGLARY")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[8], shape="spline", smoothing=0.7)),
go.Scatterpolar(
name = "WEAPONS VIOLATION",
r=df_radar[df_radar.primary_type=="WEAPONS VIOLATION"]["count"].append(df_radar[(df_radar.primary_type=="WEAPONS VIOLATION")&(df_radar.hour=="00")]["count"]),
theta=df_radar[df_radar.primary_type=="WEAPONS VIOLATION"]["hour"].append(df_radar[(df_radar.primary_type=="WEAPONS VIOLATION")&(df_radar.hour=="00")]["hour"]),
line=dict(color=palette[9], shape="spline", smoothing=0.7))],
layout=layout)
fig.update_layout(
updatemenus=[
dict(font=dict(size=13),
active=0,
buttons=list([
dict(label="ALL",
method="update",
args=[dict(visible=[True, True, True, True, True, True, True, True, True, True]),
dict(title="Top 10 Crime Hourly Distribution: 2016-2021")]),
dict(label="THEFT",
method="update",
args=[dict(visible=[True, False, False, False, False, False, False, False, False, False]),
dict(title="THEFT Hourly Distribution: 2016-2021")]),
dict(label="BATTERY",
method="update",
args=[dict(visible=[False, True, False, False, False, False, False, False, False, False]),
dict(title="BATTERY Hourly Distribution: 2016-2021")]),
dict(label="CRIMINAL DAMAGE",
method="update",
args=[dict(visible=[False, False, True, False, False, False, False, False, False, False]),
dict(title="CRIMINAL DAMAGE Hourly Distribution: 2016-2021")]),
dict(label="ASSAULT",
method="update",
args=[dict(visible=[False, False, False, True, False, False, False, False, False, False]),
dict(title="ASSAULT Hourly Distribution: 2016-2021")]),
dict(label="ROBBERY",
method="update",
args=[dict(visible=[False, False, False, False, True, False, False, False, False, False]),
dict(title="ROBBERY Hourly Distribution: 2016-2021")]),
dict(label="DECEPTIVE PRACTICE",
method="update",
args=[dict(visible=[False, False, False, False, False, True, False, False, False, False]),
dict(title="DECEPTIVE PRACTICE Hourly Distribution: 2016-2021")]),
dict(label="NARCOTICS",
method="update",
args=[dict(visible=[False, False, False, False, False, False, True, False, False, False]),
dict(title="NARCOTICS Hourly Distribution: 2016-2021")]),
dict(label="MOTOR VEHICLE THEFT",
method="update",
args=[dict(visible=[False, False, False, False, False, False, False, True, False, False]),
dict(title="MOTOR VEHICLE THEFT Hourly Distribution: 2016-2021")]),
dict(label="BURGLARY",
method="update",
args=[dict(visible=[False, False, False, False, False, False, False, False, True, False]),
dict(title="BURGLARY Hourly Distribution: 2016-2021")]),
dict(label="WEAPONS VIOLATION",
method="update",
args=[dict(visible=[False, False, False, False, False, False, False, False, False, True]),
dict(title="WEAPONS VIOLATION Hourly Distribution: 2016-2021")]),
]),
)
])
fig.update_layout(
font={'family':'sans-serif','color':'black'},
polar=dict(radialaxis=dict(visible=True)),
showlegend=True,
template="plotly_white",
title_text = "Top 10 Crime Hourly Distribution: 2016-2021",
title_x=0,
legend=dict(font=dict(size=13), yanchor="top",y=0.9,xanchor="left", x=-0.1),
title=dict(font=dict(size=20)),
updatemenus=[dict(x=-0.1,xanchor="left")],
hoverlabel = dict(
bgcolor = "white",
font_size = 13,
font_family = "sans-serif"
)
)
fig.update_traces(
mode="lines",
hovertemplate='Hour: %{theta}<br>Count: %{r}'
)
fig.show()
fig.write_html("radar_plot.html",config={'responsive':False})
import altair as alt
data_sum = data[["day", "primary_type", "year"]]\
.groupby(["day", "primary_type"]).count()\
.reset_index().rename(columns={"year":"num", "day":"date"})
data_sum["date"] = pd.to_datetime(data_sum.date)
data_sum = data_sum[data_sum.primary_type.isin(["THEFT","BATTERY","CRIMINAL DAMAGE","ASSAULT","ROBBERY"])]
data_sum["day"] = data_sum.date.dt.day
data_sum["month"] = data_sum.date.dt.month
df = data_sum.groupby(["primary_type","month","day"]).mean().reset_index()
df["date"] = "2021-" + df["month"].astype("str") + "-" + df["day"].astype("str")
df = df[df.date!="2021-2-29"]
df.date = pd.to_datetime(df.date)
df
primary_type | month | day | num | date | |
---|---|---|---|---|---|
0 | ASSAULT | 1 | 1 | 47.333333 | 2021-01-01 |
1 | ASSAULT | 1 | 2 | 35.333333 | 2021-01-02 |
2 | ASSAULT | 1 | 3 | 45.666667 | 2021-01-03 |
3 | ASSAULT | 1 | 4 | 40.166667 | 2021-01-04 |
4 | ASSAULT | 1 | 5 | 43.000000 | 2021-01-05 |
... | ... | ... | ... | ... | ... |
1825 | THEFT | 12 | 27 | 128.166667 | 2021-12-27 |
1826 | THEFT | 12 | 28 | 135.666667 | 2021-12-28 |
1827 | THEFT | 12 | 29 | 121.833333 | 2021-12-29 |
1828 | THEFT | 12 | 30 | 130.333333 | 2021-12-30 |
1829 | THEFT | 12 | 31 | 109.833333 | 2021-12-31 |
1825 rows × 5 columns
scale = alt.Scale(domain=["THEFT","BATTERY","CRIMINAL DAMAGE","ASSAULT","ROBBERY"],
range=['#9467bd', '#1f77b4', '#aec7e8', '#95d5b2', '#e7ba52'])
color = alt.Color('primary_type:N', scale=scale,legend=alt.Legend(title="Crime Primary Type"))
# We create two selections:
# - a brush that is active on the top panel
# - a multi-click that is active on the bottom panel
brush = alt.selection_interval(encodings=['x'])
#click = alt.selection_multi(encodings=['color'])
click = alt.selection_single(empty='all', fields=['primary_type'])
#click1 = alt.selection_single(fields=['primary_type'], bind='legend')
click2 = alt.selection_multi(encodings=['color'])
base = alt.Chart(df).properties(
width=550
).add_selection(click)
points = base.mark_line().encode(
alt.X('monthdate(date):T', title=''),
alt.Y('num:Q',
title='Number of Crimes',
scale=alt.Scale(domain=[0, 250])
),
color=alt.condition(click, color, alt.value('lightgray'))
).properties(
height=300
)
bars = base.mark_bar().encode(
x=alt.X('sum(num):Q',
title='Yearly Total Crimes'),
y=alt.Y('primary_type:N',
title="",
sort=["THEFT","BATTERY","CRIMINAL DAMAGE","ASSAULT","ROBBERY"]),
color=alt.condition(click, color, alt.value('lightgray'))
).transform_filter(
click
).properties(
width=260
)
boxs = base.mark_boxplot().encode(
x=alt.X('num:Q',
title='Daily Crimes'),
y=alt.Y('primary_type:N',
#title="Crime Primary Type",
sort=["THEFT","BATTERY","CRIMINAL DAMAGE","ASSAULT","ROBBERY"],axis=None),
color=alt.condition(click, color, alt.value('lightgray'))
).transform_filter(
click
).properties(
width=260
)
fig = points & (bars | boxs)
fig = fig.properties(title = "Daily Crime Trend - Average of 6 Years: 2016-2021"
).configure_axis(labelColor="black", labelFontSize=13, labelFont="sans-serif", labelFontWeight="normal",
titleColor="black", titleFontSize=15, titleFont="sans-serif", titleFontWeight="normal", grid=False
).configure_legend(labelColor="black", labelFontSize=13, labelFont="sans-serif", labelFontWeight="normal",
titleColor="black", titleFontSize=15, titleFont="sans-serif", titleFontWeight="normal"
).configure_title(color="black", fontSize=20, font="sans-serif", fontWeight="normal"
).configure_view(strokeWidth=0
)
fig.save('linked_plot.html')
fig
#!pip install folium
#! pip install geopandas
import folium
import geopandas as gpd
import json
df_agg = data.copy()
community_crime = df_agg.groupby(["year","community_area"]).count().reset_index()[["year","community_area","date"]].\
rename(columns={'community_area':'Community Area', 'date':'Crime Count'})
community_crime['Community Area'] = community_crime['Community Area'].astype('str')
community_crime
year | Community Area | Crime Count | |
---|---|---|---|
0 | 2016 | 1 | 3637 |
1 | 2016 | 2 | 3256 |
2 | 2016 | 3 | 3515 |
3 | 2016 | 4 | 1957 |
4 | 2016 | 5 | 1369 |
... | ... | ... | ... |
457 | 2021 | 73 | 2498 |
458 | 2021 | 74 | 490 |
459 | 2021 | 75 | 1713 |
460 | 2021 | 76 | 1394 |
461 | 2021 | 77 | 2365 |
462 rows × 3 columns
com_name = pd.read_csv('CommAreas.csv')
com_name = com_name[['AREA_NUMBE', 'COMMUNITY']]
com_name.columns = ['Community Area', 'Community']
com_name['Community Area'] = com_name['Community Area'].astype('str')
community_crime = pd.merge(community_crime, com_name, on = ['Community Area'], how = 'left')
fh = open('chicago_geo.geojson')
commu_geo_json = json.load(fh)
fh.close
<function TextIOWrapper.close()>
for feature in commu_geo_json['features']:
feature.update({'id': feature['properties']['area_num_1']})
fig = px.choropleth_mapbox(community_crime, geojson = commu_geo_json, locations = 'Community Area',
color = 'Crime Count', animation_frame = 'year',
color_continuous_scale = "Agsunset_r",
mapbox_style = "carto-positron", range_color = [1000, 8000],
zoom = 9, center = {"lat": 41.85, "lon": -87.7},
opacity = 0.6,
hover_data = {'Community Area': False,
'Community': True,
'Crime Count': True},
title = 'Yearly Crime Cases Map - Communities in Chicago: 2016-2021'
)
fig.update_layout(margin={"r":0,"t":80,"l":10,"b":10})
fig.update_layout(
font_family="sans-serif",
title = dict(font=dict(size=20, color="black"), x=0.01)
)
fig.update_layout(
hoverlabel = dict(
bgcolor = "white",
font_size = 13,
font_family = "sans-serif"
)
)
fig.show()
fig.write_html("geomap_community.html",config={'responsive':False})