Data Science (CSCI 39542) Fall 2021 Project
import pandas as pd
import pandasql as psql
df = pd.read_csv('NYPD_Complaint_Data_Current_Year_To_Date_.csv')
# Using pandasql to pick out the desire columns
newDF = psql.sqldf("""
SELECT BORO_NM, CMPLNT_FR_DT, LAW_CAT_CD, OFNS_DESC, X_COORD_CD, Y_COORD_CD, Lat_Lon
FROM df
ORDER BY CMPLNT_FR_DT ASC
""")
newDF.to_csv("Specific_Cols.csv", index = False)
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('Specific_Cols.csv')
print(df['BORO_NM'].value_counts())
# Making Bar Graph of the Total # of Crimes from each Borough
print(df['BORO_NM'].value_counts().plot(kind = "bar"))
plt.xticks(rotation=0, horizontalalignment="center")
plt.title("Total Number of Crimes in each Borough from 2019 to 2021")
plt.xlabel("Borough", fontsize=12, labelpad=10)
plt.ylabel('Number of Crimes',fontsize=12, labelpad=10)
# Putting values above each bar of bar graph
boroNumsY = [91122, 79229, 69912, 67724, 13801]
for i in range(5):
plt.text(i, boroNumsY[i], boroNumsY[i], ha='center', va='bottom')
plt.show()
#GeoJSON: https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
import pandas as pd
import json
import plotly.express as px
df = pd.read_csv('Specific_Cols.csv')
#Making two columns, one is the borough name and the second is the count of crimes in each borough
newDF = pd.DataFrame()
newDF = df
newDF['CMPLNT_FR_DT'] = pd.to_datetime(newDF['CMPLNT_FR_DT'])
#Filtering only 2019
mask = (newDF['CMPLNT_FR_DT'] < '2020-1-1')
newDF = newDF.loc[mask]
newDF = newDF['BORO_NM'].value_counts().reset_index()
newDF = newDF.rename({'index':'BORO_NM','BORO_NM':'Count'}, axis=1)
newDF['BORO_NM'] = newDF['BORO_NM'].str.title()
#Load geojson to get the geographical features of the boroughs
nyBoro = json.load(open('Borough Boundaries.geojson', 'r'))
#See the id of each borough in geojson
boro_id_map = {}
for feature in nyBoro['features']:
feature['id'] = feature['properties']['boro_code']
boro_id_map[feature['properties']['boro_name']] = feature['id']
#Getting the id number of each borough
newDF['id'] = newDF['BORO_NM'].apply(lambda x: boro_id_map[x])
#Making the choropleth map
fig = px.choropleth_mapbox(newDF, locations='id', geojson=nyBoro, color='Count', hover_name='BORO_NM',
hover_data=['Count'], mapbox_style='carto-positron', center={'lat': 40.7, 'lon': -74}, opacity=0.5)
fig.show()
fig.write_html('choropleth2019.html')
#GeoJSON: https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
import pandas as pd
import json
import plotly.express as px
df = pd.read_csv('Specific_Cols.csv')
#Making two columns, one is the borough name and the second is the count of crimes in each borough
newDF = pd.DataFrame()
newDF = df
newDF['CMPLNT_FR_DT'] = pd.to_datetime(newDF['CMPLNT_FR_DT'])
#Filtering only 2020
mask = (newDF['CMPLNT_FR_DT'] >= '2020-1-1') & (newDF['CMPLNT_FR_DT'] < '2021-1-1')
newDF = newDF.loc[mask]
newDF = newDF['BORO_NM'].value_counts().reset_index()
newDF = newDF.rename({'index':'BORO_NM','BORO_NM':'Count'}, axis=1)
newDF['BORO_NM'] = newDF['BORO_NM'].str.title()
#Load geojson to get the geographical features of the boroughs
nyBoro = json.load(open('Borough Boundaries.geojson', 'r'))
#See the id of each borough in geojson
boro_id_map = {}
for feature in nyBoro['features']:
feature['id'] = feature['properties']['boro_code']
boro_id_map[feature['properties']['boro_name']] = feature['id']
#Getting the id number of each borough
newDF['id'] = newDF['BORO_NM'].apply(lambda x: boro_id_map[x])
#Making the choropleth map
fig = px.choropleth_mapbox(newDF, locations='id', geojson=nyBoro, color='Count', hover_name='BORO_NM',
hover_data=['Count'], mapbox_style='carto-positron', center={'lat': 40.7, 'lon': -74}, opacity=0.5)
fig.show()
fig.write_html('choropleth2020.html')
#GeoJSON: https://data.cityofnewyork.us/City-Government/Borough-Boundaries/tqmj-j8zm
import pandas as pd
import json
import plotly.express as px
df = pd.read_csv('Specific_Cols.csv')
#Making two columns, one is the borough name and the second is the count of crimes in each borough
newDF = pd.DataFrame()
newDF = df
newDF['CMPLNT_FR_DT'] = pd.to_datetime(newDF['CMPLNT_FR_DT'])
#Filtering only 2021
mask = (newDF['CMPLNT_FR_DT'] >= '2021-1-1')
newDF = newDF.loc[mask]
newDF = newDF['BORO_NM'].value_counts().reset_index()
newDF = newDF.rename({'index':'BORO_NM','BORO_NM':'Count'}, axis=1)
newDF['BORO_NM'] = newDF['BORO_NM'].str.title()
#Load geojson to get the geographical features of the boroughs
nyBoro = json.load(open('Borough Boundaries.geojson', 'r'))
#See the id of each borough in geojson
boro_id_map = {}
for feature in nyBoro['features']:
feature['id'] = feature['properties']['boro_code']
boro_id_map[feature['properties']['boro_name']] = feature['id']
#Getting the id number of each borough
newDF['id'] = newDF['BORO_NM'].apply(lambda x: boro_id_map[x])
#Making the choropleth map
fig = px.choropleth_mapbox(newDF, locations='id', geojson=nyBoro, color='Count', hover_name='BORO_NM',
hover_data=['Count'], mapbox_style='carto-positron', center={'lat': 40.7, 'lon': -74}, opacity=0.5)
fig.show()
fig.write_html('choropleth2021.html')
#Reference: https://medium.com/analytics-vidhya/generating-maps-with-python-maps-with-markers-part-2-2e291d987821
import pandas as pd
import folium
from folium import plugins
df = pd.read_csv('Specific_Cols.csv')
df['CMPLNT_FR_DT'] = pd.to_datetime(df['CMPLNT_FR_DT'])
#Filtering only 2019
mask = (df['CMPLNT_FR_DT'] < '2020-1-1')
df = df.loc[mask]
nyc_map = folium.Map(location=[40.7128, -74.006], zoom_start=11)
incidents = plugins.MarkerCluster().add_to(nyc_map)
for lat, lng, label, in zip(df.Latitude, df.Longitude, df.OFNS_DESC):
folium.Marker(
location=[lat, lng],
icon=None,
popup=label,
).add_to(incidents)
nyc_map.save('Folium2021MarkerCluster.html')
#Reference: https://medium.com/analytics-vidhya/generating-maps-with-python-maps-with-markers-part-2-2e291d987821
import pandas as pd
import folium
from folium import plugins
df = pd.read_csv('Specific_Cols.csv')
df['CMPLNT_FR_DT'] = pd.to_datetime(df['CMPLNT_FR_DT'])
#Filtering only 2020
mask = (df['CMPLNT_FR_DT'] >= '2020-1-1') & (df['CMPLNT_FR_DT'] < '2021-1-1')
nyc_map = folium.Map(location=[40.7128, -74.006], zoom_start=11)
incidents = plugins.MarkerCluster().add_to(nyc_map)
for lat, lng, label, in zip(df.Latitude, df.Longitude, df.OFNS_DESC):
folium.Marker(
location=[lat, lng],
icon=None,
popup=label,
).add_to(incidents)
nyc_map.save('Folium2021MarkerCluster.html')