Restaurant Inspections (Delaware)¶
Mitch Fawcett November 9, 2023
This is an interactive map showing the results of health inspections of food prep and serving establishments in Delaware
In [38]:
%%capture --no-display
# Previous line supresses a warning about package versioning
# Load libraries
import pandas as pd # for dataframe manipulation
import numpy as np # for numerical analysis
import matplotlib.pyplot as plt # for generating plots and graphs
from matplotlib.pyplot import figure # for modifying appearance of plots & graphs
import requests # to make http post requests to the US Census geocoder
import io # for working with I/O streams and allow conversion of geocode response to dataframe
import csv # reading/writing csv files
import pickle as pk # to store and retrieve dataframes on disk
import csv # to read text files
import requests # to make http requests for data using census web API
import os # to list contents of disk drive folders
import sys # for managing system options
import folium # the mapping package
from folium import plugins # to allow cluster markers on maps
import seaborn as sns # for fancy plotting
from IPython.display import Markdown as md # for embedding variables in markdown cells
from IPython.display import Image # to embeg jpg images in notebook
from datetime import datetime
In [39]:
# Settings to improve the display of tabular results
pd.set_option('max_colwidth', 4000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
np.set_printoptions(threshold = sys.maxsize)
IMPORT THE RAW DATASET FROM DELAWARE.GOV¶
In [40]:
# Import inspection data into a dataframe. Data provided by Data.Delaware.Gov Website
# https://data.delaware.gov/Health/Restaurant-Inspection-Violations/384s-wygj
inspections_df = pd.read_table('Restaurant_Inspection_Violations_20231105.tsv', delimiter = '\t')
# Replaces spaces with '_' in column names
inspections_df.columns = [c.replace(' ', '_') for c in inspections_df.columns]
# Convert the inspection date to a real date value so they can be sorted later
inspections_df['Inspection_Date']= pd.to_datetime(inspections_df['Inspection_Date'])
In [41]:
# Inspect the raw data
inspections_df.head()
Out[41]:
Food_Establishment_Name | Food_Establishment_Street_Address | Food_Establishment_City | Food_Establishment_Zip_Code | Inspection_Date | Inspection_Type | Violation_Code | Violation_Description | Geocoded_Location | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 In Town African Restaurant | 240 S Dupont Hwy Suite # 300 | New Castle | 19720.0 | 2022-03-04 | Routine | NaN | NaN | 240 S Dupont Hwy Suite # 300\nNew Castle, DE 19720\n(39.653333979, -75.616564004) |
1 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 2-102.12 | Certified Food Protection Manager | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) |
2 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 5-202.12 | Handwashing Sinks, Installation | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) |
3 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 6-301.12 | Hand Drying Provision | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) |
4 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 3-501.16 | Time/Temperature Control for Safety Food, Hot and Cold Holding | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) |
CREATE COLUMNS FOR LONGITUDE & LATITUDE OF ESTABLISHMENTS¶
In [42]:
# Locations will be mapped using the longitude/latitude values in the 'Geocoded_Location column'.
# These numbers need to be extracted out of Geocoded_Location and added as a new column.
# Split the Geocoded_Location column on newline character \n
coordinates_df = inspections_df.Geocoded_Location.str.split(pat="\n", expand=True)
# Give the cooridnates column a better name than '2'
coordinates_df.rename(columns={0:'col_1', 1:'col_2', 2:'Lat_Long'},inplace=True)
# Inspect the result
coordinates_df["Lat_Long"].head()
Out[42]:
0 (39.653333979, -75.616564004) 1 (39.737731167, -75.690144456) 2 (39.737731167, -75.690144456) 3 (39.737731167, -75.690144456) 4 (39.737731167, -75.690144456) Name: Lat_Long, dtype: object
In [43]:
# Add it as another column to the dataset
inspections_df['Lat_Long'] = coordinates_df["Lat_Long"]
In [44]:
# Separate the cooordinate pairs into two columns for Longitude and Latitude
inspections_df[['Latitude', 'Longitude']] = inspections_df['Lat_Long'].str.rsplit(', ', expand=True)
# Get rid of '(' and ')' characters in the new columns
inspections_df['Latitude'] = inspections_df['Latitude'].str.replace('(','')
inspections_df['Longitude'] = inspections_df['Longitude'].str.replace(')','')
In [45]:
# Inspect the data now has a dedicated column for coordinates
inspections_df.head()
Out[45]:
Food_Establishment_Name | Food_Establishment_Street_Address | Food_Establishment_City | Food_Establishment_Zip_Code | Inspection_Date | Inspection_Type | Violation_Code | Violation_Description | Geocoded_Location | Lat_Long | Latitude | Longitude | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 In Town African Restaurant | 240 S Dupont Hwy Suite # 300 | New Castle | 19720.0 | 2022-03-04 | Routine | NaN | NaN | 240 S Dupont Hwy Suite # 300\nNew Castle, DE 19720\n(39.653333979, -75.616564004) | (39.653333979, -75.616564004) | 39.653333979 | -75.616564004 |
1 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 2-102.12 | Certified Food Protection Manager | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) | (39.737731167, -75.690144456) | 39.737731167 | -75.690144456 |
2 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 5-202.12 | Handwashing Sinks, Installation | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) | (39.737731167, -75.690144456) | 39.737731167 | -75.690144456 |
3 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 6-301.12 | Hand Drying Provision | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) | (39.737731167, -75.690144456) | 39.737731167 | -75.690144456 |
4 | 1000 Degree Pizza | 4526 New Linden Hill Road | Wilmington | 19808.0 | 2022-07-08 | Routine | 3-501.16 | Time/Temperature Control for Safety Food, Hot and Cold Holding | 4526 New Linden Hill Road\nWilmington, DE 19808\n(39.737731167, -75.690144456) | (39.737731167, -75.690144456) | 39.737731167 | -75.690144456 |
PREPARE A LIST OF ESTABLISHMENT NAMES AND LOCATIONS¶
In [46]:
# Create a dataframe that contains a unique list of food establishment names and coordinates.
# Location values repeat in the dataset for each inspection & violation. Need to get the
# unique values so only one marker is added to the map for each food establishment.
unique_locations_df = inspections_df[['Food_Establishment_Name', 'Latitude', 'Longitude']].drop_duplicates()
In [47]:
# Review some results
unique_locations_df.head()
Out[47]:
Food_Establishment_Name | Latitude | Longitude | |
---|---|---|---|
0 | 1 In Town African Restaurant | 39.653333979 | -75.616564004 |
1 | 1000 Degree Pizza | 39.737731167 | -75.690144456 |
8 | 11-Jul | 39.178200014 | -75.527233032 |
11 | 13 Wingz | 38.645889987 | -75.625000976 |
15 | 1300 Bar & Restaurant | 39.748308989 | -75.545611026 |
COUNT NUMBER OF VIOLATIONS FOR EACH INSPECTION¶
In [48]:
# Create a dataset that has the count of violations for each inspection
location_inspection_df = inspections_df[['Food_Establishment_Name', 'Inspection_Date', 'Violation_Code']].drop_duplicates()
grouped_df = location_inspection_df.groupby(['Food_Establishment_Name', 'Inspection_Date'])
inspection_violation_counts_df = grouped_df.count()
# Make the Food_Establishment_Name and Inspection_Date columns regular columns, not indexes
inspection_violation_counts_df = inspection_violation_counts_df.reset_index()
# Change a column name
inspection_violation_counts_df.rename(columns = {'Violation_Code': 'Violation_Count'}, inplace = True)
In [49]:
inspection_violation_counts_df.head()
Out[49]:
Food_Establishment_Name | Inspection_Date | Violation_Count | |
---|---|---|---|
0 | 1 In Town African Restaurant | 2022-03-04 | 0 |
1 | 1000 Degree Pizza | 2022-07-08 | 5 |
2 | 1000 Degree Pizza | 2022-08-01 | 1 |
3 | 1000 Degree Pizza | 2022-08-11 | 0 |
4 | 11-Jul | 2021-11-18 | 1 |
COUNT NUMBER OF INSPECTIONS FOR EACH ESTABLISHMENT¶
In [50]:
# Count rows in prior result grouping by establishment name to get the count of inspection dates
establishment_inspection_counts_df = inspection_violation_counts_df.groupby(['Food_Establishment_Name']).count().reset_index()
establishment_inspection_counts_df.rename(columns = {'Inspection_Date': 'Inspection_Count'}, inplace = True)
establishment_inspection_counts_df = establishment_inspection_counts_df.drop(columns=['Violation_Count'])
In [51]:
establishment_inspection_counts_df.head(10)
Out[51]:
Food_Establishment_Name | Inspection_Count | |
---|---|---|
0 | 1 In Town African Restaurant | 1 |
1 | 1000 Degree Pizza | 3 |
2 | 11-Jul | 2 |
3 | 13 Wingz | 2 |
4 | 1300 Bar & Restaurant | 7 |
5 | 14 Global LLC | 2 |
6 | 1776 Restaurant | 2 |
7 | 1857 Jackson House | 3 |
8 | 1861 | 2 |
9 | 2 Angel's Ice Cream | 3 |
TOTAL NUMBER OF VIOLATIONS FOR EACH ESTABLISHMENT¶
In [52]:
# Use previous result to calculate the total number of violations for each establishment
establishment_violation_counts_df = inspection_violation_counts_df.groupby(['Food_Establishment_Name'])['Violation_Count'].sum().reset_index()
establishment_violation_counts_df.rename(columns = {'Violation_Count': 'Total_Violation_Count'}, inplace = True)
In [53]:
establishment_violation_counts_df.head()
Out[53]:
Food_Establishment_Name | Total_Violation_Count | |
---|---|---|
0 | 1 In Town African Restaurant | 0 |
1 | 1000 Degree Pizza | 6 |
2 | 11-Jul | 3 |
3 | 13 Wingz | 3 |
4 | 1300 Bar & Restaurant | 25 |
GET THE DATE OF THE MOST RECENT INSPECTION FOR EACH ESTABLISHMENT¶
In [54]:
# Find the most recent inspection date for each establishment
# Get a list of all inspection dates for each estalishment
inspection_dates_df = inspections_df[['Food_Establishment_Name', 'Inspection_Date']].drop_duplicates()
# Get the most recent inspection date for each place.
latest_inspection_date_df = inspection_dates_df.loc[inspection_dates_df
.groupby('Food_Establishment_Name').Inspection_Date.idxmax()]
# Change the date column name
latest_inspection_date_df.rename(columns = {'Inspection_Date': 'Latest_Inspection_Date'}, inplace = True)
In [55]:
latest_inspection_date_df.head()
Out[55]:
Food_Establishment_Name | Latest_Inspection_Date | |
---|---|---|
0 | 1 In Town African Restaurant | 2022-03-04 |
7 | 1000 Degree Pizza | 2022-08-11 |
9 | 11-Jul | 2023-05-26 |
12 | 13 Wingz | 2023-02-08 |
36 | 1300 Bar & Restaurant | 2023-08-10 |
NUMBER OF VIOLATIONS IN THE MOST RECENT INSPECTION FOR EACH ESTABLISHMENT¶
In [56]:
latest_inspection_violations_df = latest_inspection_date_df.merge(
inspection_violation_counts_df,
left_on=['Latest_Inspection_Date', 'Food_Establishment_Name'],
right_on=['Inspection_Date', 'Food_Establishment_Name'],
how='inner')
In [57]:
latest_inspection_violations_df.head(10)
Out[57]:
Food_Establishment_Name | Latest_Inspection_Date | Inspection_Date | Violation_Count | |
---|---|---|---|---|
0 | 1 In Town African Restaurant | 2022-03-04 | 2022-03-04 | 0 |
1 | 1000 Degree Pizza | 2022-08-11 | 2022-08-11 | 0 |
2 | 11-Jul | 2023-05-26 | 2023-05-26 | 2 |
3 | 13 Wingz | 2023-02-08 | 2023-02-08 | 3 |
4 | 1300 Bar & Restaurant | 2023-08-10 | 2023-08-10 | 4 |
5 | 14 Global LLC | 2023-09-01 | 2023-09-01 | 2 |
6 | 1776 Restaurant | 2022-08-31 | 2022-08-31 | 5 |
7 | 1857 Jackson House | 2023-08-10 | 2023-08-10 | 2 |
8 | 1861 | 2023-02-10 | 2023-02-10 | 1 |
9 | 2 Angel's Ice Cream | 2022-07-20 | 2022-07-20 | 2 |
IDENTIFY ESTABLISHMENTS THAT HAD NO VIOLATIONS IN THEIR LATEST INSPECTION¶
In [58]:
# Find places that had no violations on most recent inspection.
latest_inspection_no_violations_df = latest_inspection_violations_df.loc[latest_inspection_violations_df['Violation_Count'] == 0]
In [59]:
latest_inspection_no_violations_df.head()
Out[59]:
Food_Establishment_Name | Latest_Inspection_Date | Inspection_Date | Violation_Count | |
---|---|---|---|---|
0 | 1 In Town African Restaurant | 2022-03-04 | 2022-03-04 | 0 |
1 | 1000 Degree Pizza | 2022-08-11 | 2022-08-11 | 0 |
11 | 2S Pizza Company | 2023-02-07 | 2023-02-07 | 0 |
12 | 2nd To None | 2023-09-13 | 2023-09-13 | 0 |
13 | 3 Countries Pizzeria | 2023-05-18 | 2023-05-18 | 0 |
CREATE SOME FUNCTIONS TO SET MAP MARKER APPEARANCE¶
In [60]:
# Create a function that sets a marker icon based on inspection results
# If the establishment had no violations in its most recent inspection, use "smile" icon,
# Otherwise use 'frown'
def get_status_icon(location_name):
if location_name in latest_inspection_no_violations_df['Food_Establishment_Name'].values:
return 'smile'
else:
return 'frown'
In [61]:
# Create a function that retuns the most recent inspection date
def get_latest_inspection_date(location_name):
latest_date_df = latest_inspection_date_df.loc[latest_inspection_date_df['Food_Establishment_Name'] == location_name]
return str(latest_date_df['Latest_Inspection_Date'].item())[:10]
In [62]:
# Create a function that returns the total number of inspections for the establishment
def inspection_count(location_name):
try:
inspect_count_df = establishment_inspection_counts_df.loc[establishment_inspection_counts_df['Food_Establishment_Name'] == location_name]
return str(inspect_count_df['Inspection_Count'].item())
except:
return 'unknown'
In [63]:
# Create a function that returns the number of violations to-date for the establishment
def violations_total(location_name):
try:
violation_count_df = establishment_violation_counts_df.loc[establishment_violation_counts_df['Food_Establishment_Name'] == location_name]
return str(violation_count_df['Total_Violation_Count'].item())
except:
return 'unknown'
In [64]:
# Create a function that returns the number of violations from the most recent inspection
# for the establishment
def violations_recent(location_name):
try:
violation_count_df = latest_inspection_violations_df.loc[latest_inspection_violations_df['Food_Establishment_Name'] == location_name]
return str(violation_count_df['Violation_Count'].item())
except:
return 'unknown'
In [99]:
# Create a function that sets a color indicating the average number of violations per inspection
# Colors: BLACK = more than 10 violations, DARK BLUE = 6 to 10 violations, BLUE = 3 to 5 violations,
# LIGHT BLUE = 1 to 2 violations, WHITE = No violations
def get_icon_color(inspection_count, violation_count):
# print(float(violation_count) / float(inspection_count))
try:
average_violation_count = (float(violation_count) / float(inspection_count))
if average_violation_count == 0:
return 'white'
elif average_violation_count >0 and average_violation_count <=2:
return 'lightblue'
elif average_violation_count >2 and average_violation_count <=5:
return 'blue'
elif average_violation_count >5 and average_violation_count <=10:
return 'darkblue'
elif average_violation_count >10:
return 'black'
except:
return 'red'
In [100]:
# Start a map of Delaware centered on Bear, DE
# 39.1582°, -75.5244° # coordinates of Dover, DE
# 39.620198 -75.670477 # coordinates of Bear, DE
mp = folium.Map(location=[39.620198, -75.670477], zoom_start=14, width=900, height=550, control_scale=True)
# mp is the map object. I will add a marker to it for each food location.
In [103]:
# Add the markers to the map
for index, row in unique_locations_df.iterrows():
# print(row['Food_Establishment_Name'],row['Latitude'], row['Longitude'])
# Get status icon
icon_name = get_status_icon(row['Food_Establishment_Name'])
total_inspections = inspection_count(row['Food_Establishment_Name'])
total_violations = violations_total(row['Food_Establishment_Name'])
recent_violations = violations_recent(row['Food_Establishment_Name'])
latest_inspection_date = get_latest_inspection_date(row['Food_Establishment_Name'])
location_name = row['Food_Establishment_Name']
icon_color = get_icon_color(total_inspections, total_violations)
try:
folium.Marker(
location = [row['Latitude'], row['Longitude']],
popup = row['Food_Establishment_Name'],
icon = folium.Icon(color = icon_color, #'blue',
icon_color = 'grey',
icon = icon_name,
prefix = 'fa'),
tooltip = location_name[0:20]
+ '<br>' + 'Number of inspections: ' + str(total_inspections)
+ '<br>' + 'Latest inspection date: ' + str(latest_inspection_date)
+ '<br>' + 'Total violations: ' + str(total_violations)
+ '<br>' + 'Recent violations; ' + str(recent_violations)
).add_to(mp)
except: # not all locations are geocoded in the dataset, skip them
print(row['Food_Establishment_Name'], ' No coordinates available')
pass
Aspira Academy High School No coordinates available Eat BAP Mobile Unit No coordinates available Newark Charter School No coordinates available
In [104]:
# Display the map
mp
Out[104]:
Make this Notebook Trusted to load map: File -> Trust Notebook