# Data
import numpy as np
import pandas as pd
# Show all columns in pandas DataFrames
pd.set_option('display.max_columns', 100)

# Maps and coordinates
import geopandas as gpd
from OSGridConverter import latlong2grid

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.ticker as ticker

# Date and time handling
from datetime import datetime
import calendar

# Linear regression
from sklearn.linear_model import LinearRegression

# Clustering
from sklearn.cluster import KMeans

# Scraping
from bs4 import BeautifulSoup

# Usefull functions
import utils

# Progress bar
from tqdm import tqdm

# Dealing with unnecessary warnings
import warnings
warnings.filterwarnings("ignore", category=FutureWarning) 

# For reproductibility
RANDOM_STATE=42

#%matplotlib widget

data_path = "data/lfb_incident.csv"
london_map_path = ("data/statistical-gis-boundaries-london/ESRI/"
                   "London_Borough_Excluding_MHW.dbf")

# Specifying dtype to avoid warning due to mixed numbers and strings (XXXX-XXXX)
incidents = pd.read_csv(data_path, dtype={0:'str'})
london_map = gpd.read_file(london_map_path)

incidents.head(2)

incidents['datetime'] = [datetime.strptime(date + time, '%d %b %Y%H:%M:%S')
                         for (date, time) in
                         incidents.loc[:,['DateOfCall','TimeOfCall']].to_numpy()
                        ]
incidents['time'] = incidents['datetime'].map(lambda x: x.hour + x.minute / 60)

# Color palette for maps
color_background = "aliceblue"
color_city = "#e9dfd8"
color_incidents = "#a64253"

plt.close()
fig, ax = plt.subplots(facecolor=color_background, figsize=(10,8))
london_map.plot(color=color_city, edgecolor='black', ax=ax)
ax.scatter(incidents.loc[:, 'Easting_m'], incidents.loc[:, 'Northing_m'],
           alpha=0.01, color=color_incidents, s=0.4)
ax.set_axis_off()
ax.set_title("Map of incident density between 2009 and 2022")
plt.show()

plt.close()
fig, ax = plt.subplots(facecolor=color_background, figsize=(10,8))
london_map.plot(color=color_city, ax=ax)
ax.scatter(incidents.loc[incidents['SpecialServiceType']=='RTC', 'Easting_m'],
           incidents.loc[incidents['SpecialServiceType']=='RTC', 'Northing_m'],
           alpha=0.1,
           color=color_incidents,
           s=0.4)
ax.set_axis_off()
ax.set_title("Map of RTCs between 2009 and 2022")
plt.show()

fig, ax = plt.subplots(2, 2, figsize=(14,12))

# Main pie chart: 3 categories of calls
fig.suptitle("Reasons for calls detailed", fontsize=18)
s = incidents.loc[:, 'IncidentGroup'].value_counts()
colors = ["#8bcd50","#0e86d4","#ff9636"]
ax[0,0].pie(s, labels=s.index, autopct='%1.0f%%',
            startangle=-20, colors=colors)
ax[0,0].set_title('Main reason for call')

# Subdivision of the 1st category: false alarms
s = incidents.loc[incidents['IncidentGroup']=="False Alarm",
                  'StopCodeDescription'].value_counts()
colors = ["#ded93e","#8bcd50","#8fa01f"]
ax[0,1].pie(s, labels = s.index, autopct='%1.0f%%',
            startangle=120, colors = colors)
ax[0,1].set_title('Types of false alarm')

# Subdivision of the 2nd category: special services
s = pd.DataFrame(incidents.loc[incidents['IncidentGroup'] == "Special Service",
                 'SpecialServiceType'].value_counts()).reset_index()
s.loc[s['count']<10_000, 'SpecialServiceType'] = 'Other'
s = s.groupby('SpecialServiceType')['count'].sum().reset_index()
colors = ["#68bbe3","#0e86d4","#055c9d","#0e86d4","#68bbe3","#0e86d4","#055c9d"]
explode = [0]*6+[0.1]+[0]*2
ax[1,0].pie(s.loc[:, 'count'], labels = s.loc[:, 'SpecialServiceType'],
            autopct='%1.0f%%', startangle=-120, colors = colors,
            explode = explode)
ax[1,0].set_title('Types of special service')

# Subdivision of the 3rd category: fires
s = pd.DataFrame(incidents.loc[incidents['IncidentGroup']=="Fire",
                 'StopCodeDescription'].value_counts())
s['sort'] = [0,2,1,3]
s = s.sort_values('sort')
colors = ["#ff5c4d","#ffcd58","#ff9636","#dad870","#ff5c4d","#ff9636"]
ax[1,1].pie(s.loc[:, 'count'], labels = s.index, autopct='%1.1f%%',
            startangle=120, colors = colors, explode = [0.06]*4)
ax[1,1].set_title('Types of fire')

utils.plotarrow(fig, ax[0,0], ax[0,1], 1, .3, -1, .3, "#8bcd50", curve = -0.1)
utils.plotarrow(fig, ax[0,0], ax[1,1], 0.83, -0.96, -0.86, 0.86, "#ff9636")
utils.plotarrow(fig, ax[0,0], ax[1,0], -0.86, -0.86, -0.86, 1.2, "#0e86d4",
                curve = 0.1)
plt.show()

s = pd.DataFrame(incidents.loc[incidents['IncidentGroup'] == "Special Service",
                 'SpecialServiceType'].value_counts()).reset_index()
s.loc[s['count']<10_000]

def hist_incidents_day(incidents, column, palette, ax, bins=48,
                       show_watches=True):
    dic_bars = {value: [] for value in incidents.loc[:, column].unique()}
    for i in incidents.index:
        dic_bars[incidents.loc[i, column]].append(incidents.loc[i, 'time'])
    ax.axvspan(0, 9.5, color='darkblue', alpha=0.1)
    ax.axvspan(9.5, 20, color='yellow', alpha=0.1)
    ax.axvspan(20,24, color='darkblue', alpha=0.1)
    sns.histplot(dic_bars, multiple = "stack", bins = bins, palette=palette,
                 binrange=(0,24), ax=ax)
    ax.set_xlabel("Time of the day")
    ax.set_xticks(range(0,25,3))

fig, ax = plt.subplots()
hist_incidents_day(incidents, 'IncidentGroup',
                   ["#0e86d4","#ff9636","#8bcd50"],
                   ax)
ax.set_title("Repartition of calls depending on the time of day")
plt.show()

def hist_incidents_year(incidents, column, palette, ax, bins=183):
    dic_bars = {value: [] for value in incidents.loc[:, column].unique()}
    for i in incidents.index:
        time = incidents.loc[i,'datetime'].timetuple()
        frac_year = ((time.tm_yday*24 + time.tm_hour)
                     / (24 * (365 + calendar.isleap(time.tm_year))))
        dic_bars[incidents.loc[i, column]].append(frac_year *12 + 1)
    sns.histplot(dic_bars, multiple = "stack", bins = bins, palette=palette,
                 binrange=(1,13), ax=ax)
    ax.set_xlabel("Time of the year")
    ax.set_xticks(range(1,13))
    ax.xaxis.set_major_formatter(ticker.NullFormatter())
    ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(1.5,13.5,1)))
    ax.xaxis.set_minor_formatter(ticker.FixedFormatter(['Jan','Feb','Mar','Apr',
                                                        'May','Jun','Jul','Aug',
                                                        'Sep','Oct','Nov','Dec']
                                                      ))

plt.close()
fig, ax = plt.subplots()
hist_incidents_year(incidents,
                    'IncidentGroup',
                    ["#0e86d4","#ff9636","#8bcd50"],
                    ax, bins = 52)
ax.set_title("Repartition of calls depending on the time of the year")
plt.show()

plt.close()
fig, ax = plt.subplots()
hist_incidents_year(incidents.loc[incidents['IncidentGroup']=='Fire'],
                    'StopCodeDescription',
                    ["#ff5c4d","#ffcd58","#ff9636","#dad870"],
                    ax, bins = 52)
ax.set_title("Repartition of calls depending on the time of the year")
ax.xaxis.set_major_formatter(ticker.NullFormatter())
ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(1.5,13.5,1)))
ax.xaxis.set_minor_formatter(ticker.FixedFormatter(['Jan','Feb','Mar','Apr',
                                                    'May','Jun','Jul','Aug',
                                                    'Sep','Oct','Nov','Dec']))
plt.show()

pump_09_14_path = "data/LFB Mobilisation data from January 2009 - 2014.xlsx"
pump_15_20_path = "data/LFB Mobilisation data from 2015 - 2020.xlsx"
pump_21_24_path = "data/LFB Mobilisation data 2021 - 2024.xlsx"

print("0/3", end="\r")
pump_09_14 = pd.read_excel(pump_09_14_path)
print("1/3", end="\r")
pump_15_20 = pd.read_excel(pump_15_20_path)
print("2/3", end="\r")
pump_21_24 = pd.read_excel(pump_21_24_path)
print("3/3", end="\r")
pump = pd.concat((pump_09_14, pump_15_20, pump_21_24))
del pump_09_14, pump_15_20, pump_21_24
pump=pump.reset_index().drop('index', axis='columns')

3/3

plt.close()
fig, ax = plt.subplots(figsize=(10,6))
sns.histplot(pump.loc[:, ['IncidentNumber', 'DateAndTimeMobilised']]
        .groupby('IncidentNumber').count(), discrete=True, ax=ax, legend=False)
ax.set_yscale('log')
ax.set_ylabel("Number of incidents")
ax.set_xlabel("Number of pumps mobilised")
ax.set_xticks(range(1,14))
ax.set_title("Pumps mobilised at each incident")
for i in ax.containers:
    ax.bar_label(i)
plt.show()

year_min, year_max = 2009, 2024
pumps_station = pump.loc[(pump['DeployedFromLocation'] == 'Home Station')
                         & (pump['CalYear'] >= year_min)
                         & (pump['CalYear'] <= year_max),
                         ['Resource_Code', 'DeployedFromStation_Name']
                ].groupby('DeployedFromStation_Name').nunique()
pumps_station.head()

numb_incidents = pump.groupby('DeployedFromStation_Name').count().sort_values('IncidentNumber')
len(numb_incidents)

126

plt.close()
fig, ax = plt.subplots(21,6, figsize=(14,40))
fig.tight_layout(pad=2, rect=[0, 0, 1, 0.97])
fig.suptitle("Pumps mobilised for each station during the period")
for i, station in enumerate(numb_incidents.index):
    sns.histplot(pump.loc[pump['DeployedFromStation_Name']==station, 'CalYear'],
                 discrete=True, ax=ax[i//6,i%6], bins=16, binrange=(2009,2024))
    ax[i//6,i%6].set_title(station)
    ax[i//6,i%6].set_xlabel(None)
    ax[i//6,i%6].set_ylabel(None)
plt.show()

plt.close()
fig, ax = plt.subplots()
ax.hist(pump['TravelTimeSeconds']/60, bins = 20)
ax.set_yscale('log')
ax.set_xlabel("Travel time in minutes")
ax.set_ylabel("Count (log scale)")
plt.show()

plt.close()
fig, ax = plt.subplots(figsize=(12,6))
sns.violinplot(pump, x='CalYear', y='TravelTimeSeconds', ax=ax)
ax.set_ylim(0,800)
plt.show()

london_stations = numb_incidents.loc[numb_incidents['IncidentNumber']>10].index
closed_stations = []
for station in london_stations:
    if pump.loc[pump['DeployedFromStation_Name']==station,
                'CalYear'].max() < 2024:
        closed_stations.append(station)
closed_stations

['Silvertown',
 'Woolwich',
 'Downham',
 'Knightsbridge',
 'Kingsland',
 'Southwark',
 'Bow',
 'Belsize',
 'Westminster',
 'Clerkenwell']

def get_coord(bs):
    """
    Takes soup entry and retrieves easting / northing coordinates
    """
    s = str(bs)
    long = float(s[26:s.index(',')])
    lat = float(s[s.index(',')+1:s.index(',', s.index(',')+1)])
    g = latlong2grid(lat, long) # Converting to easting / northing
    return g.E, g.N

def clean_name(bs):
    """
    Takes soup entry and retrieves station name
    """
    s = str(bs)
    s = s[6:s.lower().index(' fire')]
    for char in [' (', ' -', ' /', ' and']:
        if char in s:
            s = s[:s.index(char)]
    return s

map_path = "data/London UK Fire Stations & More.kml"
with open(map_path) as f:
    soup = BeautifulSoup(f, 'xml')

stations_coord = {station: coord for (station, coord) in
                  zip(map(clean_name, soup.find_all('name')[3:117]),
                      map(get_coord, soup.find_all('coordinates')[1:115]))}

# Adding missing stations (part of those closed in 2014)
stations_coord.update({'Silvertown': (540772, 180114),
                       'Bow': (540614, 183589),
                       'Woolwich': (542943, 179005),
                       'Downham': (539999, 172508),
                       'Kingsland': (533466, 183998)})

# Removing stations absent from initial database
list_names = pump.loc[:,'DeployedFromStation_Name'].unique()
to_remove = []
for station in stations_coord:
    if station not in london_stations:
        to_remove.append(station)
for station in to_remove:
    del stations_coord[station]
print(f"Removed: {to_remove}")

Removed: ['Epsom', 'Lambeth River']

def distance_sq(s0, s1, stations_coord=stations_coord):
    """
    Computes the squared distance between two stations
    """
    p0, p1 = stations_coord[s0], stations_coord[s1]
    return (p0[0] - p1[0])**2 + (p0[1] - p1[1])**2

plt.close()
fig, ax = plt.subplots(2,5, figsize=(12,5))
fig.tight_layout(pad=2, rect=[0, 0, 1, 0.95])
fig.suptitle("Average response time for stations closest to closed ones")
for i, station in enumerate(closed_stations):
    dmin = float('inf')
    for test_station in stations_coord:
        if test_station != station:
            dtest = distance_sq(test_station, station)
            if dtest < dmin:
                dmin = dtest
                station_closest = test_station
    ax[i//5, i%5].plot(pump.loc[pump['DeployedFromStation_Name'] == station_closest,
                          ['CalYear', 'TravelTimeSeconds']].groupby('CalYear').mean())
    ax[i//5, i%5].axvline(x=2013.5, c='r')
    ax[i//5, i%5].set_title(station_closest)
plt.show()

plt.close()
fig, ax = plt.subplots(facecolor = color_background, figsize = (10,8))
london_map.plot(color = color_city, edgecolor='black', ax = ax)
ax.scatter(incidents.loc[incidents['SpecialServiceType'] == 'Suicide/attempts', 'Easting_m'],
           incidents.loc[incidents['SpecialServiceType'] == 'Suicide/attempts', 'Northing_m'],
           alpha = 0.2, color = color_incidents, s = 4)
ax.set_axis_off()
ax.set_title("Map of suicide attempt calls between 2009 and 2022")
plt.show()

plt.close()
fig, ax = plt.subplots()
hist_incidents_year(incidents.loc[incidents['SpecialServiceType'] == 'Suicide/attempts'], 'SpecialServiceType', ['b'], ax, bins = 12)
ax.set_title("Suicide attempts depending on the time of the year")
ax.get_legend().remove()
#ax.set_xticks(np.arange(1,13,1))
ax.xaxis.set_major_formatter(ticker.NullFormatter())
ax.xaxis.set_minor_locator(ticker.FixedLocator(np.arange(1.5,13.5,1)))
ax.xaxis.set_minor_formatter(ticker.FixedFormatter(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']))
#ax.set_xticklabels(['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
plt.show()

plt.close()
fig, ax = plt.subplots(4,3, sharex=True, sharey=True, figsize=(12,9))
fig.tight_layout(pad = 2)
for i in range(4):
    for j in range(3):
        hist_incidents_year(incidents.loc[(incidents['SpecialServiceType'] == 'Suicide/attempts') & (incidents['CalYear'] == 2010+3*i+j)],
                            'SpecialServiceType', ['b'], ax[i,j], bins = 12)
        ax[i,j].set_title(f"Year {2010+3*i+j}")
        ax[i,j].get_legend().remove()
        ax[i,j].xaxis.set_major_formatter(ticker.NullFormatter())
plt.show()

plt.close()
fig, ax = plt.subplots()
sns.histplot(incidents.loc[(incidents['SpecialServiceType']=='Suicide/attempts') & (incidents['CalYear']<2022)], x='CalYear', ax=ax, discrete=True)
plt.show()

plt.close()
fig, ax = plt.subplots()
sns.histplot(incidents.loc[incidents['SpecialServiceType']=='Suicide/attempts'], x='datetime', ax=ax, bins=50, cumulative=True)
ax.set_title('Cumulative number of suicide attempt calls')
ax.set_xlabel("Date")

# Linear regression before 2021:
y = np.arange(1, len(incidents.loc[(incidents['SpecialServiceType']=='Suicide/attempts') & (incidents['CalYear']<2021)])+1)
X = incidents.loc[(incidents['SpecialServiceType']=='Suicide/attempts') & (incidents['CalYear']<2021), 'datetime'].map(datetime.toordinal).to_numpy().reshape(-1,1)
model = LinearRegression()
model.fit(X, y)
xplot = np.array([min(incidents.loc[:, 'datetime']), max(incidents.loc[:, 'datetime'])])
vfunc = np.vectorize(datetime.toordinal)
yplot = model.predict(vfunc(xplot).reshape(-1,1))
plt.plot(xplot, yplot, c='r')

plt.show()

plt.close()
fig, ax = plt.subplots(facecolor = color_background, figsize = (10,8))
london_map.plot(color = color_city, edgecolor='black', ax = ax)
london_stations_open = [station for station in london_stations if station not in
                       closed_stations]

for station in london_stations_open:
    x, y = stations_coord[station]
    ax.scatter(x, y, marker='x', c='r')
ax.set_axis_off()
ax.set_title("Map of LFB stations")
plt.show()

n_stations = len(london_stations_open)

kmeans = KMeans(n_clusters = n_stations, n_init = 10, random_state = RANDOM_STATE)
X = incidents.loc[:, ['Easting_m', 'Northing_m']].dropna().to_numpy()
incidents['Cluster'] = kmeans.fit(X)

plt.close()
fig, ax = plt.subplots(facecolor = color_background, figsize = (10,8))
london_map.plot(color = color_city, edgecolor='black', ax = ax)

coords_real = np.array([list(stations_coord[station]) for station in
                   london_stations_open])
coords_ideal = kmeans.cluster_centers_

ax.scatter(x=coords_real[:, 0], y=coords_real[:, 1], marker='x', c='r',
           label="Actual stations")
ax.scatter(x=coords_ideal[:, 0], y=coords_ideal[:, 1],
           marker='x', c='b', label="Ideal stations")
ax.set_axis_off()
ax.legend()
ax.set_title("Map of LFB stations with ideal positioning")
plt.show()

def smallest_sq_dist(x, y, coords):
    """
    Finds the distance to the closest station in the array of coordinates
    """
    min_sq_dist = float('inf')
    for xt, yt in coords:
        sq_dist = (x - xt) ** 2 + (y - yt) ** 2
        if sq_dist < min_sq_dist:
            min_x, min_y = xt, yt
            min_sq_dist = sq_dist
    return min_sq_dist

total_real, total_ideal = 0, 0
for x, y in tqdm(X):
    total_real += smallest_sq_dist(x, y, coords_real)
    total_ideal += smallest_sq_dist(x, y, coords_ideal)
print(f"""Mean square distance for actual stations: {total_real / len(X):.0f}
Mean square distance for ideal stations:  {total_ideal / len(X):.0f}
Decrease of {(total_real - total_ideal) / total_real * 100:.1f}%""")

100%|██████████| 758683/758683 [02:39<00:00, 4748.88it/s]

Mean square distance for actual stations: 2491213
Mean square distance for ideal stations:  1758422
Decrease of 29.4%

	IncidentNumber	DateOfCall	CalYear	TimeOfCall	HourOfCall	IncidentGroup	StopCodeDescription	SpecialServiceType	PropertyCategory	PropertyType	AddressQualifier	Postcode_full	Postcode_district	UPRN	USRN	IncGeo_BoroughCode	IncGeo_BoroughName	ProperCase	IncGeo_WardCode	IncGeo_WardName	IncGeo_WardNameNew	Easting_m	Northing_m	Easting_rounded	Northing_rounded	Latitude	Longitude	FRS	IncidentStationGround	FirstPumpArriving_AttendanceTime	FirstPumpArriving_DeployedFromStation	SecondPumpArriving_AttendanceTime	SecondPumpArriving_DeployedFromStation	NumStationsWithPumpsAttending	NumPumpsAttending	PumpCount	PumpHoursRoundUp	Notional Cost (£)	NumCalls
0	235138081	01 Jan 2009	2009	00:00:37	0	Special Service	Special Service	RTC	Road Vehicle	Car	In street close to gazetteer location	SW11 4LB	SW11	NaN	NaN	E09000032	WANDSWORTH	Wandsworth	E05000620	Queenstown	Queenstown	528652.0	176830.0	528650	176850	51.475812	-0.148894	London	Battersea	319.0	Battersea	342.0	Clapham	2.0	2.0	2.0	1.0	255.0	1.0
1	1091	01 Jan 2009	2009	00:00:46	0	Special Service	Special Service	Assist other agencies	Outdoor	Lake/pond/reservoir	Open land/water - nearest gazetteer location	SE1 7SG	SE1	NaN	NaN	E09000022	LAMBETH	Lambeth	E05000416	Bishop's	Bishop's	530485.0	179007.0	530450	179050	51.494957	-0.121712	London	Lambeth	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	1.0

	SpecialServiceType	count
8	Animal assistance incidents	8595
9	Hazardous Materials incident	6865
10	Medical Incident	6801
11	Advice Only	6747
12	Other rescue/release of persons	5588
13	Removal of objects from people	5238
14	Other Transport incident	4344
15	Evacuation (no fire)	3134
16	Suicide/attempts	3094
17	Medical Incident - Co-responder	2197
18	Stand By	1097
19	Rescue or evacuation from water	687
20	Water provision	148

	Resource_Code
DeployedFromStation_Name
Acton	1
Addington	2
Barking	2
Barnet	1
Battersea	1

Study of LFB incidents¶

Data preparation¶

Importing libraries and data¶

Adding datetime column¶

Map visualizations of incidents¶

Density of incidents over the period¶

Map of road traffic collisions¶

Types of incidents¶

Reasons for each call¶

Types of incident depending on the time of day¶

Incidents repartition over the year¶

All types of incidents¶

Fires during the year¶

Pumps mobilisation¶

Appliance mobilisation for each incident¶

Appliances available at each station¶

Observation of incidents over all stations, and consequences of 2014 closures¶

Incidents per station¶

Response time¶

A bit of web scraping: extracting LFB stations coordinates from kml file¶

Incident response time after stations closures¶

Suicide attempts calls¶

Clustering for optimal stations positioning¶

Acronyms:¶