Model Run and Data Storage Application Notebook
Imports and carry-overs
The notebook begins again with similar library imports and settings. It then carries over a few of the functions and classes from the previous application - as this application is a modification of the original, larger application. Carrying over the functions and classes allows for the loaded distributions to be parsed for attributes, but also for new distributions to be created as needed on-the-fly.
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import math
import numpy as np
import random
import seaborn as sns
import pandas as pd
from scipy import stats
from scipy.stats import beta
import ipywidgets as widgets
from IPython.display import display
import pickle
sns.set()
plt.rcParams["figure.figsize"] = (20,10)
%matplotlib notebook
N = 10_000 # population size
def pert_dist(peak, low, high, sample_size=N, temp=4):
pert = []
pert_loc = low
pert_scale = high - low
con_1 = 1 + temp * (peak - low) / (high - low)
con_0 = 1 + temp * (high - peak) / (high - low)
pert = np.random.beta(con_1, con_0, sample_size)
pert = pert_loc + pert_scale * pert
return pert
def find_beta(exp_mu, exp_low, exp_high, exp_lda=4):
mu_range = np.linspace(exp_low, exp_high, 40)
low_range = np.linspace(0, exp_mu, 40)
high_range = np.linspace(exp_mu, (exp_high * 2), 40)
test_res, mu_res, low_res, high_res, mean_err_rec, exp_mu_rec = [], [], [], [], [], []
for mu in mu_range:
for low in low_range:
for high in high_range:
if low >= mu:
low = mu * .8
if high <= mu:
high = mu * 1.2
test_dist = pert_dist(mu, low, high, 1000, exp_lda)
test_mean = np.mean(test_dist)
test_low = np.percentile(test_dist, 10)
test_high = np.percentile(test_dist, 90)
mean_err = (test_mean - exp_mu)**2
low_err = (test_low - exp_low)**2
high_err = (test_high - exp_high)**2
test_res.append(mean_err + np.sqrt(mean_err + low_err + high_err))
mu_res.append(mu)
low_res.append(low)
high_res.append(high)
mean_err_rec.append(np.sqrt(mean_err))
exp_mu_rec.append(exp_mu)
full_results = pd.DataFrame({
'Test Results': pd.Series(test_res),
'Mode Value': pd.Series(mu_res),
'Low Value': pd.Series(low_res),
'High Value': pd.Series(high_res),
'mean_err': pd.Series(mean_err_rec),
'expected mu': pd.Series(exp_mu_rec)
})
return full_results
class df_dist:
def __init__(self, dist_mu, dist_low,
dist_high, dist_size=10_000,
dist_temp=4, name='Unknown'):
self.mu = dist_mu
self.low = dist_low
self.high = dist_high
self.temp = dist_temp
self.dsize = dist_size
self.name = name
self.df = find_beta(self.mu, self.low,
self.high, self.temp)
self.df.sort_values(by=['Test Results'],
inplace=True)
self.new_mu, self.new_low, self.new_high = list(self.df.iloc[0, 1:4])
self.pert = pert_dist(self.new_mu, self.new_low,
self.new_high, self.dsize, self.temp)
def checkParams(self):
print(f'Pert vars: mu: {self.new_mu}, low: {self.new_low}, high: {self.new_high}')
plt.hist(self.pert, 25)
plt.show()
print(f'Original inputs: Mean={self.mu}, Low={self.low}, High={self.high}')
print(f'New outputs: Mean={np.mean(self.pert)}, Low={np.percentile(self.pert, 10)}, High={np.percentile(self.pert, 90)}')
def getSample(self):
return random.sample(list(self.pert), 1)[0]
def dangerPerception(dp_c, dp_q):
if (dp_c/dp_q) <= 0.01:
return .01
elif (dp_c/dp_q) <= 0.99:
return dp_c / dp_q
else:
return 0.99
def behaviorModifier(total_conf, conf_trigg):
bm_dp = dangerPerception(total_conf, conf_trigg)
beta_a = 10 - (10 * bm_dp)
beta_b = 10 - beta_a/(total_conf+1)
return np.random.beta(beta_a, beta_b)
Loading in the previously generated distributions
Originally, we would be generating the following distributions using the classes and methods above, but to save time and computational resources we previously saved the generated classes for each and will now load them back into a modified version of the application.
file_list = ['e2i_object', 'i2r_object', 'c2i_object',
'h2i_object', 'freq_cont_object',
'infreq_cont_object', 'cfr_object']
for filename in file_list:
infile = open(filename, 'rb')
if filename == 'e2i_object':
e2i = pickle.load(infile)
elif filename == 'i2r_object':
i2r = pickle.load(infile)
elif filename == 'c2i_object':
c2i = pickle.load(infile)
elif filename == 'h2i_object':
h2i = pickle.load(infile)
elif filename == 'freq_cont_object':
freq_cont = pickle.load(infile)
elif filename == 'infreq_cont_object':
infreq_cont = pickle.load(infile)
elif filename == 'cfr_object':
cfr = pickle.load(infile)
infile.close()
The ModelRun class
This is the main class of the application. The ModelRun class generates SEIR model run instances, each of which requires only an ID as input, though, the class can also accept different population sizes, durations, behavioral change triggers, and starting exposed and infectious population numbers. Additionally, the class has a simDay method that will simulate the next day in a model run, appending the results to the existing attributes of the instanced ModelRun class.
class ModelRun:
def __init__(self, id, population=10_000, duration=180, conf_trigger=50, exposed=1, infectious=0):
self.id = id
self.pop = population
self.dur = duration
self.conf_trigg = conf_trigger
self.day = []
self.beh_mod = []
self.S, self.R, self.D = [population], [0], [0]
self.NR, self.ND = [0], [0]
self.E, self.I = [exposed], [infectious]
self.NE, self.NI = [0], [0]
self.E_C, self.I_C = [0], [0]
self.IC, self.IH = [0], [0]
self.NIC, self.NIH = [0], [0]
self.inc_per = e2i.getSample()
self.inf_per = i2r.getSample()
self.c2i = c2i.getSample()
self.h2i = h2i.getSample()
self.freq_cont = freq_cont.getSample()
self.infreq_cont = infreq_cont.getSample()
self.cfr = cfr.getSample()
self.lat_rate = 1/self.inc_per
self.rec_rate = 1/self.inf_per
def simDay(self, day):
self.day.append(day)
starting_sus = self.S[day]
total_conf = self.IC[day]
self.beh_mod.append(behaviorModifier(total_conf, self.conf_trigg))
next_NE = ((self.beh_mod[day] * self.freq_cont * self.NI[day])
+ (self.beh_mod[day] * self.infreq_cont * self.I[day]))
self.NE.append(next_NE) if next_NE < self.S[day] else self.NE.append(self.S[day])
next_NI = self.E[day] * self.lat_rate
self.NI.append(next_NI) if next_NI > 0 else self.NI.append(0)
self.I_C.append(self.I_C[day] + self.NI[day+1])
next_E = self.E[day] + self.NE[day+1] - self.NI[day+1]
self.E.append(next_E) if next_E > 0 else self.E.append(0)
self.E_C.append(self.E_C[day] + self.NE[day+1])
next_NIC = self.c2i * self.NI[day+1]
self.NIC.append(next_NIC)
self.IC.append(self.IC[day] + next_NIC)
next_NIH = self.h2i * self.NI[day+1]
self.NIH.append(next_NIH)
self.IH.append(self.IH[day] + next_NIH)
next_NR = self.rec_rate * self.I[day]
self.NR.append(next_NR)
self.R.append(self.R[day] + next_NR)
next_ND = self.cfr * next_NIC
self.ND.append(next_ND)
self.D.append(self.D[day] + next_ND)
next_I = self.I[day] + self.NI[day+1] - next_NR - next_ND
self.I.append(next_I) if next_I > 0 else self.I.append(0)
next_S = self.pop - self.E[day+1] - self.I[day+1] - self.R[day+1] - self.D[day+1]
self.S.append(next_S) if next_S > 0 else self.S.append(0)
data_dict = {}
properties_dict = {'Model Sets': {}}
conf_trigger_list = [3, 5, 10, 15, 20, 35, 50, 75, 100]
for modifier in conf_trigger_list:
model_dict = {}
for i in range(1000):
model_dict[f'model_{i}'] = ModelRun(i, conf_trigger=modifier)
for day in range(model_dict[f'model_{i}'].dur):
model_dict[f'model_{i}'].simDay(day)
data_dict[f'conf_trigger_{modifier}'] = model_dict
properties_dict['Model Sets'].update({f'Confirmed Infectious Behavior Max = {modifier}': {'id': f'conf_trigger_{modifier}'}})
Create a properties dictionary for future reference and labeling tasks
This is created primarily for outward facing visualization and display applications, though it can be useful for reference with the various results calculations, as well.
properties_dict.update({'Results': {'Susceptible (Running Total)': {'id': 'S'},
'Exposed (Concurrent)': {'id': 'E'},
'Infectious (Concurrent)': {'id': 'I'},
'Recovered (Running Total)': {'id': 'R'},
'Deaths (Current Total)': {'id': 'D'},
'Recovered (New/Daily)': {'id': 'NR'},
'Deaths (New/Daily)': {'id': 'ND'},
'Exposed (New/Daily)': {'id': 'NE'},
'Infectious (New/Daily)': {'id': 'NI'},
'Confirmed Infectious (Cumulative)': {'id': 'IC'},
'Confirmed Infectious(New/Daily)': {'id': 'NIC'},
'Hospitalized Infectious (Cumulative)': {'id': 'IH'},
'Hospitalized Infectious (New/Daily)': {'id': 'NIH'},
'Exposed (Cumulative)': {'id': 'E_C'},
'Infectious (Cumulative)': {'id': 'I_C'},
'Behavioral Modifier (Daily)': {'id': 'beh_mod'},
}
})
Export the properties dictionary
filename = 'properties_dict'
outfile = open(filename, 'wb')
pickle.dump(properties_dict, outfile)
outfile.close()
Create a function that will return a key if a value is known
Be careful of using this with non-unique values, as this funciton returns the first match encountered. Advise only using with unique value dictionaries.
def getKeyWithID(in_dict, in_id, in_value):
for key in in_dict:
if properties_dict['results'][key] == {in_id: in_value}:
return key
getKeyWithID(properties_dict['results'], 'id', 'I_C')
'Infectious (Cumulative)'
Create a results dictionary repository that bundles results by type
This allows for quick and easy statistics calculations by type (mean, percentiles, std, etc), as well as plotting visualizations.
results_dict = {'S': {},
'E': {},
'I': {},
'R': {},
'D': {},
'NR': {},
'ND': {},
'NE': {},
'NI': {},
'IC': {},
'NIC': {},
'IH': {},
'NIH': {},
'E_C': {},
'I_C': {},
'beh_mod': {}
}
for conf_trigger_set in data_dict:
for type_key in results_dict:
for x in range(len(data_dict[conf_trigger_set])-1):
for property, value in vars(data_dict[conf_trigger_set][f'model_{x}']).items():
if property == type_key:
temp_dict = {f'model_{x}': value}
try: results_dict[type_key][conf_trigger_set]
except KeyError: results_dict[type_key][conf_trigger_set] = None
if results_dict[type_key][conf_trigger_set] is None:
results_dict[type_key][conf_trigger_set] = temp_dict
else:
results_dict[type_key][conf_trigger_set].update(temp_dict)
Export and save the results dictionary
Note that due to the number of ensembles, models, and result types this file is going to be quite large with the default number of models (~1.5gb).
filename = 'results_dict'
outfile = open(filename, 'wb')
pickle.dump(results_dict, outfile)
outfile.close()
Create a seperate statistics dictionary
The dictionary is created to store summarized statistical calculations of all ensemble model results.
stats_dict = {'S': {},
'E': {},
'I': {},
'R': {},
'D': {},
'NR': {},
'ND': {},
'NE': {},
'NI': {},
'IC': {},
'NIC': {},
'IH': {},
'NIH': {},
'E_C': {},
'I_C': {},
'beh_mod': {}
}
for result_type in results_dict:
for conf_trig in results_dict[result_type]:
temp_df = pd.DataFrame.from_dict(results_dict[result_type][conf_trig])
temp_stats = temp_df.apply(pd.DataFrame.describe, percentiles=[.05, .1, .9, .95], axis=1)
temp_stats.to_dict(orient='list')
stats_dict[result_type][conf_trig] = temp_stats
Finding a best fit model run compared against the calculated mean from the 1000 models
This is done for two reasons:
- Curiosity. It seems very interesting to compare the mean against the models and see how they differ.
- To attempt to capture inconsistencies. If the mean is not well represented or reflected by a single model run (out of 1000 total model runs) there should be consideration in how the mean is used. The mean can still provide insights into trends and potential outcomes, but the less similar the mean is with actual model runs, the more the individual model runs should be examined to better understand the expected nature of real world results.
from sklearn.metrics import mean_squared_error
for cat in stats_dict:
for conf_trig in stats_dict[cat]:
check_rms = None
for x in range(len(results_dict[cat][conf_trig])):
test_model_col = results_dict[cat][conf_trig][f'model_{x}']
test_mean_col = list(stats_dict[cat][conf_trig]['mean'])
test_rms = np.sqrt(mean_squared_error(test_model_col, test_mean_col))
if check_rms == None or test_rms < check_rms:
check_rms = test_rms
check_col = test_model_col
print(cat, conf_trig, test_rms)
stats_dict[cat][conf_trig]['best_fit'] = check_col
Exporting the much smaller statistics dictionary for later use in visualizations or other applications
Compared against the results dictionary/file, the statistics dictionary is a tiny fraction of the size, and much easier to export, import, store, and share.
filename = 'stats_dict'
outfile = open(filename, 'wb')
pickle.dump(stats_dict, outfile)
outfile.close()