Setup
library(outbreaks) # for data
library(trending) # for trend fitting
library(dplyr) # for data manipulation
# load data
data(covid19_england_nhscalls_2020)
# select 6 weeks of data (from a period when the prevalence was decreasing)
last_date <- as.Date("2020-05-28")
first_date <- last_date - 8*7
pathways_recent <-
covid19_england_nhscalls_2020 %>%
filter(date >= first_date, date <= last_date) %>%
group_by(date, day, weekday) %>%
summarise(count = sum(count), .groups = "drop")
# split data for fitting and prediction
dat <-
pathways_recent %>%
group_by(date <= first_date + 6*7) %>%
group_split()
fitting_data <- dat[[2]]
pred_data <- select(dat[[1]], date, day, weekday)
A succesful model fit
(model <- glm_nb_model(count ~ day + weekday))
#> Untrained trending model:
#> glm.nb(formula = count ~ day + weekday)
(fitted_model <- fit(model, fitting_data))
#> <trending_fit_tbl> 1 x 3
#> result warnings errors
#> <list> <list> <list>
#> 1 <negbin> <NULL> <NULL>
fitted_model %>% get_result()
#> [[1]]
#>
#> Call: glm.nb(formula = count ~ day + weekday, data = fitting_data,
#> init.theta = 39.34595795, link = log)
#>
#> Coefficients:
#> (Intercept) day weekdaymonday weekdayweekend
#> 11.46417 -0.03476 0.19162 -0.14224
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 39 Residual
#> Null Deviance: 395.9
#> Residual Deviance: 43.17 AIC: 850.6
# default
fitted_model %>%
predict(pred_data) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8107 18870
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 6618 16223
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 6373 15714
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 8363 21784
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 6962 16638
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 6701 16124
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 6450 15626
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6208 15145
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5079 12992
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 4889 12588
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 6439 17388
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 5326 13366
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5126 12955
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 4933 12558
# without uncertainty
fitted_model %>%
predict(pred_data, uncertain = FALSE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 9027 16948
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 7562 14199
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 7304 13715
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 9852 18494
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 7855 14748
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 7586 14245
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 7327 13758
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 7076 13289
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5928 11134
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 5725 10754
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 7723 14501
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 6157 11564
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5947 11170
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 5743 10788
# without prediction intervals
fitted_model %>%
predict(pred_data, add_pi = FALSE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 6
#> date day weekday estimate lower_ci upper_ci
#> <date> <int> <fct> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122.
#> 2 2020-05-16 59 weekend 10625. 9299. 12140.
#> 3 2020-05-17 60 weekend 10262. 8956. 11759.
#> 4 2020-05-18 61 monday 13840. 11749. 16303.
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450.
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066.
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693.
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333.
#> 9 2020-05-23 66 weekend 8330. 7138. 9721.
#> 10 2020-05-24 67 weekend 8045. 6872. 9419.
#> 11 2020-05-25 68 monday 10851. 9048. 13012.
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001.
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694.
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396.
# bootstraped prediction intervals
fitted_model %>%
predict(pred_data, simulate_pi = TRUE) %>%
get_result()
#> [[1]]
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8947 17388
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 7256 14514
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 7206 14222
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 9656 19163
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 7685 15066
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 7340 14489
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 7127 14346
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6815 13563
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5741 11425
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 5618 11199
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 7461 15158
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 5990 11973
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5673 11491
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 5525 10976
Multiple models
The fit function also works with a list input of multiple models.
models <- list(
simple = lm_model(count ~ day),
glm_poisson = glm_model(count ~ day, family = "poisson"),
glm_negbin = glm_nb_model(count ~ day + weekday),
will_error = glm_nb_model(count ~ day + nonexistant)
)
(fitted_tbl <- fit(models, fitting_data))
#> <trending_fit_tbl> 4 x 4
#> model_name result warnings errors
#> <chr> <named list> <named list> <named list>
#> 1 simple <lm> <NULL> <NULL>
#> 2 glm_poisson <glm> <NULL> <NULL>
#> 3 glm_negbin <negbin> <NULL> <NULL>
#> 4 will_error <NULL> <NULL> <chr [1]>
get_result(fitted_tbl)
#> $simple
#>
#> Call:
#> lm(formula = count ~ day, data = fitting_data)
#>
#> Coefficients:
#> (Intercept) day
#> 69202 -1093
#>
#>
#> $glm_poisson
#>
#> Call: glm(formula = count ~ day, family = "poisson", data = fitting_data)
#>
#> Coefficients:
#> (Intercept) day
#> 11.57014 -0.03822
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 41 Residual
#> Null Deviance: 321600
#> Residual Deviance: 50970 AIC: 51490
#>
#> $glm_negbin
#>
#> Call: glm.nb(formula = count ~ day + weekday, data = fitting_data,
#> init.theta = 39.34595795, link = log)
#>
#> Coefficients:
#> (Intercept) day weekdaymonday weekdayweekend
#> 11.46417 -0.03476 0.19162 -0.14224
#>
#> Degrees of Freedom: 42 Total (i.e. Null); 39 Residual
#> Null Deviance: 395.9
#> Residual Deviance: 43.17 AIC: 850.6
#>
#> $will_error
#> NULL
This can also then be used with predict()
(pred <- predict(fitted_tbl, pred_data))
#> <trending_predict_tbl> 4 x 4
#> model_name result warnings errors
#> <chr> <named list> <named list> <named list>
#> 1 simple <trndng_p [14 × 8]> <NULL> <NULL>
#> 2 glm_poisson <trndng_p [14 × 8]> <NULL> <NULL>
#> 3 glm_negbin <trndng_p [14 × 8]> <NULL> <NULL>
#> 4 will_error <NULL> <NULL> <chr [1]>
get_result(pred)
#> $simple
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 5813. 234. 11392. -13007. 24632.
#> 2 2020-05-16 59 weekend 4720. -1053. 10492. -14158. 23597.
#> 3 2020-05-17 60 weekend 3627. -2341. 9594. -15311. 22565.
#> 4 2020-05-18 61 monday 2534. -3631. 8699. -16467. 21535.
#> 5 2020-05-19 62 rest_of_week 1441. -4922. 7804. -17626. 20508.
#> 6 2020-05-20 63 rest_of_week 348. -6215. 6911. -18786. 19482.
#> 7 2020-05-21 64 rest_of_week -745. -7509. 6020. -19949. 18459.
#> 8 2020-05-22 65 rest_of_week -1838. -8805. 5129. -21114. 17439.
#> 9 2020-05-23 66 weekend -2931. -10101. 4240. -22282. 16420.
#> 10 2020-05-24 67 weekend -4024. -11399. 3352. -23451. 15404.
#> 11 2020-05-25 68 monday -5116. -12697. 2464. -24623. 14390.
#> 12 2020-05-26 69 rest_of_week -6209. -13996. 1578. -25797. 13378.
#> 13 2020-05-27 70 rest_of_week -7302. -15296. 692. -26973. 12369.
#> 14 2020-05-28 71 rest_of_week -8395. -16597. -193. -28152. 11361.
#>
#> $glm_poisson
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 11535. 11484. 11587. 11275 11798
#> 2 2020-05-16 59 weekend 11103. 11052. 11154. 10846 11361
#> 3 2020-05-17 60 weekend 10686. 10636. 10737. 10434 10941
#> 4 2020-05-18 61 monday 10286. 10236. 10336. 10038 10536
#> 5 2020-05-19 62 rest_of_week 9900. 9850. 9950. 9656 10146
#> 6 2020-05-20 63 rest_of_week 9529. 9480. 9578. 9289 9770
#> 7 2020-05-21 64 rest_of_week 9171. 9123. 9220. 8936 9409
#> 8 2020-05-22 65 rest_of_week 8827. 8780. 8876. 8596 9061
#> 9 2020-05-23 66 weekend 8496. 8449. 8544. 8269 8726
#> 10 2020-05-24 67 weekend 8178. 8131. 8225. 7955 8403
#> 11 2020-05-25 68 monday 7871. 7825. 7917. 7652 8092
#> 12 2020-05-26 69 rest_of_week 7576. 7531. 7621. 7361 7793
#> 13 2020-05-27 70 rest_of_week 7292. 7247. 7337. 7081 7505
#> 14 2020-05-28 71 rest_of_week 7018. 6974. 7063. 6811 7228
#>
#> $glm_negbin
#> <trending_prediction> 14 x 8
#> date day weekday estimate lower_ci upper_ci lower_pi upper_pi
#> <date> <int> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 2020-05-15 58 rest_of_week 12682. 11390. 14122. 8107 18870
#> 2 2020-05-16 59 weekend 10625. 9299. 12140. 6618 16223
#> 3 2020-05-17 60 weekend 10262. 8956. 11759. 6373 15714
#> 4 2020-05-18 61 monday 13840. 11749. 16303. 8363 21784
#> 5 2020-05-19 62 rest_of_week 11036. 9782. 12450. 6962 16638
#> 6 2020-05-20 63 rest_of_week 10659. 9416. 12066. 6701 16124
#> 7 2020-05-21 64 rest_of_week 10295. 9064. 11693. 6450 15626
#> 8 2020-05-22 65 rest_of_week 9943. 8724. 11333. 6208 15145
#> 9 2020-05-23 66 weekend 8330. 7138. 9721. 5079 12992
#> 10 2020-05-24 67 weekend 8045. 6872. 9419. 4889 12588
#> 11 2020-05-25 68 monday 10851. 9048. 13012. 6439 17388
#> 12 2020-05-26 69 rest_of_week 8652. 7486. 10001. 5326 13366
#> 13 2020-05-27 70 rest_of_week 8357. 7204. 9694. 5126 12955
#> 14 2020-05-28 71 rest_of_week 8071. 6933. 9396. 4933 12558
#>
#> $will_error
#> NULL