neun_euro_ticket_demonstrator
library(ubair)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
effect_title <- "neun_euro_ticket"
model_types <- c("rf", "lightgbm", "dynamic_regression") # , "fnn")
station <- "DESN025"
station_name <- "Leipzig-Mitte"
window_size <- 14
trend <- "linear"
# set dates
training_start <- lubridate::ymd("20100101")
training_end <- lubridate::ymd("20220301")
application_start <- lubridate::ymd("20220301")
application_end <- lubridate::ymd("20220831")
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
buffer <- 0
# load data
params <- load_params()
env_data <- sample_data_DESN025
dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
dt_prepared, application_start,
application_end
)
predictions_all <- data.table::data.table()
metrics_all <- data.table::data.table()
for (model_type in model_types) {
print(paste("model = ", model_type))
print(paste("trend = ", trend))
res <- run_counterfactual(split_data,
params,
detrending_function = trend,
model_type = model_type,
alpha = 0.9,
log_transform = TRUE
)
counterfactual_plot <- plot_counterfactual(res$prediction, params,
window_size = window_size,
date_effect_start,
buffer = buffer
)
print(counterfactual_plot)
ggplot2::ggsave(paste0(model_type, "_", station, effect_title, ".png"),
plot = counterfactual_plot, width = 12, height = 4
)
# evaluation
metrics <- round(
calc_performance_metrics(res$prediction,
date_effect_start,
buffer = buffer
),
2
)
metrics["effect_size"] <- estimate_effect_size(res$prediction,
date_effect_start,
buffer = buffer,
verbose = FALSE
)["absolute_effect"]
print(metrics["effect_size"])
metrics["model"] <- model_type
metrics["trend"] <- trend
metrics["station"] <- station
metrics["station_name"] <- station_name
metrics["buffer_start"] <- format(
date_effect_start - as.difftime(buffer,
units = "hours"
),
"%Y-%m-%d"
)
metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
metrics_dt <- data.table::as.data.table(t(metrics))
predictions <- data.table::copy(res$prediction)
predictions[, station := station]
predictions[, model := model_type]
predictions[, trend := trend]
predictions_all <- rbind(predictions_all, predictions)
metrics_all <- rbind(metrics_all, metrics_dt)
}
#> [1] "model = rf"
#> [1] "trend = linear"

#> $effect_size
#> [1] -3.054844
#>
#> [1] "model = lightgbm"
#> [1] "trend = linear"
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002633 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1557
#> [LightGBM] [Info] Number of data points in the train set: 103965, number of used features: 9
#> [LightGBM] [Info] Start training from score 0.000000

#> $effect_size
#> [1] -3.058521
#>
#> [1] "model = dynamic_regression"
#> [1] "trend = linear"
#> Using data for dynamic regression training from 2021-02-12 05:00:00 to 2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.

#> $effect_size
#> [1] -2.50006
predictions_save <- dplyr::select(
predictions_all,
c(
date,
value,
prediction,
prediction_lower,
prediction_upper,
station, model,
trend
)
)
predictions_save[, date := as.Date(date)]
# Calculate the window_size rolling mean on these daily aggregated values
predictions_save[, (names(predictions_save)[sapply(
predictions_save,
is.numeric
)]) :=
lapply(.SD, function(x) {
data.table::frollmean(x,
n = window_size * 24,
align = "center",
na.rm = TRUE
)
}),
by = .(model, trend),
.SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
# reduce to daily mean for demonstrator export
daily_data <- predictions_save[, lapply(.SD, mean, na.rm = TRUE),
by = .(model, trend, date),
.SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
data.table::fwrite(daily_data, file = paste0(
"preds_NO2_",
effect_title, "_",
station_name,
window_size,
"d_mean.csv"
))
data.table::fwrite(metrics_all, file = paste0(
"metrics_NO2_",
effect_title,
"_",
station_name,
".csv"
))