neun_euro_ticket_demonstrator

library(ubair)
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
effect_title <- "neun_euro_ticket"
model_types <- c("rf", "lightgbm", "dynamic_regression") # , "fnn")
station <- "DESN025"
station_name <- "Leipzig-Mitte"
window_size <- 14
trend <- "linear"

# set dates
training_start <- lubridate::ymd("20100101")
training_end <- lubridate::ymd("20220301")
application_start <- lubridate::ymd("20220301")
application_end <- lubridate::ymd("20220831")
date_effect_start <- lubridate::ymd_hm("20220601 00:00")
buffer <- 0
# load data
params <- load_params()
env_data <- sample_data_DESN025

dt_prepared <- prepare_data_for_modelling(env_data, params)
dt_prepared <- dt_prepared[complete.cases(dt_prepared)]
split_data <- split_data_counterfactual(
  dt_prepared, application_start,
  application_end
)
predictions_all <- data.table::data.table()
metrics_all <- data.table::data.table()
for (model_type in model_types) {
  print(paste("model = ", model_type))
  print(paste("trend = ", trend))

  res <- run_counterfactual(split_data,
    params,
    detrending_function = trend,
    model_type = model_type,
    alpha = 0.9,
    log_transform = TRUE
  )
  counterfactual_plot <- plot_counterfactual(res$prediction, params,
    window_size = window_size,
    date_effect_start,
    buffer = buffer
  )
  print(counterfactual_plot)
  ggplot2::ggsave(paste0(model_type, "_", station, effect_title, ".png"),
    plot = counterfactual_plot, width = 12, height = 4
  )

  # evaluation
  metrics <- round(
    calc_performance_metrics(res$prediction,
      date_effect_start,
      buffer = buffer
    ),
    2
  )
  metrics["effect_size"] <- estimate_effect_size(res$prediction,
    date_effect_start,
    buffer = buffer,
    verbose = FALSE
  )["absolute_effect"]
  print(metrics["effect_size"])
  metrics["model"] <- model_type
  metrics["trend"] <- trend
  metrics["station"] <- station
  metrics["station_name"] <- station_name
  metrics["buffer_start"] <- format(
    date_effect_start - as.difftime(buffer,
      units = "hours"
    ),
    "%Y-%m-%d"
  )
  metrics["effect_start"] <- format(date_effect_start, "%Y-%m-%d")
  metrics_dt <- data.table::as.data.table(t(metrics))

  predictions <- data.table::copy(res$prediction)
  predictions[, station := station]
  predictions[, model := model_type]
  predictions[, trend := trend]

  predictions_all <- rbind(predictions_all, predictions)
  metrics_all <- rbind(metrics_all, metrics_dt)
}
#> [1] "model =  rf"
#> [1] "trend =  linear"

#> $effect_size
#> [1] -3.054844
#> 
#> [1] "model =  lightgbm"
#> [1] "trend =  linear"
#> [LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002633 seconds.
#> You can set `force_col_wise=true` to remove the overhead.
#> [LightGBM] [Info] Total Bins 1557
#> [LightGBM] [Info] Number of data points in the train set: 103965, number of used features: 9
#> [LightGBM] [Info] Start training from score 0.000000

#> $effect_size
#> [1] -3.058521
#> 
#> [1] "model =  dynamic_regression"
#> [1] "trend =  linear"
#> Using data for dynamic regression training from  2021-02-12 05:00:00 to  2022-02-28 23:00:00. Too long training series can lead to worse performance. Adjust this via the dynamic_regression$ntrain hyperparameter.

#> $effect_size
#> [1] -2.50006
predictions_save <- dplyr::select(
  predictions_all,
  c(
    date,
    value,
    prediction,
    prediction_lower,
    prediction_upper,
    station, model,
    trend
  )
)
predictions_save[, date := as.Date(date)]
# Calculate the window_size rolling mean on these daily aggregated values
predictions_save[, (names(predictions_save)[sapply(
  predictions_save,
  is.numeric
)]) :=
  lapply(.SD, function(x) {
    data.table::frollmean(x,
      n = window_size * 24,
      align = "center",
      na.rm = TRUE
    )
  }),
by = .(model, trend),
.SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
# reduce to daily mean for demonstrator export
daily_data <- predictions_save[, lapply(.SD, mean, na.rm = TRUE),
  by = .(model, trend, date),
  .SDcols = names(predictions_save)[sapply(predictions_save, is.numeric)]
]
data.table::fwrite(daily_data, file = paste0(
  "preds_NO2_",
  effect_title, "_",
  station_name,
  window_size,
  "d_mean.csv"
))
data.table::fwrite(metrics_all, file = paste0(
  "metrics_NO2_",
  effect_title,
  "_",
  station_name,
  ".csv"
))