From b885294b77a5bc22d5681b0cd0f219acd08a662e Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Fri, 19 Mar 2021 16:59:39 +0100 Subject: Initial commit --- R/strats.R | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 R/strats.R (limited to 'R/strats.R') diff --git a/R/strats.R b/R/strats.R new file mode 100644 index 0000000..3b22d5c --- /dev/null +++ b/R/strats.R @@ -0,0 +1,377 @@ +`%>%` <- magrittr::`%>%` + +#' @useDynLib strats strats_compute_laplacian_matrix_product +strats_laplacian_matrix_product <- function (temporal_dependency, X) { + out <- matrix (0, nrow (X), ncol (X)) + n <- as.integer (nrow (X)) + d <- as.integer (ncol (X)) + temporal_dependency <- as.numeric (temporal_dependency) + omega <- as.integer (length (temporal_dependency)) + x <- as.numeric (X) + .C (strats_compute_laplacian_matrix_product, + n, d, omega, temporal_dependency, x, out)[[6]] +} + +## Check the matrix product +check_matrix_product <- function () { + X <- matrix (rnorm (24), 6, 4) + temporal_dependency <- c (0.5, 0.2, 0.1) + M <- matrix (0, 6, 6) + for (omega in 0:3) { + mij <- 1 + if (omega > 0) { + mij <- temporal_dependency[omega] + } + for (i in 1:6) { + j_plus <- i + omega + j_minus <- i - omega + if (j_plus <= 6) { + M[i, j_plus] <- mij + } + if (j_minus >= 1) { + M[i, j_minus] <- mij + } + } + } + D <- diag (rowSums (M), 6, 6) + L <- D - M + Y <- L %*% X + Yhat <- strats_laplacian_matrix_product (temporal_dependency, X) + stopifnot (nrow (Yhat) == nrow (Y)) + stopifnot (ncol (Yhat) == ncol (Y)) + stopifnot (max (abs (Yhat - Y)) < 1e-8) +} + +#' Compute the STRATS loss +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the loss +#' @export +strats_loss <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + non_residual <- R - T + (sum ((T - crossprod (W, T_normal) - R) ^ 2) + + alpha * sum (sqrt (rowSums (W ^ 2))) + + beta * sum (sqrt (rowSums (R ^ 2))) + + gamma * sum (non_residual * strats_laplacian_matrix_product (dependency, non_residual))) +} + +#' Compute the STRATS gradient wrt W +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the gradient wrt W +#' @export +strats_gradient_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + Dw <- 1 / (2 * sqrt (rowSums (W ^ 2))) + Dw[!is.finite (Dw)] <- 0 + M <- tcrossprod (T_normal) + diag (M) <- diag (M) + alpha * Dw + 2 * (M %*% W - tcrossprod (T_normal, T) + tcrossprod (T_normal, R)) +} + +#' Compute the STRATS gradient wrt R +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the gradient wrt R +#' @export +strats_gradient_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + Dr <- 1 / (2 * sqrt (rowSums (R ^ 2))) + Dr[!is.finite (Dr)] <- 0 + 2 * (crossprod (W, T_normal) - T + R + beta * sweep (R, 1, Dr, `*`) + gamma * strats_laplacian_matrix_product (dependency, R - T)) +} + +gradient_error <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + gW <- matrix (0, nrow (T_normal), nrow (T)) + gR <- matrix (0, nrow (T), ncol (T)) + for (i in 1:nrow (T_normal)) { + for (j in 1:nrow (T)) { + W_minus <- W + W_plus <- W + W_minus[i, j] <- W_minus[i, j] - 1e-4 + W_plus[i, j] <- W_plus[i, j] + 1e-4 + l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W_minus, R) + l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W_plus, R) + gW[i, j] <- (l_plus - l_minus) / (2 * 1e-4) + } + } + for (i in 1:nrow (T)) { + for (j in 1:ncol (T)) { + R_minus <- R + R_plus <- R + R_minus[i, j] <- R_minus[i, j] - 1e-4 + R_plus[i, j] <- R_plus[i, j] + 1e-4 + l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W, R_minus) + l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W, R_plus) + gR[i, j] <- (l_plus - l_minus) / (2 * 1e-4) + } + } + gW_true <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R) + gR_true <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R) + max (max (abs (gW - gW_true)), max (abs (gR - gR_true))) +} + +check_gradient <- function () { + set.seed (1) + for (i in 1:100) { + n <- 100 + n_train <- 25 + d <- 10 + dependency <- c (0.5, 0.2, 0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001) + T_normal <- matrix (rnorm (n_train * d), n_train, d) + T <- matrix (rnorm (n * d), n, d) + W <- matrix (rnorm (n_train * n), n_train, n) + R <- matrix (rnorm (n * d), n, d) + stopifnot (gradient_error (T_normal, T, 1000, 1000, 1000, dependency, W, R) + <= 1e-4) + } +} + +#' Check that the step size is adapted. +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @param gW the gradient wrt W at W +#' @param gR the gradient wrt R at R +#' @param step_w the step size for W (may be 0 to only update R) +#' @param step_r the step size for R (may be 0 to only update W) +#' @param loss_value the loss value at (W, R) +#' @return whether the step sizes for W and R are correct +#' @export +armijo <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, step_r, loss_value) { + nxt_w <- W - step_w * gW + nxt_r <- R - step_r * gR + loss_next <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, nxt_w, nxt_r) + loss_limit <- (loss_value + + sum ((nxt_w - W) * gW) + + sum ((nxt_r - R) * gR) + + ifelse (step_w == 0, 0, 0.5 * sum ((nxt_w - W) ^ 2) / step_w) + + ifelse (step_r == 0, 0, 0.5 * sum ((nxt_r - R) ^ 2) / step_r)) + (loss_next <= loss_limit) +} + +line_search_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) { + if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, 0, loss_value)) { + step + } else { + line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value) + } +} + +line_search_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) { + if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, 0, step, loss_value)) { + step + } else { + line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value) + } +} + +strats_iteration <- function (T_normal, T, alpha, beta, gamma, dependency, + loss_value = NULL, + W = NULL, R = NULL, step_w = NULL, step_r = NULL) { + if (is.null (W)) { + W <- diag (1, nrow (T_normal), nrow (T)) + row.names (W) <- row.names (T_normal) + colnames (W) <- row.names (T) + } + if (is.null (R)) { + R <- matrix (0, nrow (T), ncol (T)) + row.names (R) <- row.names (T) + colnames (R) <- colnames (T) + } + if (is.null (loss_value)) { + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + } + if (is.null (step_w)) { + step_w <- 1 + } + if (is.null (step_r)) { + step_r <- 1 + } + ## Optimize W: + gW <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R) + gR <- matrix (0, nrow (T), ncol (T)) + step_w <- line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, loss_value) + W <- W - step_w * gW + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + ## Optimize R: + gW <- matrix (0, nrow (T_normal), nrow (T)) + gR <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R) + step_r <- line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_r, loss_value) + R <- R - step_r * gR + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + list (loss_value = loss_value, + W = W, R = R, step_w = step_w, step_r = step_r) +} + +strats_sample <- function (T, n_points) { + i <- ceiling (nrow (T) * ((1:n_points) / n_points)) + T[i,, drop = FALSE] +} + +#' Apply the STRATS algorithm. +#' +#' The algorithm uses a normal time series, it is either passed as +#' T_normal, or sampled from T with n_normal points. +#' +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return the STRATS model, as a list with key 'anomalies' bound to a +#' tibble with 'observation' (the index in T), 'score' (the +#' anomaly score) and 'rank' (the anomaly rank). +#' @export +strats <- function (T, alpha, beta, gamma, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + if (is.null (T_normal) + && is.null (n_normal)) { + stop ("Either pas n_normal, the number of samples to take for normal, or pass T_normal") + } + if (is.null (T_normal)) { + T_normal <- strats_sample (T, n_normal) + } + T <- as.matrix (T) + T_normal <- as.matrix (T_normal) + dependency <- c (dependency) + stopifnot (is.finite (alpha)) + stopifnot (is.finite (beta)) + stopifnot (is.finite (gamma)) + stopifnot (alpha >= 0) + stopifnot (beta >= 0) + stopifnot (gamma >= 0) + stopifnot (all (is.finite (dependency))) + stopifnot (all (dependency >= 0)) + stopifnot (all (is.finite (T))) + stopifnot (all (is.finite (T_normal))) + stopifnot (ncol (T) == ncol (T_normal)) + model <- list () + for (i in 1:maxiter) { + last_loss <- model$loss_value + model <- strats_iteration (T_normal, T, alpha, beta, gamma, dependency, + model$loss_value, + model$W, model$R, model$step_w, model$step_r) + if (!is.null (last_loss)) { + stopifnot (model$loss_value <= last_loss) + } + } + model$anomalies <- (tibble::tibble (observation = seq_len (nrow (model$R)), + score = rowSums (model$R ^ 2)) + %>% dplyr::arrange (-score) + %>% dplyr::mutate (rank = seq_len (nrow (model$R)))) + model +} + +#' Test a configuration for STRATS with synthetic anomaly injection +#' +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return the mean rank of the injected anomalies. Lower is better. +#' @export +strats_test <- function (T, alpha, beta, gamma, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + ## Inject 1 anomaly, learn STRATS, and check the rank of the + ## anomaly. Repeat 10 times with different anomalies. Return the + ## mean. + do.call (mean, lapply (1:10, function (seed) { + set.seed (seed) + i_base <- sample (seq_len (nrow (T)), 1) + i_anomaly <- sample (seq_len (nrow (T)), 1) + which_swapped <- sample (seq_len (ncol (T)), ceiling (ncol (T) / 2)) + t <- T[i_anomaly,] + t_base <- T[i_base,] + t[which_swapped] <- t_base[which_swapped] + T[i_anomaly,] <- t + model <- strats (T, alpha, beta, gamma, dependency, + T_normal = T_normal, n_normal = n_normal, + maxiter = maxiter) + (model$anomalies + %>% dplyr::filter (observation == i_anomaly))$rank[1] + })) +} + +#' Tune STRATS with synthetic anomalies +#' +#' @param T the time series +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return a tibble, with columns alpha, beta, gamma, and error (the error +#' on synthetic anomalies). +#' @export +strats_tune <- function (T, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + set.seed (1) + do.call (rbind, lapply (1:20, function (seed) { + set.seed (seed) + alpha <- signif (10^runif (1, -3, 3), 1) + beta <- signif (10^runif (1, -3, 3), 1) + gamma <- signif (10^runif (1, -3, 3), 1) + error <- strats_test (T, alpha, beta, gamma, dependency, + T_normal = T_normal, + n_normal = n_normal, + maxiter = maxiter) + tibble::tibble (alpha = alpha, beta = beta, gamma = gamma, + error = error) + })) +} -- cgit v1.2.3