From b885294b77a5bc22d5681b0cd0f219acd08a662e Mon Sep 17 00:00:00 2001
From: Vivien Kraus <vivien@planete-kraus.eu>
Date: Fri, 19 Mar 2021 16:59:39 +0100
Subject: Initial commit

---
 R/strats.R | 377 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 377 insertions(+)
 create mode 100644 R/strats.R

(limited to 'R/strats.R')

diff --git a/R/strats.R b/R/strats.R
new file mode 100644
index 0000000..3b22d5c
--- /dev/null
+++ b/R/strats.R
@@ -0,0 +1,377 @@
+`%>%` <- magrittr::`%>%`
+
+#' @useDynLib strats strats_compute_laplacian_matrix_product
+strats_laplacian_matrix_product <- function (temporal_dependency, X) {
+    out <- matrix (0, nrow (X), ncol (X))
+    n <- as.integer (nrow (X))
+    d <- as.integer (ncol (X))
+    temporal_dependency <- as.numeric (temporal_dependency)
+    omega <- as.integer (length (temporal_dependency))
+    x <- as.numeric (X)
+    .C (strats_compute_laplacian_matrix_product,
+        n, d, omega, temporal_dependency, x, out)[[6]]
+}
+
+## Check the matrix product
+check_matrix_product <- function () {
+    X <- matrix (rnorm (24), 6, 4)
+    temporal_dependency <- c (0.5, 0.2, 0.1)
+    M <- matrix (0, 6, 6)
+    for (omega in 0:3) {
+        mij <- 1
+        if (omega > 0) {
+            mij <- temporal_dependency[omega]
+        }
+        for (i in 1:6) {
+            j_plus <- i + omega
+            j_minus <- i - omega
+            if (j_plus <= 6) {
+                M[i, j_plus] <- mij
+            }
+            if (j_minus >= 1) {
+                M[i, j_minus] <- mij
+            }
+        }
+    }
+    D <- diag (rowSums (M), 6, 6)
+    L <- D - M
+    Y <- L %*% X
+    Yhat <- strats_laplacian_matrix_product (temporal_dependency, X)
+    stopifnot (nrow (Yhat) == nrow (Y))
+    stopifnot (ncol (Yhat) == ncol (Y))
+    stopifnot (max (abs (Yhat - Y)) < 1e-8)
+}
+
+#' Compute the STRATS loss
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the loss
+#' @export
+strats_loss <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    non_residual <- R - T
+    (sum ((T - crossprod (W, T_normal) - R) ^ 2)
+        + alpha * sum (sqrt (rowSums (W ^ 2)))
+        + beta * sum (sqrt (rowSums (R ^ 2)))
+        + gamma * sum (non_residual * strats_laplacian_matrix_product (dependency, non_residual)))
+}
+
+#' Compute the STRATS gradient wrt W
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the gradient wrt W
+#' @export
+strats_gradient_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    Dw <- 1 / (2 * sqrt (rowSums (W ^ 2)))
+    Dw[!is.finite (Dw)] <- 0
+    M <- tcrossprod (T_normal)
+    diag (M) <- diag (M) + alpha * Dw
+    2 * (M %*% W - tcrossprod (T_normal, T) + tcrossprod (T_normal, R))
+}
+
+#' Compute the STRATS gradient wrt R
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the gradient wrt R
+#' @export
+strats_gradient_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    Dr <- 1 / (2 * sqrt (rowSums (R ^ 2)))
+    Dr[!is.finite (Dr)] <- 0
+    2 * (crossprod (W, T_normal) - T + R + beta * sweep (R, 1, Dr, `*`) + gamma * strats_laplacian_matrix_product (dependency, R - T))
+}
+
+gradient_error <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    gW <- matrix (0, nrow (T_normal), nrow (T))
+    gR <- matrix (0, nrow (T), ncol (T))
+    for (i in 1:nrow (T_normal)) {
+        for (j in 1:nrow (T)) {
+            W_minus <- W
+            W_plus <- W
+            W_minus[i, j] <- W_minus[i, j] - 1e-4
+            W_plus[i, j] <- W_plus[i, j] + 1e-4
+            l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                    W_minus, R)
+            l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                   W_plus, R)
+            gW[i, j] <- (l_plus - l_minus) / (2 * 1e-4)
+        }
+    }
+    for (i in 1:nrow (T)) {
+        for (j in 1:ncol (T)) {
+            R_minus <- R
+            R_plus <- R
+            R_minus[i, j] <- R_minus[i, j] - 1e-4
+            R_plus[i, j] <- R_plus[i, j] + 1e-4
+            l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                    W, R_minus)
+            l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                   W, R_plus)
+            gR[i, j] <- (l_plus - l_minus) / (2 * 1e-4)
+        }
+    }
+    gW_true <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    gR_true <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    max (max (abs (gW - gW_true)), max (abs (gR - gR_true)))
+}
+
+check_gradient <- function () {
+    set.seed (1)
+    for (i in 1:100) {
+        n <- 100
+        n_train <- 25
+        d <- 10
+        dependency <- c (0.5, 0.2, 0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001)
+        T_normal <- matrix (rnorm (n_train * d), n_train, d)
+        T <- matrix (rnorm (n * d), n, d)
+        W <- matrix (rnorm (n_train * n), n_train, n)
+        R <- matrix (rnorm (n * d), n, d)
+        stopifnot (gradient_error (T_normal, T, 1000, 1000, 1000, dependency, W, R)
+                   <= 1e-4)
+    }
+}
+
+#' Check that the step size is adapted.
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @param gW the gradient wrt W at W
+#' @param gR the gradient wrt R at R
+#' @param step_w the step size for W (may be 0 to only update R)
+#' @param step_r the step size for R (may be 0 to only update W)
+#' @param loss_value the loss value at (W, R)
+#' @return whether the step sizes for W and R are correct
+#' @export
+armijo <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, step_r, loss_value) {
+    nxt_w <- W - step_w * gW
+    nxt_r <- R - step_r * gR
+    loss_next <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, nxt_w, nxt_r)
+    loss_limit <- (loss_value
+        + sum ((nxt_w - W) * gW)
+        + sum ((nxt_r - R) * gR)
+        + ifelse (step_w == 0, 0, 0.5 * sum ((nxt_w - W) ^ 2) / step_w)
+        + ifelse (step_r == 0, 0, 0.5 * sum ((nxt_r - R) ^ 2) / step_r))
+    (loss_next <= loss_limit)
+}
+
+line_search_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) {
+    if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, 0, loss_value)) {
+        step
+    } else {
+        line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value)
+    }
+}
+
+line_search_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) {
+    if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, 0, step, loss_value)) {
+        step
+    } else {
+        line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value)
+    }
+}
+
+strats_iteration <- function (T_normal, T, alpha, beta, gamma, dependency,
+                              loss_value = NULL,
+                              W = NULL, R = NULL, step_w = NULL, step_r = NULL) {
+    if (is.null (W)) {
+        W <- diag (1, nrow (T_normal), nrow (T))
+        row.names (W) <- row.names (T_normal)
+        colnames (W) <- row.names (T)
+    }
+    if (is.null (R)) {
+        R <- matrix (0, nrow (T), ncol (T))
+        row.names (R) <- row.names (T)
+        colnames (R) <- colnames (T)
+    }
+    if (is.null (loss_value)) {
+        loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    }
+    if (is.null (step_w)) {
+        step_w <- 1
+    }
+    if (is.null (step_r)) {
+        step_r <- 1
+    }
+    ## Optimize W:
+    gW <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    gR <- matrix (0, nrow (T), ncol (T))
+    step_w <- line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, loss_value)
+    W <- W - step_w * gW
+    loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    ## Optimize R:
+    gW <- matrix (0, nrow (T_normal), nrow (T))
+    gR <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    step_r <- line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_r, loss_value)
+    R <- R - step_r * gR
+    loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    list (loss_value = loss_value,
+          W = W, R = R, step_w = step_w, step_r = step_r)
+}
+
+strats_sample <- function (T, n_points) {
+    i <- ceiling (nrow (T) * ((1:n_points) / n_points))
+    T[i,, drop = FALSE]
+}
+
+#' Apply the STRATS algorithm.
+#'
+#' The algorithm uses a normal time series, it is either passed as
+#' T_normal, or sampled from T with n_normal points.
+#'
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return the STRATS model, as a list with key 'anomalies' bound to a
+#'     tibble with 'observation' (the index in T), 'score' (the
+#'     anomaly score) and 'rank' (the anomaly rank).
+#' @export
+strats <- function (T, alpha, beta, gamma, dependency,
+                    T_normal = NULL,
+                    n_normal = NULL,
+                    maxiter = 100) {
+    if (is.null (T_normal)
+        && is.null (n_normal)) {
+        stop ("Either pas n_normal, the number of samples to take for normal, or pass T_normal")
+    }
+    if (is.null (T_normal)) {
+        T_normal <- strats_sample (T, n_normal)
+    }
+    T <- as.matrix (T)
+    T_normal <- as.matrix (T_normal)
+    dependency <- c (dependency)
+    stopifnot (is.finite (alpha))
+    stopifnot (is.finite (beta))
+    stopifnot (is.finite (gamma))
+    stopifnot (alpha >= 0)
+    stopifnot (beta >= 0)
+    stopifnot (gamma >= 0)
+    stopifnot (all (is.finite (dependency)))
+    stopifnot (all (dependency >= 0))
+    stopifnot (all (is.finite (T)))
+    stopifnot (all (is.finite (T_normal)))
+    stopifnot (ncol (T) == ncol (T_normal))
+    model <- list ()
+    for (i in 1:maxiter) {
+        last_loss <- model$loss_value
+        model <- strats_iteration (T_normal, T, alpha, beta, gamma, dependency,
+                                   model$loss_value,
+                                   model$W, model$R, model$step_w, model$step_r)
+        if (!is.null (last_loss)) {
+            stopifnot (model$loss_value <= last_loss)
+        }
+    }
+    model$anomalies <- (tibble::tibble (observation = seq_len (nrow (model$R)),
+                                        score = rowSums (model$R ^ 2))
+        %>% dplyr::arrange (-score)
+        %>% dplyr::mutate (rank = seq_len (nrow (model$R))))
+    model
+}
+
+#' Test a configuration for STRATS with synthetic anomaly injection
+#'
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return the mean rank of the injected anomalies. Lower is better.
+#' @export
+strats_test <- function (T, alpha, beta, gamma, dependency,
+                         T_normal = NULL,
+                         n_normal = NULL,
+                         maxiter = 100) {
+    ## Inject 1 anomaly, learn STRATS, and check the rank of the
+    ## anomaly. Repeat 10 times with different anomalies. Return the
+    ## mean.
+    do.call (mean, lapply (1:10, function (seed) {
+        set.seed (seed)
+        i_base <- sample (seq_len (nrow (T)), 1)
+        i_anomaly <- sample (seq_len (nrow (T)), 1)
+        which_swapped <- sample (seq_len (ncol (T)), ceiling (ncol (T) / 2))
+        t <- T[i_anomaly,]
+        t_base <- T[i_base,]
+        t[which_swapped] <- t_base[which_swapped]
+        T[i_anomaly,] <- t
+        model <- strats (T, alpha, beta, gamma, dependency,
+                         T_normal = T_normal, n_normal = n_normal,
+                         maxiter = maxiter)
+        (model$anomalies
+            %>% dplyr::filter (observation == i_anomaly))$rank[1]
+    }))
+}
+
+#' Tune STRATS with synthetic anomalies
+#'
+#' @param T the time series
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return a tibble, with columns alpha, beta, gamma, and error (the error
+#'     on synthetic anomalies).
+#' @export
+strats_tune <- function (T, dependency,
+                         T_normal = NULL,
+                         n_normal = NULL,
+                         maxiter = 100) {
+    set.seed (1)
+    do.call (rbind, lapply (1:20, function (seed) {
+        set.seed (seed)
+        alpha <- signif (10^runif (1, -3, 3), 1)
+        beta <- signif (10^runif (1, -3, 3), 1)
+        gamma <- signif (10^runif (1, -3, 3), 1)
+        error <- strats_test (T, alpha, beta, gamma, dependency,
+                              T_normal = T_normal,
+                              n_normal = n_normal,
+                              maxiter = maxiter)
+        tibble::tibble (alpha = alpha, beta = beta, gamma = gamma,
+                        error = error)
+    }))
+}
-- 
cgit v1.2.3