From b885294b77a5bc22d5681b0cd0f219acd08a662e Mon Sep 17 00:00:00 2001
From: Vivien Kraus <vivien@planete-kraus.eu>
Date: Fri, 19 Mar 2021 16:59:39 +0100
Subject: Initial commit

---
 R/strats.R                      | 377 ++++++++++++++++++++++++++++++++++++++++
 bootstrap                       |  38 ++++
 inst/bin/strats                 |  42 +++++
 src/Makevars                    |   1 +
 src/strats.c                    |  52 ++++++
 tests/testthat/test-gradients.R |   3 +
 tests/testthat/test-product.R   |   3 +
 7 files changed, 516 insertions(+)
 create mode 100644 R/strats.R
 create mode 100755 bootstrap
 create mode 100755 inst/bin/strats
 create mode 100644 src/Makevars
 create mode 100644 src/strats.c
 create mode 100644 tests/testthat/test-gradients.R
 create mode 100644 tests/testthat/test-product.R

diff --git a/R/strats.R b/R/strats.R
new file mode 100644
index 0000000..3b22d5c
--- /dev/null
+++ b/R/strats.R
@@ -0,0 +1,377 @@
+`%>%` <- magrittr::`%>%`
+
+#' @useDynLib strats strats_compute_laplacian_matrix_product
+strats_laplacian_matrix_product <- function (temporal_dependency, X) {
+    out <- matrix (0, nrow (X), ncol (X))
+    n <- as.integer (nrow (X))
+    d <- as.integer (ncol (X))
+    temporal_dependency <- as.numeric (temporal_dependency)
+    omega <- as.integer (length (temporal_dependency))
+    x <- as.numeric (X)
+    .C (strats_compute_laplacian_matrix_product,
+        n, d, omega, temporal_dependency, x, out)[[6]]
+}
+
+## Check the matrix product
+check_matrix_product <- function () {
+    X <- matrix (rnorm (24), 6, 4)
+    temporal_dependency <- c (0.5, 0.2, 0.1)
+    M <- matrix (0, 6, 6)
+    for (omega in 0:3) {
+        mij <- 1
+        if (omega > 0) {
+            mij <- temporal_dependency[omega]
+        }
+        for (i in 1:6) {
+            j_plus <- i + omega
+            j_minus <- i - omega
+            if (j_plus <= 6) {
+                M[i, j_plus] <- mij
+            }
+            if (j_minus >= 1) {
+                M[i, j_minus] <- mij
+            }
+        }
+    }
+    D <- diag (rowSums (M), 6, 6)
+    L <- D - M
+    Y <- L %*% X
+    Yhat <- strats_laplacian_matrix_product (temporal_dependency, X)
+    stopifnot (nrow (Yhat) == nrow (Y))
+    stopifnot (ncol (Yhat) == ncol (Y))
+    stopifnot (max (abs (Yhat - Y)) < 1e-8)
+}
+
+#' Compute the STRATS loss
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the loss
+#' @export
+strats_loss <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    non_residual <- R - T
+    (sum ((T - crossprod (W, T_normal) - R) ^ 2)
+        + alpha * sum (sqrt (rowSums (W ^ 2)))
+        + beta * sum (sqrt (rowSums (R ^ 2)))
+        + gamma * sum (non_residual * strats_laplacian_matrix_product (dependency, non_residual)))
+}
+
+#' Compute the STRATS gradient wrt W
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the gradient wrt W
+#' @export
+strats_gradient_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    Dw <- 1 / (2 * sqrt (rowSums (W ^ 2)))
+    Dw[!is.finite (Dw)] <- 0
+    M <- tcrossprod (T_normal)
+    diag (M) <- diag (M) + alpha * Dw
+    2 * (M %*% W - tcrossprod (T_normal, T) + tcrossprod (T_normal, R))
+}
+
+#' Compute the STRATS gradient wrt R
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @return the gradient wrt R
+#' @export
+strats_gradient_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    Dr <- 1 / (2 * sqrt (rowSums (R ^ 2)))
+    Dr[!is.finite (Dr)] <- 0
+    2 * (crossprod (W, T_normal) - T + R + beta * sweep (R, 1, Dr, `*`) + gamma * strats_laplacian_matrix_product (dependency, R - T))
+}
+
+gradient_error <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) {
+    gW <- matrix (0, nrow (T_normal), nrow (T))
+    gR <- matrix (0, nrow (T), ncol (T))
+    for (i in 1:nrow (T_normal)) {
+        for (j in 1:nrow (T)) {
+            W_minus <- W
+            W_plus <- W
+            W_minus[i, j] <- W_minus[i, j] - 1e-4
+            W_plus[i, j] <- W_plus[i, j] + 1e-4
+            l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                    W_minus, R)
+            l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                   W_plus, R)
+            gW[i, j] <- (l_plus - l_minus) / (2 * 1e-4)
+        }
+    }
+    for (i in 1:nrow (T)) {
+        for (j in 1:ncol (T)) {
+            R_minus <- R
+            R_plus <- R
+            R_minus[i, j] <- R_minus[i, j] - 1e-4
+            R_plus[i, j] <- R_plus[i, j] + 1e-4
+            l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                    W, R_minus)
+            l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency,
+                                   W, R_plus)
+            gR[i, j] <- (l_plus - l_minus) / (2 * 1e-4)
+        }
+    }
+    gW_true <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    gR_true <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    max (max (abs (gW - gW_true)), max (abs (gR - gR_true)))
+}
+
+check_gradient <- function () {
+    set.seed (1)
+    for (i in 1:100) {
+        n <- 100
+        n_train <- 25
+        d <- 10
+        dependency <- c (0.5, 0.2, 0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001)
+        T_normal <- matrix (rnorm (n_train * d), n_train, d)
+        T <- matrix (rnorm (n * d), n, d)
+        W <- matrix (rnorm (n_train * n), n_train, n)
+        R <- matrix (rnorm (n * d), n, d)
+        stopifnot (gradient_error (T_normal, T, 1000, 1000, 1000, dependency, W, R)
+                   <= 1e-4)
+    }
+}
+
+#' Check that the step size is adapted.
+#'
+#' @param T_normal a normal subset of the time series
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param W the reconstruction matrix
+#' @param R the residual matrix
+#' @param gW the gradient wrt W at W
+#' @param gR the gradient wrt R at R
+#' @param step_w the step size for W (may be 0 to only update R)
+#' @param step_r the step size for R (may be 0 to only update W)
+#' @param loss_value the loss value at (W, R)
+#' @return whether the step sizes for W and R are correct
+#' @export
+armijo <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, step_r, loss_value) {
+    nxt_w <- W - step_w * gW
+    nxt_r <- R - step_r * gR
+    loss_next <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, nxt_w, nxt_r)
+    loss_limit <- (loss_value
+        + sum ((nxt_w - W) * gW)
+        + sum ((nxt_r - R) * gR)
+        + ifelse (step_w == 0, 0, 0.5 * sum ((nxt_w - W) ^ 2) / step_w)
+        + ifelse (step_r == 0, 0, 0.5 * sum ((nxt_r - R) ^ 2) / step_r))
+    (loss_next <= loss_limit)
+}
+
+line_search_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) {
+    if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, 0, loss_value)) {
+        step
+    } else {
+        line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value)
+    }
+}
+
+line_search_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) {
+    if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, 0, step, loss_value)) {
+        step
+    } else {
+        line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value)
+    }
+}
+
+strats_iteration <- function (T_normal, T, alpha, beta, gamma, dependency,
+                              loss_value = NULL,
+                              W = NULL, R = NULL, step_w = NULL, step_r = NULL) {
+    if (is.null (W)) {
+        W <- diag (1, nrow (T_normal), nrow (T))
+        row.names (W) <- row.names (T_normal)
+        colnames (W) <- row.names (T)
+    }
+    if (is.null (R)) {
+        R <- matrix (0, nrow (T), ncol (T))
+        row.names (R) <- row.names (T)
+        colnames (R) <- colnames (T)
+    }
+    if (is.null (loss_value)) {
+        loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    }
+    if (is.null (step_w)) {
+        step_w <- 1
+    }
+    if (is.null (step_r)) {
+        step_r <- 1
+    }
+    ## Optimize W:
+    gW <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    gR <- matrix (0, nrow (T), ncol (T))
+    step_w <- line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, loss_value)
+    W <- W - step_w * gW
+    loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    ## Optimize R:
+    gW <- matrix (0, nrow (T_normal), nrow (T))
+    gR <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    step_r <- line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_r, loss_value)
+    R <- R - step_r * gR
+    loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R)
+    list (loss_value = loss_value,
+          W = W, R = R, step_w = step_w, step_r = step_r)
+}
+
+strats_sample <- function (T, n_points) {
+    i <- ceiling (nrow (T) * ((1:n_points) / n_points))
+    T[i,, drop = FALSE]
+}
+
+#' Apply the STRATS algorithm.
+#'
+#' The algorithm uses a normal time series, it is either passed as
+#' T_normal, or sampled from T with n_normal points.
+#'
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return the STRATS model, as a list with key 'anomalies' bound to a
+#'     tibble with 'observation' (the index in T), 'score' (the
+#'     anomaly score) and 'rank' (the anomaly rank).
+#' @export
+strats <- function (T, alpha, beta, gamma, dependency,
+                    T_normal = NULL,
+                    n_normal = NULL,
+                    maxiter = 100) {
+    if (is.null (T_normal)
+        && is.null (n_normal)) {
+        stop ("Either pas n_normal, the number of samples to take for normal, or pass T_normal")
+    }
+    if (is.null (T_normal)) {
+        T_normal <- strats_sample (T, n_normal)
+    }
+    T <- as.matrix (T)
+    T_normal <- as.matrix (T_normal)
+    dependency <- c (dependency)
+    stopifnot (is.finite (alpha))
+    stopifnot (is.finite (beta))
+    stopifnot (is.finite (gamma))
+    stopifnot (alpha >= 0)
+    stopifnot (beta >= 0)
+    stopifnot (gamma >= 0)
+    stopifnot (all (is.finite (dependency)))
+    stopifnot (all (dependency >= 0))
+    stopifnot (all (is.finite (T)))
+    stopifnot (all (is.finite (T_normal)))
+    stopifnot (ncol (T) == ncol (T_normal))
+    model <- list ()
+    for (i in 1:maxiter) {
+        last_loss <- model$loss_value
+        model <- strats_iteration (T_normal, T, alpha, beta, gamma, dependency,
+                                   model$loss_value,
+                                   model$W, model$R, model$step_w, model$step_r)
+        if (!is.null (last_loss)) {
+            stopifnot (model$loss_value <= last_loss)
+        }
+    }
+    model$anomalies <- (tibble::tibble (observation = seq_len (nrow (model$R)),
+                                        score = rowSums (model$R ^ 2))
+        %>% dplyr::arrange (-score)
+        %>% dplyr::mutate (rank = seq_len (nrow (model$R))))
+    model
+}
+
+#' Test a configuration for STRATS with synthetic anomaly injection
+#'
+#' @param T the time series
+#' @param alpha the global detection regularizer
+#' @param beta the residual analysis regularizer
+#' @param gamma the contextual detection regularizer
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return the mean rank of the injected anomalies. Lower is better.
+#' @export
+strats_test <- function (T, alpha, beta, gamma, dependency,
+                         T_normal = NULL,
+                         n_normal = NULL,
+                         maxiter = 100) {
+    ## Inject 1 anomaly, learn STRATS, and check the rank of the
+    ## anomaly. Repeat 10 times with different anomalies. Return the
+    ## mean.
+    do.call (mean, lapply (1:10, function (seed) {
+        set.seed (seed)
+        i_base <- sample (seq_len (nrow (T)), 1)
+        i_anomaly <- sample (seq_len (nrow (T)), 1)
+        which_swapped <- sample (seq_len (ncol (T)), ceiling (ncol (T) / 2))
+        t <- T[i_anomaly,]
+        t_base <- T[i_base,]
+        t[which_swapped] <- t_base[which_swapped]
+        T[i_anomaly,] <- t
+        model <- strats (T, alpha, beta, gamma, dependency,
+                         T_normal = T_normal, n_normal = n_normal,
+                         maxiter = maxiter)
+        (model$anomalies
+            %>% dplyr::filter (observation == i_anomaly))$rank[1]
+    }))
+}
+
+#' Tune STRATS with synthetic anomalies
+#'
+#' @param T the time series
+#' @param dependency a vector giving the temporal dependency between
+#'     an observation and the next observations (the temporal
+#'     dependency is symmetric)
+#' @param T_normal a time series with normal data
+#' @param n_normal the number of normal points to sample
+#' @param maxiter how many gradient descent iterations to make
+#' @return a tibble, with columns alpha, beta, gamma, and error (the error
+#'     on synthetic anomalies).
+#' @export
+strats_tune <- function (T, dependency,
+                         T_normal = NULL,
+                         n_normal = NULL,
+                         maxiter = 100) {
+    set.seed (1)
+    do.call (rbind, lapply (1:20, function (seed) {
+        set.seed (seed)
+        alpha <- signif (10^runif (1, -3, 3), 1)
+        beta <- signif (10^runif (1, -3, 3), 1)
+        gamma <- signif (10^runif (1, -3, 3), 1)
+        error <- strats_test (T, alpha, beta, gamma, dependency,
+                              T_normal = T_normal,
+                              n_normal = n_normal,
+                              maxiter = maxiter)
+        tibble::tibble (alpha = alpha, beta = beta, gamma = gamma,
+                        error = error)
+    }))
+}
diff --git a/bootstrap b/bootstrap
new file mode 100755
index 0000000..1e5ddb9
--- /dev/null
+++ b/bootstrap
@@ -0,0 +1,38 @@
+#!/usr/bin/env Rscript
+
+git_version <- tryCatch ({
+    readLines (".tarball-version")
+}, error = function (cond) {
+    "0.0.0.9000"
+})
+parts <- strsplit (git_version, "-", fixed = TRUE)[[1]]
+tag <- parts[1]
+commit <- NA
+if (length (parts) > 1) {
+    commit <- 9000 + as.numeric (parts[2])
+}
+r_version <- tag
+if (!is.na (commit)) {
+    r_version <- sprintf ("%s.%d", tag, commit)
+}
+
+library ("devtools")
+writeLines (c (
+    "Package: strats",
+    "Type: Package",
+    "Title: Semi-supervised Transductive Residual Analysis of Time Series",
+    sprintf ("Version: %s", r_version),
+    "Author: Vivien Kraus <vivien@planete-kraus.eu>",
+    "Maintainer: Vivien Kraus <vivien@planete-kraus.eu>",
+    "Description: Implementation of for anomaly detection.",
+    "License: Proprietary",
+    "LazyData: true"),
+    "DESCRIPTION")
+use_namespace (roxygen = TRUE)
+document ()
+load_all ()
+
+check_matrix_product ()
+check_gradient ()
+
+build ()
diff --git a/inst/bin/strats b/inst/bin/strats
new file mode 100755
index 0000000..13e5994
--- /dev/null
+++ b/inst/bin/strats
@@ -0,0 +1,42 @@
+#!/usr/bin/env Rscript
+
+files <- commandArgs (trailingOnly = TRUE)
+
+`%>%` <- magrittr::`%>%`
+
+for (f in files) {
+    n_train <- basename (f)
+    splits <- strsplit (n_train, ".", fixed = TRUE)[[1]]
+    if (length (splits) != 2) {
+        cat (sprintf ("Skipping file %s.\n", f), file = stderr ())
+        next
+    }
+    n_train <- splits[1]
+    n_train <- as.numeric (rev (strsplit (n_train, "_", fixed = TRUE)[[1]])[1])
+    if (is.na (n_train)) {
+        cat (sprintf ("Skipping file %s.\n", f), file = stderr ())
+        next
+    }
+    data <- as.matrix (readr::read_csv (f, col_names = FALSE))
+    n <- nrow (data)
+    if (ncol (data) == 1) {
+        data <- cbind (data, data ^ 2, c (data[n], data[1:(n - 1)]))
+    }
+    data <- scale (data)
+    dependency <- exp (-0.1 * 1:100)
+    T_normal <- data[seq_len (n_train),, drop = FALSE]
+    T <- data[n_train - 1 + seq_len (nrow (data) - n_train),, drop = FALSE]
+    if (nrow (T_normal) > 1000) {
+        i <- ceiling (nrow (T_normal) * ((1:1000) / 1000))
+        T_normal <- T_normal[i,, drop = FALSE]
+    }
+    tuning <- (strats::strats_tune (T, dependency, T_normal)
+        %>% dplyr::arrange (error))
+    alpha <- tuning$alpha[1]
+    beta <- tuning$beta[1]
+    gamma <- tuning$gamma[1]
+    model <- strats::strats (T, alpha, beta, gamma, dependency, T_normal)
+    cat (sprintf ("%s: %s (%g, %g, %g)\n", f,
+                  model$anomalies$observation[1],
+                  alpha, beta, gamma))
+}
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..0d3513b
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1 @@
+PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS)
diff --git a/src/strats.c b/src/strats.c
new file mode 100644
index 0000000..1c5ce53
--- /dev/null
+++ b/src/strats.c
@@ -0,0 +1,52 @@
+#include <stdlib.h>
+
+static inline void
+product (size_t n,
+	 size_t omega,
+	 const double *restrict temporal_dependency,
+	 const double *restrict vector,
+	 double *restrict out)
+{
+  size_t i;
+  #pragma omp parallel for
+  for (i = 0; i < n; i++)
+    {
+      size_t j;
+      out[i] = 0;
+      /* Backward: */
+      for (j = 0; j < omega; j++)
+	{
+	  if (i > j)
+	    {
+	      const double x = vector[i - j - 1];
+	      const double coefficient = temporal_dependency[j];
+	      out[i] += coefficient * (vector[i] - x);
+	    }
+	}
+      /* Forward: */
+      for (j = 0; j < omega; j++)
+	{
+	  if (i + j + 1 < n)
+	    {
+	      const double x = vector[i + j + 1];
+	      const double coefficient = temporal_dependency[j];
+	      out[i] += coefficient * (vector[i] - x);
+	    }
+	}
+    }
+}
+
+void
+strats_compute_laplacian_matrix_product (int *n,
+					 int *d,
+					 int *omega,
+					 double *temporal_dependency,
+					 double *x,
+					 double *out)
+{
+  int j;
+  for (j = 0; j < *d; j++)
+    {
+      product (*n, *omega, temporal_dependency, x + j * (*n), out + j * (*n));
+    }
+}
diff --git a/tests/testthat/test-gradients.R b/tests/testthat/test-gradients.R
new file mode 100644
index 0000000..d93a09f
--- /dev/null
+++ b/tests/testthat/test-gradients.R
@@ -0,0 +1,3 @@
+test_that ("The gradient is correct", {
+    check_gradient ()
+})
diff --git a/tests/testthat/test-product.R b/tests/testthat/test-product.R
new file mode 100644
index 0000000..4f01e4e
--- /dev/null
+++ b/tests/testthat/test-product.R
@@ -0,0 +1,3 @@
+test_that ("The Laplacian matrix product is correct", {
+    check_matrix_product ()
+})
-- 
cgit v1.2.3