From b885294b77a5bc22d5681b0cd0f219acd08a662e Mon Sep 17 00:00:00 2001 From: Vivien Kraus Date: Fri, 19 Mar 2021 16:59:39 +0100 Subject: Initial commit --- R/strats.R | 377 ++++++++++++++++++++++++++++++++++++++++ bootstrap | 38 ++++ inst/bin/strats | 42 +++++ src/Makevars | 1 + src/strats.c | 52 ++++++ tests/testthat/test-gradients.R | 3 + tests/testthat/test-product.R | 3 + 7 files changed, 516 insertions(+) create mode 100644 R/strats.R create mode 100755 bootstrap create mode 100755 inst/bin/strats create mode 100644 src/Makevars create mode 100644 src/strats.c create mode 100644 tests/testthat/test-gradients.R create mode 100644 tests/testthat/test-product.R diff --git a/R/strats.R b/R/strats.R new file mode 100644 index 0000000..3b22d5c --- /dev/null +++ b/R/strats.R @@ -0,0 +1,377 @@ +`%>%` <- magrittr::`%>%` + +#' @useDynLib strats strats_compute_laplacian_matrix_product +strats_laplacian_matrix_product <- function (temporal_dependency, X) { + out <- matrix (0, nrow (X), ncol (X)) + n <- as.integer (nrow (X)) + d <- as.integer (ncol (X)) + temporal_dependency <- as.numeric (temporal_dependency) + omega <- as.integer (length (temporal_dependency)) + x <- as.numeric (X) + .C (strats_compute_laplacian_matrix_product, + n, d, omega, temporal_dependency, x, out)[[6]] +} + +## Check the matrix product +check_matrix_product <- function () { + X <- matrix (rnorm (24), 6, 4) + temporal_dependency <- c (0.5, 0.2, 0.1) + M <- matrix (0, 6, 6) + for (omega in 0:3) { + mij <- 1 + if (omega > 0) { + mij <- temporal_dependency[omega] + } + for (i in 1:6) { + j_plus <- i + omega + j_minus <- i - omega + if (j_plus <= 6) { + M[i, j_plus] <- mij + } + if (j_minus >= 1) { + M[i, j_minus] <- mij + } + } + } + D <- diag (rowSums (M), 6, 6) + L <- D - M + Y <- L %*% X + Yhat <- strats_laplacian_matrix_product (temporal_dependency, X) + stopifnot (nrow (Yhat) == nrow (Y)) + stopifnot (ncol (Yhat) == ncol (Y)) + stopifnot (max (abs (Yhat - Y)) < 1e-8) +} + +#' Compute the STRATS loss +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the loss +#' @export +strats_loss <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + non_residual <- R - T + (sum ((T - crossprod (W, T_normal) - R) ^ 2) + + alpha * sum (sqrt (rowSums (W ^ 2))) + + beta * sum (sqrt (rowSums (R ^ 2))) + + gamma * sum (non_residual * strats_laplacian_matrix_product (dependency, non_residual))) +} + +#' Compute the STRATS gradient wrt W +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the gradient wrt W +#' @export +strats_gradient_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + Dw <- 1 / (2 * sqrt (rowSums (W ^ 2))) + Dw[!is.finite (Dw)] <- 0 + M <- tcrossprod (T_normal) + diag (M) <- diag (M) + alpha * Dw + 2 * (M %*% W - tcrossprod (T_normal, T) + tcrossprod (T_normal, R)) +} + +#' Compute the STRATS gradient wrt R +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @return the gradient wrt R +#' @export +strats_gradient_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + Dr <- 1 / (2 * sqrt (rowSums (R ^ 2))) + Dr[!is.finite (Dr)] <- 0 + 2 * (crossprod (W, T_normal) - T + R + beta * sweep (R, 1, Dr, `*`) + gamma * strats_laplacian_matrix_product (dependency, R - T)) +} + +gradient_error <- function (T_normal, T, alpha, beta, gamma, dependency, W, R) { + gW <- matrix (0, nrow (T_normal), nrow (T)) + gR <- matrix (0, nrow (T), ncol (T)) + for (i in 1:nrow (T_normal)) { + for (j in 1:nrow (T)) { + W_minus <- W + W_plus <- W + W_minus[i, j] <- W_minus[i, j] - 1e-4 + W_plus[i, j] <- W_plus[i, j] + 1e-4 + l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W_minus, R) + l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W_plus, R) + gW[i, j] <- (l_plus - l_minus) / (2 * 1e-4) + } + } + for (i in 1:nrow (T)) { + for (j in 1:ncol (T)) { + R_minus <- R + R_plus <- R + R_minus[i, j] <- R_minus[i, j] - 1e-4 + R_plus[i, j] <- R_plus[i, j] + 1e-4 + l_minus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W, R_minus) + l_plus <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, + W, R_plus) + gR[i, j] <- (l_plus - l_minus) / (2 * 1e-4) + } + } + gW_true <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R) + gR_true <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R) + max (max (abs (gW - gW_true)), max (abs (gR - gR_true))) +} + +check_gradient <- function () { + set.seed (1) + for (i in 1:100) { + n <- 100 + n_train <- 25 + d <- 10 + dependency <- c (0.5, 0.2, 0.1, 0.05, 0.02, 0.01, 0.005, 0.002, 0.001) + T_normal <- matrix (rnorm (n_train * d), n_train, d) + T <- matrix (rnorm (n * d), n, d) + W <- matrix (rnorm (n_train * n), n_train, n) + R <- matrix (rnorm (n * d), n, d) + stopifnot (gradient_error (T_normal, T, 1000, 1000, 1000, dependency, W, R) + <= 1e-4) + } +} + +#' Check that the step size is adapted. +#' +#' @param T_normal a normal subset of the time series +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param W the reconstruction matrix +#' @param R the residual matrix +#' @param gW the gradient wrt W at W +#' @param gR the gradient wrt R at R +#' @param step_w the step size for W (may be 0 to only update R) +#' @param step_r the step size for R (may be 0 to only update W) +#' @param loss_value the loss value at (W, R) +#' @return whether the step sizes for W and R are correct +#' @export +armijo <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, step_r, loss_value) { + nxt_w <- W - step_w * gW + nxt_r <- R - step_r * gR + loss_next <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, nxt_w, nxt_r) + loss_limit <- (loss_value + + sum ((nxt_w - W) * gW) + + sum ((nxt_r - R) * gR) + + ifelse (step_w == 0, 0, 0.5 * sum ((nxt_w - W) ^ 2) / step_w) + + ifelse (step_r == 0, 0, 0.5 * sum ((nxt_r - R) ^ 2) / step_r)) + (loss_next <= loss_limit) +} + +line_search_w <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) { + if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, 0, loss_value)) { + step + } else { + line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value) + } +} + +line_search_r <- function (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step, loss_value) { + if (armijo (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, 0, step, loss_value)) { + step + } else { + line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step / 2, loss_value) + } +} + +strats_iteration <- function (T_normal, T, alpha, beta, gamma, dependency, + loss_value = NULL, + W = NULL, R = NULL, step_w = NULL, step_r = NULL) { + if (is.null (W)) { + W <- diag (1, nrow (T_normal), nrow (T)) + row.names (W) <- row.names (T_normal) + colnames (W) <- row.names (T) + } + if (is.null (R)) { + R <- matrix (0, nrow (T), ncol (T)) + row.names (R) <- row.names (T) + colnames (R) <- colnames (T) + } + if (is.null (loss_value)) { + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + } + if (is.null (step_w)) { + step_w <- 1 + } + if (is.null (step_r)) { + step_r <- 1 + } + ## Optimize W: + gW <- strats_gradient_w (T_normal, T, alpha, beta, gamma, dependency, W, R) + gR <- matrix (0, nrow (T), ncol (T)) + step_w <- line_search_w (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_w, loss_value) + W <- W - step_w * gW + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + ## Optimize R: + gW <- matrix (0, nrow (T_normal), nrow (T)) + gR <- strats_gradient_r (T_normal, T, alpha, beta, gamma, dependency, W, R) + step_r <- line_search_r (T_normal, T, alpha, beta, gamma, dependency, W, R, gW, gR, step_r, loss_value) + R <- R - step_r * gR + loss_value <- strats_loss (T_normal, T, alpha, beta, gamma, dependency, W, R) + list (loss_value = loss_value, + W = W, R = R, step_w = step_w, step_r = step_r) +} + +strats_sample <- function (T, n_points) { + i <- ceiling (nrow (T) * ((1:n_points) / n_points)) + T[i,, drop = FALSE] +} + +#' Apply the STRATS algorithm. +#' +#' The algorithm uses a normal time series, it is either passed as +#' T_normal, or sampled from T with n_normal points. +#' +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return the STRATS model, as a list with key 'anomalies' bound to a +#' tibble with 'observation' (the index in T), 'score' (the +#' anomaly score) and 'rank' (the anomaly rank). +#' @export +strats <- function (T, alpha, beta, gamma, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + if (is.null (T_normal) + && is.null (n_normal)) { + stop ("Either pas n_normal, the number of samples to take for normal, or pass T_normal") + } + if (is.null (T_normal)) { + T_normal <- strats_sample (T, n_normal) + } + T <- as.matrix (T) + T_normal <- as.matrix (T_normal) + dependency <- c (dependency) + stopifnot (is.finite (alpha)) + stopifnot (is.finite (beta)) + stopifnot (is.finite (gamma)) + stopifnot (alpha >= 0) + stopifnot (beta >= 0) + stopifnot (gamma >= 0) + stopifnot (all (is.finite (dependency))) + stopifnot (all (dependency >= 0)) + stopifnot (all (is.finite (T))) + stopifnot (all (is.finite (T_normal))) + stopifnot (ncol (T) == ncol (T_normal)) + model <- list () + for (i in 1:maxiter) { + last_loss <- model$loss_value + model <- strats_iteration (T_normal, T, alpha, beta, gamma, dependency, + model$loss_value, + model$W, model$R, model$step_w, model$step_r) + if (!is.null (last_loss)) { + stopifnot (model$loss_value <= last_loss) + } + } + model$anomalies <- (tibble::tibble (observation = seq_len (nrow (model$R)), + score = rowSums (model$R ^ 2)) + %>% dplyr::arrange (-score) + %>% dplyr::mutate (rank = seq_len (nrow (model$R)))) + model +} + +#' Test a configuration for STRATS with synthetic anomaly injection +#' +#' @param T the time series +#' @param alpha the global detection regularizer +#' @param beta the residual analysis regularizer +#' @param gamma the contextual detection regularizer +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return the mean rank of the injected anomalies. Lower is better. +#' @export +strats_test <- function (T, alpha, beta, gamma, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + ## Inject 1 anomaly, learn STRATS, and check the rank of the + ## anomaly. Repeat 10 times with different anomalies. Return the + ## mean. + do.call (mean, lapply (1:10, function (seed) { + set.seed (seed) + i_base <- sample (seq_len (nrow (T)), 1) + i_anomaly <- sample (seq_len (nrow (T)), 1) + which_swapped <- sample (seq_len (ncol (T)), ceiling (ncol (T) / 2)) + t <- T[i_anomaly,] + t_base <- T[i_base,] + t[which_swapped] <- t_base[which_swapped] + T[i_anomaly,] <- t + model <- strats (T, alpha, beta, gamma, dependency, + T_normal = T_normal, n_normal = n_normal, + maxiter = maxiter) + (model$anomalies + %>% dplyr::filter (observation == i_anomaly))$rank[1] + })) +} + +#' Tune STRATS with synthetic anomalies +#' +#' @param T the time series +#' @param dependency a vector giving the temporal dependency between +#' an observation and the next observations (the temporal +#' dependency is symmetric) +#' @param T_normal a time series with normal data +#' @param n_normal the number of normal points to sample +#' @param maxiter how many gradient descent iterations to make +#' @return a tibble, with columns alpha, beta, gamma, and error (the error +#' on synthetic anomalies). +#' @export +strats_tune <- function (T, dependency, + T_normal = NULL, + n_normal = NULL, + maxiter = 100) { + set.seed (1) + do.call (rbind, lapply (1:20, function (seed) { + set.seed (seed) + alpha <- signif (10^runif (1, -3, 3), 1) + beta <- signif (10^runif (1, -3, 3), 1) + gamma <- signif (10^runif (1, -3, 3), 1) + error <- strats_test (T, alpha, beta, gamma, dependency, + T_normal = T_normal, + n_normal = n_normal, + maxiter = maxiter) + tibble::tibble (alpha = alpha, beta = beta, gamma = gamma, + error = error) + })) +} diff --git a/bootstrap b/bootstrap new file mode 100755 index 0000000..1e5ddb9 --- /dev/null +++ b/bootstrap @@ -0,0 +1,38 @@ +#!/usr/bin/env Rscript + +git_version <- tryCatch ({ + readLines (".tarball-version") +}, error = function (cond) { + "0.0.0.9000" +}) +parts <- strsplit (git_version, "-", fixed = TRUE)[[1]] +tag <- parts[1] +commit <- NA +if (length (parts) > 1) { + commit <- 9000 + as.numeric (parts[2]) +} +r_version <- tag +if (!is.na (commit)) { + r_version <- sprintf ("%s.%d", tag, commit) +} + +library ("devtools") +writeLines (c ( + "Package: strats", + "Type: Package", + "Title: Semi-supervised Transductive Residual Analysis of Time Series", + sprintf ("Version: %s", r_version), + "Author: Vivien Kraus ", + "Maintainer: Vivien Kraus ", + "Description: Implementation of for anomaly detection.", + "License: Proprietary", + "LazyData: true"), + "DESCRIPTION") +use_namespace (roxygen = TRUE) +document () +load_all () + +check_matrix_product () +check_gradient () + +build () diff --git a/inst/bin/strats b/inst/bin/strats new file mode 100755 index 0000000..13e5994 --- /dev/null +++ b/inst/bin/strats @@ -0,0 +1,42 @@ +#!/usr/bin/env Rscript + +files <- commandArgs (trailingOnly = TRUE) + +`%>%` <- magrittr::`%>%` + +for (f in files) { + n_train <- basename (f) + splits <- strsplit (n_train, ".", fixed = TRUE)[[1]] + if (length (splits) != 2) { + cat (sprintf ("Skipping file %s.\n", f), file = stderr ()) + next + } + n_train <- splits[1] + n_train <- as.numeric (rev (strsplit (n_train, "_", fixed = TRUE)[[1]])[1]) + if (is.na (n_train)) { + cat (sprintf ("Skipping file %s.\n", f), file = stderr ()) + next + } + data <- as.matrix (readr::read_csv (f, col_names = FALSE)) + n <- nrow (data) + if (ncol (data) == 1) { + data <- cbind (data, data ^ 2, c (data[n], data[1:(n - 1)])) + } + data <- scale (data) + dependency <- exp (-0.1 * 1:100) + T_normal <- data[seq_len (n_train),, drop = FALSE] + T <- data[n_train - 1 + seq_len (nrow (data) - n_train),, drop = FALSE] + if (nrow (T_normal) > 1000) { + i <- ceiling (nrow (T_normal) * ((1:1000) / 1000)) + T_normal <- T_normal[i,, drop = FALSE] + } + tuning <- (strats::strats_tune (T, dependency, T_normal) + %>% dplyr::arrange (error)) + alpha <- tuning$alpha[1] + beta <- tuning$beta[1] + gamma <- tuning$gamma[1] + model <- strats::strats (T, alpha, beta, gamma, dependency, T_normal) + cat (sprintf ("%s: %s (%g, %g, %g)\n", f, + model$anomalies$observation[1], + alpha, beta, gamma)) +} diff --git a/src/Makevars b/src/Makevars new file mode 100644 index 0000000..0d3513b --- /dev/null +++ b/src/Makevars @@ -0,0 +1 @@ +PKG_CFLAGS = $(SHLIB_OPENMP_CFLAGS) diff --git a/src/strats.c b/src/strats.c new file mode 100644 index 0000000..1c5ce53 --- /dev/null +++ b/src/strats.c @@ -0,0 +1,52 @@ +#include + +static inline void +product (size_t n, + size_t omega, + const double *restrict temporal_dependency, + const double *restrict vector, + double *restrict out) +{ + size_t i; + #pragma omp parallel for + for (i = 0; i < n; i++) + { + size_t j; + out[i] = 0; + /* Backward: */ + for (j = 0; j < omega; j++) + { + if (i > j) + { + const double x = vector[i - j - 1]; + const double coefficient = temporal_dependency[j]; + out[i] += coefficient * (vector[i] - x); + } + } + /* Forward: */ + for (j = 0; j < omega; j++) + { + if (i + j + 1 < n) + { + const double x = vector[i + j + 1]; + const double coefficient = temporal_dependency[j]; + out[i] += coefficient * (vector[i] - x); + } + } + } +} + +void +strats_compute_laplacian_matrix_product (int *n, + int *d, + int *omega, + double *temporal_dependency, + double *x, + double *out) +{ + int j; + for (j = 0; j < *d; j++) + { + product (*n, *omega, temporal_dependency, x + j * (*n), out + j * (*n)); + } +} diff --git a/tests/testthat/test-gradients.R b/tests/testthat/test-gradients.R new file mode 100644 index 0000000..d93a09f --- /dev/null +++ b/tests/testthat/test-gradients.R @@ -0,0 +1,3 @@ +test_that ("The gradient is correct", { + check_gradient () +}) diff --git a/tests/testthat/test-product.R b/tests/testthat/test-product.R new file mode 100644 index 0000000..4f01e4e --- /dev/null +++ b/tests/testthat/test-product.R @@ -0,0 +1,3 @@ +test_that ("The Laplacian matrix product is correct", { + check_matrix_product () +}) -- cgit v1.2.3