diff options
author | Vivien Kraus <vivien@planete-kraus.eu> | 2021-04-07 12:05:36 +0200 |
---|---|---|
committer | Vivien Kraus <vivien@planete-kraus.eu> | 2021-06-15 13:34:18 +0200 |
commit | bb1ae75c56d34a65662d7b285333c595c0ddae7f (patch) | |
tree | e8baf732ce062603f0982ebc093fbd5451c45e65 /images |
Nouvelle version du manuscrit avec HTML
Diffstat (limited to 'images')
32 files changed, 2149 insertions, 0 deletions
diff --git a/images/bothsemimulti.svg b/images/bothsemimulti.svg new file mode 100644 index 0000000..b03b092 --- /dev/null +++ b/images/bothsemimulti.svg @@ -0,0 +1,71 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="-10 -10 220 220" + height="120mm" + width="120mm"> + <defs> + <linearGradient id="modele-1" x1="0" x2="0" y1="0" y2="1"> + <stop offset="0%" stop-color="blue" /> + <stop offset="33%" stop-color="blue" /> + <stop offset="34%" stop-color="orange" /> + <stop offset="66%" stop-color="orange" /> + <stop offset="67%" stop-color="green" /> + <stop offset="100%" stop-color="green" /> + </linearGradient> + <linearGradient id="modele-2" x1="0" x2="0" y1="0" y2="1"> + <stop offset="0%" stop-color="blue" /> + <stop offset="33%" stop-color="blue" /> + <stop offset="34%" stop-color="orange" /> + <stop offset="66%" stop-color="orange" /> + <stop offset="67%" stop-color="blue" /> + <stop offset="100%" stop-color="blue" /> + </linearGradient> + <linearGradient id="modele-3" x1="0" x2="0" y1="0" y2="1"> + <stop offset="0%" stop-color="white" /> + <stop offset="33%" stop-color="white" /> + <stop offset="34%" stop-color="red" /> + <stop offset="66%" stop-color="red" /> + <stop offset="67%" stop-color="orange" /> + <stop offset="100%" stop-color="orange" /> + </linearGradient> + </defs> + <g stroke="black" stroke-width="2" fill="white" id="individus"> + <circle cx="25" cy="25" r="10" /> + <circle cx="75" cy="25" r="10" /> + <circle cx="25" cy="75" r="10" /> + <circle cx="75" cy="75" r="10" /> + <line x1="35" x2="65" y1="25" y2="25" /> + <line x1="35" x2="65" y1="75" y2="75" /> + </g> + <use xlink:href="#individus" transform="translate(100 0)" /> + <g stroke="red" stroke-width="2"> + <line x1="146.5" x2="153.5" y1="17" y2="33" /> + <line x1="146.5" x2="153.5" y1="67" y2="83" /> + </g> + <g font-size="8" stroke="black" text-anchor="middle"> + <text x="25" y="30">-1</text> + <text x="75" y="30">-0.8</text> + <text x="25" y="80">+1</text> + <text x="75" y="80">+0.8</text> + <text x="125" y="30">-1</text> + <text x="175" y="30">+1</text> + <text x="125" y="80">-0.8</text> + <text x="175" y="80">+0.8</text> + </g> + <g stroke="black" stroke-width="2"> + <line x1="50" x2="25" y1="125" y2="175" /> + <circle cx="50" cy="125" r="10" fill="url(#modele-1)" /> + <circle cx="25" cy="175" r="10" fill="url(#modele-2)"/> + <circle cx="75" cy="175" r="10" fill="url(#modele-3)"/> + </g> + <g stroke="black" stroke-width="2"> + <line x1="150" x2="125" y1="125" y2="175" /> + <line x1="134" x2="141" y1="142" y2="158" stroke="red" /> + <circle cx="150" cy="125" r="10" fill="url(#modele-1)" /> + <circle cx="125" cy="175" r="10" fill="url(#modele-3)"/> + <circle cx="175" cy="175" r="10" fill="url(#modele-2)"/> + </g> +</svg> diff --git a/images/dirty.svg b/images/dirty.svg new file mode 100644 index 0000000..4e8e76f --- /dev/null +++ b/images/dirty.svg @@ -0,0 +1,77 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="-10 -10 220 220" + height="100mm" + width="100mm"> + <g stroke="black" stroke-width="0.7"> + <rect x="0" y="0" width="100" height="200" fill="none" rx="5" /> + <g fill="yellow" fill-opacity="0.2" stroke-width="0.5"> + <g fill="none"> + <rect x="140" y="160" width="10" height="10" /> + </g> + <g fill="blue"> + <rect x="0" y="20" width="100" height="100" /> + <rect x="140" y="175" width="10" height="10" /> + </g> + <rect x="80" y="120" width="20" height="20" /> + <rect x="20" y="140" width="20" height="20" /> + <rect x="60" y="160" width="20" height="20" /> + <rect x="40" y="180" width="20" height="20" /> + <rect x="140" y="190" width="10" height="10" /> + </g> + </g> + <g stroke="black" stroke-width="0.3"> + <line x1="110" x2="110" y1="0" y2="20" /> + <line x1="118" x2="118" y1="20" y2="120" /> + <line x1="110" x2="110" y1="120" y2="200" /> + <g stroke-dasharray="4"> + <line x1="100" x2="116" y1="0" y2="0" /> + <line x1="100" x2="124" y1="20" y2="20" /> + <line x1="100" x2="124" y1="120" y2="120" /> + <line x1="100" x2="116" y1="200" y2="200" /> + </g> + </g> + <g stroke="black" stroke-width="0.1"> + <!-- The grid --> + <line x1="20" x2="20" y1="0" y2="200" /> + <line x1="40" x2="40" y1="0" y2="200" /> + <line x1="60" x2="60" y1="0" y2="200" /> + <line x1="80" x2="80" y1="0" y2="200" /> + <line x1="0" x2="100" y1="20" y2="20" /> + <line x1="0" x2="100" y1="40" y2="40" /> + <line x1="0" x2="100" y1="60" y2="60" /> + <line x1="0" x2="100" y1="80" y2="80" /> + <line x1="0" x2="100" y1="100" y2="100" /> + <line x1="0" x2="100" y1="120" y2="120" /> + <line x1="0" x2="100" y1="140" y2="140" /> + <line x1="0" x2="100" y1="160" y2="160" /> + <line x1="0" x2="100" y1="180" y2="180" /> + </g> + <g stroke="black" stroke-width="0.7"> + <line x1="130" x2="130" y1="150" y2="220" /> + <line x1="130" x2="220" y1="150" y2="150" /> + </g> + <g font-size="6px" font-family="serif"> + <text x="125" y="14"> + variables non pertinentes + </text> + <text x="125" y="70"> + variables communes + </text> + <text x="125" y="135"> + variables différenciantes + </text> + <text x="155" y="168"> + $P = 0$, $Q = 0$ + </text> + <text x="155" y="183"> + $P = 0$, $Q \neq 0$ + </text> + <text x="155" y="198"> + $P \neq 0$, $Q = 0$ + </text> + </g> +</svg> diff --git a/images/featureselectionlizeo.R b/images/featureselectionlizeo.R new file mode 100644 index 0000000..7df6b8c --- /dev/null +++ b/images/featureselectionlizeo.R @@ -0,0 +1,20 @@ +#!/usr/bin/env Rscript +load (sprintf ("%s/data/feature_selection_lizeo.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") +library ("magrittr") +plot <- (feature_selection_lizeo + %>% dplyr::filter (algorithm != "rsms_full") + %>% dplyr::mutate (algorithm = ifelse (algorithm == "rsms", "\\textbf{RSMS}", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "mifs", "MIFS", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "rfs", "RFS", algorithm)) + %>% dplyr::mutate (`% de variables sélectionnées` = n_features / max (n_features)) + %>% dplyr::select (`Algorithme` = algorithm, `% de variables sélectionnées`, `aRMSE` = armse) + %>% dplyr::group_by (Algorithme, `% de variables sélectionnées`) + %>% dplyr::summarize (aRMSE = min (aRMSE)) + %>% ggplot2::ggplot (ggplot2::aes (x = `% de variables sélectionnées`, y = aRMSE, color = Algorithme, linetype = Algorithme)) + + ggplot2::geom_line () + + ggplot2::scale_linetype_manual (values = c ("\\textbf{RSMS}" = "solid", "MIFS" = "dashed", "RFS" = "twodash")) + + ggplot2::scale_color_manual (values = c ("\\textbf{RSMS}" = "#E69F00", "MIFS" = "#56B4E9", "RFS" = "#009E73")) + + ggplot2::scale_x_continuous (trans = 'log10', labels = scales::percent)) +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 3) diff --git a/images/featureselectionlizeofull.R b/images/featureselectionlizeofull.R new file mode 100644 index 0000000..c861e28 --- /dev/null +++ b/images/featureselectionlizeofull.R @@ -0,0 +1,19 @@ +#!/usr/bin/env Rscript +load (sprintf ("%s/data/feature_selection_lizeo.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") +library ("magrittr") +plot <- (feature_selection_lizeo + %>% dplyr::filter (algorithm %in% c ("rsms_full", "rsms")) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "rsms_full", "\\textbf{par époques}", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "rsms", "par sous-ensemble", algorithm)) + %>% dplyr::mutate (`Nombre de variables` = n_features / max (n_features)) + %>% dplyr::select (`Variante` = algorithm, `Nombre de variables`, `aRMSE` = armse) + %>% dplyr::group_by (Variante, `Nombre de variables`) + %>% dplyr::summarize (aRMSE = min (aRMSE)) + %>% ggplot2::ggplot (ggplot2::aes (x = `Nombre de variables`, y = aRMSE, color = Variante, linetype = Variante)) + + ggplot2::geom_line () + + ggplot2::scale_linetype_manual (values = c ("\\textbf{par époques}" = "solid", "par sous-ensemble" = "dashed")) + + ggplot2::scale_color_manual (values = c ("\\textbf{par époques}" = "#0072B2", "par sous-ensemble" = "#E69F00")) + + ggplot2::scale_x_continuous (trans = 'log10', labels = scales::percent)) +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 3) diff --git a/images/fig-co-training.svg b/images/fig-co-training.svg new file mode 100644 index 0000000..9cffba1 --- /dev/null +++ b/images/fig-co-training.svg @@ -0,0 +1,95 @@ +<?xml version='1.0' encoding='UTF-8' ?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink='http://www.w3.org/1999/xlink' + width="14cm" + viewBox="0 0 640 640"> + <g fill-opacity=".3"> + <rect x="0" y="0" width="640" height="120" fill="#E69F00" /> + <rect x="0" y="240" width="640" height="120" fill="#E69F00" /> + <rect x="0" y="120" width="640" height="120" fill="#56B4E9" /> + <rect x="0" y="360" width="640" height="120" fill="#56B4E9" /> + </g> + <g fill="white"> + <rect x="40" y="20" width="260" height="80" /> + <rect x="40" y="140" width="260" height="80" /> + <rect x="360" y="20" width="260" height="80" /> + <rect x="360" y="140" width="260" height="80" /> + <rect x="40" y="260" width="260" height="80" /> + <rect x="40" y="380" width="260" height="80" /> + <rect x="360" y="260" width="260" height="80" /> + <rect x="360" y="380" width="260" height="80" /> + </g> + <g fill="lightgray"> + <rect x="92" y="20" width="208" height="80" /> + <rect x="92" y="140" width="208" height="80" /> + + <rect x="412" y="20" width="208" height="80" /> + <rect x="412" y="140" width="208" height="80" /> + + <rect x="92" y="260" width="208" height="80" /> + <rect x="92" y="380" width="208" height="80" /> + + <rect x="412" y="260" width="52" height="80" /><rect x="516" y="260" width="104" height="80" /> + <rect x="412" y="380" width="130" height="80" /><rect x="594" y="380" width="26" height="80" /> + </g> + <g fill="#E69F00"> + <rect x="542" y="20" width="52" height="80" /> + <rect x="222" y="380" width="52" height="80" /> + </g> + <g fill="#56B4E9"> + <rect x="464" y="140" width="52" height="80" /> + <rect x="144" y="260" width="52" height="80" /> + </g> + <g stroke="black"> + <rect x="40" y="20" width="260" height="80" fill="none" stroke="black" /> + <rect x="40" y="140" width="260" height="80" fill="none" stroke="black" /> + + <rect x="360" y="20" width="260" height="80" fill="none" stroke="black" /> + <rect x="360" y="140" width="260" height="80" fill="none" stroke="black" /> + + <rect x="40" y="260" width="260" height="80" fill="none" stroke="black" /> + <rect x="40" y="380" width="260" height="80" fill="none" stroke="black" /> + + <rect x="360" y="260" width="260" height="80" fill="none" stroke="black" /> + <rect x="360" y="380" width="260" height="80" fill="none" stroke="black" /> + + <line x1="0" x2="640" y1="240" y2="240" stroke="black" /> + <line x1="320" x2="320" y1="0" y2="480" stroke="black" /> + </g> + <g stroke="black" stroke-dasharray="5,3"> + <line x1="274" x2="274" y1="260" y2="340" /> + <line x1="222" x2="222" y1="260" y2="340" /> + <line x1="196" x2="196" y1="380" y2="460" /> + <line x1="144" x2="144" y1="380" y2="460" /> + </g> + <g stroke="black" fill="white"> + <polygon points="164,400 164,360 144,360 170,320 196,360 178,360 178,400" /> + <polygon points="240,320 240,360 222,360 248,400 274,360 256,360 256,320" /> + </g> + <g text-anchor="end" dominant-baseline="middle"> + <text x="30" y="60">$h_1$</text> + <text x="30" y="180">$h_2$</text> + + <text x="30" y="300">$h_1$</text> + <text x="30" y="420">$h_2$</text> + + <text x="350" y="60">$h_1$</text> + <text x="350" y="180">$h_2$</text> + + <text x="350" y="300">$h_1$</text> + <text x="350" y="420">$h_2$</text> + </g> + <g stroke="black"> + <rect x="7" y="487" width="42" height="42" fill="white" /> + <rect x="7" y="543" width="42" height="42" fill="lightgray" /> + <rect x="327" y="487" width="42" height="42" fill="#E69F00" /> + <rect x="327" y="543" width="42" height="42" fill="#56B4E9" /> + </g> + <g text-anchor="start" dominant-baseline="middle"> + <text x="56" y="508">Labellisé</text> + <text x="56" y="564">Non labellisé</text> + <text x="376" y="508">Valeur prédite par $h_1$</text> + <text x="376" y="564">Valeur prédite par $h_2$</text> + </g> +</svg> diff --git a/images/fig-multi-vues.svg b/images/fig-multi-vues.svg new file mode 100644 index 0000000..41af47a --- /dev/null +++ b/images/fig-multi-vues.svg @@ -0,0 +1,30 @@ +<?xml version='1.0' encoding='UTF-8' ?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink='http://www.w3.org/1999/xlink' + width="10cm" + viewBox="0 0 640 480"> + <g fill-opacity="0.5" stroke="black"> + <rect x="80" y="80" width="160" height="64" fill="#E69F00" /> + <rect x="240" y="80" width="160" height="64" fill="#56B4E9" /> + <rect x="480" y="80" width="80" height="64" fill="#009E73" /> + </g> + <g fill-opacity="0.2" stroke="black"> + <rect x="80" y="144" width="160" height="256" fill="#E69F00" /> + <rect x="240" y="144" width="160" height="256" fill="#56B4E9" /> + <rect x="480" y="144" width="80" height="256" fill="#009E73" /> + </g> + <g text-anchor="middle"> + <text x="160" y="60">$X_1$ (texte)</text> + <text x="320" y="60">$X_2$ (liens)</text> + + <g dominant-baseline="middle"> + <text x="160" y="112">${X_l}_{1}$</text> + <text x="320" y="112">${X_l}_{1}$</text> + <text x="520" y="112">$Y$</text> + + <text x="580" y="112" text-anchor="start">$n$</text> + <text x="60" y="240" text-anchor="end">$N$</text> + </g> + </g> +</svg> diff --git a/images/hireau-s.png b/images/hireau-s.png Binary files differnew file mode 100644 index 0000000..9661c8b --- /dev/null +++ b/images/hireau-s.png diff --git a/images/hireaugramme.png b/images/hireaugramme.png Binary files differnew file mode 100644 index 0000000..6ffb06c --- /dev/null +++ b/images/hireaugramme.png diff --git a/images/insurance_mae.R b/images/insurance_mae.R new file mode 100755 index 0000000..9b0341b --- /dev/null +++ b/images/insurance_mae.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/laps3l_graph_code.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- plot_tuning ("mae", "insurance") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 3) diff --git a/images/insurance_rmse.R b/images/insurance_rmse.R new file mode 100755 index 0000000..9c37ee2 --- /dev/null +++ b/images/insurance_rmse.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/laps3l_graph_code.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- plot_tuning ("rmse", "insurance") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 3) diff --git a/images/l21m2.R b/images/l21m2.R new file mode 100755 index 0000000..8998a49 --- /dev/null +++ b/images/l21m2.R @@ -0,0 +1,20 @@ +#!/usr/bin/env Rscript +`%>%` <- magrittr::`%>%` +xpoints <- seq (-1, 1, length.out = 100) +ypoints <- seq (-1, 1, length.out = 100) +compute_l1 <- function (x, y) abs (x) + abs (y) +compute_l2 <- function (x, y) sqrt (x ^ 2 + y ^ 2) +data <- (tibble::tibble (expand.grid (x = xpoints, y = ypoints)) + %>% dplyr::mutate (`$\\left\\|W\\right\\|_{2, 1}$` = compute_l1 (x, y), + `$\\left\\|W\\right\\|_F$` = compute_l2 (x, y)) + %>% dplyr::mutate (`$\\left\\|W\\right\\|_{2, 1 - 2}$` = + `$\\left\\|W\\right\\|_{2, 1}$` + - `$\\left\\|W\\right\\|_F$`)) + +plotl21 <- (ggplot2::ggplot (data, ggplot2::aes (x = x, y = y, fill = `$\\left\\|W\\right\\|_{2, 1}$`)) + + ggplot2::geom_raster ()) +plotl21m2 <- (ggplot2::ggplot (data, ggplot2::aes (x = x, y = y, fill = `$\\left\\|W\\right\\|_{2, 1 - 2}$`)) + + ggplot2::geom_raster ()) +plot <- gridExtra::arrangeGrob (plotl21, plotl21m2, nrow = 2) +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 5.5, height = 6.5) diff --git a/images/laps3l_graph_code.R b/images/laps3l_graph_code.R new file mode 100644 index 0000000..06b01b0 --- /dev/null +++ b/images/laps3l_graph_code.R @@ -0,0 +1,74 @@ +`%>%` <- magrittr::`%>%` + +load (sprintf ("%s/data/laps3l_tuning.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +aggregate_tuning_raw <- function (datasets, algorithms) { + data <- tuning + aggregated <- (data + %>% dplyr::filter (dataset %in% datasets, algorithm %in% algorithms) + %>% dplyr::group_by (algorithm, labeled_data, dataset) + %>% dplyr::summarize (armse = mean (rmse), + srmse = sd (rmse), + amae = mean (mae), + smae = sd (mae), + arrse = mean (rrse), + srrse = sd (rrse), + arae = mean (rae), + srae = sd (rae)) + %>% dplyr::ungroup ()) + metrics <- (aggregated + %>% tidyr::pivot_longer (c (armse, srmse, amae, smae, arrse, srrse, arae, srae), + names_to = "metric", + values_to = "value") + %>% dplyr::filter (is.finite (value))) + annotations <- (metrics + %>% dplyr::group_by (labeled_data, dataset, metric) + %>% dplyr::arrange (value) + %>% dplyr::summarize (algorithm = algorithm, is_best = c ( + TRUE, + rep (FALSE, dplyr::n () - 1) + )) + %>% dplyr::ungroup ()) + (metrics + %>% dplyr::inner_join (annotations)) +} + +#' Plot the tuning results +#' @param metric the metric to show: "rmse", "mae", "rrse", "rae" +#' @export +plot_tuning <- function (metric = "rmse", dataset = "wine") { + the_metric <- metric + the_dataset <- dataset + (aggregate_tuning_raw (dataset, c ("laps3l", "sssl", "laprls")) + %>% dplyr::select (algorithm, labeled_data, dataset, metric, value) + %>% dplyr::filter (metric %in% c (sprintf ("a%s", the_metric), + sprintf ("s%s", the_metric)), + dataset == the_dataset) + %>% tidyr::pivot_wider (id_cols = c (algorithm, labeled_data, dataset), + names_from = metric, + values_from = value) + %>% dplyr::rename (mean = sprintf ("a%s", the_metric), + sd = sprintf ("s%s", the_metric)) + %>% dplyr::mutate (low = mean - sd, high = mean + sd) + %>% dplyr::mutate (`Données labellisées` = labeled_data, + Algorithme = + ifelse (algorithm == "sssl", + "SSSL", + ifelse (algorithm == "laprls", + "LapRLS", + "\\textbf{LapS3L}")), + Value = mean, + low = low, + high = high) + %>% ggplot2::ggplot (ggplot2::aes (x = `Données labellisées`, + y = Value, + ymin = low, + ymax = high, + linetype = Algorithme, + color = Algorithme, + fill = Algorithme)) + + ggplot2::geom_line () + + ggplot2::geom_ribbon (alpha = 0.2, size = 0) + + ggplot2::ylab (metric) + + ggplot2::ggtitle (sprintf ("jeu de données %s, métrique %s", dataset, metric))) +} diff --git a/images/lasso.svg b/images/lasso.svg new file mode 100644 index 0000000..35ae633 --- /dev/null +++ b/images/lasso.svg @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="-10 -10 240 170" + height="75mm" + width="100mm"> + <defs> + <clipPath id="figure-gauche-clip"> + <rect x="00" y="00" width="100" height="100" /> + </clipPath> + <clipPath id="figure-droite-clip"> + <rect x="120" y="0" width="100" height="100" /> + </clipPath> + <!-- Une figure, c'est un fond gris, deux axes en blanc dessus, et + la figure (qui est en fait un croisement de deux diagonales + avec une largeur très élevée) --> + <g id="figure-base"> + <rect x="0" y="0" width="100" height="100" fill="#D3D3D3" /> + <line x1="50" x2="50" y1="0" y2="100" stroke="white" stroke-width="0.5" /> + <line x1="0" x2="100" y1="50" y2="50" stroke="white" stroke-width="0.5"/> + <line class="domaine" + x1="0" x2="100" y1="0" y2="100" + stroke="blue" opacity=".2" /> + <line class="domaine" + x1="0" x2="100" y1="100" y2="0" + stroke="blue" opacity=".2" /> + </g> + <g id="figure-decalee"> + <use xlink:href="#figure-base" transform="translate (120 0)" /> + </g> + </defs> + <use clip-path="url(#figure-gauche-clip)" + xlink:href="#figure-base" + id="gauche" + stroke-width="20"/> + <use clip-path="url(#figure-droite-clip)" + xlink:href="#figure-decalee" + id="droite" + stroke-width="50"/> + <text x="50" y="115" text-anchor="middle" font-size="10px">$\alpha$ grand</text> + <text x="170" y="115" text-anchor="middle" font-size="10px">$\alpha$ petit</text> + + <rect x="0" y="125" width="20" height="15" fill="#D3D3D3" /> + <text x="30" y="135" font-size="10px"> + le modèle a une composante nulle + </text> + + <rect x="0" y="145" width="20" height="15" fill="#D3D3D3" /> + <rect x="0" y="145" width="20" height="15" fill="blue" opacity=".2" /> + <text x="30" y="155" font-size="10px"> + le modèle est dense + </text> +</svg> diff --git a/images/lizeo-donnees.png b/images/lizeo-donnees.png Binary files differnew file mode 100644 index 0000000..c520299 --- /dev/null +++ b/images/lizeo-donnees.png diff --git a/images/lizeoclassif.svg b/images/lizeoclassif.svg new file mode 100644 index 0000000..94ef436 --- /dev/null +++ b/images/lizeoclassif.svg @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="0 0 200 200" + width="80mm" + height="80mm"> + <text x="70" y="12" text-anchor="middle" font-size="8px"> + Variables textuelles + </text> + <g transform="rotate(-90) translate(-100, 12)"> + <text text-anchor="middle" font-size="8px"> + documents + </text> + </g> + <g transform="rotate (-90) translate(-15, 145)"> + <text text-anchor="left" font-size="4px"> + Prix + </text> + </g> + <g transform="rotate (-90) translate(-15, 155)"> + <text text-anchor="left" font-size="4px"> + Bruit + </text> + </g> + <text x="172" y="15" text-anchor="middle" font-size="6px"> + ... + </text> + <text x="190" y="195" text-anchor="end" font-size="6px"> + \textit{qualifiers} + </text> + <text x="70" y="130" text-anchor="middle" font-size="20px"> + ... + </text> + <text x="175" y="130" text-anchor="middle" font-size="20px"> + $\{1, 0\}$ + </text> + + <rect x="20" y="20" width="100" height="160" rx="5" stroke="black" stroke-width=".5" fill="none" /> + <rect x="140" y="20" width="50" height="160" rx="5" stroke="black" stroke-width=".5" fill="none" /> + + <rect x="150" y="20" width="10" height="20" fill="gray" /> + <rect x="140" y="40" width="10" height="20" fill="gray" /> + <rect x="150" y="60" width="10" height="20" fill="gray" /> + + <line x1="20" x2="120" y1="40" y2="40" stroke="black" stroke-width=".1" /> + <line x1="20" x2="120" y1="60" y2="60" stroke="black" stroke-width=".1" /> + <line x1="20" x2="120" y1="80" y2="80" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="40" y2="40" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="60" y2="60" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="80" y2="80" stroke="black" stroke-width=".1" /> + <line x1="150" x2="150" y1="20" y2="180" stroke="black" stroke-width=".1" /> + <line x1="160" x2="160" y1="20" y2="180" stroke="black" stroke-width=".1" /> +</svg> diff --git a/images/lizeoreg.svg b/images/lizeoreg.svg new file mode 100644 index 0000000..4561c1d --- /dev/null +++ b/images/lizeoreg.svg @@ -0,0 +1,55 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="0 0 200 200" + height="80mm" + width="80mm"> + <text x="70" y="12" text-anchor="middle" font-size="8px"> + Variables textuelles + </text> + <g transform="rotate(-90) translate(-100, 12)"> + <text text-anchor="middle" font-size="8px"> + documents + </text> + </g> + <g transform="rotate (-90) translate(-15, 145)"> + <text text-anchor="left" font-size="4px"> + Prix + </text> + </g> + <g transform="rotate (-90) translate(-15, 155)"> + <text text-anchor="left" font-size="4px"> + Bruit + </text> + </g> + <text x="172" y="15" text-anchor="middle" font-size="6px"> + ... + </text> + <text x="190" y="195" text-anchor="end" font-size="6px"> + \textit{qualifiers} + </text> + <text x="70" y="130" text-anchor="middle" font-size="20px"> + ... + </text> + <text x="175" y="130" text-anchor="middle" font-size="20px"> + $\mathbb{R}$ + </text> + + <rect x="150" y="20" width="10" height="20" fill="orange" /> + <rect x="140" y="40" width="10" height="20" fill="blue" opacity=".4" /> + <rect x="150" y="60" width="10" height="20" fill="orange" opacity=".3" /> + + <rect x="20" y="20" width="100" height="160" rx="5" stroke="black" stroke-width=".5" fill="none" /> + <rect x="140" y="20" width="50" height="160" rx="5" stroke="black" stroke-width=".5" fill="none" /> + + <line x1="20" x2="120" y1="40" y2="40" stroke="black" stroke-width=".1" /> + <line x1="20" x2="120" y1="60" y2="60" stroke="black" stroke-width=".1" /> + <line x1="20" x2="120" y1="80" y2="80" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="40" y2="40" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="60" y2="60" stroke="black" stroke-width=".1" /> + <line x1="140" x2="190" y1="80" y2="80" stroke="black" stroke-width=".1" /> + <line x1="150" x2="150" y1="20" y2="180" stroke="black" stroke-width=".1" /> + <line x1="160" x2="160" y1="20" y2="180" stroke="black" stroke-width=".1" /> +</svg> diff --git a/images/logo.png b/images/logo.png Binary files differnew file mode 100644 index 0000000..7da55d2 --- /dev/null +++ b/images/logo.png diff --git a/images/lsmr_global_tuning.R b/images/lsmr_global_tuning.R new file mode 100644 index 0000000..3753e09 --- /dev/null +++ b/images/lsmr_global_tuning.R @@ -0,0 +1,94 @@ +load (sprintf ("%s/data/lsmr_global_tuning.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +#' Compute the Friedman + Nemenyi analysis with the tsutils package. +#' +#' @return the tsutils analysis. +#' @export +cd_analysis <- function () { + `%>%` <- magrittr::`%>%` + data <- (global_tuning_data + %>% dplyr::select (`LSMR local` = lsmr_local, dplyr::everything ()) + %>% dplyr::select (-dataset)) + colnames (data) <- toupper (colnames (data)) + colnames (data) <- gsub ("LOCAL", "local", colnames (data)) + tsutils::nemenyi (as.matrix (data), plottype = "vmcb") +} + +## Expects that higher is better. Returns -1 if the first is better, +## +1 if the second is better, and 0 otherwise. +paired_test_aux <- function (algo1, algo2) { + diff <- algo2 - algo1 + magnitude <- abs (diff) + sorted_magnitude <- sort (magnitude) + get_rank <- function (diff) { + m <- abs (diff) + ok <- which (sorted_magnitude == m) + mean (ok) + } + ranks <- sapply (diff, get_rank) + positives <- which (diff > 0) + negatives <- which (diff < 0) + ties <- which (diff == 0) + Rplus <- sum (ranks[positives]) + sum (ranks[ties]) / 2 + Rminus <- sum (ranks[negatives]) + sum (ranks[ties]) / 2 + ret <- 0 + if (Rplus < Rminus) { + ret <- -1 + } else { + ret <- +1 + } + T <- min (Rplus, Rminus) + N <- length (diff) + znum <- (T - (1 / 4) * N * (N + 1)) + ## ------------------------------ + zdenom <- sqrt ((1 / 24) * N * (N + 1) * (2 * N + 1)) + z <- znum / zdenom + alpha <- 0.1 + crit <- qnorm (1 - alpha / 2) + if (z >= -crit) { + ret <- 0 + } + ret +} + +paired_test <- function (algo1, algo2) { + data <- global_tuning_data + algo1 <- data[, algo1 + 1][[1]] + algo2 <- data[, algo2 + 1][[1]] + paired_test_aux (-algo1, -algo2) +} + +#' Compute the win / lose / tie matrix for all pairs of algorithms. +#' @return the matrix with row and column names set to the names of +#' the algorithms. +#' @export +win_lose_tie_paired_tests <- function () { + data <- global_tuning_data + M <- matrix (0, ncol (data) - 1, ncol (data) - 1) + colnames (M) <- toupper (gsub ("_", "\n", colnames (data)[2:ncol (data)], fixed = TRUE)) + colnames (M) <- gsub ("LOCAL", "local", colnames (M)) + row.names (M) <- colnames (M) + colnames (M) <- gsub ("LSMR\nlocal", "LSMR (l)", colnames (M), fixed = TRUE) + for (i in seq_len (ncol (data) - 1)) { + for (j in seq_len (ncol (data) - 1)) { + M[i, j] <- paired_test (i, j) + } + } + M +} + +#' Print the plot of the win / lose / tie matrix +#' @return Org-mode code +#' @export +print_win_lose_tie_plot <- function () { + M <- win_lose_tie_paired_tests () + data <- reshape2::melt (M, na.rm = TRUE) + (ggplot2::ggplot (data = data, + ggplot2::aes (Var2, Var1, fill = value)) + + ggplot2::geom_tile (color = "white") + + ggplot2::scale_fill_gradient2 (low = "blue", high = "red", mid = "#FFFFFF00", + midpoint = 0, limit = c (-1, 1), name = "significant test") + + ggplot2::theme (legend.position = "none") + + ggplot2::theme (axis.title = ggplot2::element_blank ()) + + ggplot2::theme (axis.text.x = ggplot2::element_text (angle=45, hjust=1))) +} diff --git a/images/lsmr_local_oes97.R b/images/lsmr_local_oes97.R new file mode 100644 index 0000000..aba51eb --- /dev/null +++ b/images/lsmr_local_oes97.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_local_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- print_local_graph ("oes97") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 4) diff --git a/images/lsmr_local_osales.R b/images/lsmr_local_osales.R new file mode 100644 index 0000000..f2d61c0 --- /dev/null +++ b/images/lsmr_local_osales.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_local_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- print_local_graph ("osales") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 4) diff --git a/images/lsmr_local_scpf.R b/images/lsmr_local_scpf.R new file mode 100644 index 0000000..a276c83 --- /dev/null +++ b/images/lsmr_local_scpf.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_local_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- print_local_graph ("scpf") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 4) diff --git a/images/lsmr_local_sf2.R b/images/lsmr_local_sf2.R new file mode 100644 index 0000000..6406c72 --- /dev/null +++ b/images/lsmr_local_sf2.R @@ -0,0 +1,5 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_local_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +plot <- print_local_graph ("sf2") +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 4) diff --git a/images/lsmr_local_tuning.R b/images/lsmr_local_tuning.R new file mode 100644 index 0000000..8ad743a --- /dev/null +++ b/images/lsmr_local_tuning.R @@ -0,0 +1,329 @@ +load (sprintf ("%s/data/lsmr_local_tuning.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +#' Construct a linear kernel +#' +#' @return a linear kernel +#' @export +linear_kernel <- function () { + ret <- list () + class (ret) <- "linear_kernel" + ret +} + +#' Construct a cosine kernel +#' +#' @return a cosine kernel +#' @export +cosine_kernel <- function () { + ret <- list () + class (ret) <- "cosine_kernel" + ret +} + +#' Construct a RBF kernel +#' +#' @param bandwidth the sigma parameter for the RBF... +#' @param gamma ... or alternatively the gamma parameter +#' @return an RBF kernel +#' @export +rbf_kernel <- function (bandwidth = NULL, gamma = NULL) { + stopifnot (!is.null (bandwidth) || !is.null (gamma)) + if (is.null (gamma)) { + gamma <- 1 / (2 * bandwidth ^ 2) + } + ret <- list (gamma = gamma) + class (ret) <- "rbf_kernel" + ret +} + +#' Construct a Laplacian matrix with binary relations +#' +#' @param kernel the kernel to compute base similarities +#' @param quantile used to compute the threshold. +#' @return a Laplacian matrix generator +#' @export +quantile_laplacian <- function (kernel = linear_kernel (), quantile = 0.95) { + ret <- list (kernel = kernel, q = quantile) + class (ret) <- "quantile_laplacian" + ret +} + +#' Apply a kernel over two data matrices +#' +#' @param x the kernel to apply +#' @param U the first data matrix +#' @param V the second data matrix (may be missing) +#' @return the kernel matrix +#' @export +cache <- function (x, U, V) { + UseMethod ("cache", x) +} + +#' @method cache linear_kernel +#' @export +cache.linear_kernel <- function (x, U, V = NULL) { + if (is.null (V)) { + V <- U + } + tcrossprod (U, V) +} + +#' @method cache cosine_kernel +#' @export +cache.cosine_kernel <- function (x, U, V = NULL) { + if (is.null (V)) { + V <- U + } + num <- tcrossprod (U, V) + nu <- sqrt (rowSums (U^2)) + nv <- sqrt (rowSums (V^2)) + denom <- tcrossprod (nu, nv) + ret <- num / denom + ret[denom == 0] <- 1 + ret +} + +pdist <- function (U, V = NULL) { + if (is.null (V)) { + V <- U + } + rsu <- as.matrix (rowSums (U^2), nrow (U), 1) + rsv <- as.matrix (rowSums (V^2), nrow (V), 1) + Du <- rsu[, array (1, nrow (V)), drop = FALSE] + Dv <- t (rsv[, array (1, nrow (U)), drop = FALSE]) + D <- Du + Dv - 2 * tcrossprod (U, V) + D[D < 0] <- 0 + D +} + +#' @method cache rbf_kernel +#' @export +cache.rbf_kernel <- function (x, U, V = NULL) { + gamma <- x$gamma + exp (- gamma * pdist (U, V)) +} + +#' @method cache quantile_laplacian +#' @export +cache.quantile_laplacian <- function (x, U, V = NULL) { + if (!is.null (V)) { + stop ("Cannot apply the Laplacian matrix on two different data matrices") + } + K <- cache (x$kernel, U) + q <- stats::quantile (K[upper.tri (K)], x$q) + M <- matrix (0, nrow (K), ncol (K)) + M[K < q] <- 0 + M[K >= q] <- 1 + D <- rowSums (M) + diag (D, nrow (K), ncol (K)) - M +} + +#' Construct a RBF kernel fit for a validation dataset +#' +#' @param x a validation data matrix +#' @param y the validation label matrix +#' @return a RBF kernel +#' @export +tune_rbf_kernel <- function (x, y) { + B <- cache (cosine_kernel (), t (t (y))) + B[B < 0] <- 0 + b <- t (t (c (B))) + D <- pdist (x) + candidates <- c (1e-4, 2e-4, 5e-4, + 1e-3, 2e-3, 5e-3, + 1e-2, 2e-2, 5e-2, + 1e-1, 2e-1, 5e-1, + 1e+0, 2e+0, 5e+0, + 1e+1, 2e+1, 5e+1, + 1e+2, 2e+2, 5e+2, + 1e+3, 2e+3, 5e+3, + 1e+4, 2e+4, 5e+4) + alignment <- sapply (candidates, function (gamma) { + K <- exp (-gamma * D) + k <- t (t (c (K))) + alignment <- cache.cosine_kernel (NULL, t (b), t (k)) + alignment[1, 1] + }) + rbf_kernel (gamma = candidates[which.max (alignment)]) +} + + +#' Load the local tuning results. +#' +#' @return A table with the following columns: 'dataset', 'kernel', +#' 'bandwidth', 's', 'semi', 'multi', 'armse_sssl', 'armse_semi', +#' 'armse_multi', 'armse_both'. +#' @export +get_local_tuning_data <- function () { + local_tuning +} + +#' Print the results for the local tuning. +#' +#' @return the data. +#' @export +print_tbl_comparison_local <- function () { + data <- get_local_tuning_data () + `%>%` <- magrittr::`%>%` + number <- function (x) { + sapply (x, function (x) { + if (x <= 1) { + sprintf ("*%.3f*", x) + } else { + sprintf ("%.3f", x) + } + }) + } + summaries <- (data + %>% dplyr::group_by (dataset) + %>% dplyr::summarize (median_sssl = median (armse_sssl), + mean = mean (armse_both), + median = median (armse_both), + q1 = quantile (armse_both, .25), + q3 = quantile (armse_both, .75), + min = min (armse_both), + max = max (armse_both)) + %>% dplyr::mutate (relative_mean = mean / median_sssl, + relative_median = median / median_sssl, + relative_q1 = q1 / median_sssl, + relative_q3 = q3 / median_sssl, + relative_min = min / median_sssl, + relative_max = max / median_sssl) + %>% dplyr::mutate (`*Données*` = dataset, + `Moyenne` = number (relative_mean), + `Médiane` = number (relative_median), + `Q1` = number (relative_q1), + `Q3` = number (relative_q3), + `Meilleur` = number (relative_min), + `Pire` = number (relative_max)) + %>% dplyr::select (`*Données*`, `Moyenne`, `Q1`, `Q3`, `Meilleur`, `Pire`) + %>% dplyr::arrange (`Meilleur`)) + summaries +} + +rescale_log <- function (value, min, max) { + log_min <- log (min) + log_max <- log (max) + log_value <- log_min + value * (log_max - log_min) + exp (log_value) +} + +laps3l_decode_hyper <- function (max_s) { + min_bandwidth <- 0.1 + max_bandwidth <- 300 + min_semi <- 1e-08 + max_semi <- 1 + min_multi <- 1e-04 + max_multi <- 10000 + min_s <- 1 + function (row) { + kernel <- NULL + row$kernel <- as.character (row$kernel) + if (row$kernel == "cosine") { + kernel <- cosine_kernel () + } + else if (row$kernel == "linear") { + kernel <- linear_kernel () + } + else { + stopifnot (row$kernel == "rbf") + bw <- rescale_log (row$bandwidth, min_bandwidth, max_bandwidth) + kernel <- rbf_kernel (bw) + } + list (kernel = kernel, + semi = rescale_log (row$semi, min_semi, max_semi), + multi = rescale_log (row$multi, min_multi, max_multi), + s = round (rescale_log (row$s, min_s, max_s))) + } +} + +#' Print a local graph +#' +#' @param graph which graph to plot +#' @return a ggplot object. +#' @export +print_local_graph <- function (graph = "atp1d") { + max_s <- NA + if (graph == "atp1d") { + max_s <- 262 + } else if (graph == "atp7d") { + max_s <- 234 + } else if (graph == "edm") { + max_s <- 121 + } else if (graph == "enb") { + max_s <- 601 + } else if (graph == "jura") { + max_s <- 281 + } else if (graph == "oes10") { + max_s <- 314 + } else if (graph == "oes97") { + max_s <- 257 + } else if (graph == "osales") { + max_s <- 495 + } else if (graph == "sarcossub") { + max_s <- 779 + } else if (graph == "scpf") { + max_s <- 889 + } else if (graph == "sf1") { + max_s <- 250 + } else if (graph == "sf2") { + max_s <- 832 + } else if (graph == "wq") { + max_s <- 827 + } else { + stop ("Unknown dataset") + } + d <- laps3l_decode_hyper (max_s) + decode_row <- function (data, i) { + row <- data[i,] + row$kernel <- "linear" + row$s <- 0 + items <- d (row) + data$semi[i] <- items$semi + data$multi[i] <- items$multi + data[i,] + } + decode <- function (data) { + do.call (rbind, lapply (seq_len (nrow (data)), function (i) decode_row (data, i))) + } + smooth <- function (data) { + X <- as.matrix (cbind (data$semi, data$multi)) + y <- t (t (data$relative_armse)) + D <- as.matrix (dist (X, diag = T, upper = T)) + M <- 0 * D + M[D < 0.1] <- 1 + sum <- rowSums (M) + M <- diag (1 / sum, nrow (X), nrow (X)) %*% M + data$relative_armse <- M %*% y + data + } + data <- get_local_tuning_data () + `%>%` <- magrittr::`%>%` + armse_sssl_median <- median ((data + %>% dplyr::filter (dataset == graph) + %>% dplyr::select (armse_sssl))$armse_sssl, na.rm = TRUE) + relative_data <- (data + %>% dplyr::filter (dataset == graph) + %>% dplyr::mutate (relative_armse = armse_both / armse_sssl_median) + %>% dplyr::select (semi, multi, relative_armse)) + averaged <- smooth (relative_data) + intp <- with (averaged, + akima::interp (x = semi, + y = multi, + z = relative_armse, + duplicate = "mean")) + values <- as.data.frame (as.matrix (intp$z)) + colnames (values) <- intp$y + intp <- (tidyr::gather (cbind (values, semi = intp$x), + multi, armse, seq_len (ncol (intp$z)), + na.rm = TRUE) + %>% dplyr::mutate (multi = as.numeric (multi))) + interpolated <- (intp %>% decode ()) + (ggplot2::ggplot (interpolated, ggplot2::aes (x = semi, y = multi, fill = armse)) + + ggplot2::geom_tile () + + ggplot2::scale_fill_gradient2 (midpoint = 1, name = "aRMSE\nrelative") + + ggplot2::xlab ("Régulariseur semi-supervisé $\\alpha$") + + ggplot2::ylab ("Régulariseur multi-label $\\beta$") + + ggplot2::scale_x_log10 () + + ggplot2::scale_y_log10 ()) +} diff --git a/images/lsmrrsmsapprentissage.svg b/images/lsmrrsmsapprentissage.svg new file mode 100644 index 0000000..766a2d9 --- /dev/null +++ b/images/lsmrrsmsapprentissage.svg @@ -0,0 +1,324 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns="http://www.w3.org/2000/svg" + xmlns:xlink="http://www.w3.org/1999/xlink" + version="1.1" + viewBox="0 0 210 297" + height="217.8mm" + width="154mm"> + <defs> + <g id="laplacian-matrix-base"> + <g fill="Teal"> + <rect x="-10" y="-10" width="4" height="4" /> + <rect x="-6" y="-6" width="4" height="4" /> + <rect x="-2" y="-2" width="4" height="4" /> + <rect x="2" y="2" width="4" height="4" /> + <rect x="6" y="6" width="4" height="4" /> + </g> + <g stroke-width=".2" stroke="Teal"> + <line x1="-6" x2="-6" y1="-10" y2="10" /> + <line x1="-2" x2="-2" y1="-10" y2="10" /> + <line x1="2" x2="2" y1="-10" y2="10" /> + <line x1="6" x2="6" y1="-10" y2="10" /> + <line y1="-6" y2="-6" x1="-10" x2="10" /> + <line y1="-2" y2="-2" x1="-10" x2="10" /> + <line y1="2" y2="2" x1="-10" x2="10" /> + <line y1="6" y2="6" x1="-10" x2="10" /> + </g> + <rect x="-10" y="-10" width="20" height="20" stroke="Teal" stroke-width=".2" fill="none" /> + </g> + <g id="laplacian-matrix-1"> + <rect x="-10" y="-10" width="20" height="20" stroke="none" fill="white" /> + <g fill="PaleTurquoise"> + <rect x="-6" y="-10" width="4" height="4" /> + <rect x="2" y="-10" width="4" height="4" /> + <rect x="-10" y="-6" width="4" height="4" /> + <rect x="6" y="-6" width="4" height="4" /> + <rect x="2" y="-2" width="4" height="4" /> + <rect x="6" y="-2" width="4" height="4" /> + <rect x="-10" y="2" width="4" height="4" /> + <rect x="-2" y="2" width="4" height="4" /> + <rect x="-6" y="6" width="4" height="4" /> + <rect x="-2" y="6" width="4" height="4" /> + </g> + <use xlink:href="#laplacian-matrix-base" /> + </g> + <g id="laplacian-matrix-2"> + <rect x="-10" y="-10" width="20" height="20" stroke="none" fill="white" /> + <g fill="PaleTurquoise"> + <rect x="-2" y="-10" width="4" height="4" /> + <rect x="2" y="-6" width="4" height="4" /> + <rect x="-10" y="-2" width="4" height="4" /> + <rect x="-6" y="2" width="4" height="4" /> + <rect x="6" y="2" width="4" height="4" /> + <rect x="2" y="6" width="4" height="4" /> + </g> + <use xlink:href="#laplacian-matrix-base" /> + </g> + </defs> + <g id="page-rsms"> + <rect x="10" y="10" width="70" height="140" + fill="white" /> + <rect x="10" y="15" width="70" height="15" + fill="WhiteSmoke" /> + <line x1="10" y1="15" x2="80" y2="15" + stroke="SlateGray" stroke-width="1" /> + <text fill="DimGray" x="45" y="27" font-size="10" text-anchor="middle"> + 1) RSMS + </text> + <line x1="10" y1="30" x2="80" y2="30" + stroke="SlateGray" stroke-width=".2" /> + <rect x="10" y="10" width="70" height="140" + stroke="SlateGray" stroke-width="0.2" fill="none" /> + <g stroke="black" stroke-width=".2"> + <rect fill="WhiteSmoke" x="20" y="40" width="30" height="60" /> + <line x1="40" x2="40" y1="40" y2="100" /> + <line x1="20" x2="50" y1="60" y2="60" /> + <line x1="20" x2="50" y1="80" y2="80" /> + </g> + <g font-size="5" text-anchor="middle" fill="DimGray"> + <text x="30" y="39"> $X$ </text> + <text x="45" y="39"> $Y$ </text> + <text x="65" y="39"> $L$ </text> + </g> + <use xlink:href="#laplacian-matrix-2" transform="translate (65 50) scale(0.9)" /> + <use xlink:href="#laplacian-matrix-1" transform="translate (65 70) scale(0.9)" /> + <use xlink:href="#laplacian-matrix-2" transform="translate (65 90) scale(0.9)" /> + <g font-size="5" fill="DimGray"> + <text x="20" y="112"> + À chaque époque : + </text> + <text x="25" y="118"> + 1 itération pour + </text> + <text x="25" y="124"> + chaque minibatch + </text> + </g> + <line x1="22" x2="22" y1="114" y2="124" stroke="DimGray" stroke-width=".2" /> + <rect x="15" y="130" width="60" height="15" fill="LavenderBlush" /> + <line x1="15" x2="75" y1="130" y2="130" stroke="DimGray" stroke-width=".2" /> + <text x="70" y="140" font-size="5" fill="DimGray" text-anchor="end"> + Variables + </text> + <rect fill="none" stroke="black" stroke-width=".2" x="15" y="105" width="60" height="40" /> + <polygon points="20,135 28,135 28,131 35,137.5 28,144 28,140 20,140" fill="white" stroke="maroon" stroke-width=".2" /> + </g> + + <g id="page-bootstrap"> + <g stroke-width="0.2"> + <rect x="120" y="40" width="80" height="140" fill="DimGray" stroke="black" /> + <rect x="110" y="30" width="80" height="140" fill="white" stroke="SlateGray"/> + <rect x="100" y="20" width="80" height="140" fill="DimGray" stroke="black" /> + <rect x="90" y="10" width="80" height="140" fill="white" stroke="SlateGray"/> + </g> + <rect x="90" y="10" width="80" height="140" + fill="white" /> + <rect x="90" y="15" width="80" height="15" + fill="WhiteSmoke" /> + <line x1="90" y1="15" x2="170" y2="15" + stroke="SlateGray" stroke-width="1" /> + <text fill="DimGray" x="130" y="27" font-size="10" text-anchor="middle"> + 2) LSMR + </text> + <line x1="90" y1="30" x2="170" y2="30" + stroke="SlateGray" stroke-width=".2" /> + <g font-size="7" text-anchor="end"> + <text x="175" y="158" fill="WhiteSmoke"> + aRMSE trop élevée + </text> + <text x="185" y="168" fill="DimGray"> + Sélectionné + </text> + </g> + <rect x="90" y="130" width="80" height="20" fill="LavenderBlush" /> + <rect x="95" y="40" width="40" height="80" fill="WhiteSmoke" /> + + <use xlink:href="#laplacian-matrix-1" transform="translate (155 50) scale(0.9)" /> + <g font-size="5" text-anchor="middle" fill="DimGray"> + <text x="112.5" y="39"> $X$ </text> + <text x="130" y="39"> $Y$ </text> + <text x="155" y="39"> $L$ </text> + </g> + <text fill="DimGray" x="97" y="52" font-size="5"> + Apprentissage + </text> + <text fill="DimGray" x="97" y="72" font-size="5"> + Validation + </text> + <text fill="DimGray" x="97" y="92" font-size="5"> + Hors du sac + </text> + <text fill="DimGray" x="97" y="112" font-size="5"> + (Out Of Bag) + </text> + <g stroke="black" stroke-width=".2"> + <rect x="95" y="40" width="40" height="80" fill="none" /> + <line x1="95" y1="60" x2="135" y2="60" /> + <line x1="95" y1="80" x2="135" y2="80" /> + <line x1="125" y1="40" x2="125" y2="120" /> + </g> + <line x1="90" x2="170" y1="130" y2="130" stroke="SlateGray" stroke-width=".2" /> + <line x1="140" x2="140" y1="130" y2="150" stroke="SlateGray" stroke-width=".2" /> + <line x1="90" x2="140" y1="140" y2="140" stroke="SlateGray" stroke-width=".2" /> + <rect x="90" y="10" width="80" height="140" + stroke="SlateGray" stroke-width="0.2" fill="none" /> + + <rect x="140" y="70" width="30" height="50" fill="beige" /> + <polygon fill="ivory" points="170,70 200,100 200,150 170,120" /> + <polygon stroke="DarkKhaki" stroke-width="0.2" fill="none" + points="140,70 170,70 200,100 200,150 170,120 140,120" /> + <g font-size="5" fill="DimGray"> + <text x="145" y="80"> + Faire + </text> + <text x="145" y="88"> + varier : + </text> + <g text-anchor="end"> + <text x="170" y="96"> + - échan + </text> + <text x="170" y="104"> + - hyper + </text> + </g> + <text transform="translate (170, 96) rotate (45)"> + tillons + </text> + <text transform="translate(170, 104) rotate (45)"> + paramètres + </text> + </g> + <line x1="170" y1="70" x2="170" y2="120" stroke="white" stroke-width=".2" /> + <g font-size="5" text-anchor="middle"> + <text x="115" y="138" fill="DimGray"> + aRMSE + </text> + <text x="115" y="148" fill="DimGray"> + OOB + </text> + <text x="155" y="143" fill="DimGray"> + Modèle + </text> + </g> + <polyline points="70,138 85,138 85,37 100,37 98,35 100,37 98,39 100,37" + stroke-width="1" + stroke="CornflowerBlue" + fill="none"/> + </g> + + <g id="page-aggregation"> + <polygon points="10,160 80,160 110,190 110,290 10,290" + fill="white" /> + <polygon points="10,165 85,165 100,180 10,180" + fill="WhiteSmoke" /> + <line x1="10" y1="165" x2="85" y2="165" + stroke="SlateGray" stroke-width="1" /> + <text fill="DimGray" x="50" y="177" font-size="10" text-anchor="middle"> + 3) Stacking + </text> + <line x1="10" y1="180" x2="100" y2="180" + stroke="SlateGray" stroke-width=".2" /> + + <text fill="DimGray" x="20" y="188" font-size="5"> + Par qualifier (indiv. pertinents) : + </text> + + <rect x="15" y="190" width="30" height="40" + stroke="DarkOliveGreen" stroke-width=".2" fill="none" /> + <g> + <rect x="22" y="197" width="5" height="23" fill="LavenderBlush" stroke="maroon" stroke-width=".2" /> + <rect x="20" y="195" width="5" height="5" fill="white" stroke="SlateGray" stroke-width=".2" /> + <rect x="20" y="200" width="5" height="18" fill="LavenderBlush" stroke="maroon" stroke-width=".2" /> + <g font-size="5" text-anchor="middle" fill="DimGray"> + <text x="22.5" y="200">?</text> + <text x="22.5" y="206">O</text> + <text x="22.5" y="212">O</text> + <text x="22.5" y="218">B</text> + </g> + <rect x="35.5" y="196" width="5" height="23" fill="WhiteSmoke" stroke="DimGray" stroke-width=".2" /> + <g font-size="5" text-anchor="middle" fill="DimGray"> + <text x="23.5" y="228">$\hat Y$</text> + <text x="38" y="228">$Y$</text> + </g> + </g> + + <rect x="50" y="190" width="30" height="50" + stroke="DarkOliveGreen" stroke-width=".2" fill="none" /> + <g> + <rect x="57" y="197" width="5" height="33" fill="LavenderBlush" stroke="maroon" stroke-width=".2" /> + <rect x="55" y="195" width="5" height="5" fill="white" stroke="SlateGray" stroke-width=".2" /> + <rect x="55" y="200" width="5" height="28" fill="LavenderBlush" stroke="maroon" stroke-width=".2" /> + <rect x="70.5" y="196" width="5" height="33" fill="WhiteSmoke" stroke="DimGray" stroke-width=".2" /> + <g font-size="5" text-anchor="middle" fill="DimGray"> + <text x="58.5" y="238">$\hat Y$</text> + <text x="73" y="238">$Y$</text> + </g> + </g> + + <text fill="DimGray" x="95" y="220" text-anchor="middle" font-size="10"> + ... + </text> + + <rect x="10" y="245" width="100" height="45" fill="LavenderBlush" /> + + <g stroke="DarkOliveGreen" stroke-width=".2"> + <rect x="20" y="255" width="30" height="30" + fill="HoneyDew" /> + <line x1="25" x2="25" y1="255" y2="285" /> + <line x1="30" x2="30" y1="255" y2="285" /> + <text x="40" y="275" font-size="5" text-anchor="middle">...</text> + </g> + + <text font-size="5" fill="DimGray" x="80" y="275" text-anchor="middle"> + $W$ : modèle + </text> + + <g fill="none" stroke="DarkOliveGreen" stroke-width="1"> + <polyline points="22.5,230 22.5,255 20.5,253 22.5,255 24.5,253 22.5,255" /> + <polyline points="50,235 27.5,235 27.5,255 25.5,253 27.5,255 29.5,253 27.5,255" /> + </g> + + <polygon points="10,240 60,240 70,230 80,230 90,240 110,240 110,250 90,250 80,260 70,260 60,250 10,250" + stroke="DarkOliveGreen" stroke-width=".2" fill="honeydew" /> + <text fill="DarkOliveGreen" font-size="5" text-anchor="middle" x="75" y="240"> + Ridge + </text> + <text fill="DarkOliveGreen" font-size="5" x="60" y="247" text-anchor="middle"> + (avec imputation par la moyenne) + </text> + + <polygon points="10,160 80,160 110,190 110,290 10,290" + fill="none" stroke="SlateGray" stroke-width=".2" /> + </g> + + <g id="page-resultats"> + <rect x="120" y="190" width="80" height="100" fill="white" /> + <rect x="120" y="195" width="80" height="15" stroke="none" fill="WhiteSmoke" /> + + <text x="160" y="207" font-size="10" fill="DimGray" text-anchor="middle"> + Sortie + </text> + + <text x="125" y="230" font-size="5" fill="DimGray"> + RSMS : variables + </text> + + <text x="125" y="255" font-size="5" fill="DimGray"> + LSMR : modèles bootstrap + </text> + + <text x="125" y="280" font-size="5" fill="DimGray"> + Stacking : modèle Ridge + </text> + + <line x1="120" x2="200" y1="195" y2="195" stroke="SlateGray" stroke-width="1"/> + <line x1="120" x2="200" y1="210" y2="210" stroke="SlateGray" stroke-width=".2" /> + <line x1="120" x2="200" y1="215" y2="215" stroke="SlateGray" stroke-width=".2" /> + <line x1="120" x2="200" y1="240" y2="240" stroke="SlateGray" stroke-width=".2" /> + <line x1="120" x2="200" y1="265" y2="265" stroke="SlateGray" stroke-width=".2" /> + <rect x="120" y="190" width="80" height="100" + fill="none" stroke="SlateGray" stroke-width="0.2" /> + </g> +</svg> diff --git a/images/lsmrrsmsinference.svg b/images/lsmrrsmsinference.svg new file mode 100644 index 0000000..d4d96f3 --- /dev/null +++ b/images/lsmrrsmsinference.svg @@ -0,0 +1,475 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg + xmlns:dc="http://purl.org/dc/elements/1.1/" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns="http://www.w3.org/2000/svg" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + width="150mm" + height="50mm" + viewBox="71 133 832 239" + version="1.1" + id="svg5016" + sodipodi:docname="Diagramme1.svg" + inkscape:version="0.92.4 (5da689c313, 2019-01-14)"> + <metadata + id="metadata5022"> + <rdf:RDF> + <cc:Work + rdf:about=""> + <dc:format>image/svg+xml</dc:format> + <dc:type + rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> + </cc:Work> + </rdf:RDF> + </metadata> + <defs + id="defs5020" /> + <sodipodi:namedview + pagecolor="#ffffff" + bordercolor="#666666" + borderopacity="1" + objecttolerance="10" + gridtolerance="10" + guidetolerance="10" + inkscape:pageopacity="0" + inkscape:pageshadow="2" + inkscape:window-width="1920" + inkscape:window-height="1080" + id="namedview5018" + showgrid="false" + inkscape:zoom="0.280633" + inkscape:cx="1016.6517" + inkscape:cy="-254.18323" + inkscape:window-x="0" + inkscape:window-y="0" + inkscape:window-maximized="1" + inkscape:current-layer="g4890" /> + <g + id="Arrière-plan" /> + <g + id="g4848" /> + <g + id="g4850" /> + <g + id="g4852" /> + <g + id="g4854" /> + <g + id="g5014"> + <g + id="g4862"> + <path + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:2;stroke-opacity:1" + d="M 81.191136,293.837 H 148.263 l -12.54004,38 H 70.176878 Z" + id="polygon4856" + inkscape:connector-curvature="0" + sodipodi:nodetypes="ccccc" /> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="106.05132" + y="320.49252" + id="text4860"> + <tspan + x="106.05132" + y="320.49252" + id="tspan4858">POST</tspan> + </text> + </g> + <g + id="g4872"> + <polygon + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="748.242,285.837 902.214,285.837 882.56,339.837 728.588,339.837 " + id="polygon4864" /> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="811.58362" + y="314.38007"> + <tspan + x="811.58362" + y="314.38007" + id="tspan4866">Tonalité</tspan> + </text> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="811.58362" + y="330.38007"> + <tspan + x="811.58362" + y="330.38007" + id="tspan4868">multi-labels</tspan> + </text> + </g> + <g + id="g4890"> + <polygon + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="633.727,264.044 689.121,312.837 633.727,361.63 578.333,312.837 " + id="polygon4874" /> + <text + font-size="12.8" + style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" + x="633.727" + y="316.737" + id="text4878"> + <tspan + x="633.727" + y="316.737" + id="tspan4876">Stacking</tspan> + </text> + <path + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + d="M 552.427 148.117 C 584.947,138.617 601.207,135.45 633.727,135.45 C 666.247,135.45 682.507,138.617 715.027,148.117 L 715.027,198.783 C 682.507,208.283 666.247,211.45 633.727,211.45 C 601.207,211.45 584.947,208.283 552.427,198.783 L 552.427,148.117z" + id="path4880" /> + <path + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + d="M 552.427 148.117 C 584.947,157.617 601.207,160.783 633.727,160.783 C 666.247,160.783 682.507,157.617 715.027,148.117" + id="path4882" /> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="633.72699" + y="183.40976" + id="text4888"> + <tspan + x="633.72699" + y="183.40976" + id="tspan4884">Modèle</tspan> + </text> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="633.72699" + y="199.40976" + id="text4888"> + <tspan + x="633.72699" + y="199.40976" + id="tspan4886">stacking</tspan> + </text> + </g> + <g + id="g4926"> + <g + id="g4898"> + <polygon + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="451.715,297.874 497.613,334.387 451.715,370.901 405.818,334.387 " + id="polygon4892" /> + <text + font-size="12.8" + style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" + x="451.715" + y="338.287" + id="text4896"> + <tspan + x="451.715" + y="338.287" + id="tspan4894">LSMR</tspan> + </text> + </g> + <g + id="g4906"> + <polygon + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="451.715,276.324 497.613,312.837 451.715,349.351 405.818,312.837 " + id="polygon4900" /> + <text + font-size="12.8" + style="fill: #000000; fill-opacity: 1; stroke: none;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" + x="451.715" + y="316.737" + id="text4904"> + <tspan + x="451.715" + y="316.737" + id="tspan4902">LSMR</tspan> + </text> + </g> + <g + id="g4914"> + <polygon + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="451.715,254.774 497.613,291.287 451.715,327.801 405.818,291.287 " + id="polygon4908" /> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="451.715" + y="300.11615" + id="text4912"> + <tspan + x="451.715" + y="300.11615" + id="tspan4910">LSMR</tspan> + </text> + </g> + <path + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + d="M 365.565 148.117 C 400.025,138.617 417.255,135.45 451.715,135.45 C 486.175,135.45 503.405,138.617 537.865,148.117 L 537.865,198.783 C 503.405,208.283 486.175,211.45 451.715,211.45 C 417.255,211.45 400.025,208.283 365.565,198.783 L 365.565,148.117z" + id="path4916" /> + <path + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + d="M 365.565 148.117 C 400.025,157.617 417.255,160.783 451.715,160.783 C 486.175,160.783 503.405,157.617 537.865,148.117" + id="path4918" /> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="451.715" + y="181.76672"> + <tspan + x="451.715" + y="181.76672" + id="tspan4920">Modèles</tspan> + </text> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="451.715" + y="197.76672"> + <tspan + x="451.715" + y="197.76672" + id="tspan4922">bootstrap</tspan> + </text> + </g> + <g + id="g4946"> + <path + style="fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke:#000000;stroke-width:2;stroke-opacity:1" + d="m 183.17656,287.837 149.52625,0 17.37119,25 -17.37119,25 -149.52625,0 -20.42256,-25 z" + id="path4928" + inkscape:connector-curvature="0" + sodipodi:nodetypes="ccccccc" /> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="256.414" + y="313.17868" + id="text4934"> + <tspan + x="256.414" + y="313.17868" + id="tspan4930">Reconnaissance</tspan> + </text> + <text + font-size="12.8" + style="font-style:normal;font-weight:normal;font-size:12.80000019px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="256.414" + y="329.17868" + id="text4934"> + <tspan + x="256.414" + y="329.17868" + id="tspan4932">des variables</tspan> + </text> + <path + style="fill: #ffffff; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + d="M 168.914 148.117 C 203.914,138.617 221.414,135.45 256.414,135.45 C 291.414,135.45 308.914,138.617 343.914,148.117 L 343.914,198.783 C 308.914,208.283 291.414,211.45 256.414,211.45 C 221.414,211.45 203.914,208.283 168.914,198.783 L 168.914,148.117z" + id="path4936" /> + <path + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + d="M 168.914 148.117 C 203.914,157.617 221.414,160.783 256.414,160.783 C 291.414,160.783 308.914,157.617 343.914,148.117" + id="path4938" /> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="256.414" + y="181.47302" + id="text4944"> + <tspan + x="256.414" + y="181.47302" + id="tspan4940">Variables</tspan> + </text> + <text + font-size="12.7998" + style="font-style:normal;font-weight:normal;font-size:12.79979992px;font-family:sans-serif;text-anchor:middle;fill:#000000;fill-opacity:1;stroke:none" + x="256.414" + y="197.47302" + id="text4944"> + <tspan + x="256.414" + y="197.47302" + id="tspan4942">sélectionnées</tspan> + </text> + </g> + <g + id="g4952"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="141.348" + y1="312.837" + x2="153.018" + y2="312.837" + id="line4948" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="160.518,312.837 150.518,317.837 153.018,312.837 150.518,307.837 " + id="polygon4950" /> + </g> + <g + id="g4958"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="350.073" + y1="312.837" + x2="396.082" + y2="312.837" + id="line4954" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="403.582,312.837 393.582,317.837 396.082,312.837 393.582,307.837 " + id="polygon4956" /> + </g> + <g + id="g4964"> + <path + style="fill:none;stroke:#000000;stroke-width:2;stroke-opacity:1" + d="M 342.57075,302.26155 396.229,292.97198" + id="line4960" + inkscape:connector-curvature="0" + sodipodi:nodetypes="cc" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="403.615,291.674 394.632,298.33 396.229,292.972 392.901,288.48 " + id="polygon4962" /> + </g> + <g + id="g4970"> + <path + style="fill:none;stroke:#000000;stroke-width:2;stroke-opacity:1" + d="M 342.33603,323.41344 396.229,332.702" + id="line4966" + inkscape:connector-curvature="0" + sodipodi:nodetypes="cc" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="403.615,334 392.901,337.194 396.229,332.702 394.632,327.345 " + id="polygon4968" /> + </g> + <g + id="g4976"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="497.613" + y1="291.287" + x2="576.225" + y2="303.733" + id="line4972" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="583.632,304.906 572.973,308.281 576.225,303.733 574.537,298.404 " + id="polygon4974" /> + </g> + <g + id="g4982"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="497.613" + y1="312.837" + x2="567.596" + y2="312.837" + id="line4978" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="575.096,312.837 565.096,317.837 567.596,312.837 565.096,307.837 " + id="polygon4980" /> + </g> + <g + id="g4988"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="497.613" + y1="334.387" + x2="576.225" + y2="321.941" + id="line4984" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="583.632,320.768 574.537,327.271 576.225,321.941 572.973,317.394 " + id="polygon4986" /> + </g> + <g + id="g4994"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="690.121" + y1="312.837" + x2="728.034" + y2="312.837" + id="line4990" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="735.534,312.837 725.534,317.837 728.034,312.837 725.534,307.837 " + id="polygon4992" /> + </g> + <g + id="g5000"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="256.414" + y1="211.45" + x2="256.414" + y2="278.101" + id="line4996" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="256.414,285.601 251.414,275.601 256.414,278.101 261.414,275.601 " + id="polygon4998" /> + </g> + <g + id="g5006"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="451.715" + y1="211.45" + x2="451.715" + y2="245.038" + id="line5002" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="451.715,252.538 446.715,242.538 451.715,245.038 456.715,242.538 " + id="polygon5004" /> + </g> + <g + id="g5012"> + <line + style="fill: none; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + x1="633.727" + y1="211.45" + x2="633.727" + y2="254.308" + id="line5008" /> + <polygon + style="fill: #000000; fill-opacity: 1; stroke-opacity: 1; stroke-width: 2; stroke: #000000" + fill-rule="evenodd" + points="633.727,261.808 628.727,251.808 633.727,254.308 638.727,251.808 " + id="polygon5010" /> + </g> + </g> +</svg> diff --git a/images/nemenyi.R b/images/nemenyi.R new file mode 100644 index 0000000..f7e4729 --- /dev/null +++ b/images/nemenyi.R @@ -0,0 +1,7 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_global_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +print_win_lose_tie_plot () +filename <- Sys.getenv ("OUTPUT") +svg (filename, width = 5, height = 5) +cd_analysis () +dev.off () diff --git a/images/rsms_1.R b/images/rsms_1.R new file mode 100644 index 0000000..df07a1d --- /dev/null +++ b/images/rsms_1.R @@ -0,0 +1,69 @@ +load (sprintf ("%s/data/rsms_test.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") + +#' Print the graphs for the features. +#' @return the graph. +#' @export +features_graph <- function () { + library ("magrittr") + data <- (test + %>% dplyr::filter (frac_labeled == 0.3, + frac_labels == 1, + algorithm %in% c ("formulas", "mifs", "rfs", "sfus"), + dataset %in% c ("atp1d", "atp7d", "edm", "enb", "oes10", "oes97", "osales", "scpf", "sf1", "sf2", "wq"), + !is.na (error)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "formulas", "\\textbf{RSMS}", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "mifs", "MIFS", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "rfs", "RFS", algorithm)) + %>% dplyr::mutate (algorithm = ifelse (algorithm == "sfus", "SFUS", algorithm)) + %>% dplyr::select (dataset, algorithm, frac_features, error) + %>% dplyr::group_by (dataset, algorithm, frac_features) + %>% dplyr::summarize (n = dplyr::n (), + mean = mean (error), + sd = sd (error), + min = min (error), + max = max (error), + median = median (error), + q1 = quantile (error, 0.25), + q3 = quantile (error, 0.75)) + %>% dplyr::ungroup ()) + arrange <- function (...) { + gridExtra::grid.arrange (..., layout_matrix = rbind ( + c (1, 2, 3), + c (4, 5, 6), + c (7, 8, 9), + c (10, 11, 11) + )) + } + do.call (arrange, lapply ((data + %>% dplyr::select (dataset) + %>% dplyr::distinct ())$dataset, function (dataset_name) { + with_legend <- (data + %>% dplyr::filter (dataset == dataset_name) + %>% dplyr::mutate (ymin = mean - sd, ymax = max + sd) + %>% dplyr::select (Algorithme = algorithm, + `Variables` = frac_features, + `aRMSE moyenne` = mean, ymin, ymax) + %>% ggplot2::ggplot (ggplot2::aes (x = `Variables`, + y = `aRMSE moyenne`, + color = Algorithme, + linetype = Algorithme)) + + ggplot2::geom_line () + + ggplot2::ggtitle (dataset_name) + + ggplot2::scale_x_continuous (labels = scales::percent) + + ggplot2::scale_color_manual (limits = c ("\\textbf{RSMS}", "MIFS", "SFUS", "RFS"), + values = c ("black", "#e69f00", "#56b4e9", "#009e73")) + + ggplot2::scale_linetype_manual (limits = c ("\\textbf{RSMS}", "MIFS", "SFUS", "RFS"), + values = c ("solid", "dashed", "longdash", "dotdash"))) + if (dataset_name == "wq") { + with_legend + } else { + with_legend + ggplot2::theme (legend.position = "none") + } + })) +} + +plot <- features_graph () +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 8) diff --git a/images/rsms_2.R b/images/rsms_2.R new file mode 100644 index 0000000..8be2cf0 --- /dev/null +++ b/images/rsms_2.R @@ -0,0 +1,58 @@ +load (sprintf ("%s/data/rsms_test.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") + +#' Print the graphs for label selection +#' @return the graph. +#' @export +labels_graph <- function () { + library ("magrittr") + data <- (test + %>% dplyr::filter (frac_labeled == 0.3, + frac_features == 0.3, + algorithm == "formulas", + dataset %in% c ("atp1d", "atp7d", "edm", "enb", "oes10", "oes97", "osales", "scpf", "sf1", "sf2", "wq"), + !is.na (error)) + %>% dplyr::select (dataset, frac_labels, error) + %>% dplyr::group_by (dataset, frac_labels) + %>% dplyr::summarize (n = dplyr::n (), + mean = mean (error), + sd = sd (error), + min = min (error), + max = max (error), + median = median (error), + q1 = quantile (error, 0.25), + q3 = quantile (error, 0.75)) + %>% dplyr::ungroup ()) + arrange <- function (...) { + gridExtra::grid.arrange (..., layout_matrix = rbind ( + c (1, 2, 3), + c (4, 5, 6), + c (7, 8, 9), + c (10, 11, 11) + )) + } + do.call (arrange, lapply ((data + %>% dplyr::select (dataset) + %>% dplyr::distinct ())$dataset, function (dataset_name) { + with_legend <- (data + %>% dplyr::filter (dataset == dataset_name) + %>% dplyr::mutate (ymin = mean - sd, ymax = max + sd) + %>% dplyr::select (`Labels sélectionnées` = frac_labels, + `aRMSE moyenne` = mean, ymin, ymax) + %>% ggplot2::ggplot (ggplot2::aes (x = `Labels sélectionnées`, + y = `aRMSE moyenne`)) + + ggplot2::geom_line () + + ggplot2::ggtitle (dataset_name) + + ggplot2::scale_x_continuous (labels = scales::percent)) + if (dataset_name == "wq") { + with_legend + } else { + with_legend + ggplot2::theme (legend.position = "none") + } + })) +} + +plot <- labels_graph () +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 8) diff --git a/images/rsms_3.R b/images/rsms_3.R new file mode 100644 index 0000000..52a77e8 --- /dev/null +++ b/images/rsms_3.R @@ -0,0 +1,94 @@ +load (sprintf ("%s/data/rsms_test.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") + +#' Print the graphs for the interest of label selection +#' @return the graph. +#' @export +with_labels_graph <- function () { + library ("magrittr") + lower_number <- tibble::tibble ( + dataset = + c ("atp1d", + "atp7d", + "edm", + "enb", + "oes10", + "oes97", + "osales", + "scpf", + "sf1", + "sf2", + "wq"), + frac_labeled_restricted = + c (0.8, + 0.8, + 0.6, + 0.6, + 0.8, + 0.8, + 0.8, + 0.6, + 0.6, + 0.6, + 0.8)) + data <- (test + %>% dplyr::filter (frac_labeled == 0.3, + algorithm == "formulas", + dataset %in% c ("atp1d", "atp7d", "edm", "enb", "oes10", "oes97", "osales", "scpf", "sf1", "sf2", "wq"), + !is.na (error)) + %>% dplyr::inner_join (lower_number) + %>% dplyr::mutate (full = (frac_labels == 1), + restricted = (frac_labels == frac_labeled_restricted)) + %>% dplyr::filter (full | restricted) + %>% dplyr::mutate (algorithm = ifelse (full, "RSMS (tous labels)", "RSMS (restreint)")) + %>% dplyr::select (dataset, frac_features, algorithm, error) + %>% dplyr::group_by (dataset, frac_features, algorithm) + %>% dplyr::summarize (n = dplyr::n (), + mean = mean (error), + sd = sd (error), + min = min (error), + max = max (error), + median = median (error), + q1 = quantile (error, 0.25), + q3 = quantile (error, 0.75)) + %>% dplyr::ungroup ()) + arrange <- function (...) { + gridExtra::grid.arrange (..., layout_matrix = rbind ( + c (1, 2, 3), + c (4, 5, 6), + c (7, 8, 9), + c (10, 11, 11) + )) + } + do.call (arrange, lapply ((data + %>% dplyr::select (dataset) + %>% dplyr::distinct ())$dataset, function (dataset_name) { + with_legend <- (data + %>% dplyr::filter (dataset == dataset_name) + %>% dplyr::mutate (ymin = mean - sd, ymax = max + sd) + %>% dplyr::select (Algorithme = algorithm, + `Variables` = frac_features, + `aRMSE moyenne` = mean, ymin, ymax) + %>% ggplot2::ggplot (ggplot2::aes (x = `Variables`, + y = `aRMSE moyenne`, + color = Algorithme, + linetype = Algorithme)) + + ggplot2::geom_line () + + ggplot2::ggtitle (dataset_name) + + ggplot2::scale_x_continuous (labels = scales::percent) + + ggplot2::scale_color_manual (limits = c ("RSMS (restreint)", "RSMS (tous labels)"), + values = c ("black", "#e69f00", "#56b4e9", "#009e73")) + + ggplot2::scale_linetype_manual (limits = c ("RSMS (restreint)", "RSMS (tous labels)"), + values = c ("solid", "dashed", "longdash", "dotdash"))) + if (dataset_name == "wq") { + with_legend + } else { + with_legend + ggplot2::theme (legend.position = "none") + } + })) +} + +plot <- with_labels_graph () +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 8) diff --git a/images/rsms_4.R b/images/rsms_4.R new file mode 100644 index 0000000..94045d2 --- /dev/null +++ b/images/rsms_4.R @@ -0,0 +1,42 @@ +load (sprintf ("%s/data/rsms_protocol_4.Rdata", Sys.getenv ("ABS_TOP_SRCDIR"))) + +Sys.setlocale ("LC_ALL", "fr_FR.UTF-8") + +#' Print the convergence graphs +#' @return the graphs +#' @export +convergence_graph <- function () { + library ("magrittr") + data <- (protocol_4 + %>% dplyr::group_by (dataset, maxiter) + %>% dplyr::summarize (mloss = mean (loss), + sloss = sd (loss)) + %>% dplyr::mutate (dataset, + `Nombre d'itérations` = maxiter, + `Coût` = mloss, + mini = mloss - sloss, + maxi = mloss + sloss)) + arrange <- function (...) { + gridExtra::grid.arrange (..., layout_matrix = rbind ( + c (1, 1, 2, 2, 3, 3), + c (4, 4, 5, 5, 6, 6), + c (7, 7, 8, 8, 9, 9), + c (10, 10, 10, 11, 11, 11) + )) + } + do.call (arrange, lapply (c ("atp1d", "atp7d", "edm", "enb", "oes10", "oes97", "osales", "scpf", "sf1", "sf2", "wq"), function (dataset_name) { + (data + %>% dplyr::filter (dataset == dataset_name) + %>% ggplot2::ggplot (ggplot2::aes (x = `Nombre d'itérations`, + y = `Coût`, + ymin = mini, + ymax = maxi)) + + ggplot2::geom_line () + + ggplot2::geom_ribbon (alpha = 0.2) + + ggplot2::ggtitle (dataset_name)) + })) +} + +plot <- convergence_graph () +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 6, height = 8) diff --git a/images/twomoons.R b/images/twomoons.R new file mode 100755 index 0000000..c825abd --- /dev/null +++ b/images/twomoons.R @@ -0,0 +1,50 @@ +#!/usr/bin/env Rscript +t <- seq (0, 1, length.out = 101) +labeled <- c (rep (0, 100), 1) +upper_x <- cos (t * pi) + 0.5 +upper_y <- sin (t * pi) +lower_x <- -upper_x +lower_y <- -upper_y + +`%>%` <- magrittr::`%>%` + +data <- (tibble::tibble (x = upper_x, y = upper_y, + label = c (rep ("unlabeled", 100), "+"), + `semi-supervised solution` = "+") + %>% rbind (tibble::tibble (x = lower_x, y = lower_y, + label = c (rep ("unlabeled", 100), "-"), + `semi-supervised solution` = "-")) + %>% dplyr::mutate (noise_x = 0.1 * rnorm (dplyr::n ()), + noise_y = 0.1 * rnorm (dplyr::n ())) + %>% dplyr::mutate (x = x + noise_x, y = y + noise_y) + %>% dplyr::mutate (`naive solution` = ifelse (x <= 0, "+", "-"))) + +scales <- function (plot) { + (plot + + ggplot2::scale_size_manual (values = c ("unlabeled" = 1, "+" = 3, "-" = 3)) + + ggplot2::scale_shape_manual (values = c ("+" = 16, "-" = 17, "unlabeled" = 3)) + + ggplot2::scale_color_manual (values = c ("+" = "#E69F00", "-" = "#56B4E9", "unlabeled" = "#009E73")) + + ggplot2::theme_void () + + ggplot2::theme(legend.position="none") + + ggplot2::coord_fixed ()) +} + +question <- ((data + %>% ggplot2::ggplot (ggplot2::aes (x = x, y = y, shape = label, color = label, size = label)) + + ggplot2::geom_point ()) + %>% scales ()) + +naive_answer <- ((data + %>% ggplot2::ggplot (ggplot2::aes (x = x, y = y, shape = `naive solution`, color = `naive solution`)) + + ggplot2::geom_point () + + ggplot2::geom_vline (xintercept = 0)) + %>% scales ()) + +semi_supervised_answer <- ((data + %>% ggplot2::ggplot (ggplot2::aes (x = x, y = y, shape = `semi-supervised solution`, color = `semi-supervised solution`)) + + ggplot2::geom_point ()) + %>% scales ()) + +plot <- gridExtra::arrangeGrob (question, gridExtra::arrangeGrob (naive_answer, semi_supervised_answer, ncol = 2), nrow = 2) +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 3.5, height = 3.5) diff --git a/images/wilcoxon.R b/images/wilcoxon.R new file mode 100644 index 0000000..2d197e2 --- /dev/null +++ b/images/wilcoxon.R @@ -0,0 +1,6 @@ +#!/usr/bin/env Rscript +source (sprintf ("%s/images/lsmr_global_tuning.R", Sys.getenv ("ABS_TOP_SRCDIR"))) +print_win_lose_tie_plot () +plot <- print_win_lose_tie_plot () +filename <- Sys.getenv ("OUTPUT") +ggplot2::ggsave (filename, plot, device = "svg", width = 5, height = 5) |