make Rmd more portable

d70c8f97 · Tomas Martinovic · 992405cd · d70c8f97 · d70c8f97 · d70c8f97
Commit d70c8f97 authored 11 months ago by Tomas Martinovic
--- a/Dockerfile
+++ b/Dockerfile
+FROM rocker/r-base:4.3.3
+RUN apt-get update -y && apt-get install -y  make pandoc zlib1g-dev libcurl4-openssl-dev libssl-dev libicu-dev libpng-dev python3 git libffi-dev && rm -rf /var/lib/apt/lists/*
+RUN mkdir -p /usr/local/lib/R/etc/ /usr/lib/R/etc/
+RUN echo "options(renv.config.pak.enabled = FALSE, repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl', Ncpus = 4)" | tee /usr/local/lib/R/etc/Rprofile.site | tee /usr/lib/R/etc/Rprofile.site
+RUN R -e 'install.packages("remotes")'
+RUN R -e 'remotes::install_version("renv", version = "1.0.7")'
+COPY renv.lock renv.lock
+RUN R -e 'renv::restore()'
+RUN R -e 'reticulate::install_python()'
+COPY renv/requirements.txt requirements.txt
+RUN R -e 'reticulate::virtualenv_create("hmch")'
+RUN R -e 'reticulate::virtualenv_install("hmch", c("-r", "requirements.txt"))'
+COPY get_features.Rmd get_features.Rmd
+COPY get_results.Rmd get_results.Rmd
+COPY functions functions
+CMD ["R", "--quiet", "-e", "rmarkdown::render('get_features.Rmd')"]
--- a/functions/functions_df_total_cv.R
+++ b/functions/functions_df_total_cv.R
@@ -16,13 +16,8 @@ get_train_df <-
    )
  }
 #train_df <- get_train_df(df_features = df_autocorrelation_features)
 # Get features from XGBOOST----
 get_features_tree_xgboost <-
  function(train_df = train_df,
@@ -55,7 +50,7 @@ get_features_tree_xgboost <-
           xgboost_iter = 100,
           set_seed = 4) {
    train_df$full_data_big_648obs <-
-      add_column(
+      tibble::add_column(
        train_df$full_data_big_648obs,
        activ_num = rep(1:8, 81),
        .before = "activity"
@@ -125,13 +120,13 @@ get_features_tree_xgboost <-
      dplyr::slice_head(n = xgboost_n_features)
    xgboost_important_features <-
-      xgboost_important_features |>  select_if(~ !any(is.na(.)))
+      xgboost_important_features |>  dplyr::select_if(~ !any(is.na(.)))
    # Select features
    all_xgboost_activ_more_features <-
      train_df$train_data_big_648obs |>
      as.data.frame() |>
-      dplyr::select(all_of(xgboost_important_features$Feature)) |>
+      dplyr::select(tidyselect::all_of(xgboost_important_features$Feature)) |>
-      mutate(
+      dplyr::mutate(
        student = train_df$full_data_big_648obs$student,
        activity = train_df$full_data_big_648obs$activity
      )
@@ -508,9 +503,7 @@ train_som_cv <- function(train_df,
        temp_list_kfold$som_accur_kfold_test[[4]]$mnLogLoss
    )
  temp_df_1 <-
    tibble::tibble(
      fold_1 = c(
@@ -683,8 +676,6 @@ get_all <-
      get_features_tree_xgboost(
        train_df = train_df,
        param_xgb_model = param_xgb_model,
        nfold = nfold,
        showsd = showsd,
        stratified = stratified,
@@ -1017,6 +1008,7 @@ train_test_models <-
    set_seed = 1245) {
    # Temp for SOM models
    temp_tests_feat <- list()
+    index <- 1
    # For loop for train
    for (xgboost_n_features in xgb_features_range) {
      all_som <- get_all(
@@ -1069,7 +1061,8 @@ train_test_models <-
        paste0(all_som$quality$`648obs`$`Features set`,
               paste0("_", xgboost_n_features, "f"))
-      temp_tests_feat[[xgboost_n_features]] <- all_som
+      temp_tests_feat[[index]] <- all_som
+      index <- index + 1
    }
@@ -1125,7 +1118,7 @@ get_table_superclass <-
    col_totals <- base::colSums(table_som_clasif[3:10])
    # Calculate percentage contributions and append to original values
-    table_som_clasif <- table_som_clasif %>%
+    table_som_clasif <- table_som_clasif |>
      dplyr::group_by(superclass) |>
      dplyr::mutate(across(2:9,
                           ~ base::paste0(.x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), 2
@@ -1135,12 +1128,12 @@ get_table_superclass <-
    # Make final table
-    df_summary <- table_som_clasif %>%
+    df_summary <- table_som_clasif |>
-      dplyr::group_by(superclass) %>%
+      dplyr::group_by(superclass) |>
-      dplyr::summarise(dplyr::across(2:9, sum)) %>%
+      dplyr::summarise(dplyr::across(2:9, sum)) |>
-      dplyr::ungroup() %>%
+      dplyr::ungroup() |>
-      dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) %>%
+      dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) |>
-      dplyr::rowwise() %>%
+      dplyr::rowwise() |>
      dplyr::mutate(dplyr::across(2:9,
                                  ~ base::paste0(
                                    .x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), round_value), "%)"
@@ -1149,7 +1142,7 @@ get_table_superclass <-
      dplyr::ungroup()
    # Format the percentages as requested
-    df_summary <- df_summary %>%
+    df_summary <- df_summary |>
      dplyr::mutate(dplyr::across(starts_with("perc"), ~ base::paste0(.x), .names = "formatted_{.col}"))
    names(df_summary) <-

--- a/get_features.Rmd
+++ b/get_features.Rmd
@@ -10,6 +10,15 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE)
 ```
 ```{r parameters}
+# Set basepath for the script execution, default is the script path
+base_path <- getwd()
+# Data folder
+if (Sys.getenv("DOCKER_RUN") == "true") {
+  zenodo_dir <- "/zenodo_dir"  
+} else {
+  zenodo_dir <- file.path(base_path, "data_zenodo")
+}
 # Chaos 01 parameters
 lmin_a <- 20
 lmin_g <- 20
@@ -25,9 +34,9 @@ renv::restore()
 ```{r install_python, include = FALSE}
 # Install python if not available
-if (!reticulate::py_available()) {
+#if (!reticulate::py_available()) {
-  reticulate::install_python()
+#  reticulate::install_python()
-}
+#}
 # # Create `hmch` virtual environment if it does not exists yet
 # if (!reticulate::virtualenv_exists("hmch")) {
 #   reticulate::virtualenv_create("hmch")
@@ -40,13 +49,10 @@ if (!reticulate::py_available()) {
 #   reticulate::py_install("tsfresh")
 # }
-# Set basepath for the script execution, default is the script path
-base_path <- getwd()
 # Install required Python packages from requirements.txt
-reticulate::virtualenv_install("my-env_25", c("-r", file.path(base_path, "renv", "requirements.txt")))
+#reticulate::virtualenv_install("hmch", c("-r", file.path(base_path, "renv", "requirements.txt")))
-# Use `my-env_23` virtual environment
+# Use `hmch` virtual environment
-reticulate::use_virtualenv("my-env_25")
+reticulate::use_virtualenv("hmch")
 library(reticulate)
 ```
@@ -71,7 +77,6 @@ import pandas
 ```{r prepare_input_data}
 # Download data 
-zenodo_dir <- file.path(base_path, "data_zenodo")
 #https://zenodo.org/records/10984138
 if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) {
  # Create folder data_zenodo if it does not exists
@@ -155,36 +160,36 @@ figure_1
 ```{r compute_features}
 # Get neighborhood size for Accelerometer and Gyroscope
 eps_df_ALL <- get_eps(df_ALL_648obs, scale = eps_scale)
-# # Get features for train models
+# # # Get features for train models
-## Get RQA features + TREND
+# ## Get RQA features + TREND
-df_ALL_rqa <- get_features_rqa_trend(df = df_ALL[1:10000, ], eps_chaos_01 = eps_df_ALL,
+df_ALL_rqa <- get_features_rqa_trend(df = df_ALL, eps_chaos_01 = eps_df_ALL,
                             lmin_a = lmin_a, lmin_g = lmin_g)
 #save(df_ALL_rqa, file = "SOM_features/rqa_trend/df_ALL_rqa.RData")
 # Autocorrelation
-df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$autocorrelation_dic)
+df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$autocorrelation_dic)
 df_ALL_autocorrelation_features <- tidyr::drop_na(df_ALL_autocorrelation_features)
-#save(df_ALL_autocorrelation_features, file = "SOM_features/autocorr/df_ALL_autocorrelation_features.RData")
+save(df_ALL_autocorrelation_features, file = file.path(zenodo_dir, "df_ALL_autocorrelation_features.RData"))
 # Spectral
-df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$spectral_dic)
+df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$spectral_dic)
 df_ALL_spectral_features <- tidyr::drop_na(df_ALL_spectral_features)
-#save(df_ALL_spectral_features, file = "SOM_features/spectral/df_ALL_spectral_features.RData")
+save(df_ALL_spectral_features, file = file.path(zenodo_dir, "df_ALL_spectral_features.RData"))
 # MIX of RQA + Autocorrelation + Spectral
-#df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607]))
+df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607]))
-#save(df_ALL_mix_features, file = "SOM_features/mix_rqa_trend_spectral_autocorr/df_ALL_mix_features.RData")
+save(df_ALL_mix_features, file = file.path(zenodo_dir, "df_ALL_mix_features.RData"))
 # Tsfresh all
-df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$ComprehensiveFCParameters())
+df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$ComprehensiveFCParameters())
 # Drop NAs
 df_ALL_tsfresh_features <- df_ALL_tsfresh_features |>
    dplyr::select_if(~ !any(is.na(.)))
@@ -194,6 +199,6 @@ df_ALL_tsfresh_features <- tidyr::drop_na(df_ALL_tsfresh_features)
    dir.create(file.path(base_path, "outputs"))
  }
-#save(df_ALL_tsfresh_features,
+save(df_ALL_tsfresh_features,
-#     file = file.path(base_path, "outputs/df_ALL_tsfresh_features.RData"))
+     file = file.path(zenodo_dir,  "df_ALL_tsfresh_features.RData"))
 ```
--- a/get_results.Rmd
+++ b/get_results.Rmd
@@ -13,13 +13,43 @@ output:
 knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
 ```
+```{r install_packages, include = FALSE}
+# Install `renv` package if not available
+if (!require("renv")) install.packages("renv")
+# Install dependencies if not installed already
+renv::restore()
+```
 ```{r parameters}
+# Set basepath for the script execution, default is the script path
+base_path <- getwd()
+# Input data path
+# Data folder
+if (Sys.getenv("DOCKER_RUN") == "true") {
+  zenodo_dir <- "/zenodo_dir"  
+} else {
+  zenodo_dir <- file.path(base_path, "data_zenodo")
+}
+# Create folder data_zenodo if it does not exists
+if (!dir.exists(zenodo_dir)) {
+  dir.create(zenodo_dir)
+}
+# Output data path
+output_dir <- file.path(base_path, "output")
+# Create folder output if it does not exists
+if (!dir.exists(output_dir)) {
+  dir.create(output_dir)
+}
 # Parameters for article "link to article"
 # SOM dimentions
 som_xdim <- c(5)
 som_ydim <- c(5)
-# Range of used features for SOM train e.g. 2:37, min number of features are 2
+# Range of used features for SOM train e.g. 2:35, min number of features are 2
-xgb_features_range <- c(27)
+xgb_features_range <- 24:27 # c(2:35)
+best_n_features <- "27" # This needs to be character as it is used to subset by name
 # XGBOOST importance type https://www.rdocumentation.org/packages/xgboost/versions/0.6.4.1/topics/xgb.importance
 xgboost_importance_type = c("Gain")
@@ -32,7 +62,6 @@ activ_name_eng <- rep(c("1:4x10m Shuttle Run",
                        "6:10x Gymnastic Hoops Passing",
                        "7:10x Bench Jumping",
                        "8:Crawling repeat"), 81)
-stud_names_648 <- as.character(df_rqa_features$student)
 # XGBOOST
 param_xgb_model =
@@ -67,41 +96,58 @@ plot_pixel_size <- 500
 legendPos <- "below"
 ```
-```{r install_packages, include = FALSE}
+```{r prepare_input_data}
-# Install `renv` package if not available
-if (!require("renv")) install.packages("renv")
+#https://zenodo.org/records/10996083
-# Install dependencies if not installed already
+if (!file.exists(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv")) |
-renv::restore()
+    !file.exists(file.path(zenodo_dir, "df_ALL_rqa.csv")) | 
-```
+    !file.exists(file.path(zenodo_dir, "df_ALL_spectral_features.csv")) | 
+    !file.exists(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv"))
+    ) {
+  # This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R
+  record <-
+    curl::curl_fetch_memory("https://zenodo.org/api/records/10996083")$content |>
+    rawToChar() |>
+    jsonlite::fromJSON()
+  file_urls <- record$files$links$self
+  filenames <- basename(record$files$key)
+  destfiles <- file.path(zenodo_dir, filenames)
+  mapply(curl::curl_download,
+         file_urls,
+         destfiles,
+         MoreArgs = list(quiet = TRUE))
+}
+if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) {
+  # Create folder data_zenodo if it does not exists
+  if (!dir.exists(zenodo_dir)) {
+    dir.create(zenodo_dir)
+  }
+  # This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R
+  record <-
+    curl::curl_fetch_memory("https://zenodo.org/api/records/10984137")$content |>
+    rawToChar() |>
+    jsonlite::fromJSON()
+  file_urls <- record$files$links$self
+  filenames <- basename(record$files$key)
+  destfiles <- file.path(zenodo_dir, filenames)
+  mapply(curl::curl_download,
+         file_urls,
+         destfiles,
+         MoreArgs = list(quiet = TRUE))
+}
+# Load data (raw time series student movements)
+df_motion <- readr::read_csv(file.path(zenodo_dir, "physical_exercise.csv"))
-```{r, include = T}
-# Load libraries
-if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse')
-if (!require('knitr')) install.packages('knitr'); library('knitr')
-# SOM
-if (!require('kohonen')) install.packages('kohonen'); library('kohonen')
-if (!require('aweSOM')) install.packages('aweSOM'); library('aweSOM')
-# XGBoost feature reduction
-if (!require('xgboost')) install.packages('xgboost'); library('xgboost')
-if (!require('caret')) install.packages('caret'); library('caret')
-# DT
-if (!require('DT')) install.packages('DT'); library('DT')
 ```
+```{r prepare_functions_and_load_data, include = T}
-```{r, include = T}
 # Include functions 
-# Windows path
+source(file.path(base_path, "functions", "functions_df_total_cv.R"))
-#source("D:/path_to/movement-classification/functions/functions_df_total.R")
-# Linux path
-#source("path_to/movement-classification/functions/functions_df_total.R")
-#source("/home/andrii/Documents/it4i/movement-classification/functions/functions_df_total_cv.R")
-source("D:/it4i/movement-classification/functions/functions_df_total_cv.R")
 # Load data
 #The Data can be found at https://zenodo.org/records/10996083
@@ -112,15 +158,13 @@ source("D:/it4i/movement-classification/functions/functions_df_total_cv.R")
 #https://cran.r-project.org/web/packages/Chaos01/Chaos01.pdf
 # RQA TREND
 #https://cran.r-project.org/web/packages/nonlinearTseries/nonlinearTseries.pdf
-#df_rqa_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_rqa.csv")
+df_rqa_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_rqa.csv"))
-df_rqa_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_rqa.csv")
 # Autocorrelation
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.autocorrelation
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.agg_autocorrelation
 #df_autocorrelation_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_autocorrelation_features.csv")
-df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_autocorrelation_features.csv")
+df_autocorrelation_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv"))
 # Spectral
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fft_aggregated
@@ -129,17 +173,14 @@ df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/109960
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fourier_entropy
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.spkt_welch_density
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.ar_coefficient
-#df_spectral_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_spectral_features.csv")
+df_spectral_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_spectral_features.csv"))
-df_spectral_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_spectral_features.csv")
 # Mix RQA/Spectral/Autocorr
-#df_mix_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_mix_features.csv")
+df_mix_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_mix_features.csv"))
-df_mix_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_mix_features.csv")
 # Tsfresh all
 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html
-#df_tsfresh_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_tsfresh_features.csv")
+df_tsfresh_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv"))
-df_tsfresh_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_tsfresh_features.csv")
 # List of features sets for model training
 list_features_all <- list(
@@ -152,9 +193,11 @@ list_features_all <- list(
 ```
-```{r, echo = F, include = F}
+```{r train_models, echo = F, include = F}
+stud_names_648 <- as.character(df_rqa_features$student)
 # Train function
-som_gain_27f_5x5 <- train_test_models(
+som_models <- train_test_models(
  list_features_all = list_features_all,
  stud_names_648 = stud_names_648,
  activ_name_eng = activ_name_eng,
@@ -181,89 +224,78 @@ som_gain_27f_5x5 <- train_test_models(
  plot_pixel_size = plot_pixel_size,
  legendPos = legendPos
 )
-# Save you trained results
-#saveRDS(som_gain_27f_5x5, file = "/home/andrii/Documents/it4i/movement-classification/trained_models/som_gain_27f_5x5.RDS")
+names(som_models) <- xgb_features_range |> as.character()
-# Get the SOM quality metrics of you trained models
-df_som_acc <- get_acc_all_features(som_list_feat = som_gain_27f_5x5)
-# Save you results
-#saveRDS(df_som_acc, file = "/home/andrii/Documents/it4i/movement-classification/df_results/df_som_acc.RDS")
 ```
 ```{r fig1, fig.align='center', out.width="100%", fig.cap = "\\label{fig1}Example of a time series from an accelerometer recording.", echo = FALSE, include = T}
 # For Figure 1: Example of a time series from an accelerometer recording
 #measured during a 4 x 10 meter run by one of the participants
-# Get the RAW time series https://zenodo.org/records/10984138
-# Load data to R
+df_motion <- df_motion |>
-# df_motion <- read_csv("C:/Users/Andrii/Downloads/10984138/physical_exercise.csv")
+  dplyr::group_by(activity_id, student_id) |>
-# 
+  dplyr::mutate(ts_length = 1:dplyr::n()) |>
-# df_motion <- df_motion |>
+  tidyr::nest(data = !tidyselect::one_of(c("student_id", "activity_id"))) |>
-#   group_by(activity_id, student_id) |> 
+  dplyr::ungroup()
-#   mutate(ts_length = 1:n()) |>
-#   tidyr::nest(data = !one_of(c("student_id", "activity_id"))) |> 
+# Load one time series
-#   ungroup()
+data_test <- df_motion$data[[1]][c(1:3, 10)]
-# 
+data_long <- tidyr::pivot_longer(data_test,
-# # Load one time series
+                          cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"),
-# data_test <- df_motion$data[[1]][c(1:3, 10)]
+                          names_to = "group",
-# data_long <- tidyr::pivot_longer(data_test,
+                          values_to = "value",
-#                           cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"),
+                          names_prefix = "value")
-#                           names_to = "group",
-#                           values_to = "value",
+data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs')
-#                           names_prefix = "value")
-# 
-# data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs')
+figure_1 <- ggplot2::ggplot(data = data_long) +
-# 
+  ggplot2::geom_line(ggplot2::aes(y = value, x = ts_length, color = group)) +
-# 
+  ggplot2::theme_bw()+
-# figure_1 <- ggplot(data = data_long) +
+  ggplot2::theme(axis.text=ggplot2::element_text(size=20),
-#   geom_line(aes(y = value, x = ts_length, color = group)) +
+        axis.title=ggplot2::element_text(size=22,face="bold"),
-#   theme_bw()+
+        legend.title = ggplot2::element_text(size = 22,  face = "bold"),
-#   theme(axis.text=element_text(size=20),
+        legend.text = ggplot2::element_text(size = 20)) +
-#         axis.title=element_text(size=22,face="bold"),
+ggplot2::labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis")
-#         legend.title = element_text(size = 22,  face = "bold"),
-#         legend.text = element_text(size = 20)) +
+figure_1
-# labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis")
-# 
-# figure_1
 ```
 ```{r fig2, fig.align='center', out.width="100%", fig.cap = "\\label{fig2}Tsfresh feature Importance by “Gain” bar plot.", echo = FALSE, include = T}
 # Plot of the used features in article
 plot_data_importance <-
-  som_gain_27f_5x5[[27]]$train_data$tsfresh$df_important_feat_xgboost |>
+  som_models[[best_n_features]]$train_data$tsfresh$df_important_feat_xgboost |>
  dplyr::arrange(desc(Gain)) |>
-  dplyr::mutate(Feature = fct_reorder(Feature, Gain, .desc = T)) |>
+  dplyr::mutate(Feature = forcats::fct_reorder(Feature, Gain, .desc = T)) |>
  dplyr::slice_head(n = 35) |>
-  ggplot(aes(x = Feature, y = Gain)) +
+  ggplot2::ggplot(ggplot2::aes(x = Feature, y = Gain)) +
-  xlab("Feature name") +
+  ggplot2::xlab("Feature name") +
-  ylab("Value") +
+  ggplot2::ylab("Value") +
-  geom_bar(stat = "identity") +
+  ggplot2::geom_bar(stat = "identity") +
-  scale_x_discrete(labels = str_wrap(c(1:35),
+  ggplot2::scale_x_discrete(labels = stringr::str_wrap(c(1:35),
                                     width = 4)) +
  ggplot2::theme_bw() +
-  theme(
+  ggplot2::theme(
-    axis.text.x = element_text(
+    axis.text.x = ggplot2::element_text(
      #angle = 30,
      hjust = 0.5,
      vjust = 0.5,
      size = 10,
      face = "bold"
    ),
-    axis.title.x = element_text(size = 25, face = "bold"),
+    axis.title.x = ggplot2::element_text(size = 25, face = "bold"),
-    axis.title.y = element_text(size = 25, face = "bold")#,
+    axis.title.y = ggplot2::element_text(size = 25, face = "bold")#,
    #panel.background = element_blank()
  ) +
-  geom_vline(xintercept = 27)
+  ggplot2::geom_vline(xintercept = 27)
 plot_data_importance
 ```
 ```{r tab4, echo = FALSE, include = T}
 table_4_som_quality <-
-  do.call("rbind", list(som_gain_27f_5x5[[27]]$quality$`648obs`))
+  do.call("rbind", list(som_models[[best_n_features]]$quality$`648obs`))
 DT::datatable(
  table_4_som_quality,
@@ -275,10 +307,8 @@ DT::datatable(
 ```
 ```{r tab5, echo=FALSE, include = T}
-# Load pre trained models for 2:35 features
-som_gain_2_35f_5x5 <- base::readRDS("D:/it4i/movement-classification/trained_models/som_gain_2_35f_5x5.RDS")
-table_2_som_quality <- get_acc_all_features(som_gain_2_35f_5x5)
+table_2_som_quality <- get_acc_all_features(som_models)
 DT::datatable(
  table_2_som_quality$table_format,
@@ -286,35 +316,32 @@ DT::datatable(
  filter = list(position = 'top', clear = FALSE)
 ) |>
  DT::formatRound(columns = c(2:27), digits = 3)
 ```
 # Supplementary Materials
-```{r, echo=FALSE, include = T}
+```{r compute_clustering, echo=FALSE, include = T}
 # Used SOM in article
-som_27f <- som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$som_model
+som_best <- som_models[[best_n_features]]$som$tsfresh$SOM_results$som_model
 # Get superclusters number
-superclust_pam <- cluster::pam(som_27f$codes[[1]], superclust_num)
+superclust_pam <- cluster::pam(som_best$codes[[1]], superclust_num)
 superclasses_pam <- superclust_pam$clustering
-superclust_hclust <- stats::hclust(dist(som_27f$codes[[1]]), "complete")
+superclust_hclust <- stats::hclust(dist(som_best$codes[[1]]), "complete")
 superclasses_hclust <- stats::cutree(superclust_hclust, superclust_num)
 ```
 ```{r fig3, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig3}", echo = FALSE, include = T}
-som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$aweSOM_plot
+som_models[[best_n_features]]$som$tsfresh$SOM_results$aweSOM_plot
 ```
 ```{r fig6, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig6}Supercluster table", echo = FALSE, include = T}
 # Get table superclass
 df_table_6 <-
-  get_table_superclass(som_27f, superclass_number = superclust_num)
+  get_table_superclass(som_best, superclass_number = superclust_num)
 table_6 <- DT::datatable(
  df_table_6,
@@ -327,7 +354,7 @@ table_6
 ```{r fig7, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig7}Silhouette score", echo = FALSE, include = T}
 #Silhouette score
-figure_7 <- aweSOM::aweSOMsilhouette(som_27f, superclasses_pam)
+figure_7 <- aweSOM::aweSOMsilhouette(som_best, superclasses_pam)
 ```
 ```{r fig8, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig5}Dendogram", echo = FALSE, include = T}

--- a/renv/requirements.txt
+++ b/renv/requirements.txt
+numba==0.57.1
-numba==0.55.1
 tsfresh==0.20.0
+numpy==1.21.6
\ No newline at end of file