diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..91de78a8a0c7dbdaa29dca0bc37b46eaea9b31d6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM rocker/r-base:4.3.3 +RUN apt-get update -y && apt-get install -y make pandoc zlib1g-dev libcurl4-openssl-dev libssl-dev libicu-dev libpng-dev python3 git libffi-dev && rm -rf /var/lib/apt/lists/* +RUN mkdir -p /usr/local/lib/R/etc/ /usr/lib/R/etc/ +RUN echo "options(renv.config.pak.enabled = FALSE, repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl', Ncpus = 4)" | tee /usr/local/lib/R/etc/Rprofile.site | tee /usr/lib/R/etc/Rprofile.site +RUN R -e 'install.packages("remotes")' +RUN R -e 'remotes::install_version("renv", version = "1.0.7")' +COPY renv.lock renv.lock +RUN R -e 'renv::restore()' + +RUN R -e 'reticulate::install_python()' + +COPY renv/requirements.txt requirements.txt +RUN R -e 'reticulate::virtualenv_create("hmch")' +RUN R -e 'reticulate::virtualenv_install("hmch", c("-r", "requirements.txt"))' + +COPY get_features.Rmd get_features.Rmd +COPY get_results.Rmd get_results.Rmd +COPY functions functions + +CMD ["R", "--quiet", "-e", "rmarkdown::render('get_features.Rmd')"] diff --git a/functions/functions_df_total_cv.R b/functions/functions_df_total_cv.R index ab60449fe40326fa5f4b8db032add8521b761d1e..09f42398a23819d90ff7e585b3d2f64fe0afc1b7 100644 --- a/functions/functions_df_total_cv.R +++ b/functions/functions_df_total_cv.R @@ -16,13 +16,8 @@ get_train_df <- ) } - - #train_df <- get_train_df(df_features = df_autocorrelation_features) - - - # Get features from XGBOOST---- get_features_tree_xgboost <- function(train_df = train_df, @@ -55,7 +50,7 @@ get_features_tree_xgboost <- xgboost_iter = 100, set_seed = 4) { train_df$full_data_big_648obs <- - add_column( + tibble::add_column( train_df$full_data_big_648obs, activ_num = rep(1:8, 81), .before = "activity" @@ -125,13 +120,13 @@ get_features_tree_xgboost <- dplyr::slice_head(n = xgboost_n_features) xgboost_important_features <- - xgboost_important_features |> select_if(~ !any(is.na(.))) + xgboost_important_features |> dplyr::select_if(~ !any(is.na(.))) # Select features all_xgboost_activ_more_features <- train_df$train_data_big_648obs |> as.data.frame() |> - dplyr::select(all_of(xgboost_important_features$Feature)) |> - mutate( + dplyr::select(tidyselect::all_of(xgboost_important_features$Feature)) |> + dplyr::mutate( student = train_df$full_data_big_648obs$student, activity = train_df$full_data_big_648obs$activity ) @@ -508,9 +503,7 @@ train_som_cv <- function(train_df, temp_list_kfold$som_accur_kfold_test[[4]]$mnLogLoss ) - - - + temp_df_1 <- tibble::tibble( fold_1 = c( @@ -683,8 +676,6 @@ get_all <- get_features_tree_xgboost( train_df = train_df, param_xgb_model = param_xgb_model, - - nfold = nfold, showsd = showsd, stratified = stratified, @@ -1017,6 +1008,7 @@ train_test_models <- set_seed = 1245) { # Temp for SOM models temp_tests_feat <- list() + index <- 1 # For loop for train for (xgboost_n_features in xgb_features_range) { all_som <- get_all( @@ -1069,7 +1061,8 @@ train_test_models <- paste0(all_som$quality$`648obs`$`Features set`, paste0("_", xgboost_n_features, "f")) - temp_tests_feat[[xgboost_n_features]] <- all_som + temp_tests_feat[[index]] <- all_som + index <- index + 1 } @@ -1125,7 +1118,7 @@ get_table_superclass <- col_totals <- base::colSums(table_som_clasif[3:10]) # Calculate percentage contributions and append to original values - table_som_clasif <- table_som_clasif %>% + table_som_clasif <- table_som_clasif |> dplyr::group_by(superclass) |> dplyr::mutate(across(2:9, ~ base::paste0(.x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), 2 @@ -1135,12 +1128,12 @@ get_table_superclass <- # Make final table - df_summary <- table_som_clasif %>% - dplyr::group_by(superclass) %>% - dplyr::summarise(dplyr::across(2:9, sum)) %>% - dplyr::ungroup() %>% - dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) %>% - dplyr::rowwise() %>% + df_summary <- table_som_clasif |> + dplyr::group_by(superclass) |> + dplyr::summarise(dplyr::across(2:9, sum)) |> + dplyr::ungroup() |> + dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) |> + dplyr::rowwise() |> dplyr::mutate(dplyr::across(2:9, ~ base::paste0( .x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), round_value), "%)" @@ -1149,7 +1142,7 @@ get_table_superclass <- dplyr::ungroup() # Format the percentages as requested - df_summary <- df_summary %>% + df_summary <- df_summary |> dplyr::mutate(dplyr::across(starts_with("perc"), ~ base::paste0(.x), .names = "formatted_{.col}")) names(df_summary) <- diff --git a/get_features.Rmd b/get_features.Rmd index 0b62b8700a83f918e141a207d83bc5bf01abcb5e..7f62da8da623288d2a75437cace28c5f565cc1b0 100644 --- a/get_features.Rmd +++ b/get_features.Rmd @@ -10,6 +10,15 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE) ``` ```{r parameters} +# Set basepath for the script execution, default is the script path +base_path <- getwd() +# Data folder +if (Sys.getenv("DOCKER_RUN") == "true") { + zenodo_dir <- "/zenodo_dir" +} else { + zenodo_dir <- file.path(base_path, "data_zenodo") +} + # Chaos 01 parameters lmin_a <- 20 lmin_g <- 20 @@ -25,9 +34,9 @@ renv::restore() ```{r install_python, include = FALSE} # Install python if not available -if (!reticulate::py_available()) { - reticulate::install_python() -} +#if (!reticulate::py_available()) { +# reticulate::install_python() +#} # # Create `hmch` virtual environment if it does not exists yet # if (!reticulate::virtualenv_exists("hmch")) { # reticulate::virtualenv_create("hmch") @@ -40,13 +49,10 @@ if (!reticulate::py_available()) { # reticulate::py_install("tsfresh") # } -# Set basepath for the script execution, default is the script path -base_path <- getwd() - # Install required Python packages from requirements.txt -reticulate::virtualenv_install("my-env_25", c("-r", file.path(base_path, "renv", "requirements.txt"))) -# Use `my-env_23` virtual environment -reticulate::use_virtualenv("my-env_25") +#reticulate::virtualenv_install("hmch", c("-r", file.path(base_path, "renv", "requirements.txt"))) +# Use `hmch` virtual environment +reticulate::use_virtualenv("hmch") library(reticulate) ``` @@ -71,7 +77,6 @@ import pandas ```{r prepare_input_data} # Download data -zenodo_dir <- file.path(base_path, "data_zenodo") #https://zenodo.org/records/10984138 if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) { # Create folder data_zenodo if it does not exists @@ -155,36 +160,36 @@ figure_1 ```{r compute_features} # Get neighborhood size for Accelerometer and Gyroscope eps_df_ALL <- get_eps(df_ALL_648obs, scale = eps_scale) -# # Get features for train models -## Get RQA features + TREND -df_ALL_rqa <- get_features_rqa_trend(df = df_ALL[1:10000, ], eps_chaos_01 = eps_df_ALL, +# # # Get features for train models +# ## Get RQA features + TREND +df_ALL_rqa <- get_features_rqa_trend(df = df_ALL, eps_chaos_01 = eps_df_ALL, lmin_a = lmin_a, lmin_g = lmin_g) #save(df_ALL_rqa, file = "SOM_features/rqa_trend/df_ALL_rqa.RData") # Autocorrelation -df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$autocorrelation_dic) +df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$autocorrelation_dic) df_ALL_autocorrelation_features <- tidyr::drop_na(df_ALL_autocorrelation_features) -#save(df_ALL_autocorrelation_features, file = "SOM_features/autocorr/df_ALL_autocorrelation_features.RData") +save(df_ALL_autocorrelation_features, file = file.path(zenodo_dir, "df_ALL_autocorrelation_features.RData")) # Spectral -df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$spectral_dic) +df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$spectral_dic) df_ALL_spectral_features <- tidyr::drop_na(df_ALL_spectral_features) -#save(df_ALL_spectral_features, file = "SOM_features/spectral/df_ALL_spectral_features.RData") +save(df_ALL_spectral_features, file = file.path(zenodo_dir, "df_ALL_spectral_features.RData")) # MIX of RQA + Autocorrelation + Spectral -#df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607])) +df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607])) -#save(df_ALL_mix_features, file = "SOM_features/mix_rqa_trend_spectral_autocorr/df_ALL_mix_features.RData") +save(df_ALL_mix_features, file = file.path(zenodo_dir, "df_ALL_mix_features.RData")) # Tsfresh all -df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$ComprehensiveFCParameters()) +df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$ComprehensiveFCParameters()) # Drop NAs df_ALL_tsfresh_features <- df_ALL_tsfresh_features |> dplyr::select_if(~ !any(is.na(.))) @@ -194,6 +199,6 @@ df_ALL_tsfresh_features <- tidyr::drop_na(df_ALL_tsfresh_features) dir.create(file.path(base_path, "outputs")) } -#save(df_ALL_tsfresh_features, -# file = file.path(base_path, "outputs/df_ALL_tsfresh_features.RData")) +save(df_ALL_tsfresh_features, + file = file.path(zenodo_dir, "df_ALL_tsfresh_features.RData")) ``` diff --git a/get_results.Rmd b/get_results.Rmd index 2f4dab001ffae39956e240de1834ba6abee9ea5d..ed0ef9caa0422148e74151bf6df17ba7af6966a7 100644 --- a/get_results.Rmd +++ b/get_results.Rmd @@ -13,13 +13,43 @@ output: knitr::opts_chunk$set(warning = FALSE, message = FALSE) ``` +```{r install_packages, include = FALSE} +# Install `renv` package if not available +if (!require("renv")) install.packages("renv") +# Install dependencies if not installed already +renv::restore() +``` + ```{r parameters} +# Set basepath for the script execution, default is the script path +base_path <- getwd() +# Input data path +# Data folder +if (Sys.getenv("DOCKER_RUN") == "true") { + zenodo_dir <- "/zenodo_dir" +} else { + zenodo_dir <- file.path(base_path, "data_zenodo") +} + +# Create folder data_zenodo if it does not exists +if (!dir.exists(zenodo_dir)) { + dir.create(zenodo_dir) +} + +# Output data path +output_dir <- file.path(base_path, "output") +# Create folder output if it does not exists +if (!dir.exists(output_dir)) { + dir.create(output_dir) +} + # Parameters for article "link to article" # SOM dimentions som_xdim <- c(5) som_ydim <- c(5) -# Range of used features for SOM train e.g. 2:37, min number of features are 2 -xgb_features_range <- c(27) +# Range of used features for SOM train e.g. 2:35, min number of features are 2 +xgb_features_range <- 24:27 # c(2:35) +best_n_features <- "27" # This needs to be character as it is used to subset by name # XGBOOST importance type https://www.rdocumentation.org/packages/xgboost/versions/0.6.4.1/topics/xgb.importance xgboost_importance_type = c("Gain") @@ -32,7 +62,6 @@ activ_name_eng <- rep(c("1:4x10m Shuttle Run", "6:10x Gymnastic Hoops Passing", "7:10x Bench Jumping", "8:Crawling repeat"), 81) -stud_names_648 <- as.character(df_rqa_features$student) # XGBOOST param_xgb_model = @@ -67,41 +96,58 @@ plot_pixel_size <- 500 legendPos <- "below" ``` -```{r install_packages, include = FALSE} -# Install `renv` package if not available -if (!require("renv")) install.packages("renv") -# Install dependencies if not installed already -renv::restore() -``` - +```{r prepare_input_data} + +#https://zenodo.org/records/10996083 +if (!file.exists(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv")) | + !file.exists(file.path(zenodo_dir, "df_ALL_rqa.csv")) | + !file.exists(file.path(zenodo_dir, "df_ALL_spectral_features.csv")) | + !file.exists(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv")) + ) { + + # This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R + record <- + curl::curl_fetch_memory("https://zenodo.org/api/records/10996083")$content |> + rawToChar() |> + jsonlite::fromJSON() + file_urls <- record$files$links$self + filenames <- basename(record$files$key) + destfiles <- file.path(zenodo_dir, filenames) + + mapply(curl::curl_download, + file_urls, + destfiles, + MoreArgs = list(quiet = TRUE)) +} + +if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) { + # Create folder data_zenodo if it does not exists + if (!dir.exists(zenodo_dir)) { + dir.create(zenodo_dir) + } + # This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R + record <- + curl::curl_fetch_memory("https://zenodo.org/api/records/10984137")$content |> + rawToChar() |> + jsonlite::fromJSON() + file_urls <- record$files$links$self + filenames <- basename(record$files$key) + destfiles <- file.path(zenodo_dir, filenames) + + mapply(curl::curl_download, + file_urls, + destfiles, + MoreArgs = list(quiet = TRUE)) +} + +# Load data (raw time series student movements) +df_motion <- readr::read_csv(file.path(zenodo_dir, "physical_exercise.csv")) - -```{r, include = T} -# Load libraries -if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse') -if (!require('knitr')) install.packages('knitr'); library('knitr') - -# SOM -if (!require('kohonen')) install.packages('kohonen'); library('kohonen') -if (!require('aweSOM')) install.packages('aweSOM'); library('aweSOM') - -# XGBoost feature reduction -if (!require('xgboost')) install.packages('xgboost'); library('xgboost') -if (!require('caret')) install.packages('caret'); library('caret') - -# DT -if (!require('DT')) install.packages('DT'); library('DT') ``` - -```{r, include = T} +```{r prepare_functions_and_load_data, include = T} # Include functions -# Windows path -#source("D:/path_to/movement-classification/functions/functions_df_total.R") -# Linux path -#source("path_to/movement-classification/functions/functions_df_total.R") -#source("/home/andrii/Documents/it4i/movement-classification/functions/functions_df_total_cv.R") -source("D:/it4i/movement-classification/functions/functions_df_total_cv.R") +source(file.path(base_path, "functions", "functions_df_total_cv.R")) # Load data #The Data can be found at https://zenodo.org/records/10996083 @@ -112,15 +158,13 @@ source("D:/it4i/movement-classification/functions/functions_df_total_cv.R") #https://cran.r-project.org/web/packages/Chaos01/Chaos01.pdf # RQA TREND #https://cran.r-project.org/web/packages/nonlinearTseries/nonlinearTseries.pdf -#df_rqa_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_rqa.csv") -df_rqa_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_rqa.csv") - +df_rqa_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_rqa.csv")) # Autocorrelation #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.autocorrelation #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.agg_autocorrelation #df_autocorrelation_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_autocorrelation_features.csv") -df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_autocorrelation_features.csv") +df_autocorrelation_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv")) # Spectral #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fft_aggregated @@ -129,17 +173,14 @@ df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/109960 #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fourier_entropy #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.spkt_welch_density #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.ar_coefficient -#df_spectral_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_spectral_features.csv") -df_spectral_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_spectral_features.csv") +df_spectral_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_spectral_features.csv")) # Mix RQA/Spectral/Autocorr -#df_mix_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_mix_features.csv") -df_mix_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_mix_features.csv") +df_mix_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_mix_features.csv")) # Tsfresh all #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html -#df_tsfresh_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_tsfresh_features.csv") -df_tsfresh_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_tsfresh_features.csv") +df_tsfresh_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv")) # List of features sets for model training list_features_all <- list( @@ -152,9 +193,11 @@ list_features_all <- list( ``` -```{r, echo = F, include = F} +```{r train_models, echo = F, include = F} + +stud_names_648 <- as.character(df_rqa_features$student) # Train function -som_gain_27f_5x5 <- train_test_models( +som_models <- train_test_models( list_features_all = list_features_all, stud_names_648 = stud_names_648, activ_name_eng = activ_name_eng, @@ -181,89 +224,78 @@ som_gain_27f_5x5 <- train_test_models( plot_pixel_size = plot_pixel_size, legendPos = legendPos ) -# Save you trained results -#saveRDS(som_gain_27f_5x5, file = "/home/andrii/Documents/it4i/movement-classification/trained_models/som_gain_27f_5x5.RDS") - -# Get the SOM quality metrics of you trained models -df_som_acc <- get_acc_all_features(som_list_feat = som_gain_27f_5x5) -# Save you results -#saveRDS(df_som_acc, file = "/home/andrii/Documents/it4i/movement-classification/df_results/df_som_acc.RDS") +names(som_models) <- xgb_features_range |> as.character() ``` ```{r fig1, fig.align='center', out.width="100%", fig.cap = "\\label{fig1}Example of a time series from an accelerometer recording.", echo = FALSE, include = T} # For Figure 1: Example of a time series from an accelerometer recording #measured during a 4 x 10 meter run by one of the participants -# Get the RAW time series https://zenodo.org/records/10984138 -# Load data to R -# df_motion <- read_csv("C:/Users/Andrii/Downloads/10984138/physical_exercise.csv") -# -# df_motion <- df_motion |> -# group_by(activity_id, student_id) |> -# mutate(ts_length = 1:n()) |> -# tidyr::nest(data = !one_of(c("student_id", "activity_id"))) |> -# ungroup() -# -# # Load one time series -# data_test <- df_motion$data[[1]][c(1:3, 10)] -# data_long <- tidyr::pivot_longer(data_test, -# cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"), -# names_to = "group", -# values_to = "value", -# names_prefix = "value") -# -# data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs') -# -# -# figure_1 <- ggplot(data = data_long) + -# geom_line(aes(y = value, x = ts_length, color = group)) + -# theme_bw()+ -# theme(axis.text=element_text(size=20), -# axis.title=element_text(size=22,face="bold"), -# legend.title = element_text(size = 22, face = "bold"), -# legend.text = element_text(size = 20)) + -# labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis") -# -# figure_1 + +df_motion <- df_motion |> + dplyr::group_by(activity_id, student_id) |> + dplyr::mutate(ts_length = 1:dplyr::n()) |> + tidyr::nest(data = !tidyselect::one_of(c("student_id", "activity_id"))) |> + dplyr::ungroup() + +# Load one time series +data_test <- df_motion$data[[1]][c(1:3, 10)] +data_long <- tidyr::pivot_longer(data_test, + cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"), + names_to = "group", + values_to = "value", + names_prefix = "value") + +data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs') + + +figure_1 <- ggplot2::ggplot(data = data_long) + + ggplot2::geom_line(ggplot2::aes(y = value, x = ts_length, color = group)) + + ggplot2::theme_bw()+ + ggplot2::theme(axis.text=ggplot2::element_text(size=20), + axis.title=ggplot2::element_text(size=22,face="bold"), + legend.title = ggplot2::element_text(size = 22, face = "bold"), + legend.text = ggplot2::element_text(size = 20)) + +ggplot2::labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis") + +figure_1 ``` ```{r fig2, fig.align='center', out.width="100%", fig.cap = "\\label{fig2}Tsfresh feature Importance by “Gain” bar plot.", echo = FALSE, include = T} # Plot of the used features in article plot_data_importance <- - som_gain_27f_5x5[[27]]$train_data$tsfresh$df_important_feat_xgboost |> + som_models[[best_n_features]]$train_data$tsfresh$df_important_feat_xgboost |> dplyr::arrange(desc(Gain)) |> - dplyr::mutate(Feature = fct_reorder(Feature, Gain, .desc = T)) |> + dplyr::mutate(Feature = forcats::fct_reorder(Feature, Gain, .desc = T)) |> dplyr::slice_head(n = 35) |> - ggplot(aes(x = Feature, y = Gain)) + - xlab("Feature name") + - ylab("Value") + - geom_bar(stat = "identity") + - scale_x_discrete(labels = str_wrap(c(1:35), + ggplot2::ggplot(ggplot2::aes(x = Feature, y = Gain)) + + ggplot2::xlab("Feature name") + + ggplot2::ylab("Value") + + ggplot2::geom_bar(stat = "identity") + + ggplot2::scale_x_discrete(labels = stringr::str_wrap(c(1:35), width = 4)) + ggplot2::theme_bw() + - theme( - axis.text.x = element_text( + ggplot2::theme( + axis.text.x = ggplot2::element_text( #angle = 30, hjust = 0.5, vjust = 0.5, size = 10, face = "bold" ), - axis.title.x = element_text(size = 25, face = "bold"), - axis.title.y = element_text(size = 25, face = "bold")#, + axis.title.x = ggplot2::element_text(size = 25, face = "bold"), + axis.title.y = ggplot2::element_text(size = 25, face = "bold")#, #panel.background = element_blank() ) + - geom_vline(xintercept = 27) + ggplot2::geom_vline(xintercept = 27) plot_data_importance - - ``` ```{r tab4, echo = FALSE, include = T} table_4_som_quality <- - do.call("rbind", list(som_gain_27f_5x5[[27]]$quality$`648obs`)) + do.call("rbind", list(som_models[[best_n_features]]$quality$`648obs`)) DT::datatable( table_4_som_quality, @@ -275,10 +307,8 @@ DT::datatable( ``` ```{r tab5, echo=FALSE, include = T} -# Load pre trained models for 2:35 features -som_gain_2_35f_5x5 <- base::readRDS("D:/it4i/movement-classification/trained_models/som_gain_2_35f_5x5.RDS") -table_2_som_quality <- get_acc_all_features(som_gain_2_35f_5x5) +table_2_som_quality <- get_acc_all_features(som_models) DT::datatable( table_2_som_quality$table_format, @@ -286,35 +316,32 @@ DT::datatable( filter = list(position = 'top', clear = FALSE) ) |> DT::formatRound(columns = c(2:27), digits = 3) - - ``` # Supplementary Materials -```{r, echo=FALSE, include = T} +```{r compute_clustering, echo=FALSE, include = T} # Used SOM in article -som_27f <- som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$som_model - +som_best <- som_models[[best_n_features]]$som$tsfresh$SOM_results$som_model # Get superclusters number -superclust_pam <- cluster::pam(som_27f$codes[[1]], superclust_num) +superclust_pam <- cluster::pam(som_best$codes[[1]], superclust_num) superclasses_pam <- superclust_pam$clustering -superclust_hclust <- stats::hclust(dist(som_27f$codes[[1]]), "complete") +superclust_hclust <- stats::hclust(dist(som_best$codes[[1]]), "complete") superclasses_hclust <- stats::cutree(superclust_hclust, superclust_num) ``` ```{r fig3, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig3}", echo = FALSE, include = T} -som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$aweSOM_plot +som_models[[best_n_features]]$som$tsfresh$SOM_results$aweSOM_plot ``` ```{r fig6, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig6}Supercluster table", echo = FALSE, include = T} # Get table superclass df_table_6 <- - get_table_superclass(som_27f, superclass_number = superclust_num) + get_table_superclass(som_best, superclass_number = superclust_num) table_6 <- DT::datatable( df_table_6, @@ -327,7 +354,7 @@ table_6 ```{r fig7, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig7}Silhouette score", echo = FALSE, include = T} #Silhouette score -figure_7 <- aweSOM::aweSOMsilhouette(som_27f, superclasses_pam) +figure_7 <- aweSOM::aweSOMsilhouette(som_best, superclasses_pam) ``` ```{r fig8, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig5}Dendogram", echo = FALSE, include = T} diff --git a/renv/requirements.txt b/renv/requirements.txt index 3e9ca615e53f3b69b4e402a3a89b2d5348b2869d..acd3f2b2b94d72f2c7f9341e67f10300c6d84dcd 100644 --- a/renv/requirements.txt +++ b/renv/requirements.txt @@ -1,9 +1,3 @@ - -numba==0.55.1 - +numba==0.57.1 tsfresh==0.20.0 - - - - - +numpy==1.21.6 \ No newline at end of file