Skip to content
Snippets Groups Projects
Commit d70c8f97 authored by Tomas Martinovic's avatar Tomas Martinovic
Browse files

make Rmd more portable

parent 992405cd
No related branches found
No related tags found
No related merge requests found
Pipeline #39263 failed
FROM rocker/r-base:4.3.3
RUN apt-get update -y && apt-get install -y make pandoc zlib1g-dev libcurl4-openssl-dev libssl-dev libicu-dev libpng-dev python3 git libffi-dev && rm -rf /var/lib/apt/lists/*
RUN mkdir -p /usr/local/lib/R/etc/ /usr/lib/R/etc/
RUN echo "options(renv.config.pak.enabled = FALSE, repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl', Ncpus = 4)" | tee /usr/local/lib/R/etc/Rprofile.site | tee /usr/lib/R/etc/Rprofile.site
RUN R -e 'install.packages("remotes")'
RUN R -e 'remotes::install_version("renv", version = "1.0.7")'
COPY renv.lock renv.lock
RUN R -e 'renv::restore()'
RUN R -e 'reticulate::install_python()'
COPY renv/requirements.txt requirements.txt
RUN R -e 'reticulate::virtualenv_create("hmch")'
RUN R -e 'reticulate::virtualenv_install("hmch", c("-r", "requirements.txt"))'
COPY get_features.Rmd get_features.Rmd
COPY get_results.Rmd get_results.Rmd
COPY functions functions
CMD ["R", "--quiet", "-e", "rmarkdown::render('get_features.Rmd')"]
...@@ -16,13 +16,8 @@ get_train_df <- ...@@ -16,13 +16,8 @@ get_train_df <-
) )
} }
#train_df <- get_train_df(df_features = df_autocorrelation_features) #train_df <- get_train_df(df_features = df_autocorrelation_features)
# Get features from XGBOOST---- # Get features from XGBOOST----
get_features_tree_xgboost <- get_features_tree_xgboost <-
function(train_df = train_df, function(train_df = train_df,
...@@ -55,7 +50,7 @@ get_features_tree_xgboost <- ...@@ -55,7 +50,7 @@ get_features_tree_xgboost <-
xgboost_iter = 100, xgboost_iter = 100,
set_seed = 4) { set_seed = 4) {
train_df$full_data_big_648obs <- train_df$full_data_big_648obs <-
add_column( tibble::add_column(
train_df$full_data_big_648obs, train_df$full_data_big_648obs,
activ_num = rep(1:8, 81), activ_num = rep(1:8, 81),
.before = "activity" .before = "activity"
...@@ -125,13 +120,13 @@ get_features_tree_xgboost <- ...@@ -125,13 +120,13 @@ get_features_tree_xgboost <-
dplyr::slice_head(n = xgboost_n_features) dplyr::slice_head(n = xgboost_n_features)
xgboost_important_features <- xgboost_important_features <-
xgboost_important_features |> select_if(~ !any(is.na(.))) xgboost_important_features |> dplyr::select_if(~ !any(is.na(.)))
# Select features # Select features
all_xgboost_activ_more_features <- all_xgboost_activ_more_features <-
train_df$train_data_big_648obs |> train_df$train_data_big_648obs |>
as.data.frame() |> as.data.frame() |>
dplyr::select(all_of(xgboost_important_features$Feature)) |> dplyr::select(tidyselect::all_of(xgboost_important_features$Feature)) |>
mutate( dplyr::mutate(
student = train_df$full_data_big_648obs$student, student = train_df$full_data_big_648obs$student,
activity = train_df$full_data_big_648obs$activity activity = train_df$full_data_big_648obs$activity
) )
...@@ -508,9 +503,7 @@ train_som_cv <- function(train_df, ...@@ -508,9 +503,7 @@ train_som_cv <- function(train_df,
temp_list_kfold$som_accur_kfold_test[[4]]$mnLogLoss temp_list_kfold$som_accur_kfold_test[[4]]$mnLogLoss
) )
temp_df_1 <- temp_df_1 <-
tibble::tibble( tibble::tibble(
fold_1 = c( fold_1 = c(
...@@ -683,8 +676,6 @@ get_all <- ...@@ -683,8 +676,6 @@ get_all <-
get_features_tree_xgboost( get_features_tree_xgboost(
train_df = train_df, train_df = train_df,
param_xgb_model = param_xgb_model, param_xgb_model = param_xgb_model,
nfold = nfold, nfold = nfold,
showsd = showsd, showsd = showsd,
stratified = stratified, stratified = stratified,
...@@ -1017,6 +1008,7 @@ train_test_models <- ...@@ -1017,6 +1008,7 @@ train_test_models <-
set_seed = 1245) { set_seed = 1245) {
# Temp for SOM models # Temp for SOM models
temp_tests_feat <- list() temp_tests_feat <- list()
index <- 1
# For loop for train # For loop for train
for (xgboost_n_features in xgb_features_range) { for (xgboost_n_features in xgb_features_range) {
all_som <- get_all( all_som <- get_all(
...@@ -1069,7 +1061,8 @@ train_test_models <- ...@@ -1069,7 +1061,8 @@ train_test_models <-
paste0(all_som$quality$`648obs`$`Features set`, paste0(all_som$quality$`648obs`$`Features set`,
paste0("_", xgboost_n_features, "f")) paste0("_", xgboost_n_features, "f"))
temp_tests_feat[[xgboost_n_features]] <- all_som temp_tests_feat[[index]] <- all_som
index <- index + 1
} }
...@@ -1125,7 +1118,7 @@ get_table_superclass <- ...@@ -1125,7 +1118,7 @@ get_table_superclass <-
col_totals <- base::colSums(table_som_clasif[3:10]) col_totals <- base::colSums(table_som_clasif[3:10])
# Calculate percentage contributions and append to original values # Calculate percentage contributions and append to original values
table_som_clasif <- table_som_clasif %>% table_som_clasif <- table_som_clasif |>
dplyr::group_by(superclass) |> dplyr::group_by(superclass) |>
dplyr::mutate(across(2:9, dplyr::mutate(across(2:9,
~ base::paste0(.x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), 2 ~ base::paste0(.x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), 2
...@@ -1135,12 +1128,12 @@ get_table_superclass <- ...@@ -1135,12 +1128,12 @@ get_table_superclass <-
# Make final table # Make final table
df_summary <- table_som_clasif %>% df_summary <- table_som_clasif |>
dplyr::group_by(superclass) %>% dplyr::group_by(superclass) |>
dplyr::summarise(dplyr::across(2:9, sum)) %>% dplyr::summarise(dplyr::across(2:9, sum)) |>
dplyr::ungroup() %>% dplyr::ungroup() |>
dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) %>% dplyr::mutate(total = base::rowSums(dplyr::across(2:9))) |>
dplyr::rowwise() %>% dplyr::rowwise() |>
dplyr::mutate(dplyr::across(2:9, dplyr::mutate(dplyr::across(2:9,
~ base::paste0( ~ base::paste0(
.x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), round_value), "%)" .x, " (", base::round((.x / col_totals[dplyr::cur_column()] * 100), round_value), "%)"
...@@ -1149,7 +1142,7 @@ get_table_superclass <- ...@@ -1149,7 +1142,7 @@ get_table_superclass <-
dplyr::ungroup() dplyr::ungroup()
# Format the percentages as requested # Format the percentages as requested
df_summary <- df_summary %>% df_summary <- df_summary |>
dplyr::mutate(dplyr::across(starts_with("perc"), ~ base::paste0(.x), .names = "formatted_{.col}")) dplyr::mutate(dplyr::across(starts_with("perc"), ~ base::paste0(.x), .names = "formatted_{.col}"))
names(df_summary) <- names(df_summary) <-
......
...@@ -10,6 +10,15 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE) ...@@ -10,6 +10,15 @@ knitr::opts_chunk$set(warning = FALSE, message = FALSE)
``` ```
```{r parameters} ```{r parameters}
# Set basepath for the script execution, default is the script path
base_path <- getwd()
# Data folder
if (Sys.getenv("DOCKER_RUN") == "true") {
zenodo_dir <- "/zenodo_dir"
} else {
zenodo_dir <- file.path(base_path, "data_zenodo")
}
# Chaos 01 parameters # Chaos 01 parameters
lmin_a <- 20 lmin_a <- 20
lmin_g <- 20 lmin_g <- 20
...@@ -25,9 +34,9 @@ renv::restore() ...@@ -25,9 +34,9 @@ renv::restore()
```{r install_python, include = FALSE} ```{r install_python, include = FALSE}
# Install python if not available # Install python if not available
if (!reticulate::py_available()) { #if (!reticulate::py_available()) {
reticulate::install_python() # reticulate::install_python()
} #}
# # Create `hmch` virtual environment if it does not exists yet # # Create `hmch` virtual environment if it does not exists yet
# if (!reticulate::virtualenv_exists("hmch")) { # if (!reticulate::virtualenv_exists("hmch")) {
# reticulate::virtualenv_create("hmch") # reticulate::virtualenv_create("hmch")
...@@ -40,13 +49,10 @@ if (!reticulate::py_available()) { ...@@ -40,13 +49,10 @@ if (!reticulate::py_available()) {
# reticulate::py_install("tsfresh") # reticulate::py_install("tsfresh")
# } # }
# Set basepath for the script execution, default is the script path
base_path <- getwd()
# Install required Python packages from requirements.txt # Install required Python packages from requirements.txt
reticulate::virtualenv_install("my-env_25", c("-r", file.path(base_path, "renv", "requirements.txt"))) #reticulate::virtualenv_install("hmch", c("-r", file.path(base_path, "renv", "requirements.txt")))
# Use `my-env_23` virtual environment # Use `hmch` virtual environment
reticulate::use_virtualenv("my-env_25") reticulate::use_virtualenv("hmch")
library(reticulate) library(reticulate)
``` ```
...@@ -71,7 +77,6 @@ import pandas ...@@ -71,7 +77,6 @@ import pandas
```{r prepare_input_data} ```{r prepare_input_data}
# Download data # Download data
zenodo_dir <- file.path(base_path, "data_zenodo")
#https://zenodo.org/records/10984138 #https://zenodo.org/records/10984138
if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) { if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) {
# Create folder data_zenodo if it does not exists # Create folder data_zenodo if it does not exists
...@@ -155,36 +160,36 @@ figure_1 ...@@ -155,36 +160,36 @@ figure_1
```{r compute_features} ```{r compute_features}
# Get neighborhood size for Accelerometer and Gyroscope # Get neighborhood size for Accelerometer and Gyroscope
eps_df_ALL <- get_eps(df_ALL_648obs, scale = eps_scale) eps_df_ALL <- get_eps(df_ALL_648obs, scale = eps_scale)
# # Get features for train models # # # Get features for train models
## Get RQA features + TREND # ## Get RQA features + TREND
df_ALL_rqa <- get_features_rqa_trend(df = df_ALL[1:10000, ], eps_chaos_01 = eps_df_ALL, df_ALL_rqa <- get_features_rqa_trend(df = df_ALL, eps_chaos_01 = eps_df_ALL,
lmin_a = lmin_a, lmin_g = lmin_g) lmin_a = lmin_a, lmin_g = lmin_g)
#save(df_ALL_rqa, file = "SOM_features/rqa_trend/df_ALL_rqa.RData") #save(df_ALL_rqa, file = "SOM_features/rqa_trend/df_ALL_rqa.RData")
# Autocorrelation # Autocorrelation
df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$autocorrelation_dic) df_ALL_autocorrelation_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$autocorrelation_dic)
df_ALL_autocorrelation_features <- tidyr::drop_na(df_ALL_autocorrelation_features) df_ALL_autocorrelation_features <- tidyr::drop_na(df_ALL_autocorrelation_features)
#save(df_ALL_autocorrelation_features, file = "SOM_features/autocorr/df_ALL_autocorrelation_features.RData") save(df_ALL_autocorrelation_features, file = file.path(zenodo_dir, "df_ALL_autocorrelation_features.RData"))
# Spectral # Spectral
df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$spectral_dic) df_ALL_spectral_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$spectral_dic)
df_ALL_spectral_features <- tidyr::drop_na(df_ALL_spectral_features) df_ALL_spectral_features <- tidyr::drop_na(df_ALL_spectral_features)
#save(df_ALL_spectral_features, file = "SOM_features/spectral/df_ALL_spectral_features.RData") save(df_ALL_spectral_features, file = file.path(zenodo_dir, "df_ALL_spectral_features.RData"))
# MIX of RQA + Autocorrelation + Spectral # MIX of RQA + Autocorrelation + Spectral
#df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607])) df_ALL_mix_features <- do.call("rbind", list(df_ALL_rqa, df_ALL_autocorrelation_features[3:141], df_ALL_spectral_features[3:2607]))
#save(df_ALL_mix_features, file = "SOM_features/mix_rqa_trend_spectral_autocorr/df_ALL_mix_features.RData") save(df_ALL_mix_features, file = file.path(zenodo_dir, "df_ALL_mix_features.RData"))
# Tsfresh all # Tsfresh all
df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL[1:10000, ], dic_features = py$ComprehensiveFCParameters()) df_ALL_tsfresh_features <- get_features_tsfresh_specific(df = df_ALL, dic_features = py$ComprehensiveFCParameters())
# Drop NAs # Drop NAs
df_ALL_tsfresh_features <- df_ALL_tsfresh_features |> df_ALL_tsfresh_features <- df_ALL_tsfresh_features |>
dplyr::select_if(~ !any(is.na(.))) dplyr::select_if(~ !any(is.na(.)))
...@@ -194,6 +199,6 @@ df_ALL_tsfresh_features <- tidyr::drop_na(df_ALL_tsfresh_features) ...@@ -194,6 +199,6 @@ df_ALL_tsfresh_features <- tidyr::drop_na(df_ALL_tsfresh_features)
dir.create(file.path(base_path, "outputs")) dir.create(file.path(base_path, "outputs"))
} }
#save(df_ALL_tsfresh_features, save(df_ALL_tsfresh_features,
# file = file.path(base_path, "outputs/df_ALL_tsfresh_features.RData")) file = file.path(zenodo_dir, "df_ALL_tsfresh_features.RData"))
``` ```
...@@ -13,13 +13,43 @@ output: ...@@ -13,13 +13,43 @@ output:
knitr::opts_chunk$set(warning = FALSE, message = FALSE) knitr::opts_chunk$set(warning = FALSE, message = FALSE)
``` ```
```{r install_packages, include = FALSE}
# Install `renv` package if not available
if (!require("renv")) install.packages("renv")
# Install dependencies if not installed already
renv::restore()
```
```{r parameters} ```{r parameters}
# Set basepath for the script execution, default is the script path
base_path <- getwd()
# Input data path
# Data folder
if (Sys.getenv("DOCKER_RUN") == "true") {
zenodo_dir <- "/zenodo_dir"
} else {
zenodo_dir <- file.path(base_path, "data_zenodo")
}
# Create folder data_zenodo if it does not exists
if (!dir.exists(zenodo_dir)) {
dir.create(zenodo_dir)
}
# Output data path
output_dir <- file.path(base_path, "output")
# Create folder output if it does not exists
if (!dir.exists(output_dir)) {
dir.create(output_dir)
}
# Parameters for article "link to article" # Parameters for article "link to article"
# SOM dimentions # SOM dimentions
som_xdim <- c(5) som_xdim <- c(5)
som_ydim <- c(5) som_ydim <- c(5)
# Range of used features for SOM train e.g. 2:37, min number of features are 2 # Range of used features for SOM train e.g. 2:35, min number of features are 2
xgb_features_range <- c(27) xgb_features_range <- 24:27 # c(2:35)
best_n_features <- "27" # This needs to be character as it is used to subset by name
# XGBOOST importance type https://www.rdocumentation.org/packages/xgboost/versions/0.6.4.1/topics/xgb.importance # XGBOOST importance type https://www.rdocumentation.org/packages/xgboost/versions/0.6.4.1/topics/xgb.importance
xgboost_importance_type = c("Gain") xgboost_importance_type = c("Gain")
...@@ -32,7 +62,6 @@ activ_name_eng <- rep(c("1:4x10m Shuttle Run", ...@@ -32,7 +62,6 @@ activ_name_eng <- rep(c("1:4x10m Shuttle Run",
"6:10x Gymnastic Hoops Passing", "6:10x Gymnastic Hoops Passing",
"7:10x Bench Jumping", "7:10x Bench Jumping",
"8:Crawling repeat"), 81) "8:Crawling repeat"), 81)
stud_names_648 <- as.character(df_rqa_features$student)
# XGBOOST # XGBOOST
param_xgb_model = param_xgb_model =
...@@ -67,41 +96,58 @@ plot_pixel_size <- 500 ...@@ -67,41 +96,58 @@ plot_pixel_size <- 500
legendPos <- "below" legendPos <- "below"
``` ```
```{r install_packages, include = FALSE} ```{r prepare_input_data}
# Install `renv` package if not available
if (!require("renv")) install.packages("renv") #https://zenodo.org/records/10996083
# Install dependencies if not installed already if (!file.exists(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv")) |
renv::restore() !file.exists(file.path(zenodo_dir, "df_ALL_rqa.csv")) |
``` !file.exists(file.path(zenodo_dir, "df_ALL_spectral_features.csv")) |
!file.exists(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv"))
) {
# This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R
record <-
curl::curl_fetch_memory("https://zenodo.org/api/records/10996083")$content |>
rawToChar() |>
jsonlite::fromJSON()
file_urls <- record$files$links$self
filenames <- basename(record$files$key)
destfiles <- file.path(zenodo_dir, filenames)
mapply(curl::curl_download,
file_urls,
destfiles,
MoreArgs = list(quiet = TRUE))
}
if (!file.exists(file.path(zenodo_dir, "physical_exercise.csv"))) {
# Create folder data_zenodo if it does not exists
if (!dir.exists(zenodo_dir)) {
dir.create(zenodo_dir)
}
# This part is based on https://github.com/inbo/inborutils/blob/main/R/download_zenodo.R
record <-
curl::curl_fetch_memory("https://zenodo.org/api/records/10984137")$content |>
rawToChar() |>
jsonlite::fromJSON()
file_urls <- record$files$links$self
filenames <- basename(record$files$key)
destfiles <- file.path(zenodo_dir, filenames)
mapply(curl::curl_download,
file_urls,
destfiles,
MoreArgs = list(quiet = TRUE))
}
# Load data (raw time series student movements)
df_motion <- readr::read_csv(file.path(zenodo_dir, "physical_exercise.csv"))
```{r, include = T}
# Load libraries
if (!require('tidyverse')) install.packages('tidyverse'); library('tidyverse')
if (!require('knitr')) install.packages('knitr'); library('knitr')
# SOM
if (!require('kohonen')) install.packages('kohonen'); library('kohonen')
if (!require('aweSOM')) install.packages('aweSOM'); library('aweSOM')
# XGBoost feature reduction
if (!require('xgboost')) install.packages('xgboost'); library('xgboost')
if (!require('caret')) install.packages('caret'); library('caret')
# DT
if (!require('DT')) install.packages('DT'); library('DT')
``` ```
```{r prepare_functions_and_load_data, include = T}
```{r, include = T}
# Include functions # Include functions
# Windows path source(file.path(base_path, "functions", "functions_df_total_cv.R"))
#source("D:/path_to/movement-classification/functions/functions_df_total.R")
# Linux path
#source("path_to/movement-classification/functions/functions_df_total.R")
#source("/home/andrii/Documents/it4i/movement-classification/functions/functions_df_total_cv.R")
source("D:/it4i/movement-classification/functions/functions_df_total_cv.R")
# Load data # Load data
#The Data can be found at https://zenodo.org/records/10996083 #The Data can be found at https://zenodo.org/records/10996083
...@@ -112,15 +158,13 @@ source("D:/it4i/movement-classification/functions/functions_df_total_cv.R") ...@@ -112,15 +158,13 @@ source("D:/it4i/movement-classification/functions/functions_df_total_cv.R")
#https://cran.r-project.org/web/packages/Chaos01/Chaos01.pdf #https://cran.r-project.org/web/packages/Chaos01/Chaos01.pdf
# RQA TREND # RQA TREND
#https://cran.r-project.org/web/packages/nonlinearTseries/nonlinearTseries.pdf #https://cran.r-project.org/web/packages/nonlinearTseries/nonlinearTseries.pdf
#df_rqa_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_rqa.csv") df_rqa_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_rqa.csv"))
df_rqa_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_rqa.csv")
# Autocorrelation # Autocorrelation
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.autocorrelation #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.autocorrelation
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.agg_autocorrelation #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.agg_autocorrelation
#df_autocorrelation_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_autocorrelation_features.csv") #df_autocorrelation_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_autocorrelation_features.csv")
df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_autocorrelation_features.csv") df_autocorrelation_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_autocorrelation_features.csv"))
# Spectral # Spectral
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fft_aggregated #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fft_aggregated
...@@ -129,17 +173,14 @@ df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/109960 ...@@ -129,17 +173,14 @@ df_autocorrelation_features <- readr::read_csv("C:/Users/Andrii/Downloads/109960
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fourier_entropy #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.fourier_entropy
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.spkt_welch_density #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.spkt_welch_density
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.ar_coefficient #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html#tsfresh.feature_extraction.feature_calculators.ar_coefficient
#df_spectral_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_spectral_features.csv") df_spectral_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_spectral_features.csv"))
df_spectral_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_spectral_features.csv")
# Mix RQA/Spectral/Autocorr # Mix RQA/Spectral/Autocorr
#df_mix_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_mix_features.csv") df_mix_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_mix_features.csv"))
df_mix_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_mix_features.csv")
# Tsfresh all # Tsfresh all
#https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html #https://tsfresh.readthedocs.io/en/latest/api/tsfresh.feature_extraction.html
#df_tsfresh_features <- read_csv("/home/andrii/Documents/it4i/activity_df_pohybove_cinnosti_2024/data_zenodo/zenodo_uploaded/df_ALL_tsfresh_features.csv") df_tsfresh_features <- readr::read_csv(file.path(zenodo_dir, "df_ALL_tsfresh_features.csv"))
df_tsfresh_features <- readr::read_csv("C:/Users/Andrii/Downloads/10996083/df_ALL_tsfresh_features.csv")
# List of features sets for model training # List of features sets for model training
list_features_all <- list( list_features_all <- list(
...@@ -152,9 +193,11 @@ list_features_all <- list( ...@@ -152,9 +193,11 @@ list_features_all <- list(
``` ```
```{r, echo = F, include = F} ```{r train_models, echo = F, include = F}
stud_names_648 <- as.character(df_rqa_features$student)
# Train function # Train function
som_gain_27f_5x5 <- train_test_models( som_models <- train_test_models(
list_features_all = list_features_all, list_features_all = list_features_all,
stud_names_648 = stud_names_648, stud_names_648 = stud_names_648,
activ_name_eng = activ_name_eng, activ_name_eng = activ_name_eng,
...@@ -181,89 +224,78 @@ som_gain_27f_5x5 <- train_test_models( ...@@ -181,89 +224,78 @@ som_gain_27f_5x5 <- train_test_models(
plot_pixel_size = plot_pixel_size, plot_pixel_size = plot_pixel_size,
legendPos = legendPos legendPos = legendPos
) )
# Save you trained results
#saveRDS(som_gain_27f_5x5, file = "/home/andrii/Documents/it4i/movement-classification/trained_models/som_gain_27f_5x5.RDS")
names(som_models) <- xgb_features_range |> as.character()
# Get the SOM quality metrics of you trained models
df_som_acc <- get_acc_all_features(som_list_feat = som_gain_27f_5x5)
# Save you results
#saveRDS(df_som_acc, file = "/home/andrii/Documents/it4i/movement-classification/df_results/df_som_acc.RDS")
``` ```
```{r fig1, fig.align='center', out.width="100%", fig.cap = "\\label{fig1}Example of a time series from an accelerometer recording.", echo = FALSE, include = T} ```{r fig1, fig.align='center', out.width="100%", fig.cap = "\\label{fig1}Example of a time series from an accelerometer recording.", echo = FALSE, include = T}
# For Figure 1: Example of a time series from an accelerometer recording # For Figure 1: Example of a time series from an accelerometer recording
#measured during a 4 x 10 meter run by one of the participants #measured during a 4 x 10 meter run by one of the participants
# Get the RAW time series https://zenodo.org/records/10984138
# Load data to R df_motion <- df_motion |>
# df_motion <- read_csv("C:/Users/Andrii/Downloads/10984138/physical_exercise.csv") dplyr::group_by(activity_id, student_id) |>
# dplyr::mutate(ts_length = 1:dplyr::n()) |>
# df_motion <- df_motion |> tidyr::nest(data = !tidyselect::one_of(c("student_id", "activity_id"))) |>
# group_by(activity_id, student_id) |> dplyr::ungroup()
# mutate(ts_length = 1:n()) |>
# tidyr::nest(data = !one_of(c("student_id", "activity_id"))) |> # Load one time series
# ungroup() data_test <- df_motion$data[[1]][c(1:3, 10)]
# data_long <- tidyr::pivot_longer(data_test,
# # Load one time series cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"),
# data_test <- df_motion$data[[1]][c(1:3, 10)] names_to = "group",
# data_long <- tidyr::pivot_longer(data_test, values_to = "value",
# cols = c("accelerometer_x", "accelerometer_y", "accelerometer_z"), names_prefix = "value")
# names_to = "group",
# values_to = "value", data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs')
# names_prefix = "value")
#
# data_long$ts_length <- as.difftime(data_long$ts_length/100, units = 'secs') figure_1 <- ggplot2::ggplot(data = data_long) +
# ggplot2::geom_line(ggplot2::aes(y = value, x = ts_length, color = group)) +
# ggplot2::theme_bw()+
# figure_1 <- ggplot(data = data_long) + ggplot2::theme(axis.text=ggplot2::element_text(size=20),
# geom_line(aes(y = value, x = ts_length, color = group)) + axis.title=ggplot2::element_text(size=22,face="bold"),
# theme_bw()+ legend.title = ggplot2::element_text(size = 22, face = "bold"),
# theme(axis.text=element_text(size=20), legend.text = ggplot2::element_text(size = 20)) +
# axis.title=element_text(size=22,face="bold"), ggplot2::labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis")
# legend.title = element_text(size = 22, face = "bold"),
# legend.text = element_text(size = 20)) + figure_1
# labs(y= expression(bold(paste("Acceleration (g = 9.8 m/s" ^ "2",")"))), x = " Time (seconds)", color = "Axis")
#
# figure_1
``` ```
```{r fig2, fig.align='center', out.width="100%", fig.cap = "\\label{fig2}Tsfresh feature Importance by “Gain” bar plot.", echo = FALSE, include = T} ```{r fig2, fig.align='center', out.width="100%", fig.cap = "\\label{fig2}Tsfresh feature Importance by “Gain” bar plot.", echo = FALSE, include = T}
# Plot of the used features in article # Plot of the used features in article
plot_data_importance <- plot_data_importance <-
som_gain_27f_5x5[[27]]$train_data$tsfresh$df_important_feat_xgboost |> som_models[[best_n_features]]$train_data$tsfresh$df_important_feat_xgboost |>
dplyr::arrange(desc(Gain)) |> dplyr::arrange(desc(Gain)) |>
dplyr::mutate(Feature = fct_reorder(Feature, Gain, .desc = T)) |> dplyr::mutate(Feature = forcats::fct_reorder(Feature, Gain, .desc = T)) |>
dplyr::slice_head(n = 35) |> dplyr::slice_head(n = 35) |>
ggplot(aes(x = Feature, y = Gain)) + ggplot2::ggplot(ggplot2::aes(x = Feature, y = Gain)) +
xlab("Feature name") + ggplot2::xlab("Feature name") +
ylab("Value") + ggplot2::ylab("Value") +
geom_bar(stat = "identity") + ggplot2::geom_bar(stat = "identity") +
scale_x_discrete(labels = str_wrap(c(1:35), ggplot2::scale_x_discrete(labels = stringr::str_wrap(c(1:35),
width = 4)) + width = 4)) +
ggplot2::theme_bw() + ggplot2::theme_bw() +
theme( ggplot2::theme(
axis.text.x = element_text( axis.text.x = ggplot2::element_text(
#angle = 30, #angle = 30,
hjust = 0.5, hjust = 0.5,
vjust = 0.5, vjust = 0.5,
size = 10, size = 10,
face = "bold" face = "bold"
), ),
axis.title.x = element_text(size = 25, face = "bold"), axis.title.x = ggplot2::element_text(size = 25, face = "bold"),
axis.title.y = element_text(size = 25, face = "bold")#, axis.title.y = ggplot2::element_text(size = 25, face = "bold")#,
#panel.background = element_blank() #panel.background = element_blank()
) + ) +
geom_vline(xintercept = 27) ggplot2::geom_vline(xintercept = 27)
plot_data_importance plot_data_importance
``` ```
```{r tab4, echo = FALSE, include = T} ```{r tab4, echo = FALSE, include = T}
table_4_som_quality <- table_4_som_quality <-
do.call("rbind", list(som_gain_27f_5x5[[27]]$quality$`648obs`)) do.call("rbind", list(som_models[[best_n_features]]$quality$`648obs`))
DT::datatable( DT::datatable(
table_4_som_quality, table_4_som_quality,
...@@ -275,10 +307,8 @@ DT::datatable( ...@@ -275,10 +307,8 @@ DT::datatable(
``` ```
```{r tab5, echo=FALSE, include = T} ```{r tab5, echo=FALSE, include = T}
# Load pre trained models for 2:35 features
som_gain_2_35f_5x5 <- base::readRDS("D:/it4i/movement-classification/trained_models/som_gain_2_35f_5x5.RDS")
table_2_som_quality <- get_acc_all_features(som_gain_2_35f_5x5) table_2_som_quality <- get_acc_all_features(som_models)
DT::datatable( DT::datatable(
table_2_som_quality$table_format, table_2_som_quality$table_format,
...@@ -286,35 +316,32 @@ DT::datatable( ...@@ -286,35 +316,32 @@ DT::datatable(
filter = list(position = 'top', clear = FALSE) filter = list(position = 'top', clear = FALSE)
) |> ) |>
DT::formatRound(columns = c(2:27), digits = 3) DT::formatRound(columns = c(2:27), digits = 3)
``` ```
# Supplementary Materials # Supplementary Materials
```{r, echo=FALSE, include = T} ```{r compute_clustering, echo=FALSE, include = T}
# Used SOM in article # Used SOM in article
som_27f <- som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$som_model som_best <- som_models[[best_n_features]]$som$tsfresh$SOM_results$som_model
# Get superclusters number # Get superclusters number
superclust_pam <- cluster::pam(som_27f$codes[[1]], superclust_num) superclust_pam <- cluster::pam(som_best$codes[[1]], superclust_num)
superclasses_pam <- superclust_pam$clustering superclasses_pam <- superclust_pam$clustering
superclust_hclust <- stats::hclust(dist(som_27f$codes[[1]]), "complete") superclust_hclust <- stats::hclust(dist(som_best$codes[[1]]), "complete")
superclasses_hclust <- stats::cutree(superclust_hclust, superclust_num) superclasses_hclust <- stats::cutree(superclust_hclust, superclust_num)
``` ```
```{r fig3, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig3}", echo = FALSE, include = T} ```{r fig3, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig3}", echo = FALSE, include = T}
som_gain_27f_5x5[[27]]$som$tsfresh$SOM_results$aweSOM_plot som_models[[best_n_features]]$som$tsfresh$SOM_results$aweSOM_plot
``` ```
```{r fig6, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig6}Supercluster table", echo = FALSE, include = T} ```{r fig6, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig6}Supercluster table", echo = FALSE, include = T}
# Get table superclass # Get table superclass
df_table_6 <- df_table_6 <-
get_table_superclass(som_27f, superclass_number = superclust_num) get_table_superclass(som_best, superclass_number = superclust_num)
table_6 <- DT::datatable( table_6 <- DT::datatable(
df_table_6, df_table_6,
...@@ -327,7 +354,7 @@ table_6 ...@@ -327,7 +354,7 @@ table_6
```{r fig7, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig7}Silhouette score", echo = FALSE, include = T} ```{r fig7, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig7}Silhouette score", echo = FALSE, include = T}
#Silhouette score #Silhouette score
figure_7 <- aweSOM::aweSOMsilhouette(som_27f, superclasses_pam) figure_7 <- aweSOM::aweSOMsilhouette(som_best, superclasses_pam)
``` ```
```{r fig8, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig5}Dendogram", echo = FALSE, include = T} ```{r fig8, fig.width = 5, fig.height = 6, fig.align='center', out.width="100%", fig.cap = "\\label{fig5}Dendogram", echo = FALSE, include = T}
......
numba==0.57.1
numba==0.55.1
tsfresh==0.20.0 tsfresh==0.20.0
numpy==1.21.6
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment