From 3bd67feb122a27b0f007b232500db8da39062088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Martinovi=C4=8D?= <martinovic@vpns363.vsb.cz> Date: Mon, 12 Jul 2021 11:44:41 +0200 Subject: [PATCH] Update --- .Rbuildignore | 6 +- .gitignore | 8 +- DESCRIPTION | 30 +++---- LICENSE | 4 +- LICENSE.md | 42 ++++----- R/classification.R | 196 +++++++++++++++++++++--------------------- R/data.R | 92 ++++++++++---------- R/dbscan_clustering.R | 148 +++++++++++++++---------------- R/detect_outliers.R | 194 ++++++++++++++++++++--------------------- R/features.R | 3 +- R/transform.R | 66 +++++++------- README.md | 152 ++++++++++++++++---------------- 12 files changed, 471 insertions(+), 470 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 2eb964e..0ded5b3 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,3 @@ -^LICENSE\.md$ -^.*\.Rproj$ -^\.Rproj\.user$ +^LICENSE\.md$ +^.*\.Rproj$ +^\.Rproj\.user$ diff --git a/.gitignore b/.gitignore index 5b6a065..d44df33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -.Rproj.user -.Rhistory -.RData -.Ruserdata +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION index ea9951e..b928fd0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,15 @@ -Package: WIPADD -Title: Wireless PD detection -Version: 1.0.1 -Authors@R: person("Tomas", "Martinovic", email = "tomas.martinovic@vsb.cz", role = c("aut", "cre")) -Description: This package is aimed at the detection of the partial discharges in the time series measured from the wireless antenna. It contains functions for detections of outliers, features computation and classification of time series. -Depends: R (>= 3.5.0) -License: MIT + file LICENSE -Encoding: UTF-8 -LazyData: true -Imports: xgboost, dplyr, tibble, purrr, moments, tidyr, stats, stringr -Suggests: -Author: Tomas Martinovic [aut, cre] -Maintainer: Tomas Martinovic <tomas.martinovic@vsb.cz> -URL: https://code.it4i.cz/ADAS/wireless-pd-detection -RoxygenNote: 7.1.1 +Package: WIPADD +Title: Wireless PD detection +Version: 1.0.1 +Authors@R: person("Tomas", "Martinovic", email = "tomas.martinovic@vsb.cz", role = c("aut", "cre")) +Description: This package is aimed at the detection of the partial discharges in the time series measured from the wireless antenna. It contains functions for detections of outliers, features computation and classification of time series. +Depends: R (>= 3.5.0) +License: MIT + file LICENSE +Encoding: UTF-8 +LazyData: true +Imports: xgboost, dplyr, tibble, purrr, moments, tidyr, stats, stringr +Suggests: +Author: Tomas Martinovic [aut, cre] +Maintainer: Tomas Martinovic <tomas.martinovic@vsb.cz> +URL: https://code.it4i.cz/ADAS/wireless-pd-detection +RoxygenNote: 7.1.1 diff --git a/LICENSE b/LICENSE index b6047c4..7ce070d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2021 -COPYRIGHT HOLDER: Tomas Martinovic +YEAR: 2021 +COPYRIGHT HOLDER: Tomas Martinovic diff --git a/LICENSE.md b/LICENSE.md index 97b280a..7ac768b 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,21 +1,21 @@ -# MIT License - -Copyright (c) 2021 Tomas Martinovic - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. +# MIT License + +Copyright (c) 2021 Tomas Martinovic + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/R/classification.R b/R/classification.R index 151a7c8..b67021f 100644 --- a/R/classification.R +++ b/R/classification.R @@ -1,98 +1,98 @@ -train_xgb_classificator <- function(features, - label, - nround = 100, - param = NULL) { - #' Train classification model - #' - #' This algorithm takes features matrix and labels to train the xgb model. - #' @param features matrix, features matrix for training - #' @param label vector, vector of labels for training - #' @param nround integer, nround parameter for `xgb.train` function - #' @param param list, list of parameters for `xgb.train` function, if NULL it will use binary logistic regression - #' @importFrom xgboost xgb.DMatrix xgb.train - #' @export - #' @examples - #' model <- train_xgb_classificator(example_train_features_matrix, example_train_label) - #' @return xgb model - - if (is.null(param)) { - param <- list( - "objective" = "binary:logistic", - "scale_pos_weight" = 5, - "eval_metric" = "auc", - "eval_metric" = "ams@0.15", - "eta" = 0.1, - "max_depth" = 6 - ) - } - - # Create training and testing set - dtrain <- xgboost::xgb.DMatrix(data = features, - label = label) - - model <- xgboost::xgb.train(params = param, - data = dtrain, - nround = nround) - return(model) -} - -classify_features <- function(features, - model) { - #' Classify from feature matrix - #' - #' This algorithm takes features matrix and pretrain classification model and returns probability of partial discharge presence in the series. - #' @param features matrix, features matrix - #' @param model xgb pretrained model for classification - #' @importFrom xgboost xgb.DMatrix - #' @importFrom stats predict - #' @export - #' @examples - #' res <- classify_features(example_test_features_matrix, example_model) - #' @return numeric, a probability that there is a partial discharge in given time series - - dtest <- xgboost::xgb.DMatrix(data = features) - - pred <- predict(model, dtest) - return(pred) -} - -classify_series <- function(series, - model, - max_density = 20, - distance = 30, - window_size = 1001, - cl_width = 30000, - sign = TRUE, - signal_transform = FALSE) { - #' Classify time series - #' - #' This algorithm takes time series and pretrain classification model and returns probability of partial discharge presence in the series. This algorithm assumes the data comes from an 8 bit analog to digital converter. - #' @param series numeric vector, input time series - #' @param model xgb pretrained model for classification - #' @param max_density integer, the limit of neighbors of evaluated point to be evaluated as an outlier , default 20 - #' @param distance integer, define the neighborhood range in y-axis from the examined point, default 30 - #' @param window_size integer, size of the moving window which should be used for outlier detection, default 1001 - #' @param cl_width integer, a number determining maximum distance of the observations, default 30000 - #' @param sign boolean, should the positive and negative values be clustered separately?, default TRUE - #' @param signal_transform boolean, should the input time series be transformed to steepness series, default FALSE - #' @keywords time series, steepness, clustering, features, outlier detection - #' @export - #' @examples - #' res <- classify_series(example_series, example_model2) - #' @return numeric, a probability that there is a partial discharge in given time series - - temp_features <- get_group_features_series( - series, - max_density = max_density, - distance = distance, - window_size = window_size, - cl_width = cl_width, - sign = sign, - signal_transform = signal_transform - ) - - res <- classify_features(as.matrix(temp_features), - model) - - return(res) -} +train_xgb_classificator <- function(features, + label, + nround = 100, + param = NULL) { + #' Train classification model + #' + #' This algorithm takes features matrix and labels to train the xgb model. + #' @param features matrix, features matrix for training + #' @param label vector, vector of labels for training + #' @param nround integer, nround parameter for `xgb.train` function + #' @param param list, list of parameters for `xgb.train` function, if NULL it will use binary logistic regression + #' @importFrom xgboost xgb.DMatrix xgb.train + #' @export + #' @examples + #' model <- train_xgb_classificator(example_train_features_matrix, example_train_label) + #' @return xgb model + + if (is.null(param)) { + param <- list( + "objective" = "binary:logistic", + "scale_pos_weight" = 5, + "eval_metric" = "auc", + "eval_metric" = "ams@0.15", + "eta" = 0.1, + "max_depth" = 6 + ) + } + + # Create training and testing set + dtrain <- xgboost::xgb.DMatrix(data = features, + label = label) + + model <- xgboost::xgb.train(params = param, + data = dtrain, + nround = nround) + return(model) +} + +classify_features <- function(features, + model) { + #' Classify from feature matrix + #' + #' This algorithm takes features matrix and pretrain classification model and returns probability of partial discharge presence in the series. + #' @param features matrix, features matrix + #' @param model xgb pretrained model for classification + #' @importFrom xgboost xgb.DMatrix + #' @importFrom stats predict + #' @export + #' @examples + #' res <- classify_features(example_test_features_matrix, example_model) + #' @return numeric, a probability that there is a partial discharge in given time series + + dtest <- xgboost::xgb.DMatrix(data = features) + + pred <- predict(model, dtest) + return(pred) +} + +classify_series <- function(series, + model, + max_density = 20, + distance = 30, + window_size = 1001, + cl_width = 30000, + sign = TRUE, + signal_transform = FALSE) { + #' Classify time series + #' + #' This algorithm takes time series and pretrain classification model and returns probability of partial discharge presence in the series. This algorithm assumes the data comes from an 8 bit analog to digital converter. + #' @param series numeric vector, input time series + #' @param model xgb pretrained model for classification + #' @param max_density integer, the limit of neighbors of evaluated point to be evaluated as an outlier , default 20 + #' @param distance integer, define the neighborhood range in y-axis from the examined point, default 30 + #' @param window_size integer, size of the moving window which should be used for outlier detection, default 1001 + #' @param cl_width integer, a number determining maximum distance of the observations, default 30000 + #' @param sign boolean, should the positive and negative values be clustered separately?, default TRUE + #' @param signal_transform boolean, should the input time series be transformed to steepness series, default FALSE + #' @keywords time series, steepness, clustering, features, outlier detection + #' @export + #' @examples + #' res <- classify_series(example_series, example_model2) + #' @return numeric, a probability that there is a partial discharge in given time series + + temp_features <- get_group_features_series( + series, + max_density = max_density, + distance = distance, + window_size = window_size, + cl_width = cl_width, + sign = sign, + signal_transform = signal_transform + ) + + res <- classify_features(as.matrix(temp_features), + model) + + return(res) +} diff --git a/R/data.R b/R/data.R index aa95848..bcfe1a9 100644 --- a/R/data.R +++ b/R/data.R @@ -1,47 +1,47 @@ -#' Example time series from the wireless antenna. -#' -#' This is a simple numeric vector representing data from the wireless antenna for the examples. -#' -#' @format numeric vector with 800,000 observations -#' @source custom measurements -"example_series" - -#' Example feature matrix. -#' -#' This is a example matrix with features created from the measured data used for training in examples. -#' -#' @format matrix [646, 71] -#' @source custom measurements -"example_train_features_matrix" - -#' Example feature matrix. -#' -#' This is a example matrix with features created from the measured data used for prediction testing in examples. -#' -#' @format matrix [276, 71] -#' @source custom measurements -"example_test_features_matrix" - -#' Example feature label. -#' -#' This is a example labels vector used for training. -#' -#' @format boolean vector -#' @source custom measurements -"example_train_label" - -#' Example model. -#' -#' This is a example model computed with the example training features matrix and example train label used in examples. -#' -#' @format boolean vector -#' @source custom measurements -"example_model" - -#' Example model. -#' -#' This is a example model computed with different variables than `example_model` so it can be run in classify series example. -#' -#' @format boolean vector -#' @source custom measurements +#' Example time series from the wireless antenna. +#' +#' This is a simple numeric vector representing data from the wireless antenna for the examples. +#' +#' @format numeric vector with 800,000 observations +#' @source custom measurements +"example_series" + +#' Example feature matrix. +#' +#' This is a example matrix with features created from the measured data used for training in examples. +#' +#' @format matrix [646, 71] +#' @source custom measurements +"example_train_features_matrix" + +#' Example feature matrix. +#' +#' This is a example matrix with features created from the measured data used for prediction testing in examples. +#' +#' @format matrix [276, 71] +#' @source custom measurements +"example_test_features_matrix" + +#' Example feature label. +#' +#' This is a example labels vector used for training. +#' +#' @format boolean vector +#' @source custom measurements +"example_train_label" + +#' Example model. +#' +#' This is a example model computed with the example training features matrix and example train label used in examples. +#' +#' @format boolean vector +#' @source custom measurements +"example_model" + +#' Example model. +#' +#' This is a example model computed with different variables than `example_model` so it can be run in classify series example. +#' +#' @format boolean vector +#' @source custom measurements "example_model2" \ No newline at end of file diff --git a/R/dbscan_clustering.R b/R/dbscan_clustering.R index 42c3aaf..cac65dd 100644 --- a/R/dbscan_clustering.R +++ b/R/dbscan_clustering.R @@ -1,74 +1,74 @@ -dbscan_ts <- function(ind_series, - cl_width, - sign = FALSE, - values = NULL) { - #' Cluster observations in time series - #' - #' This algorithm cluster observations of a time series based on their indexes. - #' Observations are part of the group if their neighbors are distance less than `cl_width` or a path of neighbors exists which connects the two. - #' @param ind_series numeric vector, indexes of the observations to be clustered - #' @param cl_width integer, a number determining maximum distance of the observations - #' @param sign boolean, should the positive and negative values be clustered separately?, default FALSE - #' @param values numeric vector, values of the clustered series, necessary if sign is TRUE, default NULL - #' @keywords time series, clustering - #' @export - #' @examples - #' outliers <- detect_outliers(example_series) - #' - #' groups_no_sign <- dbscan_ts(outliers, cl_width = 10000) - #' - #' groups <- dbscan_ts(outliers, cl_width = 10000, sign = TRUE, example_series[outliers]) - #' @return numeric vector, group numbers - - if (sign & (any(values < 0) & any(values >= 0))) { - res <- rep(NA, length(ind_series)) - group <- 1 - - # Negative part - if (length(ind_series[values < 0]) == 1) { - res[values < 0] <- group - } else { - differences <- diff(ind_series[values < 0]) < cl_width - temp_res <- rep(NA, length(ind_series[values < 0])) - for (i in 1:length(differences)) { - temp_res[i] <- group - if (!differences[i]) { - group <- group + 1 - } - } - temp_res[i + 1] <- group - res[values < 0] <- temp_res - } - - # Positive part - group <- group + 1 - - if (length(ind_series[values >= 0]) == 1) { - res[values >= 0] <- group - } else { - differences <- diff(ind_series[values >= 0]) < cl_width - temp_res <- rep(NA, length(ind_series[values >= 0])) - for (i in 1:length(differences)) { - temp_res[i] <- group - if (!differences[i]) { - group <- group + 1 - } - } - temp_res[i + 1] <- group - res[values >= 0] <- temp_res - } - - } else { - differences <- diff(ind_series) < cl_width - res <- rep(NA, length(ind_series)) - group <- 1 - for (i in 1:length(differences)) { - res[i] <- group - if (!differences[i]) { - group <- group + 1 - } - } - res[i + 1] <- group - } - return(res) -} +dbscan_ts <- function(ind_series, + cl_width, + sign = FALSE, + values = NULL) { + #' Cluster observations in time series + #' + #' This algorithm cluster observations of a time series based on their indexes. + #' Observations are part of the group if their neighbors are distance less than `cl_width` or a path of neighbors exists which connects the two. + #' @param ind_series numeric vector, indexes of the observations to be clustered + #' @param cl_width integer, a number determining maximum distance of the observations + #' @param sign boolean, should the positive and negative values be clustered separately?, default FALSE + #' @param values numeric vector, values of the clustered series, necessary if sign is TRUE, default NULL + #' @keywords time series, clustering + #' @export + #' @examples + #' outliers <- detect_outliers(example_series) + #' + #' groups_no_sign <- dbscan_ts(outliers, cl_width = 10000) + #' + #' groups <- dbscan_ts(outliers, cl_width = 10000, sign = TRUE, example_series[outliers]) + #' @return numeric vector, group numbers + + if (sign & (any(values < 0) & any(values >= 0))) { + res <- rep(NA, length(ind_series)) + group <- 1 + + # Negative part + if (length(ind_series[values < 0]) == 1) { + res[values < 0] <- group + } else { + differences <- diff(ind_series[values < 0]) < cl_width + temp_res <- rep(NA, length(ind_series[values < 0])) + for (i in 1:length(differences)) { + temp_res[i] <- group + if (!differences[i]) { + group <- group + 1 + } + } + temp_res[i + 1] <- group + res[values < 0] <- temp_res + } + + # Positive part + group <- group + 1 + + if (length(ind_series[values >= 0]) == 1) { + res[values >= 0] <- group + } else { + differences <- diff(ind_series[values >= 0]) < cl_width + temp_res <- rep(NA, length(ind_series[values >= 0])) + for (i in 1:length(differences)) { + temp_res[i] <- group + if (!differences[i]) { + group <- group + 1 + } + } + temp_res[i + 1] <- group + res[values >= 0] <- temp_res + } + + } else { + differences <- diff(ind_series) < cl_width + res <- rep(NA, length(ind_series)) + group <- 1 + for (i in 1:length(differences)) { + res[i] <- group + if (!differences[i]) { + group <- group + 1 + } + } + res[i + 1] <- group + } + return(res) +} diff --git a/R/detect_outliers.R b/R/detect_outliers.R index 86ea407..ce1ddea 100644 --- a/R/detect_outliers.R +++ b/R/detect_outliers.R @@ -1,97 +1,97 @@ -detect_outliers <- function(series, - window_size = 501, - distance = 30, - hist_size = 256, - central = TRUE, - max_density = 20, - symmetric = TRUE) { - #' Detect outliers - #' - #' Detect outliers in the given time series. - #' This algorithm is focused on low computational complexity and is tailored for the time series which contains - #' a lot of noise and are stationary. Originally, the input time series come from a sensor with an 8bit analog digital converter, therefore the values are in range of [-127, 128]. - #' Thanks to many input parameters it can be tailored to much broader range of input time series. - #' @param series numeric vector, input time series - #' @param window_size integer, size of the moving window which should be used for outlier detection, default 501 - #' @param distance integer, define the neighborhood range in y-axis from the examined point, default 30 - #' @param hist_size integer, range of the values in the input time series, default 256 - #' @param central boolean, should the evaluated point be in center, or beginning of the moving window, default TRUE - #' @param max_density integer, the limit of neighbors of evaluated point to be evaluated as an outlier , default 20 - #' @param symmetric boolean, is the signal symmetric in negative and positive values extremes?, default TRUE - #' @keywords time series, outlier detection - #' @export - #' @examples - #' outliers <- detect_outliers(example_series) - #' @return list with outliers indeces. - - if (length(series) <= window_size) { - stop("series length must be longer than window_size") - } - - if (central) { - examined <- ceiling(window_size / 2) - } else { - examined <- window_size - } - # Start and end conditions not taken care of - ind_end <- length(series) - window_size - - if (!symmetric) { - shift <- max(round(abs(min(series))), distance + 1) - } else { - shift <- 129 - } - - l_outliers <- list() - count <- 1 - - # Create histogram - hist_size <- max((shift + distance + 1), hist_size) - histogram <- rep(0, hist_size) - - # Initial fill of histogram - for (i in 1:window_size) { - ind <- round(series[i]) + shift - histogram[ind] <- histogram[ind] + 1 - } - - # Examine how many points are in histogram starting from - # If there is less than "max_density" no. of points, the point is considered outlier. - if (series[examined] > 0) { - ind <- round(series[examined]) - distance + shift - res <- sum(histogram[ind:hist_size]) - } else { - ind <- shift + round(series[examined]) + distance - res <- sum(histogram[1:ind]) - } - - if (res <= max_density) { - l_outliers[[count]] <- examined - count <- count + 1 - } - - # Main window loop - for (i in (window_size + 1):ind_end) { - ind <- round(series[i]) + shift - histogram[ind] <- histogram[ind] + 1 - ind <- series[i - window_size] + shift - histogram[ind] <- histogram[ind] - 1 - - examined <- examined + 1 - # Examine if the point is an outlier - if (series[examined] > 0) { - ind <- round(series[examined]) - distance + shift - res <- sum(histogram[ind:hist_size]) - } else { - ind <- shift + round(series[examined]) + distance - res <- sum(histogram[1:ind]) - } - - if (res <= max_density) { - l_outliers[[count]] <- examined - count <- count + 1 - } - } - - return(unlist(l_outliers)) -} +detect_outliers <- function(series, + window_size = 501, + distance = 30, + hist_size = 256, + central = TRUE, + max_density = 20, + symmetric = TRUE) { + #' Detect outliers + #' + #' Detect outliers in the given time series. + #' This algorithm is focused on low computational complexity and is tailored for the time series which contains + #' a lot of noise and are stationary. Originally, the input time series come from a sensor with an 8bit analog digital converter, therefore the values are in range of [-127, 128]. + #' Thanks to many input parameters it can be tailored to much broader range of input time series. + #' @param series numeric vector, input time series + #' @param window_size integer, size of the moving window which should be used for outlier detection, default 501 + #' @param distance integer, define the neighborhood range in y-axis from the examined point, default 30 + #' @param hist_size integer, range of the values in the input time series, default 256 + #' @param central boolean, should the evaluated point be in center, or beginning of the moving window, default TRUE + #' @param max_density integer, the limit of neighbors of evaluated point to be evaluated as an outlier , default 20 + #' @param symmetric boolean, is the signal symmetric in negative and positive values extremes?, default TRUE + #' @keywords time series, outlier detection + #' @export + #' @examples + #' outliers <- detect_outliers(example_series) + #' @return list with outliers indeces. + + if (length(series) <= window_size) { + stop("series length must be longer than window_size") + } + + if (central) { + examined <- ceiling(window_size / 2) + } else { + examined <- window_size + } + # Start and end conditions not taken care of + ind_end <- length(series) - window_size + + if (!symmetric) { + shift <- max(round(abs(min(series))), distance + 1) + } else { + shift <- 129 + } + + l_outliers <- list() + count <- 1 + + # Create histogram + hist_size <- max((shift + distance + 1), hist_size) + histogram <- rep(0, hist_size) + + # Initial fill of histogram + for (i in 1:window_size) { + ind <- round(series[i]) + shift + histogram[ind] <- histogram[ind] + 1 + } + + # Examine how many points are in histogram starting from + # If there is less than "max_density" no. of points, the point is considered outlier. + if (series[examined] > 0) { + ind <- round(series[examined]) - distance + shift + res <- sum(histogram[ind:hist_size]) + } else { + ind <- shift + round(series[examined]) + distance + res <- sum(histogram[1:ind]) + } + + if (res <= max_density) { + l_outliers[[count]] <- examined + count <- count + 1 + } + + # Main window loop + for (i in (window_size + 1):ind_end) { + ind <- round(series[i]) + shift + histogram[ind] <- histogram[ind] + 1 + ind <- series[i - window_size] + shift + histogram[ind] <- histogram[ind] - 1 + + examined <- examined + 1 + # Examine if the point is an outlier + if (series[examined] > 0) { + ind <- round(series[examined]) - distance + shift + res <- sum(histogram[ind:hist_size]) + } else { + ind <- shift + round(series[examined]) + distance + res <- sum(histogram[1:ind]) + } + + if (res <= max_density) { + l_outliers[[count]] <- examined + count <- count + 1 + } + } + + return(unlist(l_outliers)) +} diff --git a/R/features.R b/R/features.R index 9eb8820..81dfc08 100644 --- a/R/features.R +++ b/R/features.R @@ -49,6 +49,7 @@ get_group_features_series <- function(series, cl_width = 30000, sign = TRUE, signal_transform = FALSE, + symmetric = TRUE, summarize = TRUE) { #' Get group features from a time series #' @@ -88,7 +89,7 @@ get_group_features_series <- function(series, distance = distance, window_size = window_size, hist_size = hist_size, - symmetric = signal_transform + symmetric = symmetric ) if (is.null(out)) { diff --git a/R/transform.R b/R/transform.R index ad4ebbe..fa47af4 100644 --- a/R/transform.R +++ b/R/transform.R @@ -1,34 +1,34 @@ -transform_to_steepness <- function(series) { - #' Transform series to steepness - #' - #' This algorithm takes input time series and compute steepness in the time series. - #' @param series numeric vector, input time series - #' @keywords time series, steepness - #' @export - #' @examples - #' steepness <- transform_to_steepness(example_series) - #' @return numeric vector, steepness series. - diffs <- c(NA, diff(series)) - series_length <- length(diffs) - - signs <- sign(diffs) - X <- rep(0, series_length) - value <- 0 - temp_sign <- 0 - temp_length <- 1 - - for (i in 2:series_length) { - sign_i <- signs[i] - if (sign_i == temp_sign) { - value <- value + diffs[i] - temp_length <- temp_length + 1 - } else { - X[i - 1] <- value / temp_length - value <- diffs[i] - temp_sign <- sign_i - temp_length <- 1 - } - } - - return(X) +transform_to_steepness <- function(series) { + #' Transform series to steepness + #' + #' This algorithm takes input time series and compute steepness in the time series. + #' @param series numeric vector, input time series + #' @keywords time series, steepness + #' @export + #' @examples + #' steepness <- transform_to_steepness(example_series) + #' @return numeric vector, steepness series. + diffs <- c(NA, diff(series)) + series_length <- length(diffs) + + signs <- sign(diffs) + X <- rep(0, series_length) + value <- 0 + temp_sign <- 0 + temp_length <- 1 + + for (i in 2:series_length) { + sign_i <- signs[i] + if (sign_i == temp_sign) { + value <- value + diffs[i] + temp_length <- temp_length + 1 + } else { + X[i - 1] <- value / temp_length + value <- diffs[i] + temp_sign <- sign_i + temp_length <- 1 + } + } + + return(X) } \ No newline at end of file diff --git a/README.md b/README.md index 4596223..2e1d67a 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,77 @@ -# Wireless PD detection - -This repository contains [R](https://cran.r-project.org/) package for the partial discharge detection from the time series measured by the wireless PD detector. It contains functions to find the outliers, create features describing time series and classification learning and prediction - -You can install this package using `devtools` package: -``` r -devtools::install_git("https://code.it4i.cz/ADAS/wireless-pd-detection") -``` -or `remotes` package: - -``` r -remotes::install_git("https://code.it4i.cz/ADAS/wireless-pd-detection") -``` - -# Basic usage - -This package accommodates two basic functions: learning the classification model from the provided data and classification of the results based on the XGBoost model. - -The learning itself is composed of two steps. The first, is to compute the features of the time series and then use the computed features for the XGBoost model learning. - -To get the features from the time series You can use following code: - -``` r -library(WIPADD) - -# Get random series -series <- example_series - -tbl_features <- get_group_features_series(series) -``` - -To get features of many series at once it is possible to map over multiple series: -``` r -library(WIPADD) -library(purrr) - - -l_series <- list(example_series, - sample(example_series, - 8e5) - ) - -tbl_features <- map_dfr(l_series, - get_group_features_series) -) - -``` -When the features are prepared, the learning of the XGBoost model will is easily made by calling: - -```r -library(WIPADD) - -features <- example_train_features_matrix -labels <- example_train_label - -model <- train_xgb_classificator(features, - labels) -``` - -Finally, if You already have the model, then You can classify the series by: -``` r -library(WIPADD) - -# Get random series -series <- example_series - -classify_series(series, - example_model2) - -``` - - -For more info on the functions and their parameters see [manual](inst/WIPADD_1.0.1.pdf) - -# Acknowledgment - +# Wireless PD detection + +This repository contains [R](https://cran.r-project.org/) package for the partial discharge detection from the time series measured by the wireless PD detector. It contains functions to find the outliers, create features describing time series and classification learning and prediction + +You can install this package using `devtools` package: +``` r +devtools::install_git("https://code.it4i.cz/ADAS/wireless-pd-detection") +``` +or `remotes` package: + +``` r +remotes::install_git("https://code.it4i.cz/ADAS/wireless-pd-detection") +``` + +# Basic usage + +This package accommodates two basic functions: learning the classification model from the provided data and classification of the results based on the XGBoost model. + +The learning itself is composed of two steps. The first, is to compute the features of the time series and then use the computed features for the XGBoost model learning. + +To get the features from the time series You can use following code: + +``` r +library(WIPADD) + +# Get random series +series <- example_series + +tbl_features <- get_group_features_series(series) +``` + +To get features of many series at once it is possible to map over multiple series: +``` r +library(WIPADD) +library(purrr) + + +l_series <- list(example_series, + sample(example_series, + 8e5) + ) + +tbl_features <- map_dfr(l_series, + get_group_features_series) +) + +``` +When the features are prepared, the learning of the XGBoost model will is easily made by calling: + +```r +library(WIPADD) + +features <- example_train_features_matrix +labels <- example_train_label + +model <- train_xgb_classificator(features, + labels) +``` + +Finally, if You already have the model, then You can classify the series by: +``` r +library(WIPADD) + +# Get random series +series <- example_series + +classify_series(series, + example_model2) + +``` + + +For more info on the functions and their parameters see [manual](inst/WIPADD_1.0.1.pdf) + +# Acknowledgment + This work was supported by Technology Agency of the Czech Republic - project TN01000007. \ No newline at end of file -- GitLab