diff --git a/.Rbuildignore b/.Rbuildignore
index 11393e7..1739a09 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -13,4 +13,5 @@
^README-.*\.png$
^\.httr-oauth$
^make\.R$
-^CONDUCT\.md$
+^CODE_OF_CONDUCT\.md$
+^\.github/
diff --git a/DESCRIPTION b/DESCRIPTION
index 6feaaef..2f82f81 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,18 +1,21 @@
Package: tsmp
Type: Package
Title: Time Series with Matrix Profile
-Version: 0.2.13.9004
+Version: 0.2.14.9009
Authors@R: c(
person("Francisco", "Bischoff", email = "fbischoff@med.up.pt", role = c("aut", "cre"), comment = c(ORCID = "https://orcid.org/0000-0002-5301-8672")),
- person("Michael", "Yeh", email = "myeh003@ucr.edu", role = c("res", "ccp", "ctb"))
+ person("Michael", "Yeh", email = "myeh003@ucr.edu", role = c("res", "ccp", "ctb"), comment = c(ORCID = "https://orcid.org/0000-0002-9807-2963")),
+ person("Diego", "Silva", email = "diegofs@ufscar.br", role = c("res", "ccp", "ctb"), comment = c(ORCID = "https://orcid.org/0000-0002-5184-9413")),
+ person("Yan", "Zhu", email = "yzhu015@ucr.edu", role = c("res", "ccp", "ctb"))
)
Maintainer: Francisco Bischoff
Description: A toolkit implementing the Matrix Profile concept that was created by CS-UCR .
License: MIT + file LICENSE
URL: https://github.com/franzbischoff/tsmp
BugReports: https://github.com/franzbischoff/tsmp/issues
-Depends: R (>= 2.10), beepr, doSNOW, parallel, foreach
+Depends: R (>= 2.10), audio, doSNOW, parallel, foreach
Encoding: UTF-8
+Language: en-US
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.0
diff --git a/NAMESPACE b/NAMESPACE
index 1721b2c..0617192 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
export(fast.movavg)
export(fast.movsd)
+export(find.chains)
export(fluss)
export(fluss.cac)
export(fluss.extract)
@@ -14,10 +15,11 @@ export(mstomp.par)
export(sdts.f.score)
export(sdts.predict)
export(sdts.train)
+export(simple.fast)
export(stamp)
export(stamp.par)
export(unconstrain.search)
-import(beepr)
+import(audio)
import(doSNOW)
import(foreach)
import(parallel)
diff --git a/NEWS b/NEWS
index 7ed38e8..5d9c893 100644
--- a/NEWS
+++ b/NEWS
@@ -1,7 +1,12 @@
-tsmp 0.2.30
+tsmp 0.2.14
===========
+- Added SiMPle algorithm for sound data.
+- Added FLUSS algorithm.
+- Added \[find.chains()\] to look for chains primitives.
+- Changed dependency from beepr to audio (actually beepr depends on
+ audio, so less dependencies)
- Added a `NEWS.md` file to track changes to the package.
tsmp 0.2.x
diff --git a/NEWS.Rmd b/NEWS.Rmd
index 3bba9df..fd0d548 100644
--- a/NEWS.Rmd
+++ b/NEWS.Rmd
@@ -21,8 +21,12 @@ knitr::opts_chunk$set(
)
```
-# tsmp 0.2.30
+# tsmp 0.2.14
+* Added SiMPle algorithm for sound data.
+* Added FLUSS algorithm.
+* Added [find.chains()] to look for chains primitives.
+* Changed dependency from beepr to audio (actually beepr depends on audio, so less dependencies)
* Added a `NEWS.md` file to track changes to the package.
# tsmp 0.2.x
diff --git a/NEWS.md b/NEWS.md
index e5199ad..a162d02 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,12 +1,17 @@
NEWS
================
Francisco Bischoff
-\- 18 Aug 2018
+\- 20 Aug 2018
-# tsmp 0.2.30
+# tsmp 0.2.14
+ - Added SiMPle algorithm for sound data.
+ - Added FLUSS algorithm.
+ - Added \[find.chains()\] to look for chains primitives.
+ - Changed dependency from beepr to audio (actually beepr depends on
+ audio, so less dependencies)
- Added a `NEWS.md` file to track changes to the package.
# tsmp 0.2.x
diff --git a/R/find_chains.R b/R/find_chains.R
new file mode 100644
index 0000000..8daee93
--- /dev/null
+++ b/R/find_chains.R
@@ -0,0 +1,60 @@
+#' Find Time Series Chains
+#'
+#' Time Series Chains is a new primitive for time series data mining.
+#'
+#' @param matrices a result from STAMP or STOMP algorithms
+#'
+#' @return Returns `chains`, a `list` of chains founded with more than 2 patterns and `best`
+#' with the best one.
+#' @export
+#' @references 1. Zhu Y, Imamura M, Nikovski D, Keogh E. Introducing time series chains: a new
+#' primitive for time series data mining. Knowl Inf Syst. 2018 Jun 2;1–27.
+#' @references Website:
+#' @examples
+#' w <- 50
+#' data <- gait_data
+#' mp <- stamp(data, window.size = w, exclusion.zone = 1/4, verbose = 0)
+#' find.chains(mp)
+#'
+find.chains <- function(matrices) {
+ size <- length(matrices$rpi)
+ chain.length <- rep(1, size)
+ chain.set <- list()
+
+ k <- 1
+
+ for (i in 1:size) {
+ if (chain.length[i] == 1) {
+ j <- i
+ chain <- j
+
+ while (matrices$rpi[j] > 0 && matrices$lpi[matrices$rpi[j]] == j) {
+ j <- matrices$rpi[j]
+ chain.length[j] <- -1
+ chain.length[i] <- chain.length[i] + 1
+ chain <- c(chain, j)
+ }
+
+ if (length(chain) > 2) {
+ chain.set[[k]] <- chain
+ k <- k + 1
+ }
+ }
+ }
+
+ l <- max(chain.length)
+
+ best.chain <- NULL
+ mean <- Inf
+ for (i in 1:length(chain.set)) {
+ if (length(chain.set[[i]]) == l) {
+ n <- mean(matrices$rmp[chain.set[[i]]])
+ if (n < mean) {
+ mean <- n
+ best.chain <- chain.set[[i]]
+ }
+ }
+ }
+
+ return(list(chains = chain.set, best = best.chain))
+}
diff --git a/R/fluss.R b/R/fluss.R
index 0169241..bc4ef17 100644
--- a/R/fluss.R
+++ b/R/fluss.R
@@ -125,7 +125,7 @@ fluss.extract <- function(arc.counts, num.segments, window.size, exclusion.zone
#' Computes the arc count with edge correction (CAC).
#'
#' Original paper suggest using the classic statistical-process-control heuristic to set a threshold
-#' where a semantic change may occur in CAC. This may be useful in realtime implementation as we don't
+#' where a semantic change may occur in CAC. This may be useful in real-time implementation as we don't
#' know in advance the number of domain changes to look for. Please check original paper (1).
#'
#' @param profile.index the profile index for arc counting.
@@ -147,6 +147,7 @@ fluss.extract <- function(arc.counts, num.segments, window.size, exclusion.zone
#' w <- 210
#' mp <- mstomp(data, w, verbose = 0)
#' cac <- fluss.cac(mp$pi, w)
+#'
#' \dontrun{
#' data <- fluss_data$walkjogrun$data
#' w <- fluss_data$walkjogrun$window # 80
diff --git a/R/gait_data.R b/R/gait_data.R
new file mode 100644
index 0000000..085c369
--- /dev/null
+++ b/R/gait_data.R
@@ -0,0 +1,9 @@
+#' Original data used in the Time Series Chain demo
+#'
+#' @docType data
+#' @format A `matrix` with 904 rows and 1 column with the Y data from an accelerometer
+#' @source \url{https://sites.google.com/site/timeserieschain/}
+#'
+#' @references 1. Zhu Y, Imamura M, Nikovski D, Keogh E. Introducing time series chains: a new primitive for time series data mining. Knowl Inf Syst. 2018 Jun 2;1–27.
+#' @keywords datasets
+"gait_data"
diff --git a/R/m_guide_search.R b/R/m_guide_search.R
index 66afa67..c39ccf9 100644
--- a/R/m_guide_search.R
+++ b/R/m_guide_search.R
@@ -4,7 +4,7 @@
#'
#' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
#'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
#' @param window.size an `int` with the size of the sliding window.
#' @param matrix.profile multidimensional matrix profile (matrix)
#' @param profile.index multidimensional profile index (from [mstomp()] or [mstomp.par()]).
diff --git a/R/m_unconstrain_search.R b/R/m_unconstrain_search.R
index b9ab749..dbaf0ed 100644
--- a/R/m_unconstrain_search.R
+++ b/R/m_unconstrain_search.R
@@ -4,7 +4,7 @@
#'
#' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
#'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
#' @param window.size an `int` with the size of the sliding window.
#' @param matrix.profile multidimensional matrix profile (from [mstomp()] or [mstomp.par()]).
#' @param profile.index multidimensional profile index (from [mstomp()] or [mstomp.par()]).
diff --git a/R/misc.R b/R/misc.R
index eb55b28..370a4bc 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -70,9 +70,35 @@ fast.movavg <- function(data, n) {
std <- function(x) {
sdx <- stats::sd(x)
- if (sdx == 0)
+ if (sdx == 0) {
return(sdx)
+ }
return(sqrt((length(x) - 1) / length(x)) * sdx)
}
+#' Play sound with `audio`
+#'
+#' @param data sound data provided by this package
+#'
+#' @keywords internal
+#' @import audio
+beep <- function(data) {
+ if (!(is.null(audio::audio.drivers()) || nrow(audio::audio.drivers()) == 0)) {
+ tryCatch({
+ audio::play(data)
+ },
+ error = function(cond) {
+ message("Failed to play audio alert")
+ message(cond)
+ invisible()
+ },
+ warning = function(cond) {
+ message("Something went wrong playing audio alert")
+ message(cond)
+ invisible()
+ }
+ )
+ }
+ invisible()
+}
diff --git a/R/mstomp.R b/R/mstomp.R
index adeba8e..810f5ec 100644
--- a/R/mstomp.R
+++ b/R/mstomp.R
@@ -8,7 +8,7 @@
#' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
#'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
#' @param window.size an `int` with the size of the sliding window.
#' @param must.dim an `int` or `vector` of which dimensions to forcibly include (default is `NULL`).
#' @param exc.dim an `int` or `vector` of which dimensions to exclude (default is `NULL`).
@@ -114,7 +114,7 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
on.exit(close(pb))
}
if (verbose > 1) {
- on.exit(beepr::beep(), TRUE)
+ on.exit(beep(sounds[[1]]), TRUE)
}
## initialization
@@ -198,7 +198,7 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
if (n.dim > 1) {
dist.pro.sort <- t(apply(distance.profile, 1, sort))
- } # sort by row, put all -Inf to the first columns
+ } # sort by row, put all -Inf to the first column
else {
dist.pro.sort <- distance.profile
}
diff --git a/R/mstomp_par.R b/R/mstomp_par.R
index e4cef6a..a481426 100644
--- a/R/mstomp_par.R
+++ b/R/mstomp_par.R
@@ -8,7 +8,7 @@
#' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
#'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
#' @param window.size an `int`. Size of the sliding window.
#' @param must.dim an `int` or `vector` of which dimensions to forcibly include (default is `NULL`).
#' @param exc.dim an `int` or `vector` of which dimensions to exclude (default is `NULL`).
@@ -29,9 +29,8 @@
#'
#' @examples
#' # using all dimensions
-#' Sys.sleep(1) # sometimes sleep is needed if you run parallel multiple times in a row
#' mp <- mstomp.par(toy_data$data[1:100,], 30, verbose = 0)
-#' @import beepr doSNOW foreach parallel
+#' @import doSNOW foreach parallel
mstomp.par <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion.zone = 1 / 2, verbose = 2, n.workers = 2) {
eps <- .Machine$double.eps^0.5
@@ -140,7 +139,7 @@ mstomp.par <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclu
on.exit(close(pb), TRUE)
}
if (verbose > 1) {
- on.exit(beepr::beep(), TRUE)
+ on.exit(beep(sounds[[1]]), TRUE)
}
## initialize variable
diff --git a/R/sdts_predict.R b/R/sdts_predict.R
index 757c633..7512acf 100644
--- a/R/sdts_predict.R
+++ b/R/sdts_predict.R
@@ -87,7 +87,7 @@ sdts.predict <- function(model, data, window.size) {
#' `beta` is used to balance F-score towards recall (`>1`) or precision (`<1`).
#'
#' @param gtruth a `vector` of `logical`. Ground truth annotation.
-#' @param pred a `vector` of `logical`. Predictied annotation from [sdts.predict()]
+#' @param pred a `vector` of `logical`. Predicted annotation from [sdts.predict()]
#' @param beta a `numeric`. See details. (default is `1`).
#'
#' @return Returns a `list` with `f.score`, `precision` and `recall`.
diff --git a/R/sdts_train.R b/R/sdts_train.R
index 6fd35ff..0988d90 100644
--- a/R/sdts_train.R
+++ b/R/sdts_train.R
@@ -148,7 +148,7 @@ sdts.train <- function(data, label, window.size, beta = 1, pat.max = Inf, parall
on.exit(close(pb))
}
if (verbose > 1) {
- on.exit(beepr::beep(), TRUE)
+ on.exit(beep(sounds[[1]]), TRUE)
}
for (i in 1:n.window.size) {
@@ -342,7 +342,7 @@ sdts.train <- function(data, label, window.size, beta = 1, pat.max = Inf, parall
#' @param beta a number that balance the F-Score. Beta > 1 towards recall, < towards precision
#' @param window.size an integer with the sliding window size
#'
-#' @return Returns the best threashold and its F-Score
+#' @return Returns the best threshold and its F-Score
#'
#' @keywords internal
#'
@@ -384,7 +384,7 @@ golden.section <- function(dist.pro, label, pos.st, pos.ed, beta, window.size) {
#' @param window.size an integer with the sliding window size
#' @param fit.idx an integer with the index of the current threshold
#'
-#' @return Returns the best threashold and its F-Score
+#' @return Returns the best threshold and its F-Score
#'
#' @keywords internal
diff --git a/R/simple.R b/R/simple.R
new file mode 100644
index 0000000..62ddc4e
--- /dev/null
+++ b/R/simple.R
@@ -0,0 +1,224 @@
+#' Compute the similarity join for Sound data.
+#'
+#' Compute the similarity join for Sound data.
+#'
+#' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
+#'
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `list` and `data.frame` too.
+#' @param window.size an `int` with the size of the sliding window.
+#' @param exclusion.zone a `numeric`. Size of the exclusion zone, based on query size (default is `1/2`).
+#' @param verbose an `int`. See details. (Default is `2`).
+#'
+#' @return Returns a list with the Matrix Profile `mp` and Profile Index `pi`.
+#'
+#' @export
+#' @references 1. Silva D, Yeh C, Batista G, Keogh E. Simple: Assessing Music Similarity Using Subsequences Joins. Proc 17th ISMIR Conf. 2016;23–30.
+#' @references 2. Silva DF, Yeh C-CM, Zhu Y, Batista G, Keogh E. Fast Similarity Matrix Profile for Music Analysis and Exploration. IEEE Trans Multimed. 2018;14(8):1–1.
+#' @references Website:
+#' @references Website:
+#'
+#' @examples
+#' w <- 30
+#' data <- toy_data$data # 3 dimensions matrix
+#' result <- simple.fast(data, w, verbose = 0)
+#'
+simple.fast <- function(data, window.size, exclusion.zone = 1 / 2, verbose = 2) {
+ ## get various length
+ exclusion.zone <- floor(window.size * exclusion.zone)
+
+ ## transform data list into matrix
+ if (is.list(data)) {
+ data.size <- length(data[[1]])
+ n.dim <- length(data)
+
+ for (i in 1:n.dim) {
+ len <- length(data[[i]])
+ # Fix TS size with NaN
+ if (len < data.size) {
+ data[[i]] <- c(data[[i]], rep(NA, data.size - len))
+ }
+ }
+ # transform data into matrix (each column is a TS)
+ data <- sapply(data, cbind)
+ } else if (is.matrix(data) || is.data.frame(data)) {
+ if (is.data.frame(data)) {
+ data <- as.matrix(data)
+ } # just to be uniform
+ if (ncol(data) > nrow(data)) {
+ data <- t(data)
+ }
+ data.size <- nrow(data)
+ n.dim <- ncol(data)
+ } else if (is.vector(data)) {
+ data.size <- length(data)
+ n.dim <- 1
+ # transform data into 1-col matrix
+ data <- as.matrix(data) # just to be uniform
+ } else {
+ stop("Unknown type of data. Must be: matrix, data.frame, vector or list")
+ }
+
+ ## check input
+ if (window.size > data.size / 2) {
+ stop("Error: Time series is too short relative to desired subsequence length")
+ }
+ if (window.size < 4) {
+ stop("Error: Subsequence length must be at least 4")
+ }
+
+ ## initialization
+ matrix.profile.size <- data.size - window.size + 1
+ matrix.profile <- rep(Inf, matrix.profile.size)
+ profile.index <- rep(0, matrix.profile.size)
+
+ if (verbose > 0) {
+ pb <- utils::txtProgressBar(min = 0, max = matrix.profile.size, style = 3, width = 80)
+ on.exit(close(pb))
+ }
+ if (verbose > 1) {
+ on.exit(beep(sounds[[1]]), TRUE)
+ }
+
+ ## compute necessary values
+ res <- mass.simple.pre(data, data.size, window.size = window.size)
+ data.fft <- res$data.fft
+ sumx2 <- res$sumx2
+
+ ## compute first distance profile
+ query.window <- data[1:window.size, ]
+ res <- mass.simple(data.fft, query.window, data.size, window.size, sumx2)
+ distance.profile <- res$distance.profile
+ first.product <- last.product <- res$last.product
+ sumy2 <- res$sumy2
+ dropval <- query.window[1, ]
+ distance.profile[1:exclusion.zone] <- Inf
+
+
+ ind <- which.min(distance.profile)
+ profile.index[1] <- ind
+ matrix.profile[1] <- distance.profile[ind]
+
+ ## compute the remainder of the matrix profile
+ for (i in 2:matrix.profile.size) {
+
+ # compute the distance profile
+ if (verbose > 0) {
+ utils::setTxtProgressBar(pb, i)
+ }
+
+ query.window <- data[i:(i + window.size - 1), ]
+
+ sumy2 <- sumy2 - dropval^2 + query.window[window.size, ]^2
+
+ for (j in 1:n.dim) {
+ last.product[2:(data.size - window.size + 1), j] <- last.product[1:(data.size - window.size), j] -
+ data[1:(data.size - window.size), j] * dropval[j] +
+ data[(window.size + 1):data.size, j] * query.window[window.size, j]
+ }
+
+ last.product[1, ] <- first.product[i, ]
+ dropval <- query.window[1, ]
+
+ distance.profile <- matrix(0, nrow(sumx2), 1)
+
+ for (j in 1:n.dim) {
+ distance.profile <- distance.profile + sumx2[, j] - 2 * last.product[, j] + sumy2[j]
+ }
+
+ exc.st <- max(1, i - exclusion.zone)
+ exc.ed <- min(matrix.profile.size, i + exclusion.zone)
+ distance.profile[exc.st:exc.ed] <- Inf
+
+ ind <- which.min(distance.profile)
+ profile.index[i] <- ind
+ matrix.profile[i] <- distance.profile[ind]
+ }
+
+ return(list(mp = matrix.profile, pi = profile.index))
+}
+
+#' Precomputes several values used on MASS
+#'
+#' The difference of this function to [mass.pre()] is that this does not normalize data. Specific for this domain.
+#'
+#' @param data a `matrix` of `numeric`. Reference Time Series.
+#' @param data.size an `int`. Reference Time Series size.
+#' @param window.size an `int`. Sliding window size.
+#'
+#' @return Returns `data.fft` and `sumx2`.
+#' @keywords internal
+#'
+#' @references Abdullah Mueen, Yan Zhu, Michael Yeh, Kaveh Kamgar, Krishnamurthy Viswanathan, Chetan Kumar Gupta and Eamonn Keogh (2015), The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance.
+#' @references
+
+
+mass.simple.pre <- function(data, data.size, window.size) {
+ if (nrow(data) < ncol(data)) {
+ data <- t(data)
+ }
+
+ n.dim <- ncol(data)
+
+ data <- rbind(data, matrix(0, data.size, n.dim))
+
+ data.fft <- apply(data, 2, stats::fft)
+ cum_sumx2 <- apply(data^2, 2, cumsum)
+
+ sumx2 <- cum_sumx2[window.size:data.size, ] - rbind(rep(0, n.dim), cum_sumx2[1:(data.size - window.size), ])
+
+ return(list(data.fft = data.fft, sumx2 = sumx2))
+}
+
+#' Calculates the distance profile using MASS algorithm
+#'
+#' Mueen's Algorithm for Similarity Search is The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance and Correlation Coefficient.
+#' The difference of this function to [mass()] is that this does not normalize data. Specific for this domain.
+#'
+#' @param data.fft precomputed data product.
+#' @param query.window a `matrix` of `numeric`. Query window.
+#' @param data.size an `int`. The length of the reference data.
+#' @param window.size an `int`. Sliding window size.
+#' @param sumx2 precomputed sum of squares
+#'
+#' @return Returns the `distance.profile` for the given query and the `last.product` for STOMP algorithm and `sumy2`.
+#' @keywords internal
+#'
+#' @references Abdullah Mueen, Yan Zhu, Michael Yeh, Kaveh Kamgar, Krishnamurthy Viswanathan, Chetan Kumar Gupta and Eamonn Keogh (2015), The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance
+#' @references
+#'
+
+
+mass.simple <- function(data.fft, query.window, data.size, window.size, sumx2) {
+ if (nrow(data.fft) < ncol(data.fft)) {
+ data.fft <- t(data.fft)
+ }
+
+ n.dim <- ncol(data.fft)
+
+ if (ncol(query.window) != n.dim) {
+ query.window <- t(query.window)
+ }
+
+ # pre-process query for fft
+ query.window <- apply(query.window, 2, rev)
+ query.window <- rbind(query.window, matrix(0, 2 * data.size - window.size, n.dim))
+
+ query.fft <- apply(query.window, 2, stats::fft)
+ # compute the product
+ Z <- data.fft * query.fft
+ z <- apply(Z, 2, function(x){
+ stats::fft(x, inverse = TRUE) / length(x)
+ })
+
+ sumy2 <- apply(query.window^2, 2, sum)
+
+ last.product <- Re(z[window.size:data.size, ])
+
+ distance.profile <- matrix(0, nrow(sumx2), 1)
+
+ for (i in 1:n.dim) {
+ distance.profile <- distance.profile + sumx2[, i] - 2 * last.product[, i] + sumy2[i]
+ }
+
+ return(list(distance.profile = distance.profile, last.product = last.product, sumy2 = sumy2))
+}
diff --git a/R/stamp.R b/R/stamp.R
index 82e4571..542ddb6 100644
--- a/R/stamp.R
+++ b/R/stamp.R
@@ -10,7 +10,7 @@
#' @param ... a `matrix` or a `vector`. If a second time series is supplied it will be a join matrix profile.
#' @param window.size an `int`. Size of the sliding window.
#' @param exclusion.zone a `numeric`. Size of the exclusion zone, based on query size (default is `1/2`). See details.
-#' @param s.size a `numeric`. for anytime algorithm, represents the size (in observations) the random calculation will occour (default is `Inf`).
+#' @param s.size a `numeric`. for anytime algorithm, represents the size (in observations) the random calculation will occur (default is `Inf`).
#' @param verbose an `int`. See details. (Default is `2`).
#'
#' @return Returns the matrix profile `mp` and profile index `pi`.
@@ -98,7 +98,7 @@ stamp <- function(..., window.size, exclusion.zone = 1 / 2, s.size = Inf, verbos
on.exit(close(pb))
}
if (verbose > 1) {
- on.exit(beepr::beep(), TRUE)
+ on.exit(beep(sounds[[1]]), TRUE)
}
# anytime must return the result always
on.exit(return(list(
diff --git a/R/stamp_par.R b/R/stamp_par.R
index 9d94b66..40ba10d 100644
--- a/R/stamp_par.R
+++ b/R/stamp_par.R
@@ -10,7 +10,7 @@
#' @param ... a `matrix` or a `vector`. If a second time series is supplied it will be a join matrix profile.
#' @param window.size an `int`. Size of the sliding window.
#' @param exclusion.zone a `numeric`. Size of the exclusion zone, based on query size (default is `1/2`). See details.
-#' @param s.size a `numeric`. for anytime algorithm, represents the size (in observations) the random calculation will occour (default is `Inf`).
+#' @param s.size a `numeric`. for anytime algorithm, represents the size (in observations) the random calculation will occur (default is `Inf`).
#' @param n.workers an `int`. Number of workers for parallel. (Default is `2`).
#' @param verbose an `int`. See details. (Default is `2`).
#'
@@ -25,7 +25,6 @@
#' @references Website:
#'
#' @examples
-#' Sys.sleep(1) # sometimes sleep is needed if you run parallel multiple times in a row
#' mp <- stamp.par(toy_data$data[1:200,1], window.size = 30, verbose = 0)
#' \dontrun{
#' ref.data <- toy_data$data[,1]
@@ -36,7 +35,7 @@
#' mp <- stamp.par(ref.data, query.data, window.size = 30, s.size = round(nrows(query.data) * 0.1))
#' }
#'
-#' @import beepr doSNOW foreach parallel
+#' @import doSNOW foreach parallel
stamp.par <- function(..., window.size, exclusion.zone = 1 / 2, s.size = Inf, n.workers = 2, verbose = 2) {
args <- list(...)
data <- args[[1]]
@@ -107,7 +106,7 @@ stamp.par <- function(..., window.size, exclusion.zone = 1 / 2, s.size = Inf, n.
on.exit(close(pb), TRUE)
}
if (verbose > 1) {
- on.exit(beepr::beep(), TRUE)
+ on.exit(beep(sounds[[1]]), TRUE)
}
# anytime must return the result always
on.exit(return(list(
diff --git a/R/sysdata.rda b/R/sysdata.rda
new file mode 100644
index 0000000..e159947
Binary files /dev/null and b/R/sysdata.rda differ
diff --git a/R/toy_data.R b/R/toy_data.R
index 49007b5..2e383e3 100644
--- a/R/toy_data.R
+++ b/R/toy_data.R
@@ -1,11 +1,11 @@
#' Original data used in the mSTAMP demo
#'
-#' A synthetic dataset with embeded MOTIFs for multidimensional discovery
+#' A synthetic dataset with embedded MOTIFs for multidimensional discovery
#'
#' @docType data
#' @format A `list` with a `matrix` with 550 rows and 3 dimensions and an `int`:
#' \describe{
-#' \item{data}{data with embeded MOTIFs}
+#' \item{data}{data with embedded MOTIFs}
#' \item{sub.len}{size of sliding window}
#' }
#' @source \url{https://sites.google.com/view/mstamp/}
diff --git a/R/tsmp-package.R b/R/tsmp-package.R
index 5dde173..320e0ed 100644
--- a/R/tsmp-package.R
+++ b/R/tsmp-package.R
@@ -1,4 +1,4 @@
-#'
+#' @title Time Series with Matrix Profile
#' @docType package
#' @name tsmp
#' @references
diff --git a/README.Rmd b/README.Rmd
index f2bcec2..f7846e0 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -15,12 +15,12 @@ knitr::opts_chunk$set(
)
```
-# Time Series Matrix-Profile
+# Time Series with Matrix Profile
```{r include=FALSE}
library(git2r)
# current.branch <- repository_head()$name
-current.branch <- "develop"
+current.branch <- "master"
lifecycle <- "maturing" # maturing-blue # stable-brightgreen
lifecycle.color <- "blue"
```
@@ -76,14 +76,14 @@ devtools::install_github("franzbischoff/tsmp")
* Time Series Chains
* FLUSS Arc Plot and SiMPle Arc Plot
* Annotation vectors (e.g.: Stop-word MOTIF bias, Actionability bias)
-* SiMPle-Fast (Fast Similarity Matrix-Profile for Music Analysis and Exploration)
+* SiMPle-Fast (Fast Similarity Matrix Profile for Music Analysis and Exploration)
* MOTIFs under Uniform Scaling
* GPU-STOMP
* Real-time version of previous algorithms (STAMPI, FLOSS, etc)
* MASS Extensions (ADP, WQ, QwG)
* SCRIMP (waiting for publication)
-## Other projects with Matrix-Profile
+## Other projects with Matrix Profile
- Python: https://github.com/ZiyaoWei/pyMatrixProfile
## Code of Conduct
diff --git a/README.md b/README.md
index f8aa9a3..9188b2b 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,17 @@
README
================
Francisco Bischoff
-\- 18 Aug 2018
+\- 20 Aug 2018
-# Time Series Matrix-Profile
+# Time Series with Matrix Profile
[![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://choosealicense.com/licenses/mit)
[![lifecycle](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#maturing)
[![Build
-Status](https://travis-ci.com/franzbischoff/tsmp.svg?branch=develop)](https://travis-ci.com/franzbischoff/tsmp)
-[![codecov](https://codecov.io/gh/franzbischoff/tsmp/branch/develop/graph/badge.svg)](https://codecov.io/gh/franzbischoff/tsmp)
+Status](https://travis-ci.com/franzbischoff/tsmp.svg?branch=master)](https://travis-ci.com/franzbischoff/tsmp)
+[![codecov](https://codecov.io/gh/franzbischoff/tsmp/branch/master/graph/badge.svg)](https://codecov.io/gh/franzbischoff/tsmp)
[![CRAN
version](http://www.r-pkg.org/badges/version/tsmp)](https://cran.r-project.org/package=tsmp)
@@ -60,7 +60,7 @@ devtools::install_github("franzbischoff/tsmp")
- Time Series Chains
- FLUSS Arc Plot and SiMPle Arc Plot
- Annotation vectors (e.g.: Stop-word MOTIF bias, Actionability bias)
- - SiMPle-Fast (Fast Similarity Matrix-Profile for Music Analysis and
+ - SiMPle-Fast (Fast Similarity Matrix Profile for Music Analysis and
Exploration)
- MOTIFs under Uniform Scaling
- GPU-STOMP
@@ -68,12 +68,12 @@ devtools::install_github("franzbischoff/tsmp")
- MASS Extensions (ADP, WQ, QwG)
- SCRIMP (waiting for publication)
-## Other projects with Matrix-Profile
+## Other projects with Matrix Profile
- Python:
## Code of Conduct
Please note that this project is released with a [Contributor Code of
-Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to
-abide by its terms.
+Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree
+to abide by its terms.
diff --git a/cran-comments.md b/cran-comments.md
index a1287a4..24d81b0 100644
--- a/cran-comments.md
+++ b/cran-comments.md
@@ -1,4 +1,5 @@
-This is a point release fixing failing tests on CRAN.
+* Changed dependency from beepr to audio (actually beepr depends on audio, so less dependencies).
+* Added persons to DESCRIPTION as their code/research was implemented in this package.
## Test environments
* local Windows 10 install, R 3.5.1
diff --git a/data/datalist b/data/datalist
index 818d7bb..5f4867d 100644
--- a/data/datalist
+++ b/data/datalist
@@ -1,3 +1,4 @@
test_data
toy_data
fluss_data
+gait_data
diff --git a/data/gait_data.rda b/data/gait_data.rda
new file mode 100644
index 0000000..235d48c
Binary files /dev/null and b/data/gait_data.rda differ
diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html
new file mode 100644
index 0000000..37fa517
--- /dev/null
+++ b/docs/CODE_OF_CONDUCT.html
@@ -0,0 +1,178 @@
+
+
+
+
+
+
+
+
+Contributor Covenant Code of Conduct • tsmp
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
+
+
+
+Our Standards
+
Examples of behavior that contributes to creating a positive environment include:
+
+
Using welcoming and inclusive language
+
Being respectful of differing viewpoints and experiences
+
Gracefully accepting constructive criticism
+
Focusing on what is best for the community
+
Showing empathy towards other community members
+
+
Examples of unacceptable behavior by participants include:
+
+
The use of sexualized language or imagery and unwelcome sexual attention or advances
+
Trolling, insulting/derogatory comments, and personal or political attacks
+
Public or private harassment
+
Publishing others’ private information, such as a physical or electronic address, without explicit permission
+
Other conduct which could reasonably be considered inappropriate in a professional setting
+
+
+
+
+Our Responsibilities
+
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
+
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
+
+
+
+Scope
+
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
+
+
+
+Enforcement
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
+
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project’s leadership.
Original paper suggest using the classic statistical-process-control heuristic to set a threshold
-where a semantic change may occur in CAC. This may be useful in realtime implementation as we don't
+where a semantic change may occur in CAC. This may be useful in real-time implementation as we don't
know in advance the number of domain changes to look for. Please check original paper (1).
Mueen's Algorithm for Similarity Search is The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance and Correlation Coefficient.
+The difference of this function to mass() is that this does not normalize data. Specific for this domain.
Returns the distance.profile for the given query and the last.product for STOMP algorithm and sumy2.
+
+
References
+
+
Abdullah Mueen, Yan Zhu, Michael Yeh, Kaveh Kamgar, Krishnamurthy Viswanathan, Chetan Kumar Gupta and Eamonn Keogh (2015), The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance
The difference of this function to mass.pre() is that this does not normalize data. Specific for this domain.
+
+
+
+
mass.simple.pre(data, data.size, window.size)
+
+
Arguments
+
+
+
+
data
+
a matrix of numeric. Reference Time Series.
+
+
+
data.size
+
an int. Reference Time Series size.
+
+
+
window.size
+
an int. Sliding window size.
+
+
+
+
Value
+
+
Returns data.fft and sumx2.
+
+
References
+
+
Abdullah Mueen, Yan Zhu, Michael Yeh, Kaveh Kamgar, Krishnamurthy Viswanathan, Chetan Kumar Gupta and Eamonn Keogh (2015), The Fastest Similarity Search Algorithm for Time Series Subsequences under Euclidean Distance.
a matrix of numeric, where each colums is a time series. Accepts vector (see details), list and data.frame too.
+
a matrix of numeric, where each column is a time series. Accepts vector (see details), list and data.frame too.
window.size
@@ -181,7 +181,6 @@
See a
Examples
# using all dimensions
-Sys.sleep(1) # sometimes sleep is needed if you run parallel multiple times in a rowmp<-mstomp.par(toy_data$data[1:100,], 30, verbose=0)
Devtools uses the following options to configure behaviour:
-
-
devtools.path: path to use for dev_mode
-
devtools.name: your name, used when signing draft
-emails.
-
devtools.install.args: a string giving extra arguments passed
-to R CMD install by install.
-
devtools.desc.author: a string providing a default Authors@R
-string to be used in new DESCRIPTIONs. Should be a R code, and
-look like "Hadley Wickham <h.wickham@gmail.com> [aut, cre]". See
-as.person for more details.
-
devtools.desc.license: a default license string to use for
-new packages.
-
devtools.desc.suggests: a character vector listing packages to
-to add to suggests by defaults for new packages.
-
devtools.desc: a named list listing any other
-extra options to add to DESCRIPTION