Finish vRelease-1

matrix-profile-foundation · Aug 20, 2018 · fc3f874 · fc3f874
2 parents 111be14 + 65254e9
commit fc3f874
Show file tree

Hide file tree

Showing 94 changed files with 2,358 additions and 341 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -13,4 +13,5 @@
 ^README-.*\.png$
 ^\.httr-oauth$
 ^make\.R$
-^CONDUCT\.md$
+^CODE_OF_CONDUCT\.md$
+^\.github/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,18 +1,21 @@
 Package: tsmp
 Type: Package
 Title: Time Series with Matrix Profile
-Version: 0.2.13.9004
+Version: 0.2.14.9009
 Authors@R: c(
     person("Francisco", "Bischoff", email = "fbischoff@med.up.pt", role = c("aut", "cre"), comment = c(ORCID = "https://orcid.org/0000-0002-5301-8672")),
-    person("Michael", "Yeh", email = "myeh003@ucr.edu", role = c("res", "ccp", "ctb"))
+    person("Michael", "Yeh", email = "myeh003@ucr.edu", role = c("res", "ccp", "ctb"), comment = c(ORCID = "https://orcid.org/0000-0002-9807-2963")),
+    person("Diego", "Silva", email = "diegofs@ufscar.br", role = c("res", "ccp", "ctb"), comment = c(ORCID = "https://orcid.org/0000-0002-5184-9413")),
+    person("Yan", "Zhu", email = "yzhu015@ucr.edu", role = c("res", "ccp", "ctb"))
   )
 Maintainer: Francisco Bischoff <fbischoff@med.up.pt>
 Description: A toolkit implementing the Matrix Profile concept that was created by CS-UCR <http://www.cs.ucr.edu/~eamonn/MatrixProfile.html>.
 License: MIT + file LICENSE
 URL: https://github.com/franzbischoff/tsmp
 BugReports: https://github.com/franzbischoff/tsmp/issues
-Depends: R (>= 2.10), beepr, doSNOW, parallel, foreach
+Depends: R (>= 2.10), audio, doSNOW, parallel, foreach
 Encoding: UTF-8
+Language: en-US
 LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.1.0

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(fast.movavg)
 export(fast.movsd)
+export(find.chains)
 export(fluss)
 export(fluss.cac)
 export(fluss.extract)
@@ -14,10 +15,11 @@ export(mstomp.par)
 export(sdts.f.score)
 export(sdts.predict)
 export(sdts.train)
+export(simple.fast)
 export(stamp)
 export(stamp.par)
 export(unconstrain.search)
-import(beepr)
+import(audio)
 import(doSNOW)
 import(foreach)
 import(parallel)
diff --git a/NEWS b/NEWS
@@ -1,7 +1,12 @@
 <!-- NEWS.md is generated from NEWS.Rmd. Please edit that file -->
-tsmp 0.2.30
+tsmp 0.2.14
 ===========
 
+-   Added SiMPle algorithm for sound data.
+-   Added FLUSS algorithm.
+-   Added \[find.chains()\] to look for chains primitives.
+-   Changed dependency from beepr to audio (actually beepr depends on
+    audio, so less dependencies)
 -   Added a `NEWS.md` file to track changes to the package.
 
 tsmp 0.2.x

diff --git a/NEWS.Rmd b/NEWS.Rmd
@@ -21,8 +21,12 @@ knitr::opts_chunk$set(
 )
 ```
 
-# tsmp 0.2.30
+# tsmp 0.2.14
 
+* Added SiMPle algorithm for sound data.
+* Added FLUSS algorithm.
+* Added [find.chains()] to look for chains primitives.
+* Changed dependency from beepr to audio (actually beepr depends on audio, so less dependencies)
 * Added a `NEWS.md` file to track changes to the package.
 
 # tsmp 0.2.x

diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,17 @@
 NEWS
 ================
 Francisco Bischoff
-\- 18 Aug 2018
+\- 20 Aug 2018
 
 <!-- NEWS.md is generated from NEWS.Rmd. Please edit that file -->
 
-# tsmp 0.2.30
+# tsmp 0.2.14
 
+  - Added SiMPle algorithm for sound data.
+  - Added FLUSS algorithm.
+  - Added \[find.chains()\] to look for chains primitives.
+  - Changed dependency from beepr to audio (actually beepr depends on
+    audio, so less dependencies)
   - Added a `NEWS.md` file to track changes to the package.
 
 # tsmp 0.2.x

diff --git a/R/find_chains.R b/R/find_chains.R
@@ -0,0 +1,60 @@
+#' Find Time Series Chains
+#'
+#' Time Series Chains is a new primitive for time series data mining.
+#'
+#' @param matrices a result from STAMP or STOMP algorithms
+#'
+#' @return Returns `chains`, a `list` of chains founded with more than 2 patterns and `best`
+#'   with the best one.
+#' @export
+#' @references 1. Zhu Y, Imamura M, Nikovski D, Keogh E. Introducing time series chains: a new
+#'   primitive for time series data mining. Knowl Inf Syst. 2018 Jun 2;1–27.
+#' @references Website: <https://sites.google.com/site/timeserieschain/>
+#' @examples
+#' w <- 50
+#' data <- gait_data
+#' mp <- stamp(data, window.size = w, exclusion.zone = 1/4, verbose = 0)
+#' find.chains(mp)
+#'
+find.chains <- function(matrices) {
+  size <- length(matrices$rpi)
+  chain.length <- rep(1, size)
+  chain.set <- list()
+
+  k <- 1
+
+  for (i in 1:size) {
+    if (chain.length[i] == 1) {
+      j <- i
+      chain <- j
+
+      while (matrices$rpi[j] > 0 && matrices$lpi[matrices$rpi[j]] == j) {
+        j <- matrices$rpi[j]
+        chain.length[j] <- -1
+        chain.length[i] <- chain.length[i] + 1
+        chain <- c(chain, j)
+      }
+
+      if (length(chain) > 2) {
+        chain.set[[k]] <- chain
+        k <- k + 1
+      }
+    }
+  }
+
+  l <- max(chain.length)
+
+  best.chain <- NULL
+  mean <- Inf
+  for (i in 1:length(chain.set)) {
+    if (length(chain.set[[i]]) == l) {
+      n <- mean(matrices$rmp[chain.set[[i]]])
+      if (n < mean) {
+        mean <- n
+        best.chain <- chain.set[[i]]
+      }
+    }
+  }
+
+  return(list(chains = chain.set, best = best.chain))
+}
diff --git a/R/fluss.R b/R/fluss.R
@@ -125,7 +125,7 @@ fluss.extract <- function(arc.counts, num.segments, window.size, exclusion.zone
 #' Computes the arc count with edge correction (CAC).
 #'
 #' Original paper suggest using the classic statistical-process-control heuristic to set a threshold
-#' where a semantic change may occur in CAC. This may be useful in realtime implementation as we don't
+#' where a semantic change may occur in CAC. This may be useful in real-time implementation as we don't
 #' know in advance the number of domain changes to look for. Please check original paper (1).
 #'
 #' @param profile.index the profile index for arc counting.
@@ -147,6 +147,7 @@ fluss.extract <- function(arc.counts, num.segments, window.size, exclusion.zone
 #' w <- 210
 #' mp <- mstomp(data, w, verbose = 0)
 #' cac <- fluss.cac(mp$pi, w)
+#'
 #' \dontrun{
 #' data <- fluss_data$walkjogrun$data
 #' w <- fluss_data$walkjogrun$window # 80

diff --git a/R/gait_data.R b/R/gait_data.R
@@ -0,0 +1,9 @@
+#' Original data used in the Time Series Chain demo
+#'
+#' @docType data
+#' @format A `matrix` with 904 rows and 1 column with the Y data from an accelerometer
+#' @source \url{https://sites.google.com/site/timeserieschain/}
+#'
+#' @references 1. Zhu Y, Imamura M, Nikovski D, Keogh E. Introducing time series chains: a new primitive for time series data mining. Knowl Inf Syst. 2018 Jun 2;1–27.
+#' @keywords datasets
+"gait_data"
diff --git a/R/m_guide_search.R b/R/m_guide_search.R
@@ -4,7 +4,7 @@
 #'
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
 #'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int` with the size of the sliding window.
 #' @param matrix.profile multidimensional matrix profile (matrix)
 #' @param profile.index multidimensional profile index (from [mstomp()] or [mstomp.par()]).

diff --git a/R/m_unconstrain_search.R b/R/m_unconstrain_search.R
@@ -4,7 +4,7 @@
 #'
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
 #'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int` with the size of the sliding window.
 #' @param matrix.profile multidimensional matrix profile (from [mstomp()] or [mstomp.par()]).
 #' @param profile.index multidimensional profile index (from [mstomp()] or [mstomp.par()]).

diff --git a/R/misc.R b/R/misc.R
@@ -70,9 +70,35 @@ fast.movavg <- function(data, n) {
 std <- function(x) {
   sdx <- stats::sd(x)
 
-  if (sdx == 0)
+  if (sdx == 0) {
     return(sdx)
+  }
 
   return(sqrt((length(x) - 1) / length(x)) * sdx)
 }
 
+#' Play sound with `audio`
+#'
+#' @param data sound data provided by this package
+#'
+#' @keywords internal
+#' @import audio
+beep <- function(data) {
+  if (!(is.null(audio::audio.drivers()) || nrow(audio::audio.drivers()) == 0)) {
+    tryCatch({
+      audio::play(data)
+    },
+    error = function(cond) {
+      message("Failed to play audio alert")
+      message(cond)
+      invisible()
+    },
+    warning = function(cond) {
+      message("Something went wrong playing audio alert")
+      message(cond)
+      invisible()
+    }
+    )
+  }
+  invisible()
+}
diff --git a/R/mstomp.R b/R/mstomp.R
@@ -8,7 +8,7 @@
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
 #' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
 #'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int` with the size of the sliding window.
 #' @param must.dim an `int` or `vector` of which dimensions to forcibly include (default is `NULL`).
 #' @param exc.dim an `int` or `vector` of which dimensions to exclude (default is `NULL`).
@@ -114,7 +114,7 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
     on.exit(close(pb))
   }
   if (verbose > 1) {
-    on.exit(beepr::beep(), TRUE)
+    on.exit(beep(sounds[[1]]), TRUE)
   }
 
   ## initialization
@@ -198,7 +198,7 @@ mstomp <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion
 
     if (n.dim > 1) {
       dist.pro.sort <- t(apply(distance.profile, 1, sort))
-    } # sort by row, put all -Inf to the first columns
+    } # sort by row, put all -Inf to the first column
     else {
       dist.pro.sort <- distance.profile
     }

diff --git a/R/mstomp_par.R b/R/mstomp_par.R
@@ -8,7 +8,7 @@
 #' Although this functions handles Multivariate Time Series, it can also be used to handle Univariate Time Series.
 #' `verbose` changes how much information is printed by this function; `0` means nothing, `1` means text, `2` means text and sound.
 #'
-#' @param data a `matrix` of `numeric`, where each colums is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
+#' @param data a `matrix` of `numeric`, where each column is a time series. Accepts `vector` (see details), `list` and `data.frame` too.
 #' @param window.size an `int`. Size of the sliding window.
 #' @param must.dim an `int` or `vector` of which dimensions to forcibly include (default is `NULL`).
 #' @param exc.dim an `int` or `vector` of which dimensions to exclude (default is `NULL`).
@@ -29,9 +29,8 @@
 #'
 #' @examples
 #' # using all dimensions
-#' Sys.sleep(1) # sometimes sleep is needed if you run parallel multiple times in a row
 #' mp <- mstomp.par(toy_data$data[1:100,], 30, verbose = 0)
-#' @import beepr doSNOW foreach parallel
+#' @import doSNOW foreach parallel
 
 mstomp.par <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclusion.zone = 1 / 2, verbose = 2, n.workers = 2) {
   eps <- .Machine$double.eps^0.5
@@ -140,7 +139,7 @@ mstomp.par <- function(data, window.size, must.dim = NULL, exc.dim = NULL, exclu
     on.exit(close(pb), TRUE)
   }
   if (verbose > 1) {
-    on.exit(beepr::beep(), TRUE)
+    on.exit(beep(sounds[[1]]), TRUE)
   }
 
   ## initialize variable

diff --git a/R/sdts_predict.R b/R/sdts_predict.R
@@ -87,7 +87,7 @@ sdts.predict <- function(model, data, window.size) {
 #' `beta` is used to balance F-score towards recall (`>1`) or precision (`<1`).
 #'
 #' @param gtruth a `vector` of `logical`. Ground truth annotation.
-#' @param pred a `vector` of `logical`. Predictied annotation from [sdts.predict()]
+#' @param pred a `vector` of `logical`. Predicted annotation from [sdts.predict()]
 #' @param beta a `numeric`. See details. (default is `1`).
 #'
 #' @return Returns a `list` with `f.score`, `precision` and `recall`.

diff --git a/R/sdts_train.R b/R/sdts_train.R
@@ -148,7 +148,7 @@ sdts.train <- function(data, label, window.size, beta = 1, pat.max = Inf, parall
     on.exit(close(pb))
   }
   if (verbose > 1) {
-    on.exit(beepr::beep(), TRUE)
+    on.exit(beep(sounds[[1]]), TRUE)
   }
 
   for (i in 1:n.window.size) {
@@ -342,7 +342,7 @@ sdts.train <- function(data, label, window.size, beta = 1, pat.max = Inf, parall
 #' @param beta a number that balance the F-Score. Beta > 1 towards recall, < towards precision
 #' @param window.size an integer with the sliding window size
 #'
-#' @return Returns the best threashold and its F-Score
+#' @return Returns the best threshold and its F-Score
 #'
 #' @keywords internal
 #'
@@ -384,7 +384,7 @@ golden.section <- function(dist.pro, label, pos.st, pos.ed, beta, window.size) {
 #' @param window.size an integer with the sliding window size
 #' @param fit.idx an integer with the index of the current threshold
 #'
-#' @return Returns the best threashold and its F-Score
+#' @return Returns the best threshold and its F-Score
 #'
 #' @keywords internal