Conor Heffron 2024-07-16
- AAGTGCCTCAAGATA
- ACCGTCTCAGCAATA
library(devtools)
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.2
library(stringr)
# use_r("global-max-sim-matrix")
# source('R/global-max-sim-matrix.R')
devtools::install_github("conorheffron/global-max-sim-matrix")
## Skipping install of 'global.max.sim.matrix' from a github remote, the SHA1 (0c1fae77) has not changed since last install.
## Use `force = TRUE` to force installation
load_all()
## ℹ Loading global.max.sim.matrix
seq1 <- 'AAGTGCCTCAAGATA'
seq1
## [1] "AAGTGCCTCAAGATA"
seq2 <- 'ACCGTCTCAGCAATA'
seq2
## [1] "ACCGTCTCAGCAATA"
m <- nchar(seq1)
n <- nchar(seq2)
print(paste('Sequence Length (seq1, seq2):', m, n))
## [1] "Sequence Length (seq1, seq2): 15 15"
# Construct default similarity matrix and fill with 0's
sim_matrix <- matrix(0, m+1, n+1)
sim_matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [7,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [9,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [10,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [11,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [12,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [13,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [14,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16]
## [1,] 0 0 0
## [2,] 0 0 0
## [3,] 0 0 0
## [4,] 0 0 0
## [5,] 0 0 0
## [6,] 0 0 0
## [7,] 0 0 0
## [8,] 0 0 0
## [9,] 0 0 0
## [10,] 0 0 0
## [11,] 0 0 0
## [12,] 0 0 0
## [13,] 0 0 0
## [14,] 0 0 0
## [15,] 0 0 0
## [16,] 0 0 0
sim_matrix <- init_matrix_col1(sim_matrix, m)
sim_matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] -2 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] -4 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] -6 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] -8 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] -10 0 0 0 0 0 0 0 0 0 0 0 0
## [7,] -12 0 0 0 0 0 0 0 0 0 0 0 0
## [8,] -14 0 0 0 0 0 0 0 0 0 0 0 0
## [9,] -16 0 0 0 0 0 0 0 0 0 0 0 0
## [10,] -18 0 0 0 0 0 0 0 0 0 0 0 0
## [11,] -20 0 0 0 0 0 0 0 0 0 0 0 0
## [12,] -22 0 0 0 0 0 0 0 0 0 0 0 0
## [13,] -24 0 0 0 0 0 0 0 0 0 0 0 0
## [14,] -26 0 0 0 0 0 0 0 0 0 0 0 0
## [15,] -28 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] -30 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16]
## [1,] 0 0 0
## [2,] 0 0 0
## [3,] 0 0 0
## [4,] 0 0 0
## [5,] 0 0 0
## [6,] 0 0 0
## [7,] 0 0 0
## [8,] 0 0 0
## [9,] 0 0 0
## [10,] 0 0 0
## [11,] 0 0 0
## [12,] 0 0 0
## [13,] 0 0 0
## [14,] 0 0 0
## [15,] 0 0 0
## [16,] 0 0 0
sim_matrix <- init_matrix_row1(sim_matrix, n)
sim_matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24
## [2,] -2 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] -4 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] -6 0 0 0 0 0 0 0 0 0 0 0 0
## [5,] -8 0 0 0 0 0 0 0 0 0 0 0 0
## [6,] -10 0 0 0 0 0 0 0 0 0 0 0 0
## [7,] -12 0 0 0 0 0 0 0 0 0 0 0 0
## [8,] -14 0 0 0 0 0 0 0 0 0 0 0 0
## [9,] -16 0 0 0 0 0 0 0 0 0 0 0 0
## [10,] -18 0 0 0 0 0 0 0 0 0 0 0 0
## [11,] -20 0 0 0 0 0 0 0 0 0 0 0 0
## [12,] -22 0 0 0 0 0 0 0 0 0 0 0 0
## [13,] -24 0 0 0 0 0 0 0 0 0 0 0 0
## [14,] -26 0 0 0 0 0 0 0 0 0 0 0 0
## [15,] -28 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] -30 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16]
## [1,] -26 -28 -30
## [2,] 0 0 0
## [3,] 0 0 0
## [4,] 0 0 0
## [5,] 0 0 0
## [6,] 0 0 0
## [7,] 0 0 0
## [8,] 0 0 0
## [9,] 0 0 0
## [10,] 0 0 0
## [11,] 0 0 0
## [12,] 0 0 0
## [13,] 0 0 0
## [14,] 0 0 0
## [15,] 0 0 0
## [16,] 0 0 0
# define score look-up map
d <- c("match" = 1, "mismatch" = -1, "gap" = -2)
d
## match mismatch gap
## 1 -1 -2
sim_matrix <- populate_global_max_sim_matrix(sim_matrix, seq1, seq2, d, m, n)
sim_matrix
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24
## [2,] -2 1 -1 -3 -5 -7 -9 -11 -13 -15 -17 -19 -21
## [3,] -4 -1 0 -2 -4 -6 -8 -10 -12 -12 -14 -16 -18
## [4,] -6 -3 -2 -1 -1 -3 -5 -7 -9 -11 -11 -13 -15
## [5,] -8 -5 -4 -3 -2 0 -2 -4 -6 -8 -10 -12 -14
## [6,] -10 -7 -6 -5 -2 -2 -1 -3 -5 -7 -7 -9 -11
## [7,] -12 -9 -6 -5 -4 -3 -1 -2 -2 -4 -6 -6 -8
## [8,] -14 -11 -8 -5 -6 -5 -2 -2 -1 -3 -5 -5 -7
## [9,] -16 -13 -10 -7 -6 -5 -4 -1 -3 -2 -4 -6 -6
## [10,] -18 -15 -12 -9 -8 -7 -4 -3 0 -2 -3 -3 -5
## [11,] -20 -17 -14 -11 -10 -9 -6 -5 -2 1 -1 -3 -2
## [12,] -22 -19 -16 -13 -12 -11 -8 -7 -4 -1 0 -2 -2
## [13,] -24 -21 -18 -15 -12 -13 -10 -9 -6 -3 0 -1 -3
## [14,] -26 -23 -20 -17 -14 -13 -12 -11 -8 -5 -2 -1 0
## [15,] -28 -25 -22 -19 -16 -13 -14 -11 -10 -7 -4 -3 -2
## [16,] -30 -27 -24 -21 -18 -15 -14 -13 -12 -9 -6 -5 -2
## [,14] [,15] [,16]
## [1,] -26 -28 -30
## [2,] -23 -25 -27
## [3,] -20 -22 -24
## [4,] -17 -19 -21
## [5,] -16 -16 -18
## [6,] -13 -15 -17
## [7,] -10 -12 -14
## [8,] -9 -11 -13
## [9,] -8 -8 -10
## [10,] -7 -9 -9
## [11,] -4 -6 -8
## [12,] -1 -3 -5
## [13,] -3 -2 -4
## [14,] -2 -4 -1
## [15,] -1 -1 -3
## [16,] -1 -2 0
# colnames(sim_matrix) <- c('-', 'A', 'A', 'G', 'T', 'G', 'C', 'C', 'T', 'C', 'A', 'A', 'G', 'A', 'T', 'A')
colnames(sim_matrix) <- unlist(stringr::str_split(paste('-', seq1, sep=''), boundary("character")))
# rownames(sim_matrix) <- c('-', 'A', 'C', 'C', 'G', 'T', 'C', 'T', 'C', 'A', 'G', 'C', 'A', 'A', 'T', 'A')
rownames(sim_matrix) <- unlist(stringr::str_split(paste('-', seq2, sep=''), boundary("character")))
sim_matrix
## - A A G T G C C T C A A G A T A
## - 0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24 -26 -28 -30
## A -2 1 -1 -3 -5 -7 -9 -11 -13 -15 -17 -19 -21 -23 -25 -27
## C -4 -1 0 -2 -4 -6 -8 -10 -12 -12 -14 -16 -18 -20 -22 -24
## C -6 -3 -2 -1 -1 -3 -5 -7 -9 -11 -11 -13 -15 -17 -19 -21
## G -8 -5 -4 -3 -2 0 -2 -4 -6 -8 -10 -12 -14 -16 -16 -18
## T -10 -7 -6 -5 -2 -2 -1 -3 -5 -7 -7 -9 -11 -13 -15 -17
## C -12 -9 -6 -5 -4 -3 -1 -2 -2 -4 -6 -6 -8 -10 -12 -14
## T -14 -11 -8 -5 -6 -5 -2 -2 -1 -3 -5 -5 -7 -9 -11 -13
## C -16 -13 -10 -7 -6 -5 -4 -1 -3 -2 -4 -6 -6 -8 -8 -10
## A -18 -15 -12 -9 -8 -7 -4 -3 0 -2 -3 -3 -5 -7 -9 -9
## G -20 -17 -14 -11 -10 -9 -6 -5 -2 1 -1 -3 -2 -4 -6 -8
## C -22 -19 -16 -13 -12 -11 -8 -7 -4 -1 0 -2 -2 -1 -3 -5
## A -24 -21 -18 -15 -12 -13 -10 -9 -6 -3 0 -1 -3 -3 -2 -4
## A -26 -23 -20 -17 -14 -13 -12 -11 -8 -5 -2 -1 0 -2 -4 -1
## T -28 -25 -22 -19 -16 -13 -14 -11 -10 -7 -4 -3 -2 -1 -1 -3
## A -30 -27 -24 -21 -18 -15 -14 -13 -12 -9 -6 -5 -2 -1 -2 0
devtools::test()
## ℹ Testing global.max.sim.matrix
## ✔ | F W S OK | Context
## ⠏ | 0 | global-max-sim-matrix [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24
## [2,] -2 1 -1 -3 -5 -7 -9 -11 -13 -15 -17 -19 -21
## [3,] -4 -1 0 -2 -4 -6 -8 -10 -12 -12 -14 -16 -18
## [4,] -6 -3 -2 -1 -1 -3 -5 -7 -9 -11 -11 -13 -15
## [5,] -8 -5 -4 -3 -2 0 -2 -4 -6 -8 -10 -12 -14
## [6,] -10 -7 -6 -5 -2 -2 -1 -3 -5 -7 -7 -9 -11
## [7,] -12 -9 -6 -5 -4 -3 -1 -2 -2 -4 -6 -6 -8
## [8,] -14 -11 -8 -5 -6 -5 -2 -2 -1 -3 -5 -5 -7
## [9,] -16 -13 -10 -7 -6 -5 -4 -1 -3 -2 -4 -6 -6
## [10,] -18 -15 -12 -9 -8 -7 -4 -3 0 -2 -3 -3 -5
## [11,] -20 -17 -14 -11 -10 -9 -6 -5 -2 1 -1 -3 -2
## [12,] -22 -19 -16 -13 -12 -11 -8 -7 -4 -1 0 -2 -2
## [13,] -24 -21 -18 -15 -12 -13 -10 -9 -6 -3 0 -1 -3
## [14,] -26 -23 -20 -17 -14 -13 -12 -11 -8 -5 -2 -1 0
## [15,] -28 -25 -22 -19 -16 -13 -14 -11 -10 -7 -4 -3 -2
## [16,] -30 -27 -24 -21 -18 -15 -14 -13 -12 -9 -6 -5 -2
## [,14] [,15] [,16]
## [1,] -26 -28 -30
## [2,] -23 -25 -27
## [3,] -20 -22 -24
## [4,] -17 -19 -21
## [5,] -16 -16 -18
## [6,] -13 -15 -17
## [7,] -10 -12 -14
## [8,] -9 -11 -13
## [9,] -8 -8 -10
## [10,] -7 -9 -9
## [11,] -4 -6 -8
## [12,] -1 -3 -5
## [13,] -3 -2 -4
## [14,] -2 -4 -1
## [15,] -1 -1 -3
## [16,] -1 -2 0
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 -2 -4 -6 -8 -10 -12 -14 -16 -18 -20 -22 -24
## [2,] -2 1 -1 -3 -5 -7 -9 -11 -13 -15 -17 -19 -21
## [3,] -4 -1 0 -2 -4 -6 -8 -10 -12 -12 -14 -16 -18
## [4,] -6 -3 -2 -1 -1 -3 -5 -7 -9 -11 -11 -13 -15
## [5,] -8 -5 -4 -3 -2 0 -2 -4 -6 -8 -10 -12 -14
## [6,] -10 -7 -6 -5 -2 -2 -1 -3 -5 -7 -7 -9 -11
## [7,] -12 -9 -6 -5 -4 -3 -1 -2 -2 -4 -6 -6 -8
## [8,] -14 -11 -8 -5 -6 -5 -2 -2 -1 -3 -5 -5 -7
## [9,] -16 -13 -10 -7 -6 -5 -4 -1 -3 -2 -4 -6 -6
## [10,] -18 -15 -12 -9 -8 -7 -4 -3 0 -2 -3 -3 -5
## [11,] -20 -17 -14 -11 -10 -9 -6 -5 -2 1 -1 -3 -2
## [12,] -22 -19 -16 -13 -12 -11 -8 -7 -4 -1 0 -2 -2
## [13,] -24 -21 -18 -15 -12 -13 -10 -9 -6 -3 0 -1 -3
## [14,] -26 -23 -20 -17 -14 -13 -12 -11 -8 -5 -2 -1 0
## [15,] -28 -25 -22 -19 -16 -13 -14 -11 -10 -7 -4 -3 -2
## [16,] -30 -27 -24 -21 -18 -15 -14 -13 -12 -9 -6 -5 -2
## [,14] [,15] [,16]
## [1,] -26 -28 -30
## [2,] -23 -25 -27
## [3,] -20 -22 -24
## [4,] -17 -19 -21
## [5,] -16 -16 -18
## [6,] -13 -15 -17
## [7,] -10 -12 -14
## [8,] -9 -11 -13
## [9,] -8 -8 -10
## [10,] -7 -9 -9
## [11,] -4 -6 -8
## [12,] -1 -3 -5
## [13,] -3 -2 -4
## [14,] -2 -4 -1
## [15,] -1 -1 -3
## [16,] -1 -2 0
## ✔ | 4 | global-max-sim-matrix
##
## ══ Results ═════════════════════════════════════════════════════════════════════
## [ FAIL 0 | WARN 0 | SKIP 0 | PASS 4 ]