From d1dd370e05a97f5f84e2ae7fb6541a4a17c6d1d1 Mon Sep 17 00:00:00 2001 From: Justin Angevaare Date: Tue, 10 Sep 2019 19:52:31 -0400 Subject: [PATCH] DNA RNA update README update README let's keep the type names short new new >1.0 --- Project.toml | 2 +- README.md | 34 ++++++++++++++++++++++++- src/DNA.jl | 44 +++++++++++++++++++++++++++++++++ src/GeneticBitArrays.jl | 55 +++-------------------------------------- src/RNA.jl | 44 +++++++++++++++++++++++++++++++++ 5 files changed, 126 insertions(+), 53 deletions(-) create mode 100644 src/DNA.jl create mode 100644 src/RNA.jl diff --git a/Project.toml b/Project.toml index a490246..8854dfe 100644 --- a/Project.toml +++ b/Project.toml @@ -4,7 +4,7 @@ authors = ["Justin Angevaare "] version = "0.1.0" [compat] -julia = "≥ 0.7.0" +julia = "≥ 1.0.0" [extras] Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/README.md b/README.md index 03bb901..a786c82 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,34 @@ # GeneticBitArrays.jl -BitArray representations of Genetic sequences +Minimal representations of genetic sequences. + +## Use +Input using `String`, `Vector{Char}`, or use an appropriate `BitArray`. + +*e.g.* +``` +julia> DNASeq("AAAAGCCT") + +8 nt DNA sequence +AAAAGCCT +``` + +## Representation +A 4 x n `BitArray` is used to represent sequences internally, where n is sequence length. + + +``` +julia> x = DNASeq("AAAAGCCT") + +julia> x.data + +4×8 BitArray{2}: + 1 1 1 1 0 0 0 0 + 0 0 0 0 0 1 1 0 + 0 0 0 0 1 0 0 0 + 0 0 0 0 0 0 0 1 +``` + +## Notes +* Ambiguities are not currently supported. +* No error checking for I/O is provided. +* For a full featured package for genetic sequences see [BioSequences.jl](https://github.com/BioJulia/BioSequences.jl). diff --git a/src/DNA.jl b/src/DNA.jl new file mode 100644 index 0000000..48e11a1 --- /dev/null +++ b/src/DNA.jl @@ -0,0 +1,44 @@ +function onehotDNA(x::Char) + return ['A', 'C', 'G', 'T'] .== Ref(x) +end + + +function onehotDNA(x::BitArray{1}) + return ['A', 'C', 'G', 'T'][x][1] +end + +struct DNASeq + data::BitArray{2} +end + + +function DNASeq(seq::Vector{Char}) + x = BitArray(undef, (4, length(seq))) + for i in 1:length(seq) + x[:, i] = onehotDNA(seq[i]) + end + return DNASeq(x) +end + + +function DNASeq(x::String) + return DNASeq(Vector{Char}(x)) +end + + +function length(x::DNASeq) + return size(x.data, 2) +end + + +function show(io::IO, x::DNASeq) + len = length(x) + println(io, "$(len)nt DNA sequence") + if len <= 26 + print(io, prod([onehotDNA(x.data[:, i]) for i=1:len])) + else + print(io, prod([onehotDNA(x.data[:, i]) for i=1:13]) * + "..." * + prod([onehotDNA(x.data[:, i]) for i=len-13:len])) + end +end diff --git a/src/GeneticBitArrays.jl b/src/GeneticBitArrays.jl index 79748aa..7cc72b9 100644 --- a/src/GeneticBitArrays.jl +++ b/src/GeneticBitArrays.jl @@ -1,58 +1,11 @@ module GeneticBitArrays -import Base.getindex, - Base.setindex, - Base.show, +import Base.show, Base.length -function onehotDNA(x::Char) - return ['A', 'C', 'G', 'T'] .== Ref(x) -end +include("DNA.jl") +include("RNA.jl") - -function onehotDNA(x::BitArray{1}) - return ['A', 'C', 'G', 'T'][x] -end - - -struct DNABitArray - data::BitArray{2} - - function DNABitArray(seq::Vector{Char}) - x = BitArray(undef, (4, length(seq))) - for i in 1:length(seq) - x[onehotDNA(i), i] += 1 - end - return new(x) - end -end - - -function DNABitArray(x::String) - return DNABitArray(Vector{Char}(x)) -end - - -function length(x::DNABitArray) - return size(x.data, 1) -end - - -function getindex(x::DNABitArray, i) - return DNABitArray(x.data[i, :]) -end - - -function show(io::IO, x::DNABitArray) - if length(x) <= 26 - print(io, prod([onehotDNA(x[i]) for i=1:length(object)])) - else - print(io, prod([onehotDNA(x[i]) for i=1:13]) * - "..." * - prod([onehotDNA(x[i]) for i=length(x)-13:length(x)])) - end -end - -export DNABitArray +export DNASeq, RNASeq end # module diff --git a/src/RNA.jl b/src/RNA.jl new file mode 100644 index 0000000..4981cab --- /dev/null +++ b/src/RNA.jl @@ -0,0 +1,44 @@ +function onehotRNA(x::Char) + return ['A', 'C', 'G', 'U'] .== Ref(x) +end + + +function onehotRNA(x::BitArray{1}) + return ['A', 'C', 'G', 'U'][x][1] +end + +struct RNASeq + data::BitArray{2} +end + + +function RNASeq(seq::Vector{Char}) + x = BitArray(undef, (4, length(seq))) + for i in 1:length(seq) + x[:, i] = onehotRNA(seq[i]) + end + return RNASeq(x) +end + + +function RNASeq(x::String) + return RNASeq(Vector{Char}(x)) +end + + +function length(x::RNASeq) + return size(x.data, 2) +end + + +function show(io::IO, x::RNASeq) + len = length(x) + println(io, "$(len)nt RNA sequence") + if len <= 26 + print(io, prod([onehotRNA(x.data[:, i]) for i=1:len])) + else + print(io, prod([onehotRNA(x.data[:, i]) for i=1:13]) * + "..." * + prod([onehotRNA(x.data[:, i]) for i=len-13:len])) + end +end