diff --git a/src/Common.jl b/src/Common.jl new file mode 100644 index 0000000..024e1a3 --- /dev/null +++ b/src/Common.jl @@ -0,0 +1,36 @@ +function length(x::T) where {T <: GeneticSeq} + return size(x.data, 2) +end + + +function show(io::IO, x::T) where {T <: GeneticSeq} + len = length(x) + println(io, "$(len)nt DNA sequence") + if len <= 26 + print(io, prod([onehot(T, x.data[:, i]) for i=1:len])) + else + print(io, prod([onehot(T, x.data[:, i]) for i=1:13]) * + "..." * + prod([onehot(T, x.data[:, i]) for i=len-13:len])) + end +end + + +function getindex(x::T, i::Int64) where {T <: GeneticSeq} + return T(x.data[:, i]) +end + + +function setindex(x::T, a::BitArray{1}, i) where {T <: GeneticSeq} + return x.data[:, i] = a +end + + +function setindex(x::T, a::Char, i) where {T <: GeneticSeq} + return x.data[:, i] = onehot(T, a) +end + + +function ==(x::T, y::T) where {T <: GeneticSeq} + return x.data == y.data +end diff --git a/src/DNA.jl b/src/DNA.jl index 48e11a1..574537a 100644 --- a/src/DNA.jl +++ b/src/DNA.jl @@ -1,21 +1,22 @@ -function onehotDNA(x::Char) - return ['A', 'C', 'G', 'T'] .== Ref(x) +struct DNASeq <: GeneticSeq + data::BitArray{2} end -function onehotDNA(x::BitArray{1}) - return ['A', 'C', 'G', 'T'][x][1] +function onehot(::Type{DNASeq}, x::Char) + return ['A', 'C', 'G', 'T'] .== Ref(x) end -struct DNASeq - data::BitArray{2} + +function onehot(::Type{DNASeq}, x::BitArray{1}) + return ['A', 'C', 'G', 'T'][x][1] end function DNASeq(seq::Vector{Char}) x = BitArray(undef, (4, length(seq))) for i in 1:length(seq) - x[:, i] = onehotDNA(seq[i]) + x[:, i] = onehot(DNASeq, seq[i]) end return DNASeq(x) end @@ -26,19 +27,6 @@ function DNASeq(x::String) end -function length(x::DNASeq) - return size(x.data, 2) -end - - -function show(io::IO, x::DNASeq) - len = length(x) - println(io, "$(len)nt DNA sequence") - if len <= 26 - print(io, prod([onehotDNA(x.data[:, i]) for i=1:len])) - else - print(io, prod([onehotDNA(x.data[:, i]) for i=1:13]) * - "..." * - prod([onehotDNA(x.data[:, i]) for i=len-13:len])) - end +function DNASeq(x::BitArray{1}) + return DNASeq(reshape(x, (4, 1))) end diff --git a/src/GeneticBitArrays.jl b/src/GeneticBitArrays.jl index 7cc72b9..ed0486f 100644 --- a/src/GeneticBitArrays.jl +++ b/src/GeneticBitArrays.jl @@ -1,10 +1,16 @@ module GeneticBitArrays import Base.show, - Base.length + Base.length, + Base.getindex, + Base.setindex, + Base.== + +abstract type GeneticSeq end include("DNA.jl") include("RNA.jl") +include("Common.jl") export DNASeq, RNASeq diff --git a/src/RNA.jl b/src/RNA.jl index 4981cab..a61cff1 100644 --- a/src/RNA.jl +++ b/src/RNA.jl @@ -1,21 +1,22 @@ -function onehotRNA(x::Char) - return ['A', 'C', 'G', 'U'] .== Ref(x) +struct RNASeq <: GeneticSeq + data::BitArray{2} end -function onehotRNA(x::BitArray{1}) - return ['A', 'C', 'G', 'U'][x][1] +function onehot(::Type{RNASeq}, x::Char) + return ['A', 'C', 'G', 'U'] .== Ref(x) end -struct RNASeq - data::BitArray{2} + +function onehot(::Type{RNASeq}, x::BitArray{1}) + return ['A', 'C', 'G', 'U'][x][1] end function RNASeq(seq::Vector{Char}) x = BitArray(undef, (4, length(seq))) for i in 1:length(seq) - x[:, i] = onehotRNA(seq[i]) + x[:, i] = onehot(RNASeq, seq[i]) end return RNASeq(x) end @@ -26,19 +27,6 @@ function RNASeq(x::String) end -function length(x::RNASeq) - return size(x.data, 2) -end - - -function show(io::IO, x::RNASeq) - len = length(x) - println(io, "$(len)nt RNA sequence") - if len <= 26 - print(io, prod([onehotRNA(x.data[:, i]) for i=1:len])) - else - print(io, prod([onehotRNA(x.data[:, i]) for i=1:13]) * - "..." * - prod([onehotRNA(x.data[:, i]) for i=len-13:len])) - end +function RNASeq(x::BitArray{1}) + return RNASeq(reshape(x, (4, 1))) end diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..d476c07 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,12 @@ +using Test, + GeneticBitArrays + +@testset "GeneticBitArrays" begin + @test sum(DNASeq("AAAA").data, dims=2)[:] == [4;0;0;0] + @test sum(DNASeq("CCCC").data, dims=2)[:] == [0;4;0;0] + @test sum(DNASeq("GGGG").data, dims=2)[:] == [0;0;4;0] + @test sum(DNASeq("TTTT").data, dims=2)[:] == [0;0;0;4] + @test DNASeq("TTTT").data == RNASeq("UUUU").data + @test length(DNASeq("ACGT")[2]) == length(DNASeq("C")) + @test DNASeq("ACGT")[2] == DNASeq("C") +end