Skip to content

Commit

Permalink
DNA RNA
Browse files Browse the repository at this point in the history
update README

update README

let's keep the type names short

new

new

>1.0
  • Loading branch information
jangevaare committed Sep 11, 2019
1 parent 4945cc6 commit d1dd370
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 53 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ authors = ["Justin Angevaare <justinangevaare@gmail.com>"]
version = "0.1.0"

[compat]
julia = "0.7.0"
julia = "1.0.0"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
34 changes: 33 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,34 @@
# GeneticBitArrays.jl
BitArray representations of Genetic sequences
Minimal representations of genetic sequences.

## Use
Input using `String`, `Vector{Char}`, or use an appropriate `BitArray`.

*e.g.*
```
julia> DNASeq("AAAAGCCT")
8 nt DNA sequence
AAAAGCCT
```

## Representation
A 4 x n `BitArray` is used to represent sequences internally, where n is sequence length.


```
julia> x = DNASeq("AAAAGCCT")
julia> x.data
4×8 BitArray{2}:
1 1 1 1 0 0 0 0
0 0 0 0 0 1 1 0
0 0 0 0 1 0 0 0
0 0 0 0 0 0 0 1
```

## Notes
* Ambiguities are not currently supported.
* No error checking for I/O is provided.
* For a full featured package for genetic sequences see [BioSequences.jl](https://github.com/BioJulia/BioSequences.jl).
44 changes: 44 additions & 0 deletions src/DNA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
function onehotDNA(x::Char)
return ['A', 'C', 'G', 'T'] .== Ref(x)
end


function onehotDNA(x::BitArray{1})
return ['A', 'C', 'G', 'T'][x][1]
end

struct DNASeq
data::BitArray{2}
end


function DNASeq(seq::Vector{Char})
x = BitArray(undef, (4, length(seq)))
for i in 1:length(seq)
x[:, i] = onehotDNA(seq[i])
end
return DNASeq(x)
end


function DNASeq(x::String)
return DNASeq(Vector{Char}(x))
end


function length(x::DNASeq)
return size(x.data, 2)
end


function show(io::IO, x::DNASeq)
len = length(x)
println(io, "$(len)nt DNA sequence")
if len <= 26
print(io, prod([onehotDNA(x.data[:, i]) for i=1:len]))
else
print(io, prod([onehotDNA(x.data[:, i]) for i=1:13]) *
"..." *
prod([onehotDNA(x.data[:, i]) for i=len-13:len]))
end
end
55 changes: 4 additions & 51 deletions src/GeneticBitArrays.jl
Original file line number Diff line number Diff line change
@@ -1,58 +1,11 @@
module GeneticBitArrays

import Base.getindex,
Base.setindex,
Base.show,
import Base.show,
Base.length

function onehotDNA(x::Char)
return ['A', 'C', 'G', 'T'] .== Ref(x)
end
include("DNA.jl")
include("RNA.jl")


function onehotDNA(x::BitArray{1})
return ['A', 'C', 'G', 'T'][x]
end


struct DNABitArray
data::BitArray{2}

function DNABitArray(seq::Vector{Char})
x = BitArray(undef, (4, length(seq)))
for i in 1:length(seq)
x[onehotDNA(i), i] += 1
end
return new(x)
end
end


function DNABitArray(x::String)
return DNABitArray(Vector{Char}(x))
end


function length(x::DNABitArray)
return size(x.data, 1)
end


function getindex(x::DNABitArray, i)
return DNABitArray(x.data[i, :])
end


function show(io::IO, x::DNABitArray)
if length(x) <= 26
print(io, prod([onehotDNA(x[i]) for i=1:length(object)]))
else
print(io, prod([onehotDNA(x[i]) for i=1:13]) *
"..." *
prod([onehotDNA(x[i]) for i=length(x)-13:length(x)]))
end
end

export DNABitArray
export DNASeq, RNASeq

end # module
44 changes: 44 additions & 0 deletions src/RNA.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
function onehotRNA(x::Char)
return ['A', 'C', 'G', 'U'] .== Ref(x)
end


function onehotRNA(x::BitArray{1})
return ['A', 'C', 'G', 'U'][x][1]
end

struct RNASeq
data::BitArray{2}
end


function RNASeq(seq::Vector{Char})
x = BitArray(undef, (4, length(seq)))
for i in 1:length(seq)
x[:, i] = onehotRNA(seq[i])
end
return RNASeq(x)
end


function RNASeq(x::String)
return RNASeq(Vector{Char}(x))
end


function length(x::RNASeq)
return size(x.data, 2)
end


function show(io::IO, x::RNASeq)
len = length(x)
println(io, "$(len)nt RNA sequence")
if len <= 26
print(io, prod([onehotRNA(x.data[:, i]) for i=1:len]))
else
print(io, prod([onehotRNA(x.data[:, i]) for i=1:13]) *
"..." *
prod([onehotRNA(x.data[:, i]) for i=len-13:len]))
end
end

0 comments on commit d1dd370

Please sign in to comment.