-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 1692746
Showing
136 changed files
with
12,878 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# History files | ||
.Rhistory | ||
.Rapp.history | ||
|
||
# Session Data files | ||
.RData | ||
.RDataTmp | ||
|
||
# User-specific files | ||
.Ruserdata | ||
|
||
# Example code in package build process | ||
*-Ex.R | ||
|
||
# Output files from R CMD build | ||
/*.tar.gz | ||
|
||
# Output files from R CMD check | ||
/*.Rcheck/ | ||
|
||
# RStudio files | ||
.Rproj.user/ | ||
*.Rproj | ||
# produced vignettes | ||
vignettes/*.html | ||
vignettes/*.pdf | ||
|
||
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 | ||
.httr-oauth | ||
|
||
# knitr and R markdown default cache directories | ||
*_cache/ | ||
/cache/ | ||
|
||
# Temporary files created by R markdown | ||
*.utf8.md | ||
*.knit.md | ||
|
||
# R Environment Variables | ||
.Renviron | ||
|
||
# pkgdown site | ||
docs/ | ||
|
||
# translation temp files | ||
po/*~ | ||
|
||
# RStudio Connect folder | ||
rsconnect/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Basic configuration file for ONT Guppy basecaller software. | ||
|
||
# Data trimming. | ||
trim_strategy = dna | ||
trim_threshold = 2.5 | ||
trim_min_events = 3 | ||
|
||
# Basecalling. | ||
model_file = ./BrdU_Basecalling/model_adversial_bug.json | ||
chunk_size = 2000 | ||
gpu_runners_per_device = 4 | ||
chunks_per_runner = 512 | ||
chunks_per_caller = 10000 | ||
overlap = 50 | ||
qscore_offset = 0.4364 | ||
qscore_scale = 0.8409 | ||
builtin_scripts = 1 | ||
|
||
# Calibration strand detection | ||
calib_reference = lambda_3.6kb.fasta | ||
calib_min_sequence_length = 3000 | ||
calib_max_sequence_length = 3800 | ||
calib_min_coverage = 0.6 | ||
|
||
# Output. | ||
records_per_fastq = 4000 | ||
min_qscore = 7.0 | ||
|
||
# Telemetry | ||
#ping_url = https://ping.oxfordnanoportal.com/basecall | ||
#ping_segment_duration = 60 |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# NanoTiming | ||
### Laurent Lacroix (laurent.lacroix@inserm.fr) | ||
*** | ||
### BrdU Base calling | ||
|
||
Raw data are available from ENA repository under accession number PRJEB76824. | ||
A fast5 sample from the WT_rep3 is available from the Zenodo repository under the DOI [10.5281/zenodo.12668295](https://doi.org/10.5281/zenodo.12668295). | ||
|
||
In order to test this procedure, the WT_rep3.tar.gz file should be downloaded and expanded into a WT_rep3/fast5 folder. | ||
|
||
This procedure is for ONT R9.4.1 data type acquired in a fast5 format. | ||
|
||
*basecalling_sample.sh* contains a example of the base calling procedure going from RawData to the megalodon mod_mappings.bam file. | ||
|
||
The mod_mappings.bam data should then be processed using the script in the *NanoTiming_Data* folder. | ||
|
||
For this project, we have built a reference genome from ONT and Ilumina reads the the BT1 yeast strain. This strain was reported in Theulot et all 2022. The procedure used to build this reference genome is explained in the [manuscript](https://doi.org/10.1101/2024.07.05.602252) and the corresponding annotation is detailed in the *Reference_Genome* folder. | ||
|
||
*** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# Sample of the basecalling procedure used with our BrdU trained model | ||
# This requires the ONT megalodon and guppy software | ||
# We used guppy v4.4.1 GPU and megalogon v2.2.9 with minimap2 v2.24. | ||
# the configuration file should point to the modified base model | ||
# see BrdU_Configutation4megalodon.cfg | ||
# (the model file is provided as a compressed archive (model_adversial_bug.json.zip)) | ||
# a conda env was created using the yml file provided within miniconda3 | ||
conda env create -f meg2.2.9env.yml | ||
# then within this env, we used megalodon with the following parameters | ||
conda activate mega_2.2.9 | ||
WORKDIR=NanoTiming | ||
EXP=WT_rep3 | ||
REF="$WORKDIR"/Reference_Genome/BT1_multiUra.fa | ||
|
||
GUPPYPATH="~/src/ont-guppy_4.4.1" | ||
CFG=BrdU_Configuration4megalodon.cfg | ||
# the config file is placed in the "$GUPPYPATH"/data folder | ||
INPUT="$WORKDIR"/data/"$EXP"/fast5 | ||
OUTPUT="$WORKDIR"/data/"$EXP"/mega | ||
|
||
nohup megalodon "$INPUT" --guppy-server-path "$GUPPYPATH"/bin/guppy_basecall_server --outputs mod_mappings --reference "$REF" --output-dir "$OUTPUT" --guppy-config "$CFG" --disable-mod-calibration --overwrite --device cuda:0 --processes 20 --guppy-timeout 90 --mod-min-prob 0 & | ||
# please notice than for experiment with many long reads, we had to increase the guppy-timeout to 600 | ||
# also notice that this computer was equiped with a NVidia RTX2080Ti GPU | ||
|
||
# this generate a megalodon's style mod-mappings.bam file in the "$EXP"/mega folder |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
name: megalodon_2.2.9 | ||
channels: | ||
- conda-forge | ||
- defaults | ||
dependencies: | ||
- _libgcc_mutex=0.1=conda_forge | ||
- _openmp_mutex=4.5=2_gnu | ||
- ca-certificates=2022.12.7=ha878542_0 | ||
- certifi=2016.9.26=py36_0 | ||
- ld_impl_linux-64=2.40=h41732ed_0 | ||
- libffi=3.4.2=h7f98852_5 | ||
- libgcc-ng=12.2.0=h65d4601_19 | ||
- libgomp=12.2.0=h65d4601_19 | ||
- libnsl=2.0.0=h7f98852_0 | ||
- libsqlite=3.40.0=h753d276_0 | ||
- libstdcxx-ng=12.2.0=h46fd767_19 | ||
- libzlib=1.2.13=h166bdaf_4 | ||
- ncurses=6.3=h27087fc_1 | ||
- openssl=1.1.1t=h0b41bf4_0 | ||
- pip=20.0.2=py36_1 | ||
- python=3.6.15=hb7a2778_0_cpython | ||
- python_abi=3.6=2_cp36m | ||
- readline=8.2=h8228510_1 | ||
- setuptools=49.6.0=py36h5fab9bb_3 | ||
- sqlite=3.40.0=h4ff8645_0 | ||
- tk=8.6.12=h27826a3_0 | ||
- wheel=0.34.2=py36_0 | ||
- xz=5.2.6=h166bdaf_0 | ||
- pip: | ||
- cached-property==1.5.2 | ||
- cycler==0.11.0 | ||
- cython==0.29.34 | ||
- h5py==3.1.0 | ||
- importlib-resources==5.4.0 | ||
- kiwisolver==1.3.1 | ||
- mappy==2.24 | ||
- matplotlib==3.3.4 | ||
- megalodon==2.2.9 | ||
- numpy==1.19.5 | ||
- ont-fast5-api==4.1.1 | ||
- ont-pyguppy-client-lib==4.4.1 | ||
- packaging==21.3 | ||
- pandas==1.1.5 | ||
- pillow==8.4.0 | ||
- progressbar33==2.4 | ||
- pyparsing==3.0.9 | ||
- pysam==0.21.0 | ||
- python-dateutil==2.8.2 | ||
- pytz==2023.3 | ||
- scipy==1.5.4 | ||
- seaborn==0.11.2 | ||
- six==1.16.0 | ||
- sklearn==0.0.post4 | ||
- tqdm==4.64.1 | ||
- zipp==3.6.0 | ||
prefix: /users/rce/hyrien/miniconda3/envs/megalodon_2.2.9 |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
### Figure 1b | ||
### plot BRdU content in individual reads | ||
|
||
suppressMessages(library(tidyverse)) | ||
library(patchwork) | ||
library(ggprism) | ||
theme_set(theme_bw()) | ||
`%+%` <- paste0 | ||
|
||
setwd("/Users/ll/work/RStudioProjects/NanoTiming") | ||
path2fig <- "Figures_Article/Figures_pdf/" | ||
path2data <- "Figures_Article/Figures_data/" | ||
|
||
nanot_data <- bind_rows( | ||
readRDS(path2data %+% "nanoT_WT_rep1_alldata.rds"), | ||
readRDS(path2data %+% "nanoT_WT_10_alldata.rds"), | ||
readRDS(path2data %+% "nanoT_WT_20_alldata.rds"), | ||
readRDS(path2data %+% "nanoT_WT_100_alldata.rds"), | ||
readRDS(path2data %+% "nanoT_WT_1000_alldata.rds") | ||
) | ||
|
||
# Figure1b BrdU variation in reads | ||
# filtering non substituted reads | ||
|
||
set.seed(37) | ||
|
||
toplot <- nanot_data %>% | ||
mutate(dose=as_factor(dose))%>% | ||
mutate(length=end-start+1)%>% | ||
filter(length>75000)%>% | ||
unnest(cols = c(signalr))%>% | ||
group_by(read_id)%>% | ||
mutate(median_b_read=median(signal,na.rm=T))%>% | ||
mutate(mean_b_read=mean(signal,na.rm=T))%>% | ||
filter(median_b_read> 0.02)%>% | ||
ungroup()%>% | ||
nest(cols = c(positions,signal))%>% | ||
#subset of 50 reads per dose | ||
group_by(dose)%>% | ||
sample_n(50)%>% | ||
arrange(median_b_read)%>% | ||
mutate(track_i=row_number())%>% | ||
unnest(cols = c(cols))%>% | ||
mutate(x=positions-start) %>% | ||
ungroup() | ||
|
||
f1b <- ggplot(toplot)+ | ||
geom_line(aes(x=x,y=track_i,group=paste(read_id),color=signal),linewidth=1)+ | ||
facet_wrap(~dose,ncol=5)+ | ||
scale_color_viridis_c( option = "C",limits=c(0,1))+ | ||
labs(x="Position along the read (kb)", y="", color="BrdU content")+ | ||
scale_x_continuous(guide = "prism_minor", | ||
labels=scales::unit_format(big.mark ="",suffix="",scale=1e-3,sep=""), | ||
expand=expansion(mult=c(0,0.01)))+ | ||
scale_y_continuous(expand=expansion(mult=c(0.01,0.015)))+ | ||
theme_bw()+ | ||
theme(axis.text.y=element_blank(),axis.ticks.y=element_blank())+ | ||
ggtitle("Figure 1b") | ||
ggsave(plot=f1b, file=path2fig %+% "Fig1b.pdf",w=5.5,h=3) | ||
|
||
|
||
f1b <- ggplot(toplot)+ | ||
# geom_line(aes(x=x,y=track_i,group=paste(read_id),color=signal),linewidth=1)+ | ||
ggrastr::rasterise(geom_line(aes(x=x,y=track_i,group=paste(read_id),color=signal),linewidth=0.8),dpi=1200)+ | ||
facet_wrap(~dose,ncol=5)+ | ||
scale_color_viridis_c( option = "C",limits=c(0,1))+ | ||
labs(x="Position along the read (kb)", y="", color="BrdU content")+ | ||
scale_x_continuous(guide = "prism_minor", | ||
labels=scales::unit_format(big.mark ="",suffix="",scale=1e-3,sep=""), | ||
expand=expansion(mult=c(0,0.01)))+ | ||
scale_y_continuous(expand=expansion(mult=c(0.01,0.015)))+ | ||
theme_bw()+ | ||
theme(axis.text.y=element_blank(),axis.ticks.y=element_blank()) | ||
|
||
#ggsave(plot=f1b, file=path2fig %+% "Fig1b.pdf",w=5.5,h=3) | ||
ggsave(plot=f1b, file=path2fig %+% "Fig1b_rast.pdf",w=5.5,h=3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#### Figure 1c | ||
#### comparing nanoT data to sortseq vs BrdU | ||
|
||
suppressMessages(library(tidyverse)) | ||
library(ggpubr) | ||
theme_set(theme_bw()) | ||
mypal <- c(paletteer::paletteer_d("ggthemes::Classic_20"),"grey40","black") | ||
`%+%` <- paste0 | ||
|
||
setwd("/Users/ll/work/RStudioProjects/NanoTiming") | ||
path2fig <- "Figures_Article/Figures_pdf/" | ||
path2data <- "Figures_Article/Figures_data/" | ||
|
||
### Import Sortseq data | ||
|
||
SortSeq_WT <- readRDS(path2data %+% "sortseq_WT.rds") | ||
|
||
#### Import nanoT data | ||
|
||
nanoT05 <- readRDS(path2data %+% "nanoT_WT_rep1.rds") | ||
nanoT10 <- readRDS(path2data %+% "nanoT_WT_10.rds") | ||
nanoT20 <- readRDS(path2data %+% "nanoT_WT_20.rds") | ||
nanoT100 <- readRDS(path2data %+% "nanoT_WT_100.rds") | ||
nanoT1000 <- readRDS(path2data %+% "nanoT_WT_1000.rds") | ||
|
||
### Plot | ||
toplot_nano <- bind_rows(nanoT05,nanoT10,nanoT20,nanoT100,nanoT1000) %>% rename(raw=mean_br_bin) | ||
|
||
toplot <- full_join(toplot_nano,SortSeq_WT) %>% mutate(BrdU=factor(BrdU,levels=c("5 µM","10 µM","20 µM","100 µM","1000 µM"))) | ||
|
||
f1c <- ggplot(toplot,aes(x=timing,y=raw,col=BrdU))+ | ||
geom_point(size=0.2,alpha=0.2,shape=16)+ | ||
scale_color_manual(values=mypal,guide = guide_legend(reverse = TRUE) )+ | ||
geom_smooth(method="lm",se=F)+ | ||
stat_cor(label.x=2.1,label.y = c(0,0.05,0.1,0.15,0.2),method="spearman",cor.coef.name="rho",show.legend=F,aes(label=after_stat(r.label)))+ | ||
ylab("Mean BrdU content")+ | ||
xlab("Relative copy number (from sort-seq)")+ | ||
ggtitle("Figure 1c") | ||
|
||
ggsave(plot=f1c,file=path2fig %+% "Fig1c.pdf",h=3,w=4.5) | ||
|
||
|
||
f1c <- ggplot(toplot,aes(x=timing,y=raw,col=BrdU))+ | ||
# geom_point(size=0.2,alpha=0.2,shape=16)+ | ||
ggrastr::rasterise(geom_point(size=0.2,alpha=0.1,shape=16),dpi=600)+ | ||
scale_color_manual(values=mypal,guide = guide_legend(reverse = TRUE) )+ | ||
geom_smooth(method="lm",se=F)+ | ||
stat_cor(label.x=2.1,label.y = c(0,0.05,0.1,0.15,0.2),method="spearman",cor.coef.name="rho",show.legend=F,aes(label=after_stat(r.label)))+ | ||
ylab("Mean BrdU content")+ | ||
xlab("Relative copy number (from sort-seq)") | ||
|
||
#ggsave(plot=f1c,file=path2fig %+% "Fig1c.pdf",h=3,w=4.5) | ||
ggsave(plot=f1c,file=path2fig %+% "Fig1c_rast.pdf",h=3,w=4.5) | ||
|
Oops, something went wrong.