-
Notifications
You must be signed in to change notification settings - Fork 2
/
03-step_by_step_scRNA_seq_pipeline.Rmd
1477 lines (1266 loc) · 66.1 KB
/
03-step_by_step_scRNA_seq_pipeline.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Step-by-step scRNA-seq Pipeline
## Before you begin
- Load the R packages.
```{R, eval=FALSE}
# sc libraries
library(Seurat)
library(phateR)
library(DoubletFinder)
library(monocle)
library(slingshot)
library(GSVA)
library(limma)
library(plyr)
library(dplyr)
library(org.Mm.eg.db)
library(org.Hs.eg.db)
library(CellChat)
library(velocyto.R)
library(SeuratWrappers)
library(stringr)
library(scran)
library(ggpubr)
library(viridis)
library(pheatmap)
library(parallel)
library(reticulate)
library(SCENIC)
library(feather)
library(AUCell)
library(RcisTarget)
library(Matrix)
library(foreach)
library(doParallel)
library(clusterProfiler)
library(GPTCelltype)
library(openai)
library(HematoMap)
# st libraries
library(RColorBrewer)
library(Rfast2)
library(SeuratDisk)
library(abcCellmap)
library(biomaRt)
library(copykat)
library(gelnet)
library(ggplot2)
library(parallelDist)
library(patchwork)
library(markdown)
# getpot
library(getopt)
library(tools)
# HemaScopeR
library(HemaScopeR)
```
- Set the paths for the output results, and the Python installation.
```{R, eval=FALSE}
output.dir = './output'
pythonPath = '/home/anaconda3/envs/HemaScopeR/bin/python'
```
- Create folders for saving the results of HemaScopeR analysis.
```{R, eval=FALSE}
wdir <- getwd()
if(is.null(pythonPath)==FALSE){ reticulate::use_python(pythonPath) }else{print('Please set the path of Python.')}
if (!file.exists(paste0(output.dir, '/HemaScopeR_results'))) {
dir.create(paste0(output.dir, '/HemaScopeR_results'),recursive =T)
}
output.dir <- paste0(output.dir,'/HemaScopeR_results')
if (!file.exists(paste0(output.dir, '/RDSfiles/'))) {
dir.create(paste0(output.dir, '/RDSfiles/'))
}
#set the path for loading previous results, if necessary
previous_results_path <- paste0(output.dir, '/RDSfiles/')
# if (Whether_load_previous_results) {
# print('Loading the previous results...')
# Load_previous_results(previous_results_path = previous_results_path)
# }
```
## Step 1. Load the input data
- Create a folder for step1
```{R, eval=FALSE}
print('Step1. Input data.')
if (!file.exists(paste0(output.dir, '/Step1.Input_data/'))) {
dir.create(paste0(output.dir, '/Step1.Input_data/'))
}
```
- Set the parameters for loading the data sets.
```{R, eval=FALSE}
input.data.dirs = c('./SRR7881399/outs/filtered_feature_bc_matrix')#,
#'./SRR7881400/outs/filtered_feature_bc_matrix',
#'./SRR7881401/outs/filtered_feature_bc_matrix',
#'./SRR7881402/outs/filtered_feature_bc_matrix',
#'./SRR7881403/outs/filtered_feature_bc_matrix'
project.names = c('SRR7881399')#,
#'SRR7881400',
#'SRR7881401',
#'SRR7881402',
#'SRR7881403'
gene.column = 2
min.cells = 10
min.feature = 200
mt.pattern = '^MT-' # set '^mt-' for mouse data
Step1_Input_Data.type = 'cellranger-count'
loom.files.path ="./SRR7881399/loom"
```
- Load the data sets
```{R, eval=FALSE}
file.copy(from = input.data.dirs, to = paste0(output.dir,'/Step1.Input_data/'), recursive = TRUE)
if(Step1_Input_Data.type == 'cellranger-count'){
if(length(input.data.dirs) > 1){
input.data.list <- c()
for (i in 1:length(input.data.dirs)) {
sc_data.temp <- Read10X(data.dir = input.data.dirs[i],
gene.column = gene.column)
sc_object.temp <- CreateSeuratObject(counts = sc_data.temp,
project = project.names[i],
min.cells = min.cells,
min.feature = min.feature)
sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern)
input.data.list <- c(input.data.list, sc_object.temp)}
}else{
sc_data <- Read10X(data.dir = input.data.dirs,
gene.column = gene.column)
sc_object <- CreateSeuratObject(counts = sc_data,
project = project.names,
min.cells = min.cells,
min.feature = min.feature)
sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern)
}
}else if(Step1_Input_Data.type == 'Seurat'){
if(length(input.data.dirs) > 1){
input.data.list <- c()
for (i in 1:length(input.data.dirs)) {
sc_object.temp <- readRDS(input.data.dirs[i])
sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern)
input.data.list <- c(input.data.list, sc_object.temp)
}
}else{
sc_object <- readRDS(input.data.dirs)
sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern)
}
}else if(Step1_Input_Data.type == 'Matrix'){
if(length(input.data.dirs) > 1){
input.data.list <- c()
for (i in 1:length(input.data.dirs)) {
sc_data.temp <- readRDS(input.data.dirs[i])
sc_object.temp <- CreateSeuratObject(counts = sc_data.temp,
project = project.names[i],
min.cells = min.cells,
min.feature = min.feature)
sc_object.temp[["percent.mt"]] <- PercentageFeatureSet(sc_object.temp, pattern = mt.pattern)
input.data.list <- c(input.data.list, sc_object.temp)}
}else{
sc_data <- readRDS(input.data.dirs)
sc_object <- CreateSeuratObject(counts = sc_data,
project = project.names,
min.cells = min.cells,
min.feature = min.feature)
sc_object[["percent.mt"]] <- PercentageFeatureSet(sc_object, pattern = mt.pattern)
}
}else{
stop('Please input data generated by the cellranger-count software, or a Seurat object, or a gene expression matrix. HemaScopeR does not support other formats of input data.')
}
```
- Save the variables after executing each step, if necessary.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
## Step 2. Quality Control
In this step, the following quality control steps will be performed:
1. Normalize data using the LogNormalize method.
2. Find variable features using the vst method.
3. Scale data using the identified variable features and specified variables to regress out.
4. Perform principal component analysis (PCA) on the scaled data.
5. Find K nearest neighbors based on PCA dimensions.
6. Perform clustering analysis based on the found neighbors.
7. Optionally, remove doublets using doubletFinder.
8. Optionally, integrate multiple datasets by removing batch effects.
### Function arguments:
* `nFeature_RNA.limit`: The cutoff of the minimum number of detected genes in each cell.
* `percent.mt.limit`: The cutoff of the maximum percentage of mitochondria genes in each cell.
* `scale.factor`: The scale factor for the 'data' slot in the seurat object.
* `nfeatures`: The number of selected highly variable features for down stream analysis.
* `ndims`: The number of principle components in PCA.
* `vars.to.regress`: Variables to regress out (previously latent.vars in RegressOut). For example, nUMI, or percent.mito. (ScaleData in Seurat)
* `PCs`: Which dimensions to use as input features.(RunTSNE and RunUMAP in Seurat)
* `resolution`: Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. (FindClusters in Seurat)
* `n.neighbors`: Defines k for the k-nearest neighbor algorithm. (FindNeighbors in Seurat)
* `percentage`: Assuming 'percentage' doublet formation rate - tailor for your dataset. The default value is 0.05.
* `doublerFinderwraper.PCs` Which dimensions to use as input features for doubletFinder.
* `doublerFinderwraper.pN`: The percentage of real-artifical data for doubletFinder.
* `doublerFinderwraper.pK`: The pK parameter controls the doublet cell detection by determining the number of nearest neighbors and influencing the calculation of pANN scores and the final cell classification results. Adjusting the pK value allows optimization of the doublet cell detection process based on specific data and analysis requirements.
### codes for running step2
- Create a folder for saving the results of quality control.
```{R, eval=FALSE}
print('Step2. Quality control.')
if (!file.exists(paste0(output.dir, '/Step2.Quality_control/'))) {
dir.create(paste0(output.dir, '/Step2.Quality_control/'))
}
```
- Set the parameters for quality control.
```{R, eval=FALSE}
# quality control
nFeature_RNA.limit = 200
percent.mt.limit = 20
# preprocessing
nfeatures = 3000
scale.factor = 10000
ndims = 50
vars.to.regress = NULL
PCs = 1:35
resolution = 0.4
n.neighbors = 50
# removing doublets
Step2_Quality_Control.RemoveDoublets = TRUE
doublet.percentage = 0.04
doublerFinderwraper.PCs = 1:20
doublerFinderwraper.pN = 0.25
doublerFinderwraper.pK = 0.1
# removing batch effect
Step2_Quality_Control.RemoveBatches = TRUE
```
- Run the quality control process.
```{R, eval=FALSE}
if(length(input.data.dirs) > 1){
# preprocess and quality control for multiple scRNA-Seq data sets
sc_object <- QC_multiple_scRNASeq(seuratObjects = input.data.list,
datasetID = project.names,
output.dir = paste0(output.dir,'/Step2.Quality_control/'),
Step2_Quality_Control.RemoveBatches = Step2_Quality_Control.RemoveBatches,
Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets,
nFeature_RNA.limit = nFeature_RNA.limit,
percent.mt.limit = percent.mt.limit,
scale.factor = scale.factor,
nfeatures = nfeatures,
ndims = ndims,
vars.to.regress = vars.to.regress,
PCs = PCs,
resolution = resolution,
n.neighbors = n.neighbors,
percentage = doublet.percentage,
doublerFinderwraper.PCs = doublerFinderwraper.PCs,
doublerFinderwraper.pN = doublerFinderwraper.pN,
doublerFinderwraper.pK = doublerFinderwraper.pK
)
}else{
# preprocess and quality control for single scRNA-Seq data set
sc_object <- QC_single_scRNASeq(sc_object = sc_object,
datasetID = project.names,
output.dir = paste0(output.dir,'/Step2.Quality_control/'),
Step2_Quality_Control.RemoveDoublets = Step2_Quality_Control.RemoveDoublets,
nFeature_RNA.limit = nFeature_RNA.limit,
percent.mt.limit = percent.mt.limit,
scale.factor = scale.factor,
nfeatures = nfeatures,
vars.to.regress = vars.to.regress,
ndims = ndims,
PCs = PCs,
resolution = resolution,
n.neighbors = n.neighbors,
percentage = doublet.percentage,
doublerFinderwraper.PCs = doublerFinderwraper.PCs,
doublerFinderwraper.pN = doublerFinderwraper.pN,
doublerFinderwraper.pK = doublerFinderwraper.pK)
}
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
### Outputs
```{r echo=FALSE,fig.cap="Violin plots showing the nFeature, nCount and percent.mt for each sample", out.width = "800px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step2/sc_object_nFeature_nCount_percentMt.png")
```
```{r echo=FALSE,fig.cap="Figures showing the correlation between nFeature and nCount, as well as between nCount and percent.mt", out.width = "600px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step2/sc_object_FeatureScatters.png")
```
```{r echo=FALSE,fig.cap="Figures showing the variable features used for downstream analysis", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step2/sc_object_VariableFeaturePlot.png")
```
```{r echo=FALSE,fig.cap="ElbowPlot showing suitable number of PCs used for further analysis", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step2/sc_object_ElbowPlot.png")
```
```{r echo=FALSE,fig.cap="UMAP plots before (left) and after (right) batch removal", out.width = "600px" , fig.align = 'center', out.width='50%', fig.show='hold', eval=FALSE}
#knitr::include_graphics("./sc_img/step2/datasets.before.batch.removal_umap_datasetID.png")
#knitr::include_graphics("./sc_img/step2/datasets.after.batch.removal_umap_datasetID.png")
```
```{r echo=FALSE,fig.cap="UMAP plot showing doublets found by DoubletFinder", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step2/sc_object_findDoublets.png")
```
## Step 3. Clustering
- Create a folder for saving the results of Louvain clustering.
```{R, eval=FALSE}
print('Step3. Clustering.')
if (!file.exists(paste0(output.dir, '/Step3.Clustering/'))) {
dir.create(paste0(output.dir, '/Step3.Clustering/'))
}
```
- Set the parameters for clustering.
```{R, eval=FALSE}
PCs = 1:35
resolution = 0.4
n.neighbors = 50
```
- Run Louvian clustering.
```{R, eval=FALSE}
if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){graph.name <- 'integrated_snn'}else{graph.name <- 'RNA_snn'}
sc_object <- FindNeighbors(sc_object, dims = PCs, k.param = n.neighbors, force.recalc = TRUE)
sc_object <- FindClusters(sc_object, resolution = resolution, graph.name = graph.name)
sc_object@meta.data$seurat_clusters <- as.character(as.numeric(sc_object@meta.data$seurat_clusters))
# plot clustering
pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.pdf'), width = 6, height = 6)
print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
pdf(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.pdf'), width = 6, height = 6)
print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','tsne_cluster.png'), width = 600, height = 600)
print(DimPlot(sc_object, reduction = "tsne", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
png(paste0(paste0(output.dir,'/Step3.Clustering/'), '/sc_object ','umap_cluster.png'), width = 600, height = 600)
print(DimPlot(sc_object, reduction = "umap", group.by = "seurat_clusters", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="UMAP plot showing clustering results", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step3/sc_object umap_cluster.png")
```
## Step 4. Identify Cell Types
In this step, users can predict the cell types of hematopoietic cells by implementing two approaches (Scmap and Seurat) through abcCellmap packages. Cells are labeled by 43 different RNA clusters according to unsupervised clustering of single-cell transcriptional profiles, and also labeled by 32 immunophenotypic cell types. In addition, users can use Copykat to measure copy number variation (CNV) and determine the ploidy of each cell.
### codes for running abcCellmap
- Create a folder for saving the results of cell type identification.
```{R, eval=FALSE}
print('Step4. Identify cell types automatically.')
if (!file.exists(paste0(output.dir, '/Step4.Identify_Cell_Types/'))) {
dir.create(paste0(output.dir, '/Step4.Identify_Cell_Types/'))
}
```
- Set the path for the database.
```{R, eval=FALSE}
databasePath = "~/HemaScopeR/database/"
```
- Set the parameters for cell type identification.
```{R, eval=FALSE}
Step4_Use_Which_Labels = 'clustering'
Step4_Cluster_Labels = NULL
Step4_Changed_Labels = NULL
Org = 'hsa'
ncores = 10
```
- Run the cell type identification process.
```{R, eval=FALSE}
sc_object <- run_cell_annotation(object = sc_object,
assay = 'RNA',
species = Org,
output.dir = paste0(output.dir,'/Step4.Identify_Cell_Types/'))
if(Org == 'hsa'){
load(paste0(databasePath,"/HematoMap.reference.rdata")) #the data can be downloaded via the link https://cloud.tsinghua.edu.cn/d/759fd04333274d3f9946
if(length(intersect(rownames(HematoMap.reference), rownames(sc_object))) < 1000){
HematoMap.reference <- RenameGenesSeurat(obj = HematoMap.reference,
newnames = toupper(rownames(HematoMap.reference)),
gene.use = rownames(HematoMap.reference),
de.assay = "RNA",
lassays = "RNA")
}
if(sc_object@active.assay == 'integrated'){
DefaultAssay(sc_object) <- 'RNA'
sc_object <- mapDataToRef(ref_object = HematoMap.reference,
ref_labels = HematoMap.reference@meta.data$CellType,
query_object = sc_object,
PCs = PCs,
output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/'))
DefaultAssay(sc_object) <- 'integrated'
}else{
sc_object <- mapDataToRef(ref_object = HematoMap.reference,
ref_labels = HematoMap.reference@meta.data$CellType,
query_object = sc_object,
PCs = PCs,
output.dir = paste0(output.dir, '/Step4.Identify_Cell_Types/'))
}
}
```
- Set the cell labels.
```{R, eval=FALSE}
# set the cell labels
if(Step4_Use_Which_Labels == 'clustering'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$seurat_clusters
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else if(Step4_Use_Which_Labels == 'abcCellmap.1'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.RNACluster
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else if(Step4_Use_Which_Labels == 'abcCellmap.2'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.RNACluster
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else if(Step4_Use_Which_Labels == 'abcCellmap.3'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$Seurat.Immunophenotype
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else if(Step4_Use_Which_Labels == 'abcCellmap.4'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$scmap.Immunophenotype
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else if(Step4_Use_Which_Labels == 'HematoMap'){
if(Org == 'hsa'){
sc_object@meta.data$selectLabels <- sc_object@meta.data$predicted.id
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else{print("'HematoMap' is only applicable to human data ('Org' = 'hsa').")}
}else if(Step4_Use_Which_Labels == 'changeLabels'){
if (!is.null(Step4_Cluster_Labels) && !is.null(Step4_Changed_Labels) && length(Step4_Cluster_Labels) == length(Step4_Changed_Labels)){
sc_object@meta.data$selectLabels <- plyr::mapvalues(sc_object@meta.data$seurat_clusters,
from = as.character(Step4_Cluster_Labels),
to = as.character(Step4_Changed_Labels),
warn_missing = FALSE)
Idents(sc_object) <- sc_object@meta.data$selectLabels
}else{
print("Please input the 'Step4_Cluster_Labels' parameter as Seurat clustering labels, and the 'Step4_Changed_Labels' parameter as new labels. Please note that these two parameters should be of equal length.")
}
}else{
print('Please set the "Step4_Use_Which_Labels" parameter as "clustering", "abcCellmap.1", "abcCellmap.2", "HematoMap" or "changeLabels".')
}
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="UMAP plots showing cell type annotation results", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step4/predicted.id.png")
```
```{r echo=FALSE,fig.cap="Immunophenotype and RNACluster label predicted by scmap", out.width = "600px" , fig.align = 'center', out.width='50%', fig.show='hold'}
knitr::include_graphics("./sc_img/step4/predicted.scmap.Immunophenotype.png")
knitr::include_graphics("./sc_img/step4/predicted.scmap.RNACluster.png")
```
```{r echo=FALSE,fig.cap="Immunophenotype and RNACluster label predicted by Seurat", out.width = "600px" , fig.align = 'center', out.width='50%', fig.show='hold'}
knitr::include_graphics("./sc_img/step4/predicted.Seurat.Immunophenotype.png")
knitr::include_graphics("./sc_img/step4/predicted.Seurat.RNACluster.png")
```
### codes for running the CNV analysis
```{R, eval=FALSE}
sc_CNV(sc_object=sc_object,
save_path=paste0(output.dir,'/Step4.Identify_Cell_Types/'),
assay = 'RNA',
LOW.DR = 0.05, #refer to the Copykat documentation for detailed explanations of the parameters
UP.DR = 0.1,
win.size = 25,
distance = "euclidean",
genome = NULL,
n.cores = ncores, #note: this step will take a long time, using more ncores could shorten the running time
species = Org)
```
```{r echo=FALSE,fig.cap="copykat heatmap", out.width = "800px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step4/copykat_heatmap.jpeg")
```
```{r echo=FALSE,fig.cap="UMAP plot showing CNV state predicted by copykat", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step4/CNV_state.png")
```
## Step 5. Visualization
In this step, users are allowed to gain the statistical results about the numbers and proportions of cell groups, and also use three dimensional reduction methods (TSNE, UMAP, phateR) to visualize the results.
### codes for peforming three dimensional reduction methods
- Create a folder for saving the visualization results.
```{R, eval=FALSE}
print('Step5. Visualization.')
if (!file.exists(paste0(output.dir, '/Step5.Visualization/'))) {
dir.create(paste0(output.dir, '/Step5.Visualization/'))
}
```
- Perform visualization using UMAP and TSNE.
```{R, eval=FALSE}
# plot cell types
pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.pdf'), width = 6, height = 6)
print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
pdf(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.pdf'), width = 6, height = 6)
print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','tsne cell types.png'), width = 600, height = 600)
print(DimPlot(sc_object, reduction = "tsne", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
png(paste0(paste0(output.dir,'/Step5.Visualization/'), '/sc_object ','umap cell types.png'), width = 600, height = 600)
print(DimPlot(sc_object, reduction = "umap", group.by = "ident", label = FALSE, pt.size = 0.1, raster = FALSE))
dev.off()
```
```{r echo=FALSE,fig.cap="UMAP and TSNE visualization", out.width = "600px" , fig.align = 'center', out.width='50%', fig.show='hold'}
knitr::include_graphics("./sc_img/step5/sc_object tsne cell types.png")
knitr::include_graphics("./sc_img/step5/sc_object umap cell types.png")
```
- Set the parameters for phateR.
```{R, eval=FALSE}
phate.knn = 50 #The number of nearest neighbors to consider in the phateR algorithm. Default 50.
phate.npca = 20 #The number of principal components to use in the phateR algorithm. Default 20.
phate.t = 10 #The t-value for the phateR algorithm, which controls the level of exploration. Default 10.
phate.ndim = 2 #The number of dimensions for the output embedding in the phateR algorithm. Default 2.
```
- Run phateR for dimensional reduction and visualization.
```{R, eval=FALSE}
# run phateR
if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){
DefaultAssay(sc_object) <- 'integrated'
}else{
DefaultAssay(sc_object) <- 'RNA'}
if(!is.null(pythonPath)){
run_phateR(sc_object = sc_object,
output.dir = paste0(output.dir,'/Step5.Visualization/'),
pythonPath = pythonPath,
phate.knn = phate.knn,
phate.npca = phate.npca,
phate.t = phate.t,
phate.ndim = phate.ndim)
}
```
```{r echo=FALSE,fig.cap="phateR result", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step5/phateR.png")
```
- Run HematoMap to visualize cluster tree plots.
```{R, eval=FALSE}
run_HematoMap(sc_object = sc_object,
output.dir = paste0(output.dir,'/Step5.Visualization/'))
```
```{r echo=FALSE,fig.cap="HematoMap result", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step5/HematoMap.png")
```
### codes for calculating the proportions
- The statistical results for the numbers and proportions of cell groups.
```{R, eval=FALSE}
# statistical results
cells_labels <- as.data.frame(cbind(rownames(sc_object@meta.data), as.character(sc_object@meta.data$selectLabels)))
colnames(cells_labels) <- c('cell_id', 'cluster_id')
cluster_counts <- cells_labels %>%
group_by(cluster_id) %>%
summarise(count = n())
total_cells <- nrow(cells_labels)
cluster_counts <- cluster_counts %>%
mutate(proportion = count / total_cells)
cluster_counts <- as.data.frame(cluster_counts)
cluster_counts$percentages <- scales::percent(cluster_counts$proportion, accuracy = 0.1)
cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='proportion')]
cluster_counts$cluster_id_count_percentages <- paste(cluster_counts$cluster_id, " (", cluster_counts$count, ' cells; ', cluster_counts$percentages, ")", sep='')
cluster_counts <- cluster_counts[order(cluster_counts$count, decreasing = TRUE),]
cluster_counts <- rbind(cluster_counts, c('Total', sum(cluster_counts$count), '100%', 'all cells'))
sc_object@meta.data$cluster_id_count_percentages <- mapvalues(sc_object@meta.data$selectLabels,
from=cluster_counts$cluster_id,
to=cluster_counts$cluster_id_count_percentages,
warn_missing=FALSE)
colnames(sc_object@meta.data)[which(colnames(sc_object@meta.data) == 'cluster_id_count_percentages')] <- paste('Total ', nrow(sc_object@meta.data), ' cells', sep='')
cluster_counts <- cluster_counts[,-which(colnames(cluster_counts)=='cluster_id_count_percentages')]
colnames(cluster_counts) <- c('Cell types', 'Cell counts', 'Percentages')
# names(colorvector) <- mapvalues(names(colorvector),
# from=cluster_counts$cluster_id,
# to=cluster_counts$cluster_id_count_percentages,
# warn_missing=FALSE)
write.csv(cluster_counts, file=paste(paste0(output.dir, '/Step5.Visualization/'), '/cell types_cell counts_percentages.csv', sep=''), quote=FALSE, row.names=FALSE)
```
- The UMAP visualization.
```{R, eval=FALSE}
pdf(paste(paste0(output.dir, '/Step5.Visualization'), '/cell types_cell counts_percentages_umap.pdf', sep=''), width = 14, height = 6)
print(DimPlot(sc_object,
reduction = "umap",
group.by = paste('Total ', nrow(sc_object@meta.data), ' cells', sep=''),
label = FALSE,
pt.size = 0.1,
raster = FALSE))
dev.off()
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="UMAP plot showing cell type and corresponding proportion", out.width = "800px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step5/cell types_cell counts_percentages_umap.jpg")
```
## Step 6. Find DEGs
In this step, users can find DEGs (differentially expressed genes) across different cell type group using FindAllMarkers, use GPTCelltype to predict cell label, perform GO and KEGG enrichment analysis, and perform subnetwork analysis for each cell type group.
### codes for finding DEGs
- Set the parameters for identifying differentially expressed genes.
```{R, eval=FALSE}
min.pct = 0.25
logfc.threshold = 0.25
```
- Create a folder for the DEGs analysis.
```{R, eval=FALSE}
print('Step6. Find DEGs.')
if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/'))) {
dir.create(paste0(output.dir, '/Step6.Find_DEGs/'))
}
```
- Identify DEGs using Wilcoxon Rank-Sum Test.
```{R, eval=FALSE}
sc_object.markers <- FindAllMarkers(sc_object, only.pos = TRUE, min.pct = min.pct, logfc.threshold = logfc.threshold)
write.csv(sc_object.markers,
file = paste0(paste0(output.dir, '/Step6.Find_DEGs/'),'sc_object.markerGenes.csv'),
quote=FALSE)
# visualization
sc_object.markers.top5 <- sc_object.markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
pdf(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.pdf'),
width = 0.5*length(unique(sc_object.markers.top5$gene)),
height = 0.5*length(unique(Idents(sc_object))))
print(DotPlot(sc_object,
features = unique(sc_object.markers.top5$gene),
cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1)))
dev.off()
png(paste0(paste0(output.dir, '/Step6.Find_DEGs/'), 'sc_object_markerGenesTop5.png'),
width = 20*length(unique(sc_object.markers.top5$gene)),
height = 30*length(unique(Idents(sc_object))))
print(DotPlot(sc_object,
features = unique(sc_object.markers.top5$gene),
cols=c("lightgrey",'red'))+theme(axis.text.x =element_text(angle = 45, vjust = 1, hjust = 1)))
dev.off()
```
```{r echo=FALSE,fig.cap="Dotplot showing marker genes of each cell type group", out.width = "600px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step6/sc_object_markerGenesTop5.png")
```
### codes for using GPTCelltype
- Set the parameters for GPTCelltype.
```{R, eval=FALSE}
your_openai_API_key = ''
tissuename = 'human bone marrow'
gptmodel = 'gpt-3.5'
```
- Use GPTCelltype to assist cell type annotation.
```{R, eval=FALSE}
GPT_annotation( marker.genes = sc_object.markers,
your_openai_API_key = your_openai_API_key,
tissuename = tissuename,
gptmodel = gptmodel,
output.dir = paste0(output.dir, '/Step6.Find_DEGs/'))
```
### Perform GO and KEGG enrichment.
```{R, eval=FALSE}
# GO enrichment
if(Org=='mmu'){
OrgDb <- 'org.Mm.eg.db'
}else if(Org=='hsa'){
OrgDb <- 'org.Hs.eg.db'
}else{
stop("Org should be 'mmu' or 'hsa'.")
}
HemaScopeREnrichment(DEGs=sc_object.markers,
OrgDb=OrgDb,
output.dir=paste0(output.dir, '/Step6.Find_DEGs/'))
```
```{r echo=FALSE,fig.cap="Barplot showing GO(BP)and KEGG enrichment results of each cell type group", out.width = "400px", fig.align = 'center', out.width='50%', fig.show='hold'}
knitr::include_graphics("./sc_img/step6/X0_BP_GOenrich.jpg")
knitr::include_graphics("./sc_img/step6/X0_KEGGenrich.jpg")
```
### Perform subnetwork analysis
- Create a folder for saving the results of gene network analysis.
```{R, eval=FALSE}
if (!file.exists(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))) {
dir.create(paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'))
}
```
- Perform gene network analysis.
```{R, eval=FALSE}
OpenXGR_SAG(sc_object.markers = sc_object.markers,
output.dir = paste0(output.dir, '/Step6.Find_DEGs/OpenXGR/'),
subnet.size = 10)
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="Figure showing subnetwork of each cell type group identified by OpenXGR", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step6/SAG-network_cluster_1.jpg")
```
## Step 7. Assign Cell Cycles
This step assigns cell cycle phases by analyzing cell cycle-related genes and generates plots of the cell cycle analysis results.
### Function arguments:
* `sc_object`: A Seurat object containing single-cell RNA sequencing data.
* `counts_matrix`: The 'counts' slot in the Seurat object.
* `data_matrix`: The 'data' slot in the Seurat object.
* `cellcycleCutoff`: The cutoff value for distinguishing between cycling and quiescent cells. Cells with a G1G2Score below this cutoff are considered quiescent.
* `cellTypeOrders`: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input Seurat object.
* `databasePath`: The path to the database required for the analysis.
* `Org`: A character vector specifying the species of cell cycle genes, can be 'mmu' (mouse) or 'hsa' (human).
### codes for step7
- Create a folder for saving the results of cell cycle analysis.
```{R, eval=FALSE}
print('Step7. Assign cell cycles.')
if (!file.exists(paste0(output.dir, '/Step7.Assign_cell_cycles/'))) {
dir.create(paste0(output.dir, '/Step7.Assign_cell_cycles/'))
}
```
- Set the parameters for the cell cycle analysis.
```{R, eval=FALSE}
cellcycleCutoff = NULL
```
- Run the cell cycle analysis.
```{R, eval=FALSE}
datasets.before.batch.removal <- readRDS(paste0(paste0(output.dir, '/RDSfiles/'),'datasets.before.batch.removal.rds'))
sc_object <- cellCycle(sc_object=sc_object,
counts_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "counts")%>%as.matrix(),
data_matrix = GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix(),
cellcycleCutoff = cellcycleCutoff,
cellTypeOrders = unique(sc_object@meta.data$selectLabels),
output.dir=paste0(output.dir, '/Step7.Assign_cell_cycles/'),
databasePath = databasePath,
Org = Org)
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
### Outputs
```{r echo=FALSE,fig.cap="Barplot showing the proportion of different cell cycle within each cell type group", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step7/CellCycle.jpg")
```
```{r echo=FALSE,fig.cap="Density plot showing the distribution of cell cycle scores", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step7/G1G2Score.jpg")
```
## Step 8. Calculate Heterogeneity
This step quantifies cell heterogeneity by computing Spearman correlation coefficients between cells within the same cell type groups.
### Function arguments:
* `expression_matrix`: A numeric matrix representing the expression data, where rows are genes and columns are cells. The matrix should be appropriately preprocessed and filtered before using this function.
* `cell_types_groups`: A data frame specifying cell type annotations for each cell, including cell type labels and group information.
* `cellTypeOrders`: The order of cell types for visualization. If not provided, the function will use the unique cell types in the input cell_types_groups.
### codes for step8
- Create a folder for saving the results of heterogeneity calculation.
```{R, eval=FALSE}
print('Step8. Calculate heterogeneity.')
if (!file.exists(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))) {
dir.create(paste0(output.dir, '/Step8.Calculate_heterogeneity/'))
}
```
- Run heterogeneity calculation process.
```{R, eval=FALSE}
expression_matrix <- GetAssayData(object = datasets.before.batch.removal, slot = "data")%>%as.matrix()
expression_matrix <- expression_matrix[,rownames(sc_object@meta.data)]
cell_types_groups <- as.data.frame(cbind(sc_object@meta.data$selectLabels,
sc_object@meta.data$datasetID))
colnames(cell_types_groups) <- c('clusters', 'datasetID')
if(is.null(ViolinPlot.cellTypeOrders)){
cellTypes_orders <- unique(sc_object@meta.data$selectLabels)
}else{
cellTypes_orders <- ViolinPlot.cellTypeOrders
}
heterogeneity(expression_matrix = expression_matrix,
cell_types_groups = cell_types_groups,
cellTypeOrders = cellTypes_orders,
output.dir = paste0(output.dir, '/Step8.Calculate_heterogeneity/'))
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="Box plot showing the Spearman correlation coefficients between cells within the same cell type groups(here we take data including more samples as an example)", out.width = "400px" , fig.align = 'center'}
knitr::include_graphics("./sc_img/step8/Heterogeneity_spearmanCorrelation.png")
```
## Step 9. Violin Plot for Marker Genes
This step generates violin plots for marker genes across different cell types.
### Function arguments:
* `dataMatrix`: A data frame or matrix representing the expression data, where rows are cells and columns are genes.
* `features`: A character vector specifying the marker genes to plot in the violin plots.
* `CellTypes`: A factor vector containing cell type annotations for each cell.
* `cellTypeOrders`: A character vector specifying the order of cell types for plotting. Defaults to unique values in `CellTypes`.
* `cellTypeColors`: A character vector specifying the colors to use for cell type groups. Defaults to a color palette.
### codes for step9
- Create a folder for saving the violin plots of marker genes.
```{R, eval=FALSE}
print('Step9. Violin plot for marker genes.')
if (!file.exists(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))) {
dir.create(paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'))
}
```
- Run violin plot visualization.
```{R, eval=FALSE}
if( (length(input.data.dirs) > 1) & Step2_Quality_Control.RemoveBatches ){
DefaultAssay(sc_object) <- 'integrated'
}else{
DefaultAssay(sc_object) <- 'RNA'}
dataMatrix <- GetAssayData(object = sc_object, slot = "scale.data")
if(is.null(marker.genes)&(Org == 'mmu')){
# mpp genes are from 'The bone marrow microenvironment at single cell resolution'
# the other genes are from 'single cell characterization of haematopoietic progenitors and their trajectories in homeostasis and perturbed haematopoiesis'
# the aliases of these genes were changed in gecodeM16:Gpr64 -> Adgrg2, Sdpr -> Cavin2, Hbb-b1 -> Hbb-bs, Sfpi1 -> Spi1
HSC_lineage_signatures <- c('Slamf1', 'Itga2b', 'Kit', 'Ly6a', 'Bmi1', 'Gata2', 'Hlf', 'Meis1', 'Mpl', 'Mcl1', 'Gfi1', 'Gfi1b', 'Hoxb5')
Mpp_genes <- c('Mki67', 'Mpo', 'Elane', 'Ctsg', 'Calr')
Erythroid_lineage_signatures <- c('Klf1', 'Gata1', 'Mpl', 'Epor', 'Vwf', 'Zfpm1', 'Fhl1', 'Adgrg2', 'Cavin2','Gypa', 'Tfrc', 'Hbb-bs', 'Hbb-y')
Lymphoid_lineage_signatures <- c('Tcf3', 'Ikzf1', 'Notch1', 'Flt3', 'Dntt', 'Btg2', 'Tcf7', 'Rag1', 'Ptprc', 'Ly6a', 'Blnk')
Myeloid_lineage_signatures <- c('Gfi1', 'Spi1', 'Mpo', 'Csf2rb', 'Csf1r', 'Gfi1b', 'Hk3', 'Csf2ra', 'Csf3r', 'Sp1', 'Fcgr3')
marker.genes <- c(HSC_lineage_signatures, Mpp_genes, Erythroid_lineage_signatures, Lymphoid_lineage_signatures, Myeloid_lineage_signatures)
}else if(is.null(marker.genes)&(Org == 'hsa')){
HSPCs_lineage_signatures <- c('CD34','KIT','AVP','FLT3','MME','CD7','CD38','CSF1R','FCGR1A','MPO','ELANE','IL3RA')
Myeloids_lineage_signatures <- c('LYZ','CD36','MPO','FCGR1A','CD4','CD14','CD300E','ITGAX','FCGR3A','FLT3','AXL',
'SIGLEC6','CLEC4C','IRF4','LILRA4','IL3RA','IRF8','IRF7','XCR1','CD1C','THBD',
'MRC1','CD34','KIT','ITGA2B','PF4','CD9','ENG','KLF','TFRC')
B_cells_lineage_signatures <- c('CD79A','IGLL1','RAG1','RAG2','VPREB1','MME','IL7R','DNTT','MKI67','PCNA','TCL1A','MS4A1','IGHD','CD27','IGHG3')
T_NK_cells_lineage_signatures <- c('CD3D','CD3E','CD8A','CCR7','IL7R','SELL','KLRG1','CD27','GNLY',
'NKG7','PDCD1','TNFRSF9','LAG3','CD160','CD4','CD40LG','IL2RA',
'FOXP3','DUSP4','IL2RB','KLRF1','FCGR3A','NCAM1','XCL1','MKI67','PCNA','KLRF')
marker.genes <- c(HSPCs_lineage_signatures, Myeloids_lineage_signatures, B_cells_lineage_signatures, T_NK_cells_lineage_signatures)
}
if(is.null(ViolinPlot.cellTypeOrders)){
ViolinPlot.cellTypeOrders <- unique(sc_object@meta.data$selectLabels)
}
if(is.null(ViolinPlot.cellTypeColors)){
ViolinPlot.cellTypeColors <- viridis::viridis(length(unique(sc_object@meta.data$selectLabels)))
}
combinedViolinPlot(dataMatrix = dataMatrix,
features = marker.genes,
CellTypes = sc_object@meta.data$selectLabels,
cellTypeOrders = ViolinPlot.cellTypeOrders,
cellTypeColors = ViolinPlot.cellTypeColors,
Org = Org,
output.dir = paste0(output.dir, '/Step9.Violin_plot_for_marker_genes/'),
databasePath = databasePath)
```
- Save the variables.
```{R, eval=FALSE}
# Get the names of all variables in the current environment
variable_names <- ls()
# Loop through the variable names and save them as RDS files
for (var_name in variable_names) {
var <- get(var_name) # Get the variable by its name
saveRDS(var, file = paste0(output.dir, '/RDSfiles/', var_name, ".rds")) # Save as RDS with the variable's name
}
```
```{r echo=FALSE,fig.cap="Violin plot showing the expression of marker genes between cell type groups", out.width = "400px" , fig.align = 'center'}