#workflow for Seurat and Velocity
#open Rstudio on VNC desktop
rstudio
library(Seurat)
#load in 10x data
sample1=Read10X(data.dir = 'path1')
sample2=Read10X(data.dir = 'path2')
#convert 10xdata into Seurat object
sample1.seurat=CreateSeuratObject(sample1, project = 'ID1')
sample2.seurat=CreateSeuratObject(sample2, project = 'ID2')
#calculate percent mito content
sample1.seurat[['percent.mt']]=PercentageFeatureSet(male5mo.seurat, pattern = 'mt-')
sample2.seurat[['percent.mt']]=PercentageFeatureSet(female5mo.seurat, pattern = 'mt-')
#show basic QC parameters for samples
VlnPlot(sample1.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
VlnPlot(sample2.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
#can filter based on top/bottoms of violins to remove outliers e.g.
sample1.seurat=subset(sample1.seurat, subset = nFeature_RNA > 200 & nCount_RNA <20000 & percent.mt <5)
sample2.seurat=subset(sample2.seurat, subset = nFeature_RNA > 500 & nCount_RNA <20000 & percent.mt <5)
#remake vlnplots to compare vs before
VlnPlot(male5mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
VlnPlot(female5mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
VlnPlot(male22mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
VlnPlot(female22mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)
#normalise data
sample1.seurat <- NormalizeData(sample1.seurat, verbose = FALSE)
sample2.seurat <- NormalizeData(sample2.seurat, verbose = FALSE)
sample1.seurat <- FindVariableFeatures(sample1.seurat)
sample2mo.seurat <- FindVariableFeatures(sample2.seurat)
integ.anchors <- FindIntegrationAnchors(object.list = list(sample1.seurat, sample2.seurat))
combo.seurat <- IntegrateData(anchorset = integ.anchors)
# Run the standard workflow for visualization and clustering
combo.seurat <- ScaleData(combo.seurat, verbose = FALSE, vars.to.regress=c('nCount_RNA', 'nFeature_RNA', 'percent.mt'))
combo.seurat <- RunPCA(combo.seurat, verbose = FALSE)
# UMAP and Clustering
combo.seurat <- RunUMAP(combo.seurat, dims = 1:10)
combo.seurat <- FindNeighbors(combo.seurat)
combo.seurat <- FindClusters(combo.seurat)
combo.seurat$cluster_by_genotype = paste(Idents(combo.seurat), sep='_', combo.seurat$orig.ident)
DefaultAssay(combo.seurat)='RNA'
combo.seurat=CellCycleScoring(combo.seurat, s.features = stringr::str_to_title(cc.genes.updated.2019$s.genes), g2m.features = stringr::str_to_title(cc.genes.updated.2019$g2m.genes))
#command to find top enriched genes in each cluster vs all other cells is FindAllMarkers()
FindAllMarkers(combo.seurat, assay='RNA', only.pos=T)
#output is pasted in terminal, either save it to an object e.g. x=FindAllMarkers() or write it out into a text file
write.csv(FindAllMarkers(combo.seurat, assay='RNA', only.pos=T), file='combo_allmarkers.csv')
#command to compare two specific clusters is FindMarkers()
write.csv(FindMarkers(combo.seurat, group.by = 'cluster_by_genotype', ident.1 = '16_UOmet', ident.2 = '16_UOcont'), file='combo_cluster16_UOmet_v_UOcont_DEG.csv')
#group.by can be column name in combo.seurat$
#ident.1 and ident.2 must be set, if only ident.1 set then it compares against all other cells
#calculate Otsu threshold for gene, autothresholdr must use integers so multiple out all decimal places then divide back down
OtsuThresh = function(seurat_object, gene_name) {
library(autothresholdr)
array=as.integer(seurat_object@assays$RNA@data[gene_name,]*10000000)
auto_thresh(array, method='Otsu')[1]/10000000
}