Wednesday, August 30, 2023

workflow for Seurat and Velocity

 

#workflow for Seurat and Velocity


#open Rstudio on VNC desktop

rstudio

library(Seurat)


#load in 10x data

sample1=Read10X(data.dir = 'path1')

sample2=Read10X(data.dir = 'path2')


#convert 10xdata into Seurat object

sample1.seurat=CreateSeuratObject(sample1, project = 'ID1')

sample2.seurat=CreateSeuratObject(sample2, project = 'ID2')


#calculate percent mito content

sample1.seurat[['percent.mt']]=PercentageFeatureSet(male5mo.seurat, pattern = 'mt-')

sample2.seurat[['percent.mt']]=PercentageFeatureSet(female5mo.seurat, pattern = 'mt-')


#show basic QC parameters for samples

VlnPlot(sample1.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)

VlnPlot(sample2.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)


#can filter based on top/bottoms of violins to remove outliers e.g.

sample1.seurat=subset(sample1.seurat, subset = nFeature_RNA > 200 & nCount_RNA <20000 & percent.mt <5)

sample2.seurat=subset(sample2.seurat, subset = nFeature_RNA > 500 & nCount_RNA <20000 & percent.mt <5)


#remake vlnplots to compare vs before

VlnPlot(male5mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)

VlnPlot(female5mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)

VlnPlot(male22mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)

VlnPlot(female22mo.seurat, c('nCount_RNA','nFeature_RNA','percent.mt'), pt.size=0)


#normalise data

sample1.seurat <- NormalizeData(sample1.seurat, verbose = FALSE)

sample2.seurat <- NormalizeData(sample2.seurat, verbose = FALSE)


sample1.seurat <- FindVariableFeatures(sample1.seurat)

sample2mo.seurat <- FindVariableFeatures(sample2.seurat)



integ.anchors <- FindIntegrationAnchors(object.list = list(sample1.seurat, sample2.seurat))


combo.seurat <- IntegrateData(anchorset = integ.anchors)



# Run the standard workflow for visualization and clustering

combo.seurat <- ScaleData(combo.seurat, verbose = FALSE, vars.to.regress=c('nCount_RNA', 'nFeature_RNA', 'percent.mt'))

combo.seurat <- RunPCA(combo.seurat, verbose = FALSE)

# UMAP and Clustering

combo.seurat <- RunUMAP(combo.seurat, dims = 1:10)

combo.seurat <- FindNeighbors(combo.seurat)

combo.seurat <- FindClusters(combo.seurat)


combo.seurat$cluster_by_genotype = paste(Idents(combo.seurat), sep='_', combo.seurat$orig.ident)



DefaultAssay(combo.seurat)='RNA'

combo.seurat=CellCycleScoring(combo.seurat, s.features = stringr::str_to_title(cc.genes.updated.2019$s.genes), g2m.features = stringr::str_to_title(cc.genes.updated.2019$g2m.genes))


#command to find top enriched genes in each cluster vs all other cells is FindAllMarkers()

FindAllMarkers(combo.seurat, assay='RNA', only.pos=T)

#output is pasted in terminal, either save it to an object e.g. x=FindAllMarkers() or write it out into a text file

write.csv(FindAllMarkers(combo.seurat, assay='RNA', only.pos=T), file='combo_allmarkers.csv')


#command to compare two specific clusters is FindMarkers()

write.csv(FindMarkers(combo.seurat, group.by = 'cluster_by_genotype', ident.1 = '16_UOmet', ident.2 = '16_UOcont'), file='combo_cluster16_UOmet_v_UOcont_DEG.csv')

#group.by can be column name in combo.seurat$ 

#ident.1 and ident.2 must be set, if only ident.1 set then it compares against all other cells 


#calculate Otsu threshold for gene, autothresholdr must use integers so multiple out all decimal places then divide back down

OtsuThresh = function(seurat_object, gene_name) {

     library(autothresholdr)

     array=as.integer(seurat_object@assays$RNA@data[gene_name,]*10000000)

     auto_thresh(array, method='Otsu')[1]/10000000

 }

No comments:

Post a Comment