## this script is based on ## https://satijalab.org/seurat/articles/pbmc3k_tutorial.html ## ############################################################## ## load data ## ############################################################## ## ## Mayo ## file = "C:/Users/Public/Desktop/VM/01_Statistics_lab/GSM2818521_larva_counts_matrix.txt" ## ## Illinois ## file = "C:/Users/IGB/Desktop/VM/01_Statistics_lab/GSM2818521_larva_counts_matrix.txt" ## my computer file = "/home/user/data/stat530_2022/scrna-seq/GSM2818521_larva_counts_matrix.txt" larval = read.table(file, header = TRUE) dim(larval) library(Seurat) ## set random seed for reproducibility set.seed(1) obj = CreateSeuratObject(counts = larval, min.cells = 30, min.features = 500) ## ############################################################## ## preprocessing ## ############################################################## ## -------------------------------------------------------------- ## quality control ## -------------------------------------------------------------- obj[["percent.mt"]] = PercentageFeatureSet(obj, pattern = "^MT-") VlnPlot(obj, features = c("nCount_RNA", "nFeature_RNA", "percent.mt")) obj = subset(obj, percent.mt <= 6) ## -------------------------------------------------------------- ## normalization ## -------------------------------------------------------------- obj = NormalizeData(obj) obj = FindVariableFeatures(obj) obj = ScaleData(obj, vars.to.regress = "percent.mt") ## -------------------------------------------------------------- ## view preprocessed data ## -------------------------------------------------------------- ## this next command only works in RStudio View(GetAssayData(obj, slot = "scale.data")) ## ############################################################## ## analysis ## ############################################################## ## -------------------------------------------------------------- ## define clusters ## -------------------------------------------------------------- ## dimension reduction obj = RunPCA(obj) ## clustering obj = FindNeighbors(obj) obj = FindClusters(obj, resolution = 0.5) ## dimension reduction obj = RunUMAP(obj, dims = 1:20) ## visualization DimPlot(obj) ## -------------------------------------------------------------- ## find differentially expressed genes ## -------------------------------------------------------------- markers = FindAllMarkers(obj) ## view top markers for cluster 0 head(markers[markers$cluster == 0,]) ## view top markers for cluster 5 head(markers[markers$cluster == 5,]) ## visualize markers FeaturePlot(obj, features = c("G0S2", "LRRTM1")) ## ## print top gene marker for each cluster ## library(data.table) ## data.table(markers)[,head(.SD, 1), by = cluster][, -2, with = FALSE]