## ############################################################## ## load data ## ############################################################## ## ## Mayo ## file <- "C:/Users/Public/Desktop/datafiles/01_Statistics_lab/GSM2818521_larva_counts_matrix.txt" ## Illinois ## file <- "C:/Users/IGB/Desktop/VM/01_Statistics_lab/GSM2818521_larva_counts_matrix.txt" ## my computer file = "~/data/GSM2818521_larva_counts_matrix.txt" pandey <- read.table(file, header = TRUE) dim(pandey) library("Seurat") ## set random seed for reproducibility set.seed(1) s_obj <- CreateSeuratObject(pandey) ## ############################################################## ## quality control ## ############################################################## s_obj <- PercentageFeatureSet(s_obj, pattern = "^MT-", col.name = "percent.mito") VlnPlot(s_obj, features = c("nCount_RNA", "nFeature_RNA", "percent.mito")) s_obj <- subset(s_obj, percent.mito <= 5 & nCount_RNA <= 2e4) ## ############################################################## ## preprocessing ## ############################################################## s_obj <- NormalizeData(s_obj) s_obj <- FindVariableFeatures(s_obj) s_obj <- ScaleData(s_obj, vars.to.regress = c("nCount_RNA")) s_obj <- RunPCA(s_obj) ## ############################################################## ## analysis and visualization ## ############################################################## s_obj <- FindNeighbors(s_obj) s_obj <- FindClusters(s_obj, resolution = 0.5) s_obj <- RunUMAP(s_obj, dims = 1:20) DimPlot(s_obj) markers = FindAllMarkers(s_obj, logfc.threshold = 1.5) markers = markers[markers$p_val_adj <= 0.05,] head(markers) FeaturePlot(s_obj, features = c("G0S2", "TP53I11B", "FXYD1")) + patchwork::plot_layout(ncol = 3) gene_names = unique(markers$gene) length(gene_names) cat(gene_names, sep = "\n") ## https://david.ncifcrf.gov/