...
Load required libraries
Code Block library(Seurat) library(ggplot2) library(dplyr) library(data.table) #if doing sctransform with newer version: library()
Load Data and Create Seurat Object
Load data and create Seurat object
Code Block pbmc_data <- Read10X(data.dir = "filtered_feature_bc_matrix") pbmc <- CreateSeuratObject(pbmc_data)
Check what the counts matrix looks like
Look at counts matrix
Code Block pbmc_data[c("CD3D", "TCL1A", "MS4A1"), 1:30] ##output ## CD3D 4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2 3 . . . . . 3 4 1 5 ## TCL1A . . . . . . . . 1 . . . . . . . . . . . . 1 . . . . . . . . ## MS4A1 . 6 . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .Check what the seurat object looks like
Look at seurat object
Code Block pbmc An object of class seurat in project SeuratProject 33538 genes across 5155 samples. #right now, it just has counts, but as we process the data, many other slots will get added.
Normalize Data Using SCTransform and Regress out Genes Related to Cell Cycle
Normalize data
Code Block s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) pbmc$CC.Difference <- pbmc$S.Score - pbmc$G2M.Score pbmc <- SCTransform(pbmc, vars.to.regress = "CC.Difference")
Identify Highly Variable Features
Identify Highly Variable Features and Generate Plots
Code Block pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) top10 <- head(VariableFeatures(pbmc), 10) pdf("variableFeaturesPlot.pdf") plot1 <- VariableFeaturePlot(pbmc) plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) CombinePlots(plots = list(plot1, plot2)) dev.off()Scale the Data
Scale data
Code Block all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes)
Select Dimensions using PCA (using variable features)
Perform PCA for Dimensionality Reduction
Code Block #Perform PCA for Dimensionality Reduction pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) #Print and Examine PCA Results in Multiple Formats pdf("findDimensions.pdf") print(pbmc[["pca"]], dims = 1:5, nfeatures = 5) VizDimLoadings(pbmc, dims = 1:2, reduction = "pca") DimPlot(pbmc, reduction = "pca") #Generate Principal Component Heatmaps through PC_30 DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE) DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE) DimHeatmap(pbmc, dims = 16:30, cells = 500, balanced = TRUE) #Create Elbow Plot of Principal Components through PC_30 ElbowPlot(pbmc, ndims = 30, reduction = "pca") dev.off()Cluster the cells (using the selected number of dimensions)
Cluster the Cells for the selected numeber of Principal Components
Code Block #Cluster the Cells for the selected numeber of Principal Components (we selected 14 for 5k Data) pbmc <- FindNeighbors(pbmc, dims = 1:14) pbmc <- FindClusters(pbmc, resolution = 0.5) #View the Cluster IDs of the First 5 Cells head(Idents(pbmc), 5)
Visualize the clusters using tSNE or UMAP
Visualize using tSNE
Code Block pdf("tsne.pdf") pbmcTSNE <pbmc<- RunTSNE(pbmc, dims = 1:14) DimPlot(pbmcTSNEpbmc, reduction = "tsne") dev.off()Find markers for each cluster
Find Markers for Every Cluster and Print Top 2 Per Cluster
Code Block pbmc.markers <- FindAllMarkers(pbmcTSNE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC) write.csv(pbmc.markers, file = "markersAll.csv") #Generate Heatmap of Top 10 Markers per Cluster top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) DoHeatmap(pbmcTSNE, features = top10$gene, size = 3) + theme(axis.text.y = element_text(size = 5)) + NoLegend()
Identify cells expressing genes of interestÂ
Identify Clusters for Marker Genes of Interest
Code Block markerGenes <- c("CD27", "CCR7", "CD8A", "CD8B", "IL7R", "GZMK", "CD79A", "CD37", "CD160", "NKG7", "GNL$ seuratClusters <- function(output, genes) { for (gene in genes) { print(output[output$gene == gene,]) } } seuratClusters(pbmc.markers, markerGenes) #Visualize Feature Expression on TSNE Plot of Top Gene per Cluster FeaturePlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP")) FeaturePlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3")) FeaturePlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1")) FeaturePlot(pbmcTSNE, features = c("PPBP","AC084033.3")) #Visualize Expression Probability Distribution Across Clusters for Top Gene per Cluster VlnPlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP")) VlnPlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3")) VlnPlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1")) VlnPlot(pbmcTSNE, features = c("PPBP","AC084033.3"))Label clusters as cell types based on expression of known marker genes.
...
Download this zip file and unzip it on your computer to view the files (Most computers will automatically unzip files).
What do you do if you have multiple samples
If you have multiple samples (multiple cellranger output directories), read each one in and create individual seurat objects. Then, merged them into one object. You may want to use the project parameter to indicate condition information. You can also add metadata to do this.
From Seurat documentionWhen merging, to easily tell which original object any particular cell came from, you can set the add.cell.ids parameter with an c(x, y) vector, which will prepend the given identifier to the beginning of each cell name. The original project ID will remain stored in object meta data under orig.ident.
| Code Block |
|---|
data10<-Read10X(data.dir = "../../10/outs/filtered_feature_bc_matrix")
pbmc10<-CreateSeuratObject(counts = data10, project="dep")
data11<-Read10X(data.dir = "../../11/outs/filtered_feature_bc_matrix")
pbmc11<-CreateSeuratObject(counts = data11, project="control")
data14<-Read10X(data.dir = "../../14/outs/filtered_feature_bc_matrix")
pbmc14<-CreateSeuratObject(counts = data14, project="dep")
data16<-Read10X(data.dir = "../../16/outs/filtered_feature_bc_matrix")
pbmc16<-CreateSeuratObject(counts = data16, project="control")
pbmc.combined<-merge(pbmc10, y=c(pbmc11,pbmc14,pbmc16),add.cell.ids = c("10","11","14","16), project = "pbmc", merge.data = TRUE) |
Single Cell Proportion Test
...