...
Unfortunately, the packages required to run Seurat on TACC are either currently not loading properly (could be because they are out-of-date or not installed). So, we are not going to run this script today.
...
Load Data and Create Seurat Object
Code Block title Load data and create Seurat object pbmc_data <- Read10X(data.dir = "filtered_feature_bc_matrix") pbmc <- CreateSeuratObject(counts = pbmc_data)- Check what the counts matrix looks like
Code Block title Look at counts matrix pbmc.data[c("CD3D", "TCL1A", "MS4A1"), 1:30] ##output ## CD3D 4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2 3 . . . . . 3 4 1 5 ## TCL1A . . . . . . . . 1 . . . . . . . . . . . . 1 . . . . . . . . ## MS4A1 . 6 . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .
Normalize Data Using SCTransform and Regress out Genes Related to Cell Cycle
Code Block title Normalize data s.genes <- cc.genes$s.genes g2m.genes <- cc.genes$g2m.genes pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE) pbmc$CC.Difference <- pbmc$S.Score - pbmc$G2M.Score pbmc <- SCTransform(pbmc, vars.to.regress = "CC.Difference")
Identify Highly Variable Features
Code Block title Identify Highly Variable Features and Generate Plots pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000) top10 <- head(VariableFeatures(pbmc), 10) pdf("variableFeaturesPlot.pdf") plot1 <- VariableFeaturePlot(pbmc) plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE) CombinePlots(plots = list(plot1, plot2)) dev.off()Scale the Data
Code Block title Scale data all.genes <- rownames(pbmc) pbmc <- ScaleData(pbmc, features = all.genes)
Select Dimensions using PCA (using variable features)
Code Block title Perform PCA for Dimensionality Reduction #Perform PCA for Dimensionality Reduction pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc)) #Print and Examine PCA Results in Multiple Formats pdf("findDimensions.pdf") print(pbmc[["pca"]], dims = 1:5, nfeatures = 5) VizDimLoadings(pbmc, dims = 1:2, reduction = "pca") DimPlot(pbmc, reduction = "pca") #Generate Principal Component Heatmaps through PC_30 DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE) DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE) DimHeatmap(pbmc, dims = 16:30, cells = 500, balanced = TRUE) #Create Elbow Plot of Principal Components through PC_30 ElbowPlot(pbmc, ndims = 30, reduction = "pca") dev.off()Cluster the cells (using the selected number of dimensions)
Code Block title Cluster the Cells for the selected numeber of Principal Components #Cluster the Cells for the selected numeber of Principal Components (we selected 14 for 5k Data) pbmc <- FindNeighbors(pbmc, dims = 1:14) pbmc <- FindClusters(pbmc, resolution = 0.5) #View the Cluster IDs of the First 5 Cells head(Idents(pbmc), 5)
Visualize the clusters using tSNE or UMAP
Code Block title Visualize using tSNE pdf("tsne.pdf") pbmcTSNE <- RunTSNE(pbmc, dims = 1:14) DimPlot(pbmcTSNE, reduction = "tsne") dev.off()
Find markers for each cluster
Code Block title Find Markers for Every Cluster and Print Top 2 Per Cluster pbmc.markers <- FindAllMarkers(pbmcTSNE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25) pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC) write.csv(pbmc.markers, file = "markersAll.csv") #Generate Heatmap of Top 10 Markers per Cluster top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC) DoHeatmap(pbmcTSNE, features = top10$gene, size = 3) + theme(axis.text.y = element_text(size = 5)) + NoLegend()
Identify cells expressing genes of interest
Code Block title Identify Clusters for Marker Genes of Interest markerGenes <- c("CD27", "CCR7", "CD8A", "CD8B", "IL7R", "GZMK", "CD79A", "CD37", "CD160", "NKG7", "GNL$ seuratClusters <- function(output, genes) { for (gene in genes) { print(output[output$gene == gene,]) } } seuratClusters(pbmc.markers, markerGenes) #Visualize Feature Expression on TSNE Plot of Top Gene per Cluster FeaturePlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP")) FeaturePlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3")) FeaturePlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1")) FeaturePlot(pbmcTSNE, features = c("PPBP","AC084033.3")) #Visualize Expression Probability Distribution Across Clusters for Top Gene per Cluster VlnPlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP")) VlnPlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3")) VlnPlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1")) VlnPlot(pbmcTSNE, features = c("PPBP","AC084033.3"))- Label clusters as cell types based on expression of known marker genes.
| Code Block | ||
|---|---|---|
| ||
new.cluster.ids <- c("CD4+ Memory Cells", "CD16+ and CD14+ Monocytes", "Regulatory T Cells", "CD8+ T Ce$
names(new.cluster.ids) <- levels(pbmcTSNE)
pbmcTSNE <- RenameIdents(pbmcTSNE, new.cluster.ids)
DimPlot(pbmcTSNE, reduction = "tsne", label = TRUE) |
Let's look at the results: