Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Load required libraries

    Code Block
    library(Seurat)
    library(ggplot2)
    library(dplyr)
    library(data.table)
    
    #if doing sctransform with newer version:
    library()
  • Load Data and Create Seurat Object

    Load data and create Seurat object

    Code Block
    pbmc_data <- Read10X(data.dir = "filtered_feature_bc_matrix")
    pbmc <- CreateSeuratObject(pbmc_data)
  • Check what the counts matrix looks like

    Look at counts matrix

    Code Block
    pbmc_data[c("CD3D", "TCL1A", "MS4A1"), 1:30]
    
    ##output                                                                   
    ## CD3D  4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2  3 . . . . . 3 4 1 5
    ## TCL1A . .  . . . . . . 1 . . . . . . . . . . .  . 1 . . . . . . . .
    ## MS4A1 . 6  . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .
  • Check what the seurat object looks like

    Look at seurat object

    Code Block
    pbmc
    
    An object of class seurat in project SeuratProject 
     33538 genes across 5155 samples.
    
    #right now, it just has counts, but as we process the data, many other slots will get added.
  • Normalize Data Using SCTransform and Regress out Genes Related to Cell Cycle

    Normalize data

    Code Block
    s.genes <- cc.genes$s.genes
    g2m.genes <- cc.genes$g2m.genes
    pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
    pbmc$CC.Difference <- pbmc$S.Score - pbmc$G2M.Score
    pbmc <- SCTransform(pbmc, vars.to.regress = "CC.Difference")
  • Identify Highly Variable Features

    Identify Highly Variable Features and Generate Plots

    Code Block
    pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
    top10 <- head(VariableFeatures(pbmc), 10)
    pdf("variableFeaturesPlot.pdf")
    plot1 <- VariableFeaturePlot(pbmc)
    plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
    CombinePlots(plots = list(plot1, plot2))
    dev.off()
  • Scale the Data

    Scale data

    Code Block
    all.genes <- rownames(pbmc)
    pbmc <- ScaleData(pbmc, features = all.genes)
  • Select Dimensions using PCA (using variable features)

    Perform PCA for Dimensionality Reduction

    Code Block
    #Perform PCA for Dimensionality Reduction
    pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
    
    #Print and Examine PCA Results in Multiple Formats
    pdf("findDimensions.pdf")
    print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
    VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")
    DimPlot(pbmc, reduction = "pca")
    
    #Generate Principal Component Heatmaps through PC_30
    DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
    DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)
    DimHeatmap(pbmc, dims = 16:30, cells = 500, balanced = TRUE)
    
    #Create Elbow Plot of Principal Components through PC_30
    ElbowPlot(pbmc, ndims = 30, reduction = "pca")
    dev.off()
  • Cluster the cells (using the selected number of dimensions)

    Cluster the Cells for the selected numeber of Principal Components

    Code Block
    #Cluster the Cells for the selected numeber of Principal Components (we selected 14 for 5k Data)
    pbmc <- FindNeighbors(pbmc, dims = 1:14)
    pbmc <- FindClusters(pbmc, resolution = 0.5)
    
    #View the Cluster IDs of the First 5 Cells
    head(Idents(pbmc), 5)
    
  • Visualize the clusters using tSNE or UMAP

    Visualize using tSNE

    Code Block
    pdf("tsne.pdf")
    pbmcTSNE <pbmc<- RunTSNE(pbmc, dims = 1:14)
    DimPlot(pbmcTSNEpbmc, reduction = "tsne")
    dev.off()
  • Find markers for each cluster

    Find Markers for Every Cluster and Print Top 2 Per Cluster

    Code Block
    pbmc.markers <- FindAllMarkers(pbmcTSNE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
    pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
    write.csv(pbmc.markers, file = "markersAll.csv")
    
    #Generate Heatmap of Top 10 Markers per Cluster
    top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
    DoHeatmap(pbmcTSNE, features = top10$gene, size = 3) + theme(axis.text.y = element_text(size = 5)) + NoLegend()
  • Identify cells expressing genes of interest 

    Identify Clusters for Marker Genes of Interest

    Code Block
    markerGenes <- c("CD27", "CCR7", "CD8A", "CD8B", "IL7R", "GZMK", "CD79A", "CD37", "CD160", "NKG7", "GNL$
    seuratClusters <- function(output, genes) {
            for (gene in genes) {
            print(output[output$gene == gene,]) }
            }
    seuratClusters(pbmc.markers, markerGenes)
    
    #Visualize Feature Expression on TSNE Plot of Top Gene per Cluster
    FeaturePlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
    FeaturePlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
    FeaturePlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
    FeaturePlot(pbmcTSNE, features = c("PPBP","AC084033.3"))
    
    #Visualize Expression Probability Distribution Across Clusters for Top Gene per Cluster
    VlnPlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
    VlnPlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
    VlnPlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
    VlnPlot(pbmcTSNE, features = c("PPBP","AC084033.3")) 
  • Label clusters as cell types based on expression of known marker genes.

...

Download this zip file and unzip it on your computer to view the files (Most computers will automatically unzip files).

results.zip

What do you do if you have multiple samples

If you have multiple samples (multiple cellranger output directories), read each one in and create individual seurat objects. Then, merged them into one object. You may want to use the project parameter to indicate condition information. You can also add metadata to do this.

From Seurat documentionWhen merging, to easily tell which original object any particular cell came from, you can set the add.cell.ids parameter with an c(x, y) vector, which will prepend the given identifier to the beginning of each cell name. The original project ID will remain stored in object meta data under orig.ident.

Code Block
data10<-Read10X(data.dir = "../../10/outs/filtered_feature_bc_matrix")
pbmc10<-CreateSeuratObject(counts = data10, project="dep")
data11<-Read10X(data.dir = "../../11/outs/filtered_feature_bc_matrix")
pbmc11<-CreateSeuratObject(counts = data11, project="control")
data14<-Read10X(data.dir = "../../14/outs/filtered_feature_bc_matrix")
pbmc14<-CreateSeuratObject(counts = data14, project="dep")
data16<-Read10X(data.dir = "../../16/outs/filtered_feature_bc_matrix")
pbmc16<-CreateSeuratObject(counts = data16, project="control")

pbmc.combined<-merge(pbmc10, y=c(pbmc11,pbmc14,pbmc16),add.cell.ids = c("10","11","14","16), project = "pbmc",  merge.data = TRUE)

Single Cell Proportion Test

In order to identify differences in proportions of cell types between conditions, scProportionTest can be run. It reads in a seurat object and runs a permutation test to calculate a p-value for each cluster and a magnitude difference between conditions. It also generates a plot showing the pvalues and difference. It can be used to identify enriched or depleted cell types between conditions.

Code Block
library("scProportionTest")

#read in seurat object, saved as a RData file
load("seurat.RData")

#read in seurat object (pbmc), assuming that pbmc has multiple samples with different condition identities
prop_test <- sc_utils(pbmc)
Code Block
#run permutation test to compare between cells with orig.ident as 'Con' and ident 'Treatment'
#run it for every cluster
prop_test <- permutation_test(
	prop_test, cluster_identity = "custom_clusters",
	sample_1 = "Con", sample_2 = "Treatment",
	sample_identity = "orig.ident"
)
Code Block
#Generate plot
permutation_plot(prop_test)

View file
namepermutation_plot.l2.pdf