Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

  • Load required libraries

    Code Block
    library(Seurat)
    library(ggplot2)
    library(dplyr)
    library(data.table)
    
    #if doing sctransform with newer version:
    library()
  • Load Data and Create Seurat Object

    Load data and create Seurat object

    Code Block
    pbmc_data <- Read10X(data.dir = "filtered_feature_bc_matrix")
    pbmc <- CreateSeuratObject(pbmc_data)
  • Check what the counts matrix looks like

    Look at counts matrix

    Code Block
    pbmc_data[c("CD3D", "TCL1A", "MS4A1"), 1:30]
    
    ##output                                                                   
    ## CD3D  4 . 10 . . 1 2 3 1 . . 2 7 1 . . 1 3 . 2  3 . . . . . 3 4 1 5
    ## TCL1A . .  . . . . . . 1 . . . . . . . . . . .  . 1 . . . . . . . .
    ## MS4A1 . 6  . . . . . . 1 1 1 . . . . . . . . . 36 1 2 . . 2 . . . .
  • Check what the seurat object looks like

    Look at seurat object

    Code Block
    pbmc
    
    An object of class seurat in project SeuratProject 
     33538 genes across 5155 samples.
    
    #right now, it just has counts, but as we process the data, many other slots will get added.
  • Normalize Data Using SCTransform and Regress out Genes Related to Cell Cycle

    Normalize data

    Code Block
    s.genes <- cc.genes$s.genes
    g2m.genes <- cc.genes$g2m.genes
    pbmc <- CellCycleScoring(pbmc, s.features = s.genes, g2m.features = g2m.genes, set.ident = TRUE)
    pbmc$CC.Difference <- pbmc$S.Score - pbmc$G2M.Score
    pbmc <- SCTransform(pbmc, vars.to.regress = "CC.Difference")
  • Identify Highly Variable Features

    Identify Highly Variable Features and Generate Plots

    Code Block
    pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
    top10 <- head(VariableFeatures(pbmc), 10)
    pdf("variableFeaturesPlot.pdf")
    plot1 <- VariableFeaturePlot(pbmc)
    plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
    CombinePlots(plots = list(plot1, plot2))
    dev.off()
  • Scale the Data

    Scale data

    Code Block
    all.genes <- rownames(pbmc)
    pbmc <- ScaleData(pbmc, features = all.genes)
  • Select Dimensions using PCA (using variable features)

    Perform PCA for Dimensionality Reduction

    Code Block
    #Perform PCA for Dimensionality Reduction
    pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
    
    #Print and Examine PCA Results in Multiple Formats
    pdf("findDimensions.pdf")
    print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
    VizDimLoadings(pbmc, dims = 1:2, reduction = "pca")
    DimPlot(pbmc, reduction = "pca")
    
    #Generate Principal Component Heatmaps through PC_30
    DimHeatmap(pbmc, dims = 1, cells = 500, balanced = TRUE)
    DimHeatmap(pbmc, dims = 1:15, cells = 500, balanced = TRUE)
    DimHeatmap(pbmc, dims = 16:30, cells = 500, balanced = TRUE)
    
    #Create Elbow Plot of Principal Components through PC_30
    ElbowPlot(pbmc, ndims = 30, reduction = "pca")
    dev.off()
  • Cluster the cells (using the selected number of dimensions)

    Cluster the Cells for the selected numeber of Principal Components

    Code Block
    #Cluster the Cells for the selected numeber of Principal Components (we selected 14 for 5k Data)
    pbmc <- FindNeighbors(pbmc, dims = 1:14)
    pbmc <- FindClusters(pbmc, resolution = 0.5)
    
    #View the Cluster IDs of the First 5 Cells
    head(Idents(pbmc), 5)
    
  • Visualize the clusters using tSNE or UMAP

    Visualize using tSNE

    Code Block
    pdf("tsne.pdf")
    pbmcTSNE <pbmc<- RunTSNE(pbmc, dims = 1:14)
    DimPlot(pbmcTSNEpbmc, reduction = "tsne")
    dev.off()
  • Find markers for each cluster

    Find Markers for Every Cluster and Print Top 2 Per Cluster

    Code Block
    pbmc.markers <- FindAllMarkers(pbmcTSNE, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
    pbmc.markers %>% group_by(cluster) %>% top_n(n = 2, wt = avg_logFC)
    write.csv(pbmc.markers, file = "markersAll.csv")
    
    #Generate Heatmap of Top 10 Markers per Cluster
    top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
    DoHeatmap(pbmcTSNE, features = top10$gene, size = 3) + theme(axis.text.y = element_text(size = 5)) + NoLegend()
  • Identify cells expressing genes of interest 

    Identify Clusters for Marker Genes of Interest

    Code Block
    markerGenes <- c("CD27", "CCR7", "CD8A", "CD8B", "IL7R", "GZMK", "CD79A", "CD37", "CD160", "NKG7", "GNL$
    seuratClusters <- function(output, genes) {
            for (gene in genes) {
            print(output[output$gene == gene,]) }
            }
    seuratClusters(pbmc.markers, markerGenes)
    
    #Visualize Feature Expression on TSNE Plot of Top Gene per Cluster
    FeaturePlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
    FeaturePlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
    FeaturePlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
    FeaturePlot(pbmcTSNE, features = c("PPBP","AC084033.3"))
    
    #Visualize Expression Probability Distribution Across Clusters for Top Gene per Cluster
    VlnPlot(pbmcTSNE, features = c("LTB","S100A8","CCL5","NOSIP"))
    VlnPlot(pbmcTSNE, features = c("LINC02446","IGKC","GNLY","MT-CO3"))
    VlnPlot(pbmcTSNE, features = c("CST3","NKG7","KLRB1","LST1"))
    VlnPlot(pbmcTSNE, features = c("PPBP","AC084033.3")) 
  • Label clusters as cell types based on expression of known marker genes.

...