library(tidyverse)
#> ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
#> ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
#> ✔ tibble 3.1.8 ✔ dplyr 1.1.0
#> ✔ tidyr 1.3.0 ✔ stringr 1.5.0
#> ✔ readr 2.1.4 ✔ forcats 1.0.0
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
library(SPADE)
Load the example data for simulated spatial gene expression, single cell RNA-seq data, marker gene lists for cell types, spatial variable genes, and spatial locations.
load('simulatedData.RData')
load('scRNAseq.RData')
load('MarkerGenes.RData')
=read.csv('spa_loc.csv',header = T,row.names = 1) location
Build cell type reference from scRNAseq
=scRefer(input_data=refData,
screfct_var="celltype",
sample_var="orig.ident")
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:dplyr':
#>
#> combine, intersect, setdiff, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
Identify cell types for each domain
=unlist(sign_list)
marker_list=simData[[1]]
pseudo.spatial=max(location$domain)
nlay=CTperDom(loc=location,stcount=pseudo.spatial,scref=scref,sign_list=marker_list,lasso=T)
CTperLayer#> Loading required package: lattice
#>
#> Attaching package: 'caret'
#> The following object is masked from 'package:purrr':
#>
#> lift
#> Loading required package: Matrix
#>
#> Attaching package: 'Matrix'
#> The following objects are masked from 'package:tidyr':
#>
#> expand, pack, unpack
#> Loaded glmnet 4.1-6
#> [1] TRUE
#> [1] TRUE
#> [1] TRUE
#> [1] TRUE
based on identified cell type, decompose each domain
=SPADE(stcount=pseudo.spatial,scref=scref,
CTestsign_list=sign_list, # sc DEGs, it is in list format
loc=location,
ctData=CTperLayer, # the estimated cell type per domain
offset=10) # user input tuning parameter to adjust the threshold, if is NULL, then an average value will be used
merge domain information
=simData[[2]]
truep=matrix(0,ncol = ncol(truep),nrow=nrow(truep))
estCTrownames(estCT)=c(rownames(CTest[[1]]),rownames(CTest[[2]]),rownames(CTest[[3]]),rownames(CTest[[4]]))
colnames(estCT)=colnames(truep)
rownames(CTest[[1]]),colnames(CTest[[1]])] = CTest[[1]]
estCT[rownames(CTest[[2]]),colnames(CTest[[2]])] = CTest[[2]]
estCT[rownames(CTest[[3]]),colnames(CTest[[3]])] = CTest[[3]]
estCT[rownames(CTest[[4]]),colnames(CTest[[4]])] = CTest[[4]]
estCT[=estCT[rownames(truep),colnames(truep)] estCT
compare results with true proportion
<- round(sqrt(mean(as.matrix((truep - estCT)^2), na.rm = T)), digits = 5)
RMSD <- round(mean(as.matrix(abs(truep - estCT)), na.rm = T), digits = 5)
mAD <- round(cor(c(as.matrix(truep)), c(as.matrix(estCT))), digits = 5)
pearson print(c('RMSD'=RMSD,'mAD'=mAD,'pearson'=pearson))
#> RMSD mAD pearson
#> 0.03931 0.01680 0.97866
visialize the results
=c("PG"="green4","Oligo"='lawngreen',"ET"="gold","mitral"="purple","tufted"="brown2","Astro"="magenta",'Micro'="lightskyblue","GC"="orange",'Immature'="lightcyan3",'OPC'='royalblue1')
colors
dot_plot(truep,estCT,10,colors=colors)
library(scatterpie)
=cbind(location,estCT)
fuldatscatter_pie(fuldat=fuldat,dat=estCT,fillcolors=colors)
featureplot(estCT,fuldat,c('GC','mitral'))