## ---- options, include = FALSE------------------------------------------------
library(knitr)
opts_chunk$set(
    cache = FALSE,
    results = "hold"
)

## ----load-data, eval=TRUE-----------------------------------------------------
library(MERINGUE)
library(Matrix)

data("mouseCoronal")
filteredGenes <- mouseCoronal$filteredGenes
tissueSpotRotation <- mouseCoronal$tissueSpotRotation

## ----pca, eval=TRUE-----------------------------------------------------------
# Dimensionality reduction by PCA on log10 CPM normalized expression values
pcs.info <- prcomp(t(log10(as.matrix(filteredGenes)+1)), center=TRUE)
# Check `screeplot` to assess number of PCs to use
screeplot(pcs.info, npcs=20,)
# Choose number of PCs to look at based on screeplot ("elbow rule")
numberPcs <- 5 #
pcs <- pcs.info$x[,1:numberPcs]
# 2D embedding by tSNE
emb <- Rtsne::Rtsne(pcs,
             is_distance=FALSE,
             perplexity=30,
             num_threads=1,
             verbose=FALSE)$Y
rownames(emb) <- rownames(pcs)
# iGraph cluster community labeling
spotClusters <- getClusters(pcs, k = 100)

## ----visualize-clusters, fig.width=8, fig.height=6----------------------------
par(mfrow=c(1,2), mar=rep(2,4))
# plot the tsne and color by community
plotEmbedding(emb, groups=spotClusters, 
              show.legend=TRUE, xlab='tSNE X', ylab='tSNE Y',
              verbose=FALSE)
              
# plot the spots based on position on tissue, color by community
plotEmbedding(tissueSpotRotation, groups=spotClusters, 
              cex=1, xlab='spatial X', ylab='spatial Y',
              verbose=FALSE)

## ----differentially-expressed-genes, eval=TRUE--------------------------------
# Identify significantly differentially upregulated genes
# in each identified cluster by Wilcox test
diffGenes <- getDifferentialGenes(as.matrix(filteredGenes), spotClusters)
## focus on cluster 4
diffExpCluster4Genes <- diffGenes[[4]]
# gene with highest expression in cluster assigned as marker to that cluster
highestExpClust4 <- diffExpCluster4Genes[which(diffExpCluster4Genes$highest == TRUE),]
## order and view
topDiffExpGenesClust4 <- highestExpClust4[order(highestExpClust4$p.adj),]
topDiffExpGenesClust4[1:10,]


## ----visualize-top-diff-exp-genes, fig.width=5, fig.height=5, eval=TRUE-------
par(mfrow=c(1,2), mar=rep(2,4))
g <- 'Cpne6'
gexp <- scale(filteredGenes[g,])[,1]
plotEmbedding(emb, col=gexp)
plotEmbedding(tissueSpotRotation, col=gexp, 
              cex=1, xlab='spatial X', ylab='spatial Y')

## ----cluster-weight-matrix, fig.width=4, fig.height=4, eval=TRUE--------------
# get spots that are part of the cluster
cluster4SpotIDs <- names(spotClusters[which(spotClusters == '4')])
cluster4SpotCoords <- tissueSpotRotation[cluster4SpotIDs,]
# build weight matrix for the cluster spots
cluster4WeightMatrix <- getSpatialNeighbors(
  cluster4SpotCoords, filterDist = 25, verbose=TRUE)
plotNetwork(cluster4SpotCoords, cluster4WeightMatrix)

## ----cluster-morans-I, eval=TRUE----------------------------------------------
# get gene expression of filteredGenes for just the cluster spots
cluster4GeneExp <- filteredGenes[,which(colnames(filteredGenes) %in% cluster4SpotIDs)]

# calculate Moran's I for filteredGenes only using the spots in cluster
# aka autocorrelation of filtered genes within the cluster
cluster4GeneExpMoransI <- getSpatialPatterns(cluster4GeneExp,
                                           cluster4WeightMatrix,
                                           verbose=TRUE)

# find significantly spatially variable genes
cluster4SpatialGenes <- filterSpatialPatterns(
  cluster4GeneExp,
  cluster4GeneExpMoransI,
  cluster4WeightMatrix,
  details = TRUE,
  verbose=TRUE,
  minPercentCells = 0.10)

# plot a few genes
par(mfrow=c(2,2))
sapply(rownames(cluster4SpatialGenes)[1:4], function(g) {
  plotEmbedding(emb = cluster4SpotCoords,
                col = winsorize(scale(cluster4GeneExp[g,])[,1]),
                main=g)
})

## ----cluster-spatial-patterns, fig.width=5, fig.height=6, eval=TRUE-----------
# spatial cross cor matrix
cluster4CorrMtx <- spatialCrossCorMatrix(
  cluster4GeneExp[rownames(cluster4SpatialGenes),],
  cluster4WeightMatrix)

par(mfrow=c(2,2))
# use all tissue spots to make the visualization more representative
method = 'ward.D'
cluster4SpatialPatterns <- groupSigSpatialPatterns(
  pos=tissueSpotRotation,
  mat=cluster4GeneExp[rownames(cluster4SpatialGenes),],
  scc=cluster4CorrMtx,
  hclustMethod = method,
  deepSplit = 1,
  binSize = 50,
  power = 1)

# Double check cross correlation matrix
par(mfrow=c(1,1))
# Look at pattern association by plotting SCI matrix as a heatmap and dendrogram
patternColors <- rainbow(
  length(levels(cluster4SpatialPatterns$groups)), v=0.5)[cluster4SpatialPatterns$groups]
names(patternColors) <- names(cluster4SpatialPatterns$groups)
# Visualize as heatmap
heatmap(cluster4CorrMtx[cluster4SpatialPatterns$hc$labels, cluster4SpatialPatterns$hc$labels],
        scale='none', 
        Colv=as.dendrogram(cluster4SpatialPatterns$hc), 
        Rowv=as.dendrogram(cluster4SpatialPatterns$hc), 
        labCol=NA, labRow = NA,
        RowSideColors=patternColors[cluster4SpatialPatterns$hc$labels],
        ColSideColors=patternColors[cluster4SpatialPatterns$hc$labels],
        col=colorRampPalette(c('white', 'lightgrey', 'black'))(100)
)


