
library(Seurat)
library(harmony)
library(ggplot2)
library(cowplot)
library(dplyr)
library(Seurat)
library(harmony)
library(Signac)
library(GenomicRanges)
library(ggplot2)
library(cowplot)
library(stringr)
library(cellrangerRkit)
library(patchwork)
library(ggrepel)
library(Matrix)
library(monocle)
library(SeuratWrappers)
library(igraph)
library(tidyverse) 
library(tidycensus)
library(leidenbase)

devtools::load_all("C:/Merfish_code/DATA/monocle") 



workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

# Figure4_UMAP_related_code
MERFSDH.counts <- read.csv(file = 'All_AN_for_harmony_Nuc.csv', header=T, row.name =1 )
MERFSDH <- CreateSeuratObject(counts = MERFSDH.counts, project = "Harmony", min.cells = 3, min.features = 10)
celltypes_col <- read.csv(file = 'celltypeInfo.csv', header=T) 
MERFSDH@meta.data <-cbind(MERFSDH@meta.data,celltypes_col)
MERFSDH <- FindVariableFeatures(MERFSDH, selection.method = "vst", nfeatures = 20)
top10 <- head(VariableFeatures(MERFSDH), 10)
p1 <- VariableFeaturePlot(MERFSDH)
p2 <- LabelPoints(plot = p1, points = top10, repel = TRUE)
p1 + p2
all.genes <- rownames(MERFSDH)
MERFSDH <- ScaleData(MERFSDH, features = all.genes)

MERFSDH <- RunPCA(MERFSDH, features = VariableFeatures(object = MERFSDH))
ElbowPlot(MERFSDH, ndims = 19, reduction = "pca")

MERFSDH <- FindNeighbors(MERFSDH, dims = 1:10)
MERFSDH <- FindClusters(MERFSDH, resolution = 0.5) 
MERFSDH <- RunUMAP(MERFSDH, dims = 1:10)

p1 <- VlnPlot(object = MERFSDH, features = "PC_1", group.by = "orig.ident", pt.size = .1) + NoLegend()
p2 <- DimPlot(object = MERFSDH, reduction = "pca", pt.size = .1, group.by = "orig.ident") 

plot_grid(p1,p2) 

#run harmony 
MERFSDH_harmony <- MERFSDH
MERFSDH_harmony <- MERFSDH_harmony %>% RunHarmony("orig.ident", plot_convergence = T, project.dim = F)
harmony_embeddings <- Embeddings(MERFSDH_harmony, 'harmony')
MERFSDH_harmony <- MERFSDH_harmony %>% 
  RunUMAP(reduction = "harmony", dims = 1:10, verbose = F) %>% 
  FindNeighbors(reduction = "harmony", k.param = 10, dims = 1:10) %>% 
  FindClusters() %>% 
  identity()
MERFSDH_harmony <- SetIdent(MERFSDH_harmony,value = "orig.ident")
DimPlot(MERFSDH_harmony,reduction = "umap") 


#figure4A
MERFSDH_harmony <- SetIdent(MERFSDH_harmony,value = "seurat_clusters")
DimPlot(MERFSDH_harmony,label = T) + NoLegend()


#Figure4B

color_data <- read.csv(file="color_group.csv", head = T) # csv for color!!!
color_data$Color<-as.character(color_data$Color)
col <- as.character(color_data$Color)
names(col) <- as.character(color_data$Group)
DimPlot(
  MERFSDH_harmony,
  dims = c(1, 2),
  group.by = 'cell_type',
  seed = 1,
  label.size = 4,
  label.color = "black",
  cols.highlight = "#DE2D26",
  sizes.highlight = 1,
  na.value = "grey50",
  combine = TRUE,
  raster.dpi = c(512, 512)
)+
scale_color_manual(values=col) 



#figure4F
color_data <- read.csv(file="color_hi.csv", head = T) 
color_data$Color<-as.character(color_data$Color)
col <- as.character(color_data$Color)
names(col) <- as.character(color_data$Group)
DimPlot(
  MERFSDH_harmony,
  dims = c(1, 2),
  group.by = 'hi_lo_3_4',
  seed = 1,
  label.size = 4,
  label.color = "black",
  cols.highlight = "#DE2D26",
  sizes.highlight = 1,
  na.value = "grey50",
  combine = TRUE,
  raster.dpi = c(512, 512)
)+
scale_color_manual(values=col) 

#figure4G
FeaturePlot(MERFSDH_harmony,c("MYH8","LEUTX","CCNA1","ZSCAN4"))

#figure(UMAP_Batch_before_Harmony FigS8A)
DimPlot(
  MERFSDH,
  dims = c(1, 2),
  group.by = 'orig.ident',
  label.color = "black",
  cols.highlight = "#DE2D26",
  sizes.highlight = 1,
)


#figure(UMAP_Batch_after_Harmony FigS8A)
DimPlot(
  MERFSDH_harmony,
  dims = c(1, 2),
  group.by = 'orig.ident',
  label.color = "black",
  cols.highlight = "#DE2D26",
  sizes.highlight = 1,
)

#PCA before Harmony FigS8A
DimPlot(object = MERFSDH, reduction = "pca", pt.size = .1, group.by = "orig.ident") 

#ElbowPlot before Harmony FigS8A
ElbowPlot(MERFSDH, ndims = 19, reduction = "pca")

# Fig4_monocle_related_code

workingDir = "C:/Merfish_code/DATA/fig1H"

setwd(workingDir)
getwd()


MERFSDH1.counts <- read.csv(file = "fig1H_batch_corrected_data_including_1000MNC.csv", header=T, row.name =1 )
MERFSDH1 <- CreateSeuratObject(counts = MERFSDH1.counts, project = "MERFSDH1test", min.cells = 3, min.features = 10)

cell_type_harmony_hi_lo_hi_lo_col <- read.csv(file = 'fig1H_celltype_including_1000MNC.csv', header=T)
MERFSDH1@meta.data <-cbind(MERFSDH1@meta.data,cell_type_harmony_hi_lo_hi_lo_col)

data <- as(as.matrix(MERFSDH1.counts), 'sparseMatrix')
pd <- new('AnnotatedDataFrame', data = MERFSDH1@meta.data)
fData <- data.frame(gene_short_name = row.names(data), row.names = row.names(data))
fd <- new('AnnotatedDataFrame', data = fData)

monocle_cds <- newCellDataSet(data,
                              phenoData = pd,
                              featureData = fd,
                              lowerDetectionLimit = 0.5,
                              expressionFamily = negbinomial.size())

my_cds <- monocle_cds
slotNames(my_cds)
my_cds <- estimateSizeFactors(my_cds)
my_cds <- estimateDispersions(my_cds)

my_cds <- detectGenes(my_cds, min_expr = 0.1)
 head(fData(my_cds))

summary(fData(my_cds)$num_cells_expressed)
head(pData(my_cds))
summary(pData(my_cds)$num_genes_expressed)
x <- pData(my_cds)$num_genes_expressed
x_1 <- (x - mean(x)) / sd(x)
summary(x_1)
df <- data.frame(x = x_1)
ggplot(df, aes(x)) +
  geom_histogram(bins = 50) +
  geom_vline(xintercept = c(-2, 2), linetype = "dotted", color = 'red')
pData(my_cds)$UMI <- Matrix::colSums(exprs(my_cds))
head(pData(my_cds))
ggplot(pData(my_cds), aes(num_genes_expressed, UMI)) + geom_point()
disp_table <- dispersionTable(my_cds)
head(disp_table)
table(disp_table$mean_expression>=0.1)

unsup_clustering_genes <- subset(disp_table, mean_expression >= 0.1)
my_cds <- setOrderingFilter(my_cds, unsup_clustering_genes$gene_id)
plot_ordering_genes(my_cds)
plot_pc_variance_explained(my_cds, return_all = FALSE)
my_cds <- reduceDimension(my_cds, max_components = 2, num_dim = 9,
                          reduction_method = 'tSNE', verbose = TRUE)
my_cds <- clusterCells(my_cds, num_clusters = 15)
head(pData(my_cds))
#write.csv(pData(my_cds),"240226_cds_clusterCell_1.csv", row.names = T)
my_cluster_dim_5 <- pData(my_cds)$Cluster
plot_cell_clusters(my_cds)
clustering_DEG_genes <- differentialGeneTest(my_cds,
                                             fullModelFormulaStr = '~Cluster',
                                             cores = 8)
clustering_DEG_genes %>% arrange(qval) %>% head()
#write.csv(clustering_DEG_genes,"240226_clustering_DEG_genes.csv", row.names = T)

my_ordering_genes <- row.names(clustering_DEG_genes)[order(clustering_DEG_genes$qval)][1:18]
my_cds <- setOrderingFilter(my_cds, ordering_genes = my_ordering_genes)
my_cds <- reduceDimension(my_cds, method = 'DDRTree')
my_cds <- orderCells(my_cds, reverse=TRUE) 
ddrtreePt <- plot_cell_trajectory(my_cds,color_by = "Pseudotime",theta = 160,show_branch_points = F)
ddrtreePt <- ddrtreePt + viridis::scale_color_viridis(option = "D") +
  theme(legend.text = element_text(size = 18),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 18),
        legend.key.width = unit(0.3, "in")) 
#Fig4H
ddrtreePt
my_pseudotime_de  <- read.csv(file = '2221121_my_pseudotime_de.csv', header=T)
my_pseudotime_de %>% arrange(qval) %>% head(50) %>% select(gene_short_name) -> gene_to_cluster
gene_to_cluster <- gene_to_cluster$gene_short_name

my_pseudotime_cluster <- plot_pseudotime_heatmap(my_cds[gene_to_cluster,],
                                                 num_clusters = 3,
                                                 cores = 8,
                                                 show_rownames = TRUE,
                                                 return_heatmap = TRUE)

buildBranchCellDataSet(
my_cds,
progenitor_method = c("sequential_split"),
branch_states = NULL,
branch_point = 1,
branch_labels = NULL,
stretch = TRUE
)

BEAM_res <- buildBranchCellDataSet(
my_cds,
progenitor_method = c("sequential_split"),
branch_states = NULL,
branch_point = 1,
branch_labels = NULL,
stretch = TRUE
)

my_pseudotime_cluster <- plot_pseudotime_heatmap(my_cds[gene_to_cluster,],
                                                 num_clusters = 3,
                                                 cores = 8,
                                                 show_rownames = TRUE,
                                                 return_heatmap = TRUE)

#Fig4L
plot_genes_branched_heatmap(my_cds[gene_to_cluster,], 
                                          branch_point = 1,
                                          num_clusters = 3,
                                          cores = 2,
                                          use_gene_short_name = T,
                                          show_rownames = T)

#Fig4I
ddrtreePt <- plot_cell_trajectory(my_cds,color_by = "cluster_hi_4_3",theta = 160,show_branch_points = F)
ddrtreePt <- ddrtreePt + facet_wrap(~fct_rev(cluster_hi_4_3), nrow = 3) +
  theme(legend.text = element_text(size = 12),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 18),
        legend.key.width = unit(0.1, "in")) 

ddrtreePt




#FigS8D
ddrtreePt <- plot_cell_trajectory(my_cds,color_by = "hi_lo_cluster1_rank",theta = 160,show_branch_points = F)
ddrtreePt <- ddrtreePt + facet_wrap(~fct_rev(hi_lo_cluster1_rank), nrow = 3) +
  theme(legend.text = element_text(size = 12),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 18),
        legend.key.width = unit(0.1, "in")) 

ddrtreePt


#FigS8B and D
ddrtreePt <- plot_cell_trajectory(my_cds,color_by = "cluster1_rank",theta = 160,show_branch_points = F)
ddrtreePt <- ddrtreePt + facet_wrap(~fct_rev(cluster1_rank), nrow = 2) +
  theme(legend.text = element_text(size = 12),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_text(size = 18),
        legend.key.width = unit(0.1, "in")) 

ddrtreePt


# Fig5_WGCNA_related_code

library(dynamicTreeCut)
library(flashClust)
library(WGCNA)
allowWGCNAThreads()
ALLOW_WGCNA_THREADS=12
options(stringsAsFactors = FALSE);
enableWGCNAThreads()
library(MASS) 
library(class)
library(cluster)
library(impute)
library(Hmisc)

softPower = 4
minModuleSize = 8
cutHeight = 0.94
n=97 

workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

Data = read.csv("Control_corrected_1616_97_BNX100.csv", head = T);# 

gene.names=Data$gene
SubGeneNames=gene.names[21:n] # only keep no DUX4 genes

datExpr0 = as.data.frame(t(Data[, -c(1)]));# remove column 1 
fix(datExpr0)

names(datExpr0) = Data$gene;
fix(datExpr0) 

datExpr = as.data.frame(datExpr0[, -c(1:20)]);# remove column 1 
fix(datExpr) # no DUX4 target genes

powers = c(c(1:10), seq(from = 12, to=20, by=2));
sft=pickSoftThreshold(datExpr,dataIsExpr = TRUE,powerVector = powers,corFnc = cor,corOptions = list(use = 'p'),networkType = "unsigned")

sizeGrWindow(9, 5)
par(mfrow = c(1,2));
cex1 = 0.9;

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit, signed R^2",type="n", main = paste("corrected all Control Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");

abline(h=0.8,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

TOM=TOMsimilarityFromExpr(datExpr,networkType = "unsigned", TOMType = "unsigned", power = softPower);
colnames(TOM) =rownames(TOM) =SubGeneNames
dissTOM=1-TOM
geneTree = flashClust(as.dist(dissTOM),method="average");

dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", cutHeight = cutHeight, minClusterSize = minModuleSize);

colorSeq = c("turquoise", "blue", "brown")
dynamicColors = labels2colors(dynamicMods, colorSeq = colorSeq)
table(dynamicColors)
#Data_for_figS9C_Control
write.csv(dynamicColors,"All_Control_power4_color_min8_cut0.94.csv", row.names = T)

genecolors = dynamicColors
ADJControl=adjacency(datExpr, power = softPower)
ConnectivityMeasuresControl=intramodularConnectivity(ADJControl,colors=genecolors)
#kWithin Data for Fig5C
write.csv(ConnectivityMeasuresControl," ConnectivityMeasuresControl_power4_color_min8_cut0.94.csv", row.names = T)

pdf(file=  "Control_geneTree.pdf", width=12, height=4);
plotDendroAndColors(geneTree, genecolors, "ALL_CONTROL Modules", main = "ALL_CONTROL MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()

plotTOM = dissTOM^7;
diag(plotTOM) = NA;

#Fig4A_Control
pdf(file=  "Control_heatmap.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, genecolors, main = "CONTROL_MT")
dev.off()

ref_geneTree = geneTree
ref_color = dynamicColors

pdf(file=  "Control_heatmap_ref.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, ref_geneTree, ref_color, main = "ALL_CONTROL_ref_MT")
dev.off()
 
vis = exportNetworkToVisANT(TOM, file = "Control_TOM.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) ) # looks OK
#Control_TOM_data for Fig5D
write.csv(vis,file = "Control_TOM.csv", row.names = T)




Data = read.csv("FSHD1_corrected_1616_97_BNX100.csv", head = T);# 

gene.names=Data$gene
SubGeneNames=gene.names[21:n] 
datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data$gene;
fix(datExpr0) # 140 genes
datExpr = as.data.frame(datExpr0[, -c(1:20)])
fix(datExpr) # no DUX4 target genes


TOM=TOMsimilarityFromExpr(datExpr,networkType = "unsigned", TOMType = "unsigned", power = softPower);
colnames(TOM) =rownames(TOM) =SubGeneNames
dissTOM=1-TOM
geneTree = flashClust(as.dist(dissTOM),method="average");

pdf(file=  "all_FSHD1_geneTree&name.pdf", width=12, height=4);
plot(geneTree, xlab="", sub="",cex=0.7, main = "FSHD1 Cluster Dendrogram");
dev.off()

dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", cutHeight = cutHeight, minClusterSize = minModuleSize);
colorSeq = c("saddlebrown", "skyblue", "red", "yellow")
dynamicColors = labels2colors(dynamicMods, colorSeq = colorSeq)
table(dynamicColors)
#Data_for_figS9C_FSHD
write.csv(dynamicColors,"All_FSHD1_power4_color_min8_cut0.94.csv", row.names = T)
genecolors = dynamicColors 
# figS9D_FSHD
pdf(file=  "FSHD1_geneTree.pdf", width=12, height=4);
plotDendroAndColors(geneTree, genecolors, "ALL_FSHD1 Modules", main = "ALL_FSHD1 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()
# figS9A and D.
pdf(file=  "FSHD1_geneTree_ref_color.pdf", width=12, height=4);
plotDendroAndColors(geneTree, ref_color, "ALL_FSHD1 Modules", main = "ALL_FSHD1 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()
pdf(file=  "FSHD1_geneTree_ref_tree.pdf", width=12, height=4);
plotDendroAndColors(ref_geneTree, genecolors, "ALL_FSHD1 Modules", main = "ALL_FSHD1 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()
plotTOM = dissTOM^7;
diag(plotTOM) = NA;
# figS9A.
pdf(file=  "FSHD1_heatmap.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, genecolors, main = "FSHD1_MT")
dev.off()

ADJFSHD1=adjacency(datExpr, power = softPower)
ConnectivityMeasuresFSHD1=intramodularConnectivity(ADJFSHD1,colors=ref_color)
#kWithin Data for Fig5C
write.csv(ConnectivityMeasuresFSHD1," ConnectivityMeasuresFSHD1_power4_color_min8_cut0.94.csv", row.names = T)

#Fig4A_FSHD
pdf(file=  "FSHD1_heatmap_ref.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, ref_geneTree, ref_color, main = "FSHD1_MT")
dev.off()

#FigS9A_FSHD
pdf(file=  "FSHD1_heatmap_ref_cor.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, ref_color, main = "FSHD1_MT")
dev.off()

vis = exportNetworkToVisANT(TOM, file = "FSHD1_TOM.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) ) # looks OK
#FSHD_TOM_data for Fig5D
write.csv(vis,file = "FSHD1_TOM.csv", row.names = T)

 

Data = read.csv("DEL5_corrected_1616_97_BNX100.csv", head = T);# 

gene.names=Data$gene
SubGeneNames=gene.names[21:n] # only keep no DUX4 genes
datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data$gene;
fix(datExpr0) # 140 genes

datExpr = as.data.frame(datExpr0[, -c(1:20)])
fix(datExpr) # no DUX4 target genes


TOM=TOMsimilarityFromExpr(datExpr,networkType = "unsigned", TOMType = "unsigned", power = softPower);
colnames(TOM) =rownames(TOM) =SubGeneNames
dissTOM=1-TOM
geneTree = flashClust(as.dist(dissTOM),method="average");

pdf(file=  "all_DEL5_geneTree&name.pdf", width=12, height=4);
plot(geneTree, xlab="", sub="",cex=0.7, main = "DEL5 Cluster Dendrogram");
dev.off()

dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", cutHeight = cutHeight, minClusterSize = minModuleSize);
colorSeq = c("darkorange", "midnightblue", "purple", "greenyellow")
dynamicColors = labels2colors(dynamicMods, colorSeq = colorSeq)
table(dynamicColors)

#Data_for_figS9C_DEL5
write.csv(dynamicColors,"All_DEL5_power4_color_min8_cut0.94.csv", row.names = T)
genecolors = dynamicColors 

# figS9D_DEL5
pdf(file=  "DEL5_geneTree.pdf", width=12, height=4);
plotDendroAndColors(geneTree, genecolors, "ALL_DEL5 Modules", main = "ALL_DEL5 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()

# figS9A and D.
pdf(file=  "DEL5_geneTree_ref_color.pdf", width=12, height=4);
plotDendroAndColors(geneTree, ref_color, "ALL_DEL5 Modules", main = "ALL_DEL5 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()

pdf(file=  "DEL5_geneTree_ref_tree.pdf", width=12, height=4);
plotDendroAndColors(ref_geneTree, genecolors, "ALL_DEL5 Modules", main = "ALL_DEL5 MTgene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
dev.off()

plotTOM = dissTOM^7;
diag(plotTOM) = NA;
# figS9A.
pdf(file=  "DEL5_heatmap.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, genecolors, main = "DEL5_MT")
dev.off()

ADJDEL5=adjacency(datExpr, power = softPower)
ConnectivityMeasuresDEL5=intramodularConnectivity(ADJDEL5,colors=ref_color)
#kWithin Data for Fig5C
write.csv(ConnectivityMeasuresDEL5," ConnectivityMeasuresDEL5_power4_color_min8_cut0.94.csv", row.names = T)

#Fig4A_DEL5
pdf(file=  "DEL5_heatmap_ref.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, ref_geneTree, ref_color, main = "DEL5_MT")
dev.off()

#FigS9A_DEL5
pdf(file=  "DEL5_heatmap_ref_cor.pdf", width=9, height=9);
sizeGrWindow(9,9)
TOMplot(plotTOM, geneTree, ref_color, main = "DEL5_MT")
dev.off()

vis = exportNetworkToVisANT(TOM, file = "DEL5_TOM.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) ) # looks OK
#DEL5_TOM_data for Fig5D
write.csv(vis,file = "DEL5_TOM.csv", row.names = T)

#correlation_data_related_code

library(dynamicTreeCut)
library(flashClust)
library(WGCNA)
allowWGCNAThreads()
ALLOW_WGCNA_THREADS=12
options(stringsAsFactors = FALSE);
enableWGCNAThreads()
library(MASS)  
library(class)  
library(cluster)
library(impute) 
library(Hmisc)  
workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()
Data = read.csv("Control_corrected_1616_97_BNX100.csv", head = T)
gene.names=Data$gene
datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data$gene;
fix(datExpr0) # including DUX4 genes

datExpr = as.data.frame(datExpr0[, -c(1:20)]);
fix(datExpr) # no DUX4 genes
cor_Data <- cor(datExpr)
x <- c(1:77)
y <- c(1:77)
ar2 <- array(0, c(77,77))
ar2 = cor_Data
for (a in x) {
for (b in y) {

if (ar2[a,b] > 0) {
ar2[a,b] = 1
} else {
ar2[a,b] = -1
}
}
}
Control_cor_power4  <- cor_Data^4* ar2

vis = exportNetworkToVisANT(Control_cor_power4 , file = "Control_cor_power4.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
#Control_correlation^4_data, for FigS9C
write.csv(vis,file = "Control_cor_power4.csv", row.names = T)


Data = read.csv("FSHD1_corrected_1616_97_BNX100.csv", head = T);# 
gene.names=Data$gene
datExpr = as.data.frame(t(Data[, -c(1)]))
fix(datExpr)
names(datExpr) = Data$gene;
fix(datExpr) # 99 genes
cor_Data <- cor(datExpr)
x <- c(1:97)
y <- c(1:97)
ar2 <- array(0, c(97,97))
ar2 = cor_Data
for (a in x) {
for (b in y) {
if (ar2[a,b] > 0) {
ar2[a,b] = 1
} else {
ar2[a,b] = -1
}
}
}
FSHD1_cor_power4  <- cor_Data^4* ar2

#FSHD1_correlation_data, for Fig6A,  FigS9D and FigS10A
vis = exportNetworkToVisANT(cor_Data, file = "FSHD1_cor.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis,file = "FSHD1_cor.csv", row.names = T)

#FSHD1_correlation^4_data, for Fig6B and C, and FigS9C
vis = exportNetworkToVisANT(FSHD1_cor_power4 , file = "FSHD1_cor_power4.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis,file = "FSHD1_cor_power4.csv", row.names = T)

Data = read.csv("DEL5_corrected_1616_97_BNX100.csv", head = T)
gene.names=Data$gene
datExpr = as.data.frame(t(Data[, -c(1)]))
fix(datExpr)
names(datExpr) = Data$gene;
fix(datExpr) 

cor_Data <- cor(datExpr)
x <- c(1:97)
y <- c(1:97)
ar2 <- array(0, c(97,97))
ar2 = cor_Data
for (a in x) {
for (b in y) {
if (ar2[a,b] > 0) {
ar2[a,b] = 1
} else {
ar2[a,b] = -1
}
}
}

DEL5_cor_power4  <- cor_Data^4* ar2
#DEL5_correlation_data, for Fig6A,  FigS9D and FigS10A
vis = exportNetworkToVisANT(cor_Data, file = "DEL5_cor.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis,file = "DEL5_cor.csv", row.names = T)

#DEL5_correlation^4_data, for Fig6B and C, and FigS9C
vis = exportNetworkToVisANT(DEL5_cor_power4 , file = "DEL5_cor_power4.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
#DEL5_correlation_data
write.csv(vis,file = "DEL5_cor_power4.csv", row.names = T)

#Fig5C
library(ggplot2)
workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()
data <- read.csv ('ConnectivityMeasures_ALL.csv')
col <- as.character(data$color)
names(col) <- as.character(data$color)

p<- ggplot(data, aes(x = x1, y = kWithin, fill=color))+ 
  geom_dotplot(binaxis='y', stackdir='center', binwidth = 0.2, color = NA)+
     scale_fill_manual(values=col) 
p

#Fig5D
library(ggplot2)
workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()
data <- read.csv ('example_for_connection_comparison.csv')

p <- ggplot(data, aes(x = x, y = y))+
    scale_y_continuous(minor_breaks = seq(0, 25, 1))+
    scale_x_continuous(limits = c(0, 0.6))+
    geom_point(aes(color = color, size = 1, alpha = 0.99))
p + scale_colour_manual(values = c("black", "green", "red"))


#FigS3N and Fig1H

liblibrary(ggrepel)
rary(tidyverse) 
library(cowplot) 
library(smplot2)
library(scales) 

workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

dat <- read.csv ('RNA_seq_vs_Merfish_test.csv')
dat <- data.frame(dat)
names(dat) <- c("X","Y")
res <- resid(mod <- lm(Y ~ X, data = dat))
res.qt <- quantile(res, probs = c(0.05,0.95))
unwant <- res < res.qt[1] | res > res.qt[2]
#find the top 10% genes, which are ranked by their distance from the regression line.
write.csv(unwant, file = 'unwant_log.csv')


data <- read.csv ('RNA_seq_vs_Merfish_with_unwant.csv')

#FigS3N, Fig1H (no labeled name)
ggplot(data = data, mapping = aes(x = bulkRNAseqTPM, y = MERFISHmyotube)) +
   geom_label_repel(aes(label = name, color = color)) +
geom_point(shape = 16, color = sm_color('blue'), size = 3) + 
  sm_corr_theme() + 
  sm_statCorr(color = sm_color('blue')) +
scale_x_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + scale_y_log10(breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x)))


# FigS9B

library(dynamicTreeCut)
library(flashClust)
library(WGCNA)
allowWGCNAThreads()
ALLOW_WGCNA_THREADS=12
options(stringsAsFactors = FALSE);
enableWGCNAThreads()
library(MASS) 
library(class)
library(cluster)
library(impute)
library(Hmisc)



softPower = 4; 
minModuleSize = 8;
cutHeight = 0.94
n=97 

workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

Data.CTRL = read.csv("DEL5_corrected_1616_97_BNX100.csv", head = T);# 
dim(Data.CTRL)

gene.names=Data.CTRL$gene
datExpr0 = as.data.frame(t(Data.CTRL [, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data.CTRL$gene
fix(datExpr0) 
datExpr.CTRL = as.data.frame(datExpr0[, -c(1:20)])

Data = read.csv("Control_corrected_1616_97_BNX100.csv", head = T);
gene.names=Data$gene
datExpr0 = as.data.frame(t(Data [, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data$gene
fix(datExpr0) 

datExpr0 = as.data.frame(datExpr0[, -c(1:20)])
fix(datExpr0) 

adj= adjacency(datExpr.CTRL,type = "unsigned", power = softPower);
TOM=TOMsimilarityFromExpr(datExpr.CTRL,networkType = "unsigned", TOMType = "unsigned", power = softPower);
colnames(TOM) =rownames(TOM) 
dissTOM=1-TOM
geneTree = flashClust(as.dist(dissTOM),method="average");
plot(geneTree, xlab="", sub="",cex=0.7);


dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", cutHeight = cutHeight, minClusterSize = minModuleSize);
table(dynamicMods)

colorSeq = c("darkorange", "midnightblue", "purple", "greenyellow")
dynamicColors = labels2colors(dynamicMods, colorSeq = colorSeq)
table(dynamicColors)
colorsCONTROL = dynamicColors 
CONTROL2other = match(colnames(datExpr.CTRL), colnames(datExpr0));
table(is.finite(CONTROL2other))
datExpr0 = datExpr0[, CONTROL2other];
all.equal(colnames(datExpr.CTRL), colnames(datExpr0))
TOMCTRL = TOMsimilarityFromExpr(datExpr.CTRL, power = softPower, TOMType = "unsigned");
dissTOMCTRL = 1- TOMCTRL

treeCTRL = flashClust(as.dist(dissTOMCTRL), method = "a");
sizeGrWindow(12,4)
plotDendroAndColors(treeCTRL, colorsCONTROL, "Control Modules", main = "DEL5 gene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 
 
dissTOMother = 1-TOMsimilarityFromExpr(datExpr0, power = softPower, TOMType = "unsigned");
treeOther = flashClust(as.dist(dissTOMother), method = "a");
sizeGrWindow(12, 4)
layout(matrix(c(1:4), 4, 1), heights = rep(c(0.8, 0.2), 2));
plotDendroAndColors(treeOther, colorsCONTROL, "Control  Modules", main = " Control gene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE);

setLabels = c("CTRL", "Other");
multiExpr = list(CTRL = list(data = datExpr.CTRL), Other = list(data = datExpr0));
multiColor = list(CTRL = colorsCONTROL, Other = colorsCONTROL);
nSets = 2

system.time( {
mp = modulePreservation(multiExpr, multiColor,
referenceNetworks = c(1:2),
nPermutations = 100,
randomSeed = 1,
verbose = 3)
} );

ref = 1
test = 2
statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);

print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)) )
# Zsummary.pres values are used for FigS9B_Control

 

 

Data = read.csv("FSHD1_corrected_1616_97_BNX100.csv", head = T);

gene.names=Data$gene
datExpr0 = as.data.frame(t(Data [, -c(1)]))
fix(datExpr0)
names(datExpr0) = Data$gene
fix(datExpr0) 
datExpr0 = as.data.frame(datExpr0[, -c(1:20)])
fix(datExpr0) # no DUX4 target genes

adj= adjacency(datExpr.CTRL,type = "unsigned", power = softPower);
TOM=TOMsimilarityFromExpr(datExpr.CTRL,networkType = "unsigned", TOMType = "unsigned", power = softPower);
colnames(TOM) =rownames(TOM) 
dissTOM=1-TOM
geneTree = flashClust(as.dist(dissTOM),method="average");

plot(geneTree, xlab="", sub="",cex=0.7);

dynamicMods = cutreeDynamic(dendro = geneTree,  method="tree", cutHeight = cutHeight, minClusterSize = minModuleSize);
table(dynamicMods)

colorSeq = c("darkorange", "midnightblue", "purple", "greenyellow")
dynamicColors = labels2colors(dynamicMods, colorSeq = colorSeq)
table(dynamicColors)

colorsCONTROL = dynamicColors 
CONTROL2other = match(colnames(datExpr.CTRL), colnames(datExpr0));
table(is.finite(CONTROL2other))
datExpr0 = datExpr0[, CONTROL2other];
all.equal(colnames(datExpr.CTRL), colnames(datExpr0))
TOMCTRL = TOMsimilarityFromExpr(datExpr.CTRL, power = softPower, TOMType = "unsigned");
dissTOMCTRL = 1- TOMCTRL

treeCTRL = flashClust(as.dist(dissTOMCTRL), method = "a");
sizeGrWindow(12,4)
plotDendroAndColors(treeCTRL, colorsCONTROL, "Control Modules", main = "DEL5 gene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE); 

dissTOMother = 1-TOMsimilarityFromExpr(datExpr0, power = softPower, TOMType = "unsigned");
treeOther = flashClust(as.dist(dissTOMother), method = "a");

sizeGrWindow(12, 4)
layout(matrix(c(1:4), 4, 1), heights = rep(c(0.8, 0.2), 2));
plotDendroAndColors(treeOther, colorsCONTROL, "Control  Modules", main = "FSHD gene dendrogram and module colors", dendroLabels = FALSE, addGuide = TRUE);
 
setLabels = c("CTRL", "Other");
multiExpr = list(CTRL = list(data = datExpr.CTRL), Other = list(data = datExpr0));
multiColor = list(CTRL = colorsCONTROL, Other = colorsCONTROL);
nSets = 2

system.time( {
mp = modulePreservation(multiExpr, multiColor,
referenceNetworks = c(1:2),
nPermutations = 100,
randomSeed = 1,
verbose = 3)
} );

ref = 1
test = 2
statsObs = cbind(mp$quality$observed[[ref]][[test]][, -1], mp$preservation$observed[[ref]][[test]][, -1])
statsZ = cbind(mp$quality$Z[[ref]][[test]][, -1], mp$preservation$Z[[ref]][[test]][, -1]);

print( cbind(statsObs[, c("medianRank.pres", "medianRank.qual")],
signif(statsZ[, c("Zsummary.pres", "Zsummary.qual")], 2)) )
# Zsummary.pres values are used for FigS9B_FSHD

# FigS9D_code

library(ggrepel)
library(ggplot2)
workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

data <- read.csv ('cor_DUX4_non_DUX4.csv')
#FigS9D_FSHD
p <- ggplot(data, aes(x = x, y = final_cor_FSHD_04))+
geom_label_repel(aes(label = FSHD_name)) +
    scale_x_continuous(minor_breaks = seq(0, 77, 1))+
   scale_y_continuous(limits = c(-1, 1))+
    geom_point(aes(color = FSHD_color, size = 0.5))
p <- p + scale_colour_manual(values = c("black", "blue", "grey"))
p + theme(
  plot.background = element_rect(fill = "white"),
  panel.background = element_rect(fill = "white"),
axis.line = element_line(colour = "grey"))

#FigS9D_DEL5
p <- ggplot(data, aes(x = x, y = final_cor_DEL5_04))+
geom_label_repel(aes(label = DEL5_name)) +
    scale_x_continuous(minor_breaks = seq(0, 77, 1))+
   scale_y_continuous(limits = c(-1, 1))+
    geom_point(aes(color = DEL5_color, size = 0.5))
p <- p + scale_colour_manual(values = c("black", "blue", "grey"))
p + theme(
  plot.background = element_rect(fill = "white"),
  panel.background = element_rect(fill = "white"),
axis.line = element_line(colour = "grey"))

#FigS10A_code
library(dynamicTreeCut)
library(flashClust)
library(WGCNA)
allowWGCNAThreads()
ALLOW_WGCNA_THREADS=12
options(stringsAsFactors = FALSE);
enableWGCNAThreads()
library(MASS) # standard, no need to install
library(class) # standard, no need to install
library(cluster)
library(impute)# install it for imputing missing value
library(Hmisc) # install it for the C-index calculations
library(ggplot2)
library(dplyr)

n=97
workingDir = "C:/Merfish_code/DATA"
setwd(workingDir)
getwd()

Data = read.csv("Control_corrected_1616_97_BNX100.csv", head = T)
gene.names=Data$gene
SubGeneNames=gene.names[1:n] # 
datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)

names(datExpr0) = Data$gene;
fix(datExpr0) # 97 genes

#datExpr = as.data.frame(datExpr0)
datExpr_C = as.data.frame(datExpr0);

Data = read.csv("FSHD1_corrected_1616_97_BNX100.csv", head = T)

gene.names=Data$gene
SubGeneNames=gene.names[1:n] # only keep no DUX4 genes

datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)

names(datExpr0) = Data$gene;
fix(datExpr0) # 140 genes

datExpr_F = as.data.frame(datExpr0)


Data = read.csv("DEL5_corrected_1616_97_BNX100.csv", head = T)

gene.names=Data$gene
SubGeneNames=gene.names[1:n] # only keep no DUX4 genes

datExpr0 = as.data.frame(t(Data[, -c(1)]))
fix(datExpr0)

names(datExpr0) = Data$gene;
fix(datExpr0) 

datExpr_D = as.data.frame(datExpr0)



cor_C <- cor(datExpr_C)
cor_F <- cor(datExpr_F)
cor_D <- cor(datExpr_D)

x <- c(1:97)
y <- c(1:97)

p_C_F <- cor_C
p_C_D <- cor_C
p_D_F <- cor_C


nc=404
nf=370
nd=213


for (a in x) {
for (b in y) {

rc=cor_C[a,b]
rf=cor_F[a,b]
rd=cor_D[a,b]

zc <- atanh(rc)
zf <- atanh(rf)
zd <- atanh(rd)

zobs_cf <- (zc-zf) / sqrt( 1 / (nc-3) + 1 / (nf-3) )
zobs_cd <- (zc-zd) / sqrt( 1 / (nc-3) + 1 / (nd-3) )
zobs_df <- (zd-zf) / sqrt( 1 / (nd-3) + 1 / (nf-3) )

p_C_F[a,b] <- 2 * pnorm(-abs(zobs_cf))
p_C_D[a,b] <- 2 * pnorm(-abs(zobs_cd))
p_D_F[a,b] <- 2 * pnorm(-abs(zobs_df))

}
}

vis = exportNetworkToVisANT(cor_C, file = "cor_C.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = "vis_ cor_Control.csv", row.names = T)

vis = exportNetworkToVisANT(cor_D, file = "cor_D.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = " vis_ cor_DEL5.csv", row.names = T)

vis = exportNetworkToVisANT(cor_F, file = "cor_F.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = " vis_cor_FSHD.csv", row.names = T)
write.csv(p_D_F, file = "p_D_F.csv", row.names = T)


vis = exportNetworkToVisANT(p_C_F, file = "p_C_F.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = "vis_p_C_F.csv", row.names = T)
write.csv(p_C_F, file = "p_C_F.csv", row.names = T)

vis = exportNetworkToVisANT(p_C_D, file = "p_C_D.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = " vis_p_C_D.csv", row.names = T)
write.csv(p_C_D, file = "p_C_D.csv", row.names = T)


vis = exportNetworkToVisANT(p_D_F, file = "p_D_F.txt", weighted = TRUE, threshold = -1, probeToGene = data.frame(Data$gene, Data$gene) )
write.csv(vis, file = " vis_p_D_F.csv", row.names = T)
write.csv(p_D_F, file = "p_D_F.csv", row.names = T)


# Then make vis_ DUX4_cor_and_pvalue.csv using DUX4 gene correlation data between FSHD and DEL5

data <- read.csv ('vis_ DUX4_cor_and_pvalue.csv')


col <- as.character(data$p_D_F_grade)
names(col) <- as.character(data$p_D_F_grade)
ggplot(data = data, mapping = aes(x = cor_FSHD, y = cor_DEL5)) +
  geom_point(shape = 21, aes(fill = p_D_F_grade), color = 'white',size = 3) + 
  sm_corr_theme() + 
   xlim(-0.25, 1) +
   ylim(-0.25, 1) +
  sm_statCorr(color = sm_color('blue')) +
 scale_fill_manual(values=col)








