# Load required packages

library(WGCNA)
library(dplyr)
library(tidyr)
library(ggplot2)
library(gplots)
library(doParallel)

# Register the parallel backend
cl <- makeCluster(detectCores() - 1)
registerDoParallel(cl)

# Set the output directory
output_path <- "/home/user/2021/YangXi/Duck/WGCNA/"

# Read the gene expression data
data <- read.table(file.path(output_path, "AllTPM.txt"), header = TRUE, row.names = 1, sep = "\t") # Rows = Genes (With tissue mark, for example: Liver_ACAT1 ...); Columns = Individuals
# Filter genes with mean expression >= 0.1 TPM
filtered_data <- data[rowMeans(data) >= 0.1, ]
# Apply log2 transformation
transformed_data <- log2(filtered_data + 1)
# Transpose the data so that samples are rows
data_transposed <- as.data.frame(t(transformed_data))

# Extract organ names from the column names (assumes organ name is before the first underscore)
organ_names <- unique(sub("_.*", "", colnames(data_transposed)))

# Function to calculate a p-value matrix from correlation tests
cor.mtest <- function(mat) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat <- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      test <- cor.test(mat[, i], mat[, j])
      p.mat[i, j] <- test$p.value
      p.mat[j, i] <- test$p.value
    }
  }
  colnames(p.mat) <- colnames(mat)
  rownames(p.mat) <- colnames(mat)
  return(list(p = p.mat))
}

# Initialize list to store results for each organ
results <- list()

# Process each organ separately with WGCNA
for (organ in organ_names) {
  cat("Processing:", organ, "\n")
  
  # Subset data for the current organ based on column names
  organ_data <- data_transposed[, grepl(organ, colnames(data_transposed))]
  
  # Skip analysis if there are fewer than 5 samples
  if (ncol(organ_data) < 5) {
    cat("Not enough samples for WGCNA in", organ, "\n")
    next
  }
  
  # Define a range of powers for network construction
  powers <- c(1:10, seq(from = 12, to = 50, by = 2))
  sft <- pickSoftThreshold(organ_data, powerVector = powers, verbose = 5)
  
  # Save the scale-free topology fit indices
  write.table(sft$fitIndices, file.path(output_path, paste0(organ, "_soft_threshold.txt")),
              sep = "\t", quote = FALSE, col.names = NA)
  
  # Create a PDF with scale-free topology and mean connectivity plots
  pdf(file.path(output_path, paste0(organ, "_soft_threshold_plot.pdf")), width = 14, height = 10)
  par(mfrow = c(1, 2))
  plot(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
       xlab = "Soft Threshold (power)", ylab = "Scale Free R^2", type = "n",
       main = paste("Scale Free Topology Fit for", organ))
  text(sft$fitIndices[, 1], -sign(sft$fitIndices[, 3]) * sft$fitIndices[, 2],
       labels = powers, cex = 0.9, col = "red")
  
  plot(sft$fitIndices[, 1], sft$fitIndices[, 5],
       xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
       main = paste("Mean Connectivity for", organ))
  text(sft$fitIndices[, 1], sft$fitIndices[, 5],
       labels = powers, cex = 0.9, col = "blue")
  dev.off()
  
  # Compute adjacency and TOM matrices
  adjacency <- adjacency(organ_data, power = sft$powerEstimate)
  TOM <- TOMsimilarity(adjacency)
  dissTOM <- 1 - TOM
  
  # Perform hierarchical clustering
  geneTree <- hclust(as.dist(dissTOM), method = "average")
  
  # Dynamic tree cutting to detect modules
  dynamicMods <- cutreeDynamic(dendro = geneTree, distM = dissTOM, deepSplit = 2, minClusterSize = 30)
  dynamicColors <- labels2colors(dynamicMods)
  
  # Plot the dendrogram with module colors
  pdf(file.path(output_path, paste0(organ, "_dendrogram.pdf")), width = 14, height = 10)
  plotDendroAndColors(geneTree, dynamicColors, "Module Colors",
                      dendroLabels = FALSE, hang = 0.03,
                      main = paste("Dendrogram and Module Colors for", organ))
  dev.off()
  
  # Calculate module eigengenes
  moduleEigengenes <- moduleEigengenes(organ_data, colors = dynamicColors)
  eigengenes <- moduleEigengenes$eigengenes
  
  # Save the module eigengenes to a file
  outputfile <- paste0(organ, "_Eigengene.txt")
  output_full_path <- file.path(output_path, outputfile)
  write.table(eigengenes, output_full_path, sep = "\t", quote = FALSE, col.names = NA)
  
  # Merge similar modules
  mergedModules <- mergeCloseModules(organ_data, dynamicColors, cutHeight = 0.25, verbose = 3)
  mergedColors <- mergedModules$colors
  mergedEigengenes <- mergedModules$newMEs
  colnames(mergedEigengenes) <- paste0(organ, "_ME", seq_len(ncol(mergedEigengenes)))
  
  outputfile <- paste0(organ, "_Merged_Eigengene.txt")
  output_full_path <- file.path(output_path, outputfile)
  write.table(mergedEigengenes, output_full_path, sep = "\t", quote = FALSE, col.names = NA)
  
  # Group genes by module
  module_genes <- split(colnames(organ_data), mergedColors)
  module_genes_summary <- data.frame(Module = character(), Genes = character(), stringsAsFactors = FALSE)
  for (module_index in seq_along(module_genes)) {
    genes_in_module <- module_genes[[module_index]]
    module_name <- colnames(mergedEigengenes)[module_index]
    module_genes_summary <- rbind(module_genes_summary,
                                  data.frame(Module = module_name, Genes = I(list(genes_in_module)),
                                             stringsAsFactors = FALSE))
  }
  
  # Save the module genes summary
  summary_output_file <- file.path(output_path, paste0(organ, "_Module_Genes_Summary.txt"))
  write.table(module_genes_summary, summary_output_file, sep = "\t", quote = FALSE,
              col.names = TRUE, row.names = FALSE)
  
  # Create a heatmap of module eigengenes
  pdf(file.path(output_path, paste0(organ, "_Module_Heatmap.pdf")), width = 14, height = 10)
  heatmap.2(as.matrix(mergedEigengenes), trace = "none", col = bluered(100),
            main = paste("Module Heatmap for", organ), xlab = "Modules", ylab = "Samples",
            margins = c(10, 10))
  dev.off()
  
  # Store results for the current organ
  results[[organ]] <- list(Eigengenes = mergedEigengenes, Colors = mergedColors, Data = organ_data)
}

# Combine module eigengenes from all organs
all_eigengenes <- do.call(cbind, lapply(results, function(x) x$Eigengenes))

# Compute the correlation matrix of all module eigengenes
cor_matrix <- cor(all_eigengenes)

# Save the correlation matrix to a file
write.table(cor_matrix, file.path(output_path, "all_organs_correlation_matrix.txt"),
            sep = "\t", quote = FALSE, col.names = NA)

# Compute the p-value matrix for the correlations
p_matrix <- cor.mtest(all_eigengenes)$p

# Save the p-value matrix
write.table(p_matrix, file.path(output_path, "all_organs_p_matrix.txt"),
            sep = "\t", quote = FALSE, col.names = NA)

# Adjust the p-values using FDR correction
fdr_matrix <- apply(p_matrix, 2, p.adjust, method = "fdr")

# Save the FDR matrix
write.table(fdr_matrix, file.path(output_path, "all_organs_fdr_matrix.txt"),
            sep = "\t", quote = FALSE, col.names = NA)

# Plot a heatmap of the correlation matrix
pdf(file.path(output_path, "all_organs_correlation_heatmap.pdf"), width = 12, height = 10)
heatmap.2(cor_matrix,
          dendrogram = "both",
          trace = "none",
          col = bluered(100),
          main = "Correlation Heatmap of Module Eigengenes Across All Organs",
          xlab = "Modules",
          ylab = "Modules",
          margins = c(10, 10))
dev.off()

# Stop the parallel cluster
stopCluster(cl)

cat("Analysis complete. Results saved to the specified output path.\n")
