## Plot average Spearmans correlation of eight NDD gene sets in six major cortical cell types by downsampling.

library(ggplot2)

gene_set <- scan("../gene_set/gene_set.txt", what = "character", sep = "\t")

gene_set <- gene_set[1:8]

cell_type <- scan("../expression/sample_id_9.txt", what = "character", sep = "\t")

column_name <- c("Cor", "Type")

for (i in 1:length(gene_set))
{
    file_name <- paste("cor_", ".txt", sep = gene_set[i])

    write.table(t(column_name), file = file_name, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

    data_cor <- numeric(length = length(cell_type))

    for (j in 1:5)
    {
        file_name_1 <- paste("cor_", ".txt", sep = cell_type[j])

        data_cor_1 <- read.table(file_name_1, sep = "\t")

        data_cor_1 <- t(data_cor_1)

        data_cor_2 <- data_cor_1[,i]

        data_cor[j] <- mean(data_cor_2)

        data_cell_type <- rep(cell_type[j], length(data_cor_2))

        data_cor_3 <- data.frame(data_cor_2, data_cell_type)

        write.table(data_cor_3, file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)
    }

    data_cor_1 <- scan("cor_Microglia.txt", sep = "\t")

    data_cor_2 <- data_cor_1[i]

    data_cor[6] <- data_cor_2

    data_cell_type <- rep("Microglia", length(data_cor_2))

    data_cor_3 <- data.frame(data_cor_2, data_cell_type)

    write.table(data_cor_3, file = file_name, append = TRUE, quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE)

    data_cor_4 <- mean(data_cor)

    data_cor <- read.table(file_name, header = TRUE, sep = "\t")

    file_name_2 <- paste("cor_", ".pdf", sep = gene_set[i])

    data_plot <- ggplot(data_cor, aes(x = factor(Type), y = Cor, colour = Type, fill = Type))

    data_plot + geom_violin(trim = FALSE) +

            theme_classic() +

            theme(legend.position = "none") +

            scale_colour_manual(values = c("cyan3", "darkgreen", "royalblue3", "orange", 2, "hotpink")) +

            scale_fill_manual(values = c("cyan3", "darkgreen", "royalblue3", "orange", 2, "hotpink")) +

            geom_segment(aes(x = 5.6, y = data_cor_2, xend = 6.4, yend = data_cor_2), colour = "orange") +

            stat_summary(fun.y = mean, geom = "point", colour = 1, size = 3) +

            scale_x_discrete(limits = cell_type) +

            scale_y_continuous(breaks = c(0:3/10), limits = c(0, 0.3)) +

            labs(title = gene_set[i], x = "Downsampling (n=68)", y = "Average Spearmans correlation") +

            theme(axis.text.x = element_text(size = 20, hjust = 1, vjust = 1, angle = 45), axis.text.y = element_text(size = 25)) +

            theme(plot.title = element_text(face = "bold", size = 25), axis.title.x = element_text(size = 25, hjust = 0.5), axis.title.y = element_text(size = 25, vjust = 0.4)) +

            geom_segment(aes(x = 0.5, y = data_cor_4, xend = 6.5, yend = data_cor_4), colour = 1, linetype = 2)

    ggsave(file = file_name_2, width = 8, height = 8)
}
