# age from .divsum
library(dplyr)
library(readr)
library(magrittr)
library(ggplot2)
library(purrr)
library(tidyr)
library(ggsci)
library(ggpubr)

# read both table and plot them together
# read in the table section from .divsum

# Read hsym  ---------------------------------------------------------
table <- read_table2("hsym_sep2019.divsum")
genome_size <- 406693435 # 11sep updated
species_name <- "Hydractinia symbiolongicarpus" # Hydractinia echinata, Hydractinia symbiolongicarpus
species_shortname <- "hsym"

# remove the last empty column
table <- table[,1:dim(table)[2]-1]
# remove non-informative row (all 0)
non_zero_row_index <- mutate(table,rs=rowSums(select(table,c(-1))))[,"rs"][["rs"]] == "0"
table <- table[!non_zero_row_index,]
# wide to long form
#data_long <- gather(olddata_wide, condition, measurement, control:cond2, factor_key=TRUE)
table_long <- tidyr::gather(table,te_class,raw_cnt,!!colnames(table)[2]:!!colnames(table)[dim(table)[2]])
table_long <- mutate(table_long,perc_genome = raw_cnt/genome_size*100)
hsym_table_long <- table_long
hsym_table_long$species <- species_shortname


# Read hech ---------------------------------------------------------------
table <- read_table2("hech_sep2019.divsum")
genome_size <- 565065865 # 11sep updated
species_name <- "Hydractinia echinata" # Hydractinia echinata, Hydractinia symbiolongicarpus
species_shortname <- "hech"
# remove the last empty column
table <- table[,1:dim(table)[2]-1]
# remove non-informative row (all 0)
non_zero_row_index <- mutate(table,rs=rowSums(select(table,c(-1))))[,"rs"][["rs"]] == "0"
table <- table[!non_zero_row_index,]
# wide to long form
#data_long <- gather(olddata_wide, condition, measurement, control:cond2, factor_key=TRUE)
table_long <- tidyr::gather(table,te_class,raw_cnt,!!colnames(table)[2]:!!colnames(table)[dim(table)[2]])
table_long <- mutate(table_long,perc_genome = raw_cnt/genome_size*100)
hech_table_long <- table_long
hech_table_long$species <- species_shortname

full_table_long <- rbind(hsym_table_long,hech_table_long)

# general class plot ------------------------------------------------------------
full_table_long <- tidyr::separate(full_table_long,te_class,c("general_class","superfamily"),sep="/")
class_to_plot <- c("DNA","LTR","LINE","SINE","SINE?","RC")
facet_label <- c("H. echinata","H. symbiolongicarpus")
names(facet_label) <- c("hech","hsym")
g <- ggplot(full_table_long %>% filter(general_class %in% class_to_plot),aes(x=Div,y=perc_genome,fill=general_class))
g + geom_bar(stat = "identity") + 
  labs(title = "General TE class", x="Kimura substitution level (CpG adjusted)", y="Percentage of genome (%)") +
  scale_x_continuous(breaks = seq(0,10*ceiling(max(table_long$Div)/10),by=5))+
  facet_grid(cols = vars(species),labeller = labeller(species=facet_label)) +
  theme_pubr() + scale_fill_npg() +
  theme(strip.text.x = element_text(face="italic"))

ggsave("combined_age_estimation_kimura.svg",width=320,height = 164, units = "mm")


# LTR plot ----------------------------------------------------------------

ltr_superfamily <- full_table_long$te_class[grep("LTR",full_table_long$te_class)] %>% unique()
full_table_long <- mutate(full_table_long,te_class = paste0(general_class,"/",superfamily))
facet_label <- c("H. echinata","H. symbiolongicarpus")
names(facet_label) <- c("hech","hsym")
g <- ggplot(full_table_long %>% filter(te_class %in% ltr_superfamily),aes(x=Div,y=perc_genome,fill=superfamily))
g + geom_bar(stat = "identity",width=1) + 
  labs(title = paste0("LTR superfamilies"),x="Kimura substitution level (CpG adjusted)", y="Percentage of genome (%)") +
  scale_x_continuous(breaks = seq(0,10*ceiling(max(table_long$Div)/10),by=5))+
  theme_pubr() + scale_fill_npg() +
  facet_grid(te_class ~species,labeller = labeller(species=facet_label),scales="free_y") +
  theme(axis.text.x=element_text(angle=45,hjust = 1, size = 8),
        axis.text.y=element_text(size=8),
        strip.text.x = element_text(face="italic")) 

ggsave("combined_age_estimation_kimura_LTR.svg",width=239,height = 216, units = "mm")


# LTR overlap plot --------------------------------------------------------
ltr_superfamily <- full_table_long$te_class[grep("LTR",full_table_long$te_class)] %>% unique()
full_table_long <- mutate(full_table_long,te_class = paste0(general_class,"/",superfamily))
facet_label <- c("H. echinata","H. symbiolongicarpus")
names(facet_label) <- c("hech","hsym")
g <- ggplot(full_table_long %>% filter(te_class %in% ltr_superfamily),aes(x=Div,y=perc_genome,fill=species,color=species))
g + geom_bar(stat = "identity",position="identity",width=1,alpha=0.2) + 
  labs(title = paste0("LTR superfamilies"),x="Kimura substitution level (CpG adjusted)", y="Percentage of genome (%)") +
  scale_x_continuous(breaks = seq(0,10*ceiling(max(table_long$Div)/10),by=5))+
  theme_pubr() + scale_fill_npg() +
  facet_grid(rows=vars(te_class),labeller = labeller(species=facet_label),scales="free_y") +
  theme(axis.text.x=element_text(angle=45,hjust = 1, size = 8),
        axis.text.y=element_text(size=8),
        strip.text.x = element_text(face="italic")) 

ggsave("combined_age_estimation_kimura_LTR_overlap.svg",width=205,height = 236, units = "mm")


# DNA overlap plot ----------------------------------------------------------------
DNA_superfamily <- full_table_long$te_class[grep("DNA",full_table_long$te_class)] %>% unique()
full_table_long <- mutate(full_table_long,te_class = paste0(general_class,"/",superfamily))
facet_label <- c("H. echinata","H. symbiolongicarpus")
names(facet_label) <- c("hech","hsym")
g <- ggplot(full_table_long %>% filter(te_class %in% DNA_superfamily),aes(x=Div,y=perc_genome,fill=species,color=species))
g + geom_bar(stat = "identity",position="identity",width=1,alpha=0.2) + 
  labs(title = paste0("DNA superfamilies"),x="Kimura substitution level (CpG adjusted)", y="Percentage of genome (%)") +
  scale_x_continuous(breaks = seq(0,10*ceiling(max(table_long$Div)/10),by=5))+
  theme_pubr() + scale_fill_npg() +
  facet_grid(rows=vars(te_class),labeller = labeller(species=facet_label),scales="free_y") +
  theme(axis.text.x=element_text(angle=45,hjust = 1, size = 8),
        axis.text.y=element_text(size=4),
        strip.text.x = element_text(face="italic"),
        strip.text.y = element_text(angle = 0,size=8)) 

ggsave("combined_age_estimation_kimura_DNA.svg",width=185,height = 236, units = "mm")


