## figure paper
library(ggplot2)
library(tidyverse)
library(reshape2)
theme_set(theme_bw())
library(gridExtra)

raw_counts <- read.csv("data/HTSeq_count_stats_all_libraries.csv")
rownames(raw_counts) <- raw_counts$Gene_id

calc_per_section <- function(genes) {
  rep1 <- unlist(genes[1:3]/sum(genes[1:3])*100)
  rep2 <- unlist(genes[4:6]/sum(genes[4:6])*100)
  rep3 <- unlist(genes[7:9]/sum(genes[7:9])*100)
  mean <- c((rep1[1] + rep2[1] + rep3[1] )/ 3,
            (rep1[2] + rep2[2] + rep3[2]) / 3,
            (rep1[3] + rep2[3] + rep3[3]) / 3)
  
  return(mean)
}

# at 3h -------------------------------------------------------------------


raw_counts_trip <- raw_counts %>% filter(str_detect(Gene_id, "ENSMUSG.*"))  %>% select(matches("Resting_3h_Triptolide_(UTR5|UTR3|CDS)$"))
raw_counts_CHX <- raw_counts  %>% filter(str_detect(Gene_id, "ENSMUSG.*")) %>% select(matches("Resting_3h_Triptolide_CHX_(UTR5|UTR3|CDS)$")) 
raw_counts_HAR <- raw_counts  %>% filter(str_detect(Gene_id, "ENSMUSG.*")) %>% select(matches("Resting_3h_Triptolide_HARR_(UTR5|UTR3|CDS)$")) 

keep <- rownames(raw_counts_trip[rowSums(raw_counts_trip)>30, ])
raw_counts_trip <- raw_counts_trip[rownames(raw_counts_trip)%in% keep,]
raw_counts_CHX <- raw_counts_CHX[rownames(raw_counts_CHX)%in% keep,]
raw_counts_HAR <- raw_counts_HAR[rownames(raw_counts_HAR)%in% keep,]


Trip_percent <- t(apply(raw_counts_trip, 1, calc_per_section))
colnames(Trip_percent) <- c("CDS", "UTR3", "UTR5")
Trip_percent <- as.data.frame(Trip_percent)
Trip_percent$Gene_id <- rownames(Trip_percent)
Trip_percent <- pivot_longer(as.data.frame(Trip_percent), cols = !Gene_id, names_to = "region", values_to = "Trip")

CHX_percent <- t(apply(raw_counts_CHX, 1, calc_per_section))
colnames(CHX_percent) <- c("CDS", "UTR3", "UTR5")
CHX_percent <- as.data.frame(CHX_percent)
CHX_percent$Gene_id <- rownames(CHX_percent)
CHX_percent <- pivot_longer(as.data.frame(CHX_percent), cols = !Gene_id, names_to = "region", values_to = "TripCHX")

HAR_percent <- t(apply(raw_counts_HAR, 1, calc_per_section))
colnames(HAR_percent) <- c("CDS", "UTR3", "UTR5")
HAR_percent <- as.data.frame(HAR_percent)
HAR_percent$Gene_id <- rownames(HAR_percent)
HAR_percent <- pivot_longer(as.data.frame(HAR_percent), cols = !Gene_id, names_to = "region", values_to = "TripHAR")


data <- merge(Trip_percent, CHX_percent, by = c("Gene_id", "region"))
data <- merge(data, HAR_percent, by = c("Gene_id", "region"))
cor1 <-  paste("R = ", 
               round(cor(data[data$region == "UTR5", "Trip"], data[data$region == "UTR5", "TripCHX"], use = "complete.obs"), digits = 3))
cor2 <-  paste("R = ", 
               round(cor(data[data$region == "CDS", "Trip"], data[data$region == "CDS", "TripCHX"], use = "complete.obs"), digits = 3))
cor3 <-  paste("R = ", 
               round(cor(data[data$region == "UTR3", "Trip"], data[data$region == "UTR3", "TripCHX"], use = "complete.obs"), digits = 3))
p1 <- ggplot(data = data %>% filter(region == "UTR5"), aes(x = Trip, y = TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor1) + 
  theme_bw() + ggtitle("UTR5")
p2 <- ggplot(data = data %>% filter(region == "CDS"), aes(x = Trip, TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor2) + 
  theme_bw() + ggtitle("CDS")
p3 <- ggplot(data = data %>% filter(region == "UTR3"), aes(x = Trip, TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor3) + 
  theme_bw() + ggtitle("UTR3")

gridExtra::grid.arrange(p1,p2,p3, ncol = 3, top = "3hTrip vs 3hTripCHX")


cor1 <-  paste("R = ", 
               round(cor(data[data$region == "UTR5", "Trip"], data[data$region == "UTR5", "TripHAR"], use = "complete.obs"), digits = 3))
cor2 <-  paste("R = ", 
               round(cor(data[data$region == "CDS", "Trip"], data[data$region == "CDS", "TripHAR"], use = "complete.obs"), digits = 3))
cor3 <-  paste("R = ", 
               round(cor(data[data$region == "UTR3", "Trip"], data[data$region == "UTR3", "TripHAR"], use = "complete.obs"), digits = 3))
p1 <- ggplot(data = data %>% filter(region == "UTR5"), aes(x = Trip, y = TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor1) + 
  theme_bw() + ggtitle("UTR5")
p2 <- ggplot(data = data %>% filter(region == "CDS"), aes(x = Trip, TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor2) + 
  theme_bw() + ggtitle("CDS")
p3 <- ggplot(data = data %>% filter(region == "UTR3"), aes(x = Trip, TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor3) + 
  theme_bw() + ggtitle("UTR3")

gridExtra::grid.arrange(p1,p2,p3, ncol = 3, top = "3hTrip vs 3hTripHAR")

# at 0h -------------------------------------------------------------------


raw_counts_trip <- raw_counts %>% filter(str_detect(Gene_id, "ENSMUSG.*"))  %>% select(matches("Resting_0h_Triptolide_(UTR5|UTR3|CDS)$"))
raw_counts_CHX <- raw_counts  %>% filter(str_detect(Gene_id, "ENSMUSG.*")) %>% select(matches("Resting_3h_Triptolide_CHX_(UTR5|UTR3|CDS)$")) 
raw_counts_HAR <- raw_counts  %>% filter(str_detect(Gene_id, "ENSMUSG.*")) %>% select(matches("Resting_3h_Triptolide_HARR_(UTR5|UTR3|CDS)$")) 

keep <- rownames(raw_counts_trip[rowSums(raw_counts_trip)>30, ])
raw_counts_trip <- raw_counts_trip[rownames(raw_counts_trip)%in% keep,]
raw_counts_CHX <- raw_counts_CHX[rownames(raw_counts_CHX)%in% keep,]
raw_counts_HAR <- raw_counts_HAR[rownames(raw_counts_HAR)%in% keep,]

Trip_percent <- t(apply(raw_counts_trip, 1, calc_per_section))
colnames(Trip_percent) <- c("CDS", "UTR3", "UTR5")
Trip_percent <- as.data.frame(Trip_percent)
Trip_percent$Gene_id <- rownames(Trip_percent)
Trip_percent <- pivot_longer(as.data.frame(Trip_percent), cols = !Gene_id, names_to = "region", values_to = "Trip")

CHX_percent <- t(apply(raw_counts_CHX, 1, calc_per_section))
colnames(CHX_percent) <- c("CDS", "UTR3", "UTR5")
CHX_percent <- as.data.frame(CHX_percent)
CHX_percent$Gene_id <- rownames(CHX_percent)
CHX_percent <- pivot_longer(as.data.frame(CHX_percent), cols = !Gene_id, names_to = "region", values_to = "TripCHX")

HAR_percent <- t(apply(raw_counts_HAR, 1, calc_per_section))
colnames(HAR_percent) <- c("CDS", "UTR3", "UTR5")
HAR_percent <- as.data.frame(HAR_percent)
HAR_percent$Gene_id <- rownames(HAR_percent)
HAR_percent <- pivot_longer(as.data.frame(HAR_percent), cols = !Gene_id, names_to = "region", values_to = "TripHAR")


data <- merge(Trip_percent, CHX_percent, by = c("Gene_id", "region"))
data <- merge(data, HAR_percent, by = c("Gene_id", "region"))

cor1 <-  paste("R = ", 
               round(cor(data[data$region == "UTR5", "Trip"], data[data$region == "UTR5", "TripCHX"], use = "complete.obs"), digits = 3))
cor2 <-  paste("R = ", 
               round(cor(data[data$region == "CDS", "Trip"], data[data$region == "CDS", "TripCHX"], use = "complete.obs"), digits = 3))
cor3 <-  paste("R = ", 
               round(cor(data[data$region == "UTR3", "Trip"], data[data$region == "UTR3", "TripCHX"], use = "complete.obs"), digits = 3))


p1 <- ggplot(data = data %>% filter(region == "UTR5"), aes(x = Trip, y = TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor1) + 
  theme_bw() + ggtitle("UTR5")

p2 <- ggplot(data = data %>% filter(region == "CDS"), aes(x = Trip, TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor2) + 
  theme_bw() + ggtitle("CDS")

p3 <- ggplot(data = data %>% filter(region == "UTR3"), aes(x = Trip, TripCHX)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor3) + 
  theme_bw() + ggtitle("UTR3")

gridExtra::grid.arrange(p1,p2,p3, ncol = 3, top = "0hTrip vs 3hTripCHX")

cor1 <-  paste("R = ", 
               round(cor(data[data$region == "UTR5", "Trip"], data[data$region == "UTR5", "TripHAR"], use = "complete.obs"), digits = 3))
cor2 <-  paste("R = ", 
               round(cor(data[data$region == "CDS", "Trip"], data[data$region == "CDS", "TripHAR"], use = "complete.obs"), digits = 3))
cor3 <-  paste("R = ", 
               round(cor(data[data$region == "UTR3", "Trip"], data[data$region == "UTR3", "TripHAR"], use = "complete.obs"), digits = 3))


p1 <- ggplot(data = data %>% filter(region == "UTR5"), aes(x = Trip, y = TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor1) + 
  theme_bw() + ggtitle("UTR5")

p2 <- ggplot(data = data %>% filter(region == "CDS"), aes(x = Trip, TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor2) + 
  theme_bw() + ggtitle("CDS")

p3 <- ggplot(data = data %>% filter(region == "UTR3"), aes(x = Trip, TripHAR)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor3) + 
  theme_bw() + ggtitle("UTR3")

gridExtra::grid.arrange(p1,p2,p3, ncol = 3, top = "0hTrip vs 3hTripHAR")


# 0h Trip vs 3h Trip ------------------------------------------------------


counts_trip <- raw_counts %>% filter(str_detect(Gene_id, "ENSMUSG.*"))  %>% select(matches("Resting_0h_Triptolide_(UTR5|UTR3|CDS)$"))
raw_counts_CHX <- raw_counts  %>% filter(str_detect(Gene_id, "ENSMUSG.*")) %>% select(matches("Resting_3h_Triptolide_(UTR5|UTR3|CDS)$")) 

keep <- rownames(raw_counts_trip[rowSums(raw_counts_trip)>30, ])
raw_counts_trip <- raw_counts_trip[rownames(raw_counts_trip)%in% keep,]
raw_counts_CHX <- raw_counts_CHX[rownames(raw_counts_CHX)%in% keep,]

Trip_percent <- t(apply(raw_counts_trip, 1, calc_per_section))
colnames(Trip_percent) <- c("CDS", "UTR3", "UTR5")
Trip_percent <- as.data.frame(Trip_percent)
Trip_percent$Gene_id <- rownames(Trip_percent)
Trip_percent <- pivot_longer(as.data.frame(Trip_percent), cols = !Gene_id, names_to = "region", values_to = "Trip0h")

CHX_percent <- t(apply(raw_counts_CHX, 1, calc_per_section))
colnames(CHX_percent) <- c("CDS", "UTR3", "UTR5")
CHX_percent <- as.data.frame(CHX_percent)
CHX_percent$Gene_id <- rownames(CHX_percent)
CHX_percent <- pivot_longer(as.data.frame(CHX_percent), cols = !Gene_id, names_to = "region", values_to = "Trip3h")


data <- merge(Trip_percent, CHX_percent, by = c("Gene_id", "region"))

cor1 <-  paste("R = ", 
               round(cor(data[data$region == "UTR5", "Trip0h"], data[data$region == "UTR5", "Trip3h"], use = "complete.obs"), digits = 3))
cor2 <-  paste("R = ", 
               round(cor(data[data$region == "CDS", "Trip0h"], data[data$region == "CDS", "Trip3h"], use = "complete.obs"), digits = 3))
cor3 <-  paste("R = ", 
               round(cor(data[data$region == "UTR3", "Trip0h"], data[data$region == "UTR3", "Trip3h"], use = "complete.obs"), digits = 3))


p1 <- ggplot(data = data %>% filter(region == "UTR5"), aes(x = Trip0h, y = Trip3h)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor1) + 
  theme_bw() + ggtitle("UTR5")

p2 <- ggplot(data = data %>% filter(region == "CDS"), aes(x = Trip0h, Trip3h)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor2) + 
  theme_bw() + ggtitle("CDS")

p3 <- ggplot(data = data %>% filter(region == "UTR3"), aes(x = Trip0h, Trip3h)) + 
  geom_point(alpha = 0.2) + 
  scale_x_continuous(limits = c(0,100)) + 
  scale_y_continuous(limits = c(0,100)) + 
  annotate("text", x = 15, y = 80, label = cor3) + 
  theme_bw() + ggtitle("UTR3")

gridExtra::grid.arrange(p1,p2,p3, ncol = 3, top = "0hTrip vs 3hTrip")


