library(dplyr)
library(tidyverse)
library(ggplot2)

hnscc_gene_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/head_and_neck_cancer/APOBEC.HNSC.txt")%>%
  gather(cell_id,`log2(TPM/10+1)`,2:5903)
hnscc_meta_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/head_and_neck_cancer/cell_type.txt")%>%
  gather(cell_id,value,2:5903)%>%
  spread(data,value)%>%
  mutate(tissue=ifelse(`Lymph node`=="0","P","LN"))%>%
  mutate(cell_type=ifelse(`classified as non-cancer cells`==1,"normal","cancer"))%>%
  select(cell_id,tissue,cell_type)

hnscc_merge_df<-left_join(hnscc_gene_df,hnscc_meta_df)%>%
  filter(gene_id%in%c("APOBEC3A","APOBEC3B"))%>%
  mutate(TPM=10*(2^`log2(TPM/10+1)`-1))

hnscc_merge_fin_df<-hnscc_merge_df%>%filter(cell_type=="cancer")%>%
  arrange(gene_id,-TPM)
hnscc_merge_fin_df%>%filter(cell_type=="cancer")%>%nrow()/2
hnscc_merge_fin_df%>%
  write.table("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/head_and_neck_cancer/hnscc_merge_fin_df.txt",
              sep="\t",
              quote=F,
              row.names=F)


p1<-hnscc_merge_fin_df%>%
  filter(gene_id=="APOBEC3A")%>%
  filter(cell_type=="cancer")%>%
  #filter(!grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  theme_bw()
p1
p2<-hnscc_merge_fin_df%>%
  filter(gene_id=="APOBEC3B")%>%
  filter(cell_type=="cancer")%>%
  #filter(!grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  theme_bw()
p2
combined_plot <- plot_grid(p1, p2, labels = NULL, ncol = 2, align = "hv")

hnscc_merge_fin_df

hnscc_merge_fin_df%>%
  filter(gene_id=="APOBEC3B")%>%
  filter(cell_type=="cancer")%>%
  unique()
# Add a title on top
hnsc_final_plot <- plot_grid(
  ggdraw() +
    draw_label("Head and neck cancer, n=2539",
               fontface = "bold",
               size = 16),
  combined_plot2,
  ncol = 1,
  rel_heights = c(0.1, 1)  # adjust title height relative to plot
)

ggsave("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/head_and_neck_cancer/HNSC.pdf",hnsc_final_plot,
       height=8,width=10)
