library(dplyr)
library(tidyverse)

eso_APOBEC_tpm_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/esophageal_cancer//inferCNVpy/APOBEC.eso.txt")
eso_APOBEC_tpm_df

eso_obs_df<-read.csv("/home/users/jueenome01/scRNA_seq.eso.obs")%>%as.tibble()
eso_met_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/esophageal_cancer//inferCNVpy/metadata.txt")
eso_met_df2<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/esophageal_cancer/cell_metadata.tsv")%>%
  plyr::rename(c("...1"="cell_id"))%>%
  select(cell_id,donor)
eso_met_merge_tmp_df<-left_join(eso_met_df,eso_met_df2)
eso_met_merge_tmp_df<-eso_met_merge_tmp_df%>%
  select(tissue,donor)%>%
  filter(!is.na(donor))%>%
  unique()

eso_met_df<-left_join(eso_met_df,eso_met_merge_tmp_df)
eso_obs_df%>%filter(X%in%(eso_APOBEC_tpm_df%>%filter(TPM>500))$cell_id)%>%
  select(X,cell_id_7,cnv_leiden)


met_df
eso_obs_df%>%filter(grepl("C10",cell_id_7_for_infercnv))%>%
  filter(cnv_leiden=="2")
cnv_norm_group<-(eso_obs_df%>%
                   filter(cell_id_7%in%c("C6")))$cnv_leiden%>%unique()
eso_met_df%>%
  filter(grepl("ormal",tissue))
eso_obs_df%>%filter(X%in%c("SRR6133132","SRR6133133"))%>%
  select(cnv_leiden)
cancer_cell_id<-(eso_obs_df%>%
                   filter(cell_id_7%in%c("C0","C1","C2","C4","C5","C7"))%>% ##epithelial cells
                   filter(!cnv_leiden%in%cnv_norm_group))$X ## cnv positive epithelial cells


eso_APOBEC_tpm_df
eso_obs_df%>%filter(X%in%(eso_APOBEC_tpm_df%>%
                             filter(TPM>1000))$cell_id)%>%
  select(X,cell_id_7,cell_id_7_for_infercnv,cnv_leiden)%>%
  plyr::rename(c("X"="cell_id"))%>%
  left_join(eso_APOBEC_tpm_df%>%filter(cell_id%in%(eso_APOBEC_tpm_df%>%filter(TPM>1000))$cell_id)%>%spread(gene_id,TPM))


##LTS63 is only squamous cell carinoma
eso_merge_df<-left_join(eso_APOBEC_tpm_df,eso_met_df)
eso_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%arrange(-TPM)



library(ggplot2)

library(cowplot)

p1<-eso_merge_df%>%
  filter(gene_id=="APOBEC3A")%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  theme_bw()
p1
p2<-eso_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3B")%>%
  filter(grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  theme_bw()
p2
combined_plot <- plot_grid(p1, p2, labels = NULL, ncol = 2, align = "hv")


eso_merge_df
eso_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(grepl("Squamous",donor))%>%
  select(cell_id)%>%unique()
# Add a title on top
eso_final_plot <- plot_grid(
  ggdraw() +
    draw_label("Esophageal squamous cell carcinoma, n=144",
               fontface = "bold",
               size = 16),
  combined_plot,
  ncol = 1,
  rel_heights = c(0.1, 1)  # adjust title height relative to plot
)
eso_final_plot
ggsave("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/esophageal_cancer/ESCC.pdf",eso_final_plot,
       height=8,width=10)

p1_ESAD<-eso_merge_df%>%
  filter(gene_id=="APOBEC3A")%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(!grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  theme_bw()
p1_ESAD
p2_ESAD<-eso_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3B")%>%
  filter(!grepl("Squamous",donor))%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  theme_bw()
p2_ESAD
combined_plot2 <- plot_grid(p1_ESAD, p2_ESAD, labels = NULL, ncol = 2, align = "hv")




eso_merge_df
eso_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(!grepl("Squamous",donor))%>%
  select(cell_id)%>%unique()
# Add a title on top
eso_final_plot_ESAD <- plot_grid(
  ggdraw() +
    draw_label("Esophageal squamous adenocarcinoma, n=156",
               fontface = "bold",
               size = 16),
  combined_plot2,
  ncol = 1,
  rel_heights = c(0.1, 1)  # adjust title height relative to plot
)
eso_final_plot_ESAD
ggsave("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/esophageal_cancer/ESAD.pdf",eso_final_plot_ESAD,
       height=8,width=10)

final_plot
# Print the final plot
final_plot
eso_merge_df%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  ggtitle("LADC, n=2662")+
  theme_bw()
