library(dplyr)
library(tidyverse)

lung_APOBEC_tpm_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/lung_cancer/total_sample/APOBEC.lung_cancer.txt")
lung_APOBEC_tpm_df

lung_obs_df<-read.csv("/home/users/jueenome01/scRNA_seq.lung_cancer.obs")%>%as.tibble()
lung_met_df<-read.csv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/lung_cancer/metadata.csv")%>%
  select(Run,tissue,isolate)%>%
  plyr::rename(c("Run"="cell_id"))%>%
  as.tibble()




met_df
cnv_norm_group<-(lung_obs_df%>%
  filter(cell_id_1%in%c("C3","C5")))$cnv_leiden%>%unique()

cancer_cell_id<-(lung_obs_df%>%
  filter(cell_id_1%in%c("C2","C6","C8","C9","C10"))%>% ##epithelial cells
  filter(!cnv_leiden%in%cnv_norm_group))$X ## cnv positive epithelial cells


lung_APOBEC_tpm_df
##LTS63 is only squamous cell carinoma
lung_merge_df<-left_join(lung_APOBEC_tpm_df,lung_met_df)

lung_adeno_df<-lung_merge_df%>%filter(isolate!="LT_S63")%>%
  filter(cell_id%in%cancer_cell_id)


library(ggplot2)
lung_adeno_df%>%
  ggplot(aes(x=TPM))+
  geom_histogram()+
  facet_wrap(~gene_id)
lung_adeno_df$cell_id%>%unique()%>%length()

library(cowplot)
lung_adeno_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3A")%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE)%>%
  arrange(-TPM)

p1<-lung_adeno_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3A")%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  theme_bw()
p1
p2<-lung_adeno_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3B")%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
#  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  theme_bw()

combined_plot <- plot_grid(p1, p2, labels = NULL, ncol = 2, align = "hv")
lung_adeno_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3B")%>%
  nrow()
# Add a title on top
lung_final_plot <- plot_grid(
  ggdraw() +
    draw_label("Lung adenocarcinoma, n=2662",
               fontface = "bold",
            size = 16),
  combined_plot,
  ncol = 1,
  rel_heights = c(0.1, 1)  # adjust title height relative to plot
)

ggsave("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/lung_cancer/LADC.pdf",lung_final_plot,
       height=8,width=10)
final_plot
# Print the final plot
final_plot
lung_adeno_df%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  ggtitle("LADC, n=2662")+
  theme_bw()
