library(ggplot2)
library(dplyr)
library(tidyverse)
tpm_combined<-read.table("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/total_sample/tpm_combined.mat")
#tpm_combined<-read.table("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/total_sample/count_merged.mat")
tpm_combined%>%is.data.frame()
# Initialize a dataframe with genes as rows
expression_matrix <- data.frame(row.names = rownames(tpm_combined))
colnames(tpm_combined)%>%length()
# Create a metadata dataframe
metadata <- data.frame(sample_id = character(), batch = character(), stringsAsFactors = FALSE)

t_tpm_combined<-tpm_combined%>%as.tibble()%>%gather(cell_id,TPM,1:1532)

met_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/inferCNVpy/metadata.txt")
# Read TPM values from each RSEM file and assign batch information

t_tpm_combined <- tpm_combined %>%
  rownames_to_column("gene_id") %>%
  pivot_longer(cols = -gene_id, names_to = "cell_id", values_to = "TPM")


t_tpm_combined%>%
  write.table("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/inferCNVpy/t_count_combined.txt",
              sep="\t",
              quote=F,
              row.names=F)


t_tpm_combined%>%
  filter(gene_id%in%c("APOBEC3A","APOBEC3B"))%>%
  arrange(-TPM)%>%
  write.table("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/inferCNVpy/APOBEC.TNBC.txt",
              sep="\t",
              quote=F,
              row.names=F)


id<-c("SRR7667704",
"SRR7666735",
"SRR7666386",
"SRR7667506",
"SRR7666379",
"SRR7666350",
"SRR7666904",
"SRR7666724",
"SRR7666377",
"SRR7667461",
"SRR7667099",
"SRR7666926"
)


obs<-read.csv("/home/users/jueenome01/scRNA_seq.TNBC.obs")
obs%>%arrange(cnv_score)
obs_filter<-obs%>%filter(X%in%id)
obs_filter$X<-factor(obs_filter$X,levels=id)
obs_filter<-obs_filter%>%arrange(id)
obs_filter<-obs_filter%>%plyr::rename(c("X"="id"))

tpm_files<-list.files("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/rsem",
                      "genes.results",
                      full.names=T)
matched_tpm_files <- tpm_files[grepl(paste(id, collapse = "|"), tpm_files)]

tpm_tmp<-lapply(matched_tpm_files,function(x){
  read_tsv(x)%>%mutate(id=gsub(".genes.results","",basename(x)))%>%
    filter(grepl("APOBEC3A",gene_id))%>%
    filter(!grepl("AP1",gene_id))%>%
    select(id,gene_id,TPM)
})
tpm_df<-do.call(rbind,tpm_tmp)

left_join(obs_filter,tpm_df)%>%
  select(id,cell_id_9,cnv_leiden,cnv_score,gene_id,TPM)


###infercnvpy

library(dplyr)
library(tidyverse)

TNBC_APOBEC_tpm_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/inferCNVpy/APOBEC.TNBC.txt")
TNBC_APOBEC_tpm_df

TNBC_obs_df<-read.csv("/home/users/jueenome01/scRNA_seq.TNBC.obs")%>%as.tibble()
TNBC_met_df<-read_tsv("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC/inferCNVpy/metadata.txt")


TNBC_obs_df%>%filter(X%in%(TNBC_APOBEC_tpm_df%>%filter(TPM>500))$cell_id)%>%
  select(X,cell_id_9,cnv_leiden)


met_df
TNBC_obs_df%>%filter(grepl("C10",cell_id_9_for_infercnv))%>%
  filter(cnv_leiden=="2")
cnv_norm_group<-(TNBC_obs_df%>%
                   filter(cell_id_9%in%c("C10")))$cnv_leiden%>%unique()

cancer_cell_id<-(TNBC_obs_df%>%
                   filter(cell_id_9%in%c("C0","C4","C3","C12","C7","C1","C9","C5","C6"))%>% ##epithelial cells
                   filter(!cnv_leiden%in%cnv_norm_group))$X ## cnv positive epithelial cells


TNBC_APOBEC_tpm_df
TNBC_obs_df%>%filter(X%in%(TNBC_APOBEC_tpm_df%>%
                             filter(TPM>1000))$cell_id)%>%
  select(X,cell_id_9,cell_id_9_for_infercnv,cnv_leiden)%>%
  plyr::rename(c("X"="cell_id"))%>%
  left_join(TNBC_APOBEC_tpm_df%>%filter(cell_id%in%(TNBC_APOBEC_tpm_df%>%filter(TPM>1000))$cell_id)%>%spread(gene_id,TPM))


##LTS63 is only squamous cell carinoma
TNBC_merge_df<-left_join(TNBC_APOBEC_tpm_df,TNBC_met_df)
TNBC_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%arrange(-TPM)



library(ggplot2)

library(cowplot)
TNBC_merge_df%>%filter(cell_id=="SRR7666926")
p1<-TNBC_merge_df%>%
  filter(gene_id=="APOBEC3A")%>%
  filter(cell_id%in%cancer_cell_id)%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  theme_bw()
p1
p2<-TNBC_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  filter(gene_id=="APOBEC3B")%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  #  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  scale_y_continuous(lim=c(0,1))+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  theme_bw()
p2
combined_plot <- plot_grid(p1, p2, labels = NULL, ncol = 2, align = "hv")
TNBC_merge_df
TNBC_merge_df%>%
  filter(cell_id%in%cancer_cell_id)%>%
  select(cell_id)%>%unique()
# Add a title on top
TNBC_final_plot <- plot_grid(
  ggdraw() +
    draw_label("Triple negative breast cancer, n=541",
               fontface = "bold",
               size = 16),
  combined_plot,
  ncol = 1,
  rel_heights = c(0.1, 1)  # adjust title height relative to plot
)
TNBC_final_plot
ggsave("/home/users/ayh/Projects/27_A3B/07_revision/scRNA_seq/breast_cancer_TNBC//TNBC.pdf",TNBC_final_plot,
       height=8,width=10)
final_plot
# Print the final plot
final_plot
TNBC_merge_df%>%
  mutate(ecdf = ecdf(TPM)(TPM)) %>%
  distinct(TPM, .keep_all = TRUE) %>%  # keep one point per TPM
  filter(ecdf > 0 & ecdf < 1)%>%          # remove points where ecdf == 0 or 1
  ggplot(aes(x=TPM,y=ecdf))+
  geom_point()+
  facet_wrap(~gene_id)+
  scale_x_continuous(lim=c(0,6100))+
  geom_vline(xintercept=440.6,colour="#000075",linetype="dashed")+
  geom_vline(xintercept=825.3,colour="#800000",linetype="dashed")+
  geom_vline(xintercept=3371.937,colour="blue",linetype="dashed")+
  geom_vline(xintercept=6001.793,colour="red",linetype="dashed")+
  ggtitle("LADC, n=2662")+
  theme_bw()
