library(dplyr)
library(tidyverse)
library(ggplot2)
options(scipen=999)

list.files("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/discovery_simulation/add_simulation/unique_vaf/fin/03_merge_results/v4",
)


id_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/discovery_simulation/add_simulation/unique_vaf/fin/id.v4.txt")

#mutate(condition=gsub("_.*","",gsub("^.*-[0-9]*_","",id)))
id_df$std<-as.numeric(id_df$std)
id_df$condition<-as.numeric(id_df$condition)
simul_merge_f_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/discovery_simulation/add_simulation/unique_vaf/fin/03_merge_results/v4/simul_merge_f_df.tsv")%>%
  select(id,imd_cutoff,simul,std,condition)%>%unique()

sum_files_to_read<-list.files("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/discovery_simulation/add_simulation/unique_vaf/fin/03_merge_results/v4",
                              ".*simul.*txt",
                              full.names=T)
sum_files_tmp<-lapply(sum_files_to_read,function(x){
  read_tsv(x)
})
simul_merge_f_sum_df<-do.call(rbind,sum_files_tmp)
#simul_merge_f_sum_df<-read_tsv("/home/users/ayh/Projects/27_A3B/01_public_data/pcawg/discovery_simulation/add_simulation/unique_vaf/omikli/100/simul_merge_f_sum_df.omikli.txt")
simul_merge_f_sum_df<-left_join(simul_merge_f_sum_df,simul_merge_f_df)
simul_merge_f_sum_df<-left_join(id_df,simul_merge_f_sum_df)
simul_merge_f_sum_df[is.na(simul_merge_f_sum_df)]<-0
simul_merge_f_sum_df
simul_merge_f_sum_df<-simul_merge_f_sum_df%>%unique()
simul_merge_f_sum_df$condition<-simul_merge_f_sum_df$condition%>%as.double()
simul_merge_f_sum_df$condition
#simul_merge_f_sum_df$std<-factor(simul_merge_f_sum_df$std,levels=c(25,50,75,100,250,500))
simul_merge_f_sum_df$std<-as.numeric(simul_merge_f_sum_df$std)
simul_merge_f_sum_df<-simul_merge_f_sum_df%>%mutate(class=simul_merge_f_sum_df$std)
simul_merge_f_sum_df$class<-factor(simul_merge_f_sum_df$class,levels=c(25,50,75,100,250,500))
simul_merge_f_sum_df%>%
  ggplot(aes(y=count/std,x=imd_cutoff,col=class))+
  geom_point()+
  facet_wrap(~simul+class,ncol=4)+
  geom_smooth(method="nls", formula=y~SSasymp(x, Asym, R0, lrc), color="red", se=F, fullrange=T)

#facet_wrap(~cluster)#+

theme_bw()
theme(axis.text=element_text(size=30),
      axis.title=element_text(size=30),
      strip.text=element_text(size=20))+
  ylab("APOBEC mediated cluster")
simul_merge_f_sum_df%>%ungroup()%>%select(condition,imd_cutoff,AMS_n,cluster)%>%arrange(-imd_cutoff)%>%unique()
simul_merge_f_sum_df%>%
  ggplot(aes(y=AMS_n,x=imd_cutoff))+
  geom_point()+
  facet_wrap(~cluster)+
  theme_bw()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30),
        strip.text=element_text(size=20))+
  ylab("APOBEC mediated cluster")#+

simul_merge_f_sum_df%>%
  ggplot(aes(y=AMS_n,x=imd_cutoff))+
  geom_point()+
  facet_wrap(~simul)+
  theme_bw()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30),
        strip.text=element_text(size=20))+
  ylab("APOBEC mediated cluster")#+

simul_merge_f_sum_df<-simul_merge_f_sum_df%>%mutate(type=paste(simul,std,sep="_"))
lapply()

library(drc)
library(ggpmisc)
library(aomisc)
model <- drm(Y ~ X, fct = MM.2())
summary(model)

dosefit <- drm((simul_merge_f_sum_df)$count/100~(simul_merge_f_sum_df)$imd_cutoff, fct = AR.3())
summary(dosefit)
coef(dosefit)
#Coefficients:
#c:(Intercept) d:(Intercept) e:(Intercept)
#0.03374132    0.92143512  510.69440554
#  c:(Intercept)  d:(Intercept)  e:(Intercept)
#-1.566        101.077        789.893
#f(x)=c+(d−c)(1−exp(−x/e))
#f(x)=-0.034+0.89(1-exp(-x/510.69))
#f(x) = c + (d-c)(1-\exp(-x/e))
-1.566+102.643*(1-exp(-7/789.893))

dosefit2 <- drm((simul_merge_f_sum_df)$count/100~(simul_merge_f_sum_df)$imd_cutoff, fct = DRC.powerCurve())
summary(dosefit)
id_df%>%mutate(simul_class=paste(simul,std,sep="_"))%>%dplyr::select(simul_class)%>%unique()
simul_merge_f_sum_df<-simul_merge_f_sum_df%>%mutate(simul_class=paste(simul,std,sep="_"))

lapply(c("kataegis_100","kataegis_25","kataegis_50","kataegis_75","omikli_100","omikli_250","omikli_500","omikli_50"),function(x){
  df<-simul_merge_f_sum_df%>%filter(simul_class==x)
  dosefit<-nls(count/std ~ SSasymp(imd_cutoff, Asym, R0, lrc), data = df)
  coef(dosefit)
})
df<-simul_merge_f_sum_df%>%filter(simul=="kataegis")
nls(count/std ~ SSasymp(imd_cutoff, Asym=2,R0,lrc), data = df)
lapply(c("kataegis","omikli"),function(x){
  #x="kataegis"
  df<-simul_merge_f_sum_df%>%filter(simul==x)
  #SSmicmen(x, Vm, K)
  #df[is.na(df$count/df$std),]
  #df[is.infinite(df$count/df$std),]
  dosefit<-nls(count/std ~ SSasymp(imd_cutoff, Asym,R0,lrc), data = df)
  coef(dosefit)
})

#231123
#Asym+(R0-Asym)*exp(-exp(lrc)*input)
#[[1]]
#Asym          R0         lrc 
#1.00897648 -0.01319299 -7.08041332 
y=1.00897648+(-0.01319299-1.00897648)*exp(-exp(-7.08041332)*input)
#y=1.01-1.02*exp(-exp(-7.08)*x)

#[[2]]
#Asym          R0         lrc 
#0.93550132  0.01868966 -6.19856470 
y=0.93550132+(0.01868966-0.93550132)*exp(-exp(-6.19856470)*input)
#y=0.94-0.92*exp(-exp(-6.20)*x)


#Asym+(R0-Asym)*exp(-exp(lrc)*input)
#kataegis
#y=1.01-1.02*exp(-exp(-7.17)*x)
#omikli
#y=0.93-0.92*exp(-exp(-6.21)*x)


#[[1]]
#Asym          R0         lrc 
#1.01343010 -0.01281911 -7.17099565 
#y=1.01343010+(-1.02624921)*exp(-exp(-7.17099565))
#y=1.01-1.03*exp(-exp(-7.17)*x)

#[[2]]
#Asym          R0         lrc 
#0.92514662  0.01837694 -6.25403913 
#y=0.92514662+(-0.90676968)*exp(-exp(-6.25403913))
#y=0.93-0.91*exp(-exp(-6.25)*x)

(simul_merge_f_sum_df%>%filter(simul=="omikli"))$count/(simul_merge_f_sum_df%>%filter(simul=="omikli"))$std%>%unique()%>%tail()
#simul_merge_f_sum_df[is.na(simul_merge_f_sum_df)]
#simul_merge_f_sum_df%>%filter(count/std>1)
(is.infinite((simul_merge_f_sum_df%>%filter(simul=="omikli"))$count/(simul_merge_f_sum_df%>%filter(simul=="omikli"))$std))%>%
  table()
dosefit3 <- drm((simul_merge_f_sum_df%>%filter(simul=="omikli"))$count/(simul_merge_f_sum_df%>%filter(simul=="omikli"))$std~(simul_merge_f_sum_df%>%filter(simul=="omikli"))$imd_cutoff, fct = DRC.YL())
dosefit4 <- drm((simul_merge_f_sum_df)$count/100~(simul_merge_f_sum_df)$imd_cutoff, fct = MM.2(fixed=c(1,NA)))

coef(dosefit3)
#c:(Intercept) d:(Intercept) e:(Intercept)
#0.02270827    1.01434249  367.68401016
dev.off()
#c + \frac{d-c}{1+(e/x)}
y=0.023+(1.01-0.023)/(1+367.68/x)

coef(dosefit4)
plot(y=(simul_merge_f_sum_df%>%filter(simul=="kataegis"))$AMS_n,
     x=(simul_merge_f_sum_df%>%filter(simul=="kataegis"))$imd_cutoff,
     xlim=c(0,10000),
     ylim=c(0,100))
#par(new=TRUE)
plot(dosefit, log="", main = "Asymptotic regression",col="red",
     xlim=c(0,10000),
     ylim=c(0,1.1))
plot(dosefit2, log="", main = "Asymptotic regression",col="red",
     xlim=c(0,10000),
     ylim=c(0,1))
plot(dosefit3, log="", main = "Asymptotic regression",col="red",
     xlim=c(0,10000),
     ylim=c(0,1))

plot(dosefit4, log="", main = "Asymptotic regression",col="red",
     xlim=c(0,10000),
     ylim=c(0,1))


fm <- nls(count/100 ~ SSasymp(imd_cutoff, Asym, R0, lrc), data = simul_merge_f_sum_df)
coef(fm)
#Asym         R0        lrc
#101.076791  -1.565501  -6.671877
#y=101.08+(-1.57-101.08)*exp(-exp(-6.67)*x)
#101.08+(-1.57-101.08)*exp(-exp(-6.67)*7)
#Asym          R0         lrc
#1.01076791 -0.01565501 -6.67187730
#y=1.01+(-0.016-1.01)*exp(-exp(-6.67)*x)
#Asym          R0         lrc
#0.94648344  0.04069107 -7.50024933
#y=0.95-0.91*exp(-exp(-7.50)*x)
x<-simul_merge_f_sum_df$imd_cutoff
y<-simul_merge_f_sum_df$count/100
y <- c(22.180,21.941,21.786,21.724,20.964,19.128,18.371,17.508,16.931)
x <- c(1,3,5,9,18,36,72,144,288)
f <- function(x, Asym=1, lrc=-7,R0)
  Asym + (R0 - Asym) * exp(-exp(lrc) * x)
nls(
  y ~ f(x,R0),
  data = data.frame(x,y),
  start = list(R0=0))


f <- function(x, Asym, lrc, R0 = 25)
  Asym + (R0 - Asym) * exp(-exp(lrc) * x)
nls(
  y ~ f(x, Asym, lrc),
  data = data.frame(x, y),
  start = list(Asym = 15, lrc = -3))


f <- function(x, Asym=1, lrc, R0 = 0)
  Asym + (R0 - Asym) * exp(-exp(lrc) * x)
f
nls(
  y ~ f(x,  lrc),
  data = data.frame(simul_merge_f_sum_df$imd_cutoff,simul_merge_f_sum_df$count/100),
  start = list(lrc = -7))

Fit <- nls(AMS_n ~ a*exp(b*imd_cutoff), start = list(a=1, b=1),data=simul_merge_f_sum_df%>%filter(cluster=="kataegis"))
summary(fm)
lines((simul_merge_f_sum_df%>%filter(cluster=="kataegis"))$imd_cutoff,predict(dosefit))
simul_merge_f_sum_df<-simul_merge_f_sum_df%>%mutate(std2=as.character(std))
simul_merge_f_sum_df$std2<-factor(simul_merge_f_sum_df$std2,levels=c(25,50,75,100,250,500))
Plot_o<-simul_merge_f_sum_df%>%filter(simul=="omikli")%>%
  ggplot(aes(y=count/std,x=imd_cutoff,col=std2,))+
  geom_point()+
  facet_wrap(~simul)+
  #  stat_smooth()+
  #nls(rate~SSmicmen(conc, Vm, K)
  geom_smooth(method="nls", formula=y~SSasymp(x, Asym, R0, lrc), color="red", se=F, fullrange=T)+
  theme_bw()+
  #  ggtitle("omikli(n=100)")+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30))+
  ylab("found_ratio")
Plot_o
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/omi_simul.pdf",Plot_o,
       width=10,height=10)
Plot_k<-simul_merge_f_sum_df%>%filter(simul=="kataegis")%>%
  ggplot(aes(y=count/std,x=imd_cutoff,col=std2,))+
  geom_point()+
  facet_wrap(~simul)+
  #  stat_smooth()+
  #nls(rate~SSmicmen(conc, Vm, K)
  geom_smooth(method="nls", formula=y~SSasymp(x, Asym, R0, lrc), color="red", se=F, fullrange=T)+
  theme_bw()+
  #  ggtitle("omikli(n=100)")+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30))+
  ylab("found_ratio")
Plot_k
ggsave("/home/users/ayh/Projects/27_A3B/06_Figure_code/edit_figure/FigS/kataegis_simul.pdf",Plot_k,
         width=10,height=10)
  

a <- 15; b <- 0.5

nls(((simul_merge_f_sum_df%>%filter(simul=="omikli"))$count)/((simul_merge_f_sum_df%>%filter(simul=="omikli"))$std) ~ SSmicmen(((simul_merge_f_sum_df%>%filter(simul=="omikli"))$imd_cutoff), a, b))

model.asymp<-drm (count/std ~ imd_cutoff, data = (simul_merge_f_sum_df%>%filter(simul=="omikli")), fct = AR.2())
model.asymp.k<-drm (count/std ~ imd_cutoff, data = (simul_merge_f_sum_df%>%filter(simul=="omikli")), fct = AR.2())
model.mm <- drm (count/std ~ imd_cutoff, data = (simul_merge_f_sum_df%>%filter(simul=="omikli")), fct = MM.3())
model.mm.k <- drm (count/std ~ imd_cutoff, data = (simul_merge_f_sum_df%>%filter(simul=="kataegis")), fct = MM.3())

summary(model.asymp)
summary(model.mm)


summary(model.asymp.k)
summary(model.mm.k)

model.yl<-nls(count/std ~ NLS.YL(imd_cutoff, a, i), data = simul_merge_f_sum_df%>%filter(simul=="omikli"))
model.yl.k<-nls(count/std ~ NLS.YL(imd_cutoff, a, i), data = simul_merge_f_sum_df%>%filter(simul=="kataegis")%>%mutate(count=ifelse(count==0,1,count)))
summary(model.yl.k)
t_c[t_c>=0]
t_c<-(simul_merge_f_sum_df%>%filter(simul=="omikli"))$count/(simul_merge_f_sum_df%>%filter(simul=="omikli"))$std
t_c
simul_merge_f_sum_df%>%filter(simul=="kataegis")
coef(model.asymp)
coef(model.mm)
summary(model.yl)
coef(model.yl)

summary(model.yl)
t_c<-((simul_merge_f_sum_df%>%filter(simul=="omikli"))%>%dplyr::select(count))/((simul_merge_f_sum_df%>%filter(simul=="omikli"))%>%dplyr::select(std))
t_c%>%filter(is.na(count))
summary(model.drm)
summary(dosefit)
simul_merge_f_sum_df%>%mutate(std2=as.character(std))%>%
  ggplot(aes(y=count/std,x=imd_cutoff,col=std2,))+
  geom_point()+
  facet_wrap(~simul)+
  #  stat_smooth()+
  #nls(rate~SSmicmen(conc, Vm, K)
  geom_smooth(method = nls,col = "skyblue",
              method.args = list(formula = y ~ Vmax * x / (Km + x),
                                 start = list(Km = coef(model.nls)[1], Vmax = coef(model.nls)[2])), se = F) +
  theme_bw()+
  #  ggtitle("omikli(n=100)")+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30))+
  ylab("found_ratio")

simul_merge_f_sum_df%>%mutate(std2=as.character(std))%>%
  ggplot(aes(y=count/std,x=imd_cutoff,col=std2,))+
  geom_point()+
  facet_wrap(~simul)+
  #  stat_smooth()+
  #nls(rate~SSmicmen(conc, Vm, K)
  geom_smooth(method = nls,col = "skyblue",
              method.args = list(formula = y ~ i * x / (1+(i*x/a)),
                                 start = list(a= coef(model.nls)[1], i = coef(model.nls)[2])), se = F) +
  theme_bw()+
  #  ggtitle("omikli(n=100)")+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30))+
  ylab("found_ratio")



Plot
Plot2<-simul_merge_f_sum_df%>%filter(cluster=="kataegis")%>%
  ggplot(aes(y=AMS_n/100,x=imd_cutoff))+
  geom_point()+
  stat_smooth()
Plot2
Plot+geom_smooth(method="nls", formula=y~SSasymp(x, Asym, R0, lrc), color="red", se=F, fullrange=T)
Plot2+geom_smooth(method="nls", formula=y~SSasymp(x, Asym, R0, lrc), color="red", se=F, fullrange=T)
imd_cutoff<-(simul_merge_f_sum_df%>%filter(cluster=="kataegis"))$imd_cutoff
AMS_n<-(simul_merge_f_sum_df%>%filter(cluster=="kataegis"))$AMS_n
nls_fit <- nls(formula = imd_cutoff ~ a*exp(-b *AMS_n),
               start = c(a=10, b=10), data = simul_merge_f_sum_df%>%filter(cluster=="kataegis"))
coef(nls_fit)

build <- ggplot_build(Plot)$data[[2]]
#  facet_wrap(~cluster)+
theme_bw()+
  theme(axis.text=element_text(size=30),
        axis.title=element_text(size=30),
        strip.text=element_text(size=20))+
  ylab("APOBEC mediated cluster")+
  
  
  
  lm_eqn <- function(df){
    m <- lm(y ~ log(x), df);
    eq <- substitute(italic(y) == -34.44 + 14.59 %.% italic(x)*","~~italic(r)^2~"="~r2,
                     list(a = format(unname(coef(m)[1]), digits = 2),
                          b = format(unname(coef(m)[2]), digits = 2),
                          r2 = format(summary(m)$r.squared, digits = 3)))
    as.character(as.expression(eq));
  }

fit<-lm((simul_merge_f_sum_df%>%filter(cluster=="kataegis"))$AMS_n~log((simul_merge_f_sum_df%>%filter(cluster=="kataegis"))$imd_cutoff))

coef(fit)

sum_fit[[1]]
simul_merge_sum_df<-simul_merge_df%>%filter(cluster!="other")%>%
  select(id,`#CHROM`,cluster,cluster_id,condition)%>%unique()%>%
  group_by(id,condition,cluster)%>%
  dplyr::summarise(cluster_n=n())%>%spread(cluster,cluster_n)



simul_merge_sum_df