

LOY_adjust <- function(dat, outliers=100, plot = F, min_cells = 100){
  
  library(aomisc)
  readr::read_csv(
  "https://raw.githubusercontent.com/michaelcvermeulen/microglia-loss-of-y/main/data/LOY_tables/all_cell_types/LOY_prop_table_3000_1000_.txt") %>%
   as.data.table() -> l

  filter_function(min_cells = min_cells, score = -10, l = l, type = l$cell_type %>% unique()) -> d
  
  
    model <- drc::drm(LOY_percent ~ sum_Y_exp, fct = aomisc::DRC.expoDecay(),
             data = d[d$LOY_percent<=outliers,])
    predict(model, newdata = dat) -> dat$predicted
    
    dplyr::mutate(dat, residuals = LOY_percent - predicted) -> dat
    ifelse(dat$residuals<0, 0 , dat$residuals) -> dat$adj_LOY_percent
   
    

  if(plot==T){ 
  
    plot(model, log = "")
  
    dat %>%
    dplyr::mutate(curve = predict(model, newdata = dat)) %>%
    ggplot(aes(sum_Y_exp,LOY_percent)) +
    geom_point(color = "grey50") +
    geom_line(aes(y = curve)) -> a 
    
    a
  }

    return(list(dat,model))
}


# function to pull p value from lm model
lmp <- function (modelobject) {
  if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
  f <- summary(modelobject)$fstatistic
  p <- pf(f[1],f[2],f[3],lower.tail=F)
  attributes(p) <- NULL
  return(p)
}


# filters LOY tables
filter_function <- function(min_cells=100, score = -1.25,l ,  
                            type = c("Oligodendrocyte","Microglia","Astrocyte","Neuron","OPC","Pericyte")){
## cleans up 
  
l[l$TOTAL_CELLS > min_cells,] -> l
l[l$cell_type %in% type,] -> dat
dat[!grepl(dat$file, pattern = "MD5893"),] -> dat
dat[!grepl(dat$tissue, pattern = "Lung"),] -> dat
dat[which(dat$sample=="Microglia_MO_MCI3"),]$neuro_degen_diagnosis <- "MCI"
dat[dat$neuro_degen_diagnosis %in% c("Alzheimer's disease"),]$neuro_degen_diagnosis <- "AD"
dat[which(dat$neuro_degen_diagnosis=="Diffuse Lewy Body disease, limbic or transitional type"),]$neuro_degen_diagnosis <- "PD"
dat[dat$neuro_degen_diagnosis %in% c("Non-symptomatic","Normal"),]$neuro_degen_diagnosis <- "Control"


dat$sum_Y_exp -> x
scale(x) -> dat$sum_Y_exp_scaled

dat$expr_Y -> x
scale(x) -> dat$expr_Y_scaled
 
dplyr::mutate(.data = dat, scaled_Y_metric  = (sum_Y_exp_scaled*1.5 + expr_Y_scaled*1) / 2) -> dat

dat[dat$sum_Y_exp_scaled >= score,] -> dat

dat$LOY_percent <- dat$LOY_prop * 100

return(dat)
  
}



