### R functions for Reproducing Analysis
### Matthew Lalli 2020
## N.B. I did not write all the functions here and not all functions included here are used in my analysis

drop_seq_average_TPM <- function(counts_data, summed_counts=T)
{
  # raw_counts <- GetAssayData(SFARI_with_guide, slot = "counts")  ## raw_counts would be a good input to this fxn
  raw_counts <- counts_data
  fractional_counts <- raw_counts/colSums(as.data.frame(raw_counts))
  scaleFactor <- max(colSums(as.data.frame(raw_counts)))
  transformed_counts <- fractional_counts*scaleFactor
  
  if(summed_counts)
  {
    agg <- rowSums(as.data.frame(transformed_counts))
    agg <- log10(cpm(agg)+1)
    return(agg)
  }
  else
  {
    return(transformed_counts)
  }
}


mito_qc <- function(y) 
{ 
  mito.genes <- grep(pattern = "^MT-", row.names(y@assays$RNA), value=T, ignore.case=T)
  percent.mito <- Matrix::colSums(y@assays$RNA[mito.genes,]) /  Matrix::colSums(y@assays$RNA)
  y$percent.mito <- percent.mito 
  return(y)
} 

rowVars <- function (x,na.rm = TRUE)   ### Function from DEseq 
{
  sqr = function(x) x * x
  n = rowSums(!is.na(x))
  n[n <= 1] = NA
  return(rowSums(sqr(x - rowMeans(x,na.rm = na.rm)), na.rm = na.rm)/(n - 1))
}

add_sgRNA <- function(y, bc_table) 
{
  sgRNA <- rep("none", length(colnames(y@assays$RNA)))
  for(i in 1:length(colnames(y@assays$RNA)))
  {
    if (length(bc_table[grep(colnames(y@assays$RNA)[i], bc_table$V1),]$V2) > 0) 
      sgRNA[i] <- paste((bc_table[grep(colnames(y@assays$RNA)[i], bc_table$V1),]$V2), collapse=",")
  }
  
  sgRNA <- sapply(strsplit(as.character(sgRNA), ","), "[[", 1)   ### only count the first guide / cell  
  y@meta.data$sgRNA <- as.factor(sgRNA)   ### Assign new metadata column to Seurat object 
  gene_level <- sapply(strsplit(as.character(sgRNA), "_"), "[[", 1)
  gene_level[grep('none|no|NA', gene_level)] <- 0
  gene_level_numeric <- as.numeric(as.factor(gene_level))
  y@meta.data$gene_level <- gene_level 
  y@meta.data$gene_level_numeric <- gene_level_numeric 
  return(y)
}

add_pseudotime_state_to_Seurat <- function(mon_pseudotime, some_seurat) 
{
  
  mon_ids <- sampleNames(mon_pseudotime)
  some_seurat <- subset(some_seurat, cells=mon_ids)
  pt_table <-   as.data.frame(cbind(sampleNames(mon_pseudotime), mon_pseudotime$State))
  
  
  
  pt_state <- rep("none", length(colnames(some_seurat)))
  for(i in 1:length(colnames(some_seurat)))
  {
    if (length(pt_table[grep(colnames(some_seurat)[i], pt_table$V1),]$V2) > 0) 
      pt_state[i] <- paste((pt_table[grep(colnames(some_seurat)[i], pt_table$V1),]$V2), collapse=",")
  }
  
  some_seurat$pt_state <- as.numeric(pt_state)
  return(some_seurat)
}



QC_Filter_Seurat <- function(y) 
{
  y <- subset(x = y, subset = nFeature_RNA > 500 & nFeature_RNA < 7500 & percent.mito < 0.08)
  #  ribo_gene.indexes <- grep(pattern = "^RPL|^RPS", rownames(y), ignore.case=TRUE) 
  #  y <- y[-ribo_gene.indexes]
  y <- CellCycleScoring(object = y, s.features  = s.genes, g2m.features = g2m.genes, set.ident = FALSE)
  y <- NormalizeData(object = y, normalization.method = "LogNormalize", scale.factor = 1e4)
  y <- ScaleData(y, vars.to.regress = c("batch", "nUMI", "percent.mito","S.score","G2M.score"))
  y <- FindVariableFeatures(object = y, selection.method="vst")  ## Seurat3 prefers VST 
  #y <- FindVariableFeatures(object = y, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 0.5)
  return(y)
}

PCA_TSNE <- function(y) 
{
  y <- RunPCA(object = y, npcs  = 40, ndims.print = 1:10) 
  y <- FindNeighbors(object = y, dims = 1:20)
  y <- FindClusters(object = y, reduction.type = "pca", dims.use = 1:20, resolution = 0.25, save.SNN = TRUE, force.recalc=T)
  y <- RunTSNE(y, dims = 1:11)
  y <- RunUMAP(object = y, reduction="pca", dims = 1:6)
  return(y)
} 

AggregateAllGOI <- function(SomeSeurat, GOI)
{
  myCells <- subset(SomeSeurat, idents=GOI)
  agg <- as.data.frame(rowSums(as.data.frame(((GetAssayData(myCells, slots=counts))))))
  colnames(agg) <- GOI 
  return(agg)
}

AggregateAllGOI_alt <- function(SomeSeurat, GOI)  ### gives same output as GetAssayData counts or @assys$RNA 
{
  myCells <- subset(SomeSeurat, idents=GOI)
  agg <- as.data.frame(rowSums(myCells))
  colnames(agg) <- GOI 
  return(agg)
}


BestKnockDownCells <- function(SomeSeurat, genes_of_interest) 
{
  
  keepThese <- WhichCells(SomeSeurat, idents="Nontargeting")
  for (GOI in genes_of_interest) 
  {
    testMean <- mean(GetAssayData(SomeSeurat, slots=scale.data)[GOI, WhichCells(SomeSeurat, idents="Nontargeting")])
    
    H5_H5 <-  GetAssayData(SomeSeurat, slots=scale.data)[GOI, WhichCells(SomeSeurat, idents=GOI)]
    keepThese <- c(keepThese, names(H5_H5[which(H5_H5 < testMean)]))
    
    
    ## keepThese <- c(keepThese, WhichCells(SomeSeurat, idents=GOI, expression = GOI < testMean)  ## expression wont take GOI.. but it lets me type in the gene of interest so for now do it by hand 
  }
  
  return(SomeSeurat[,keepThese])
}

AggregateCounts <- function(SomeSeurat, GOI, number_cells, number_samples) 
{
  aggregate = list()
  myCells <- subset(SomeSeurat, idents = GOI)
  total_cells <-  length(colnames(myCells))
  for(i in 1:number_samples)
  {
    random_subset = sample(x = 1:total_cells, size = number_cells, replace=F)
    set.seed(NULL)
    #print(random_subset)
    aggregate[[i]] <- as.data.frame(rowSums(as.data.frame(((GetAssayData(myCells, slots=counts)[,random_subset])))))
  }
  row_names <- row.names(aggregate[[1]])
  num_rows <- length(row_names)
  my_agg <- as.data.frame(matrix(unlist(aggregate), nrow=num_rows), byrow=T)
  row.names(my_agg) <- row_names
  colnames(my_agg) <- rep(GOI, number_samples)
  return(my_agg)
}

compare2sets <- function(SomeSeurat, GOI, num1, num2) 
{
  seed1 <- round(runif(1,0,10000))
  random_1 <- WhichCells(object = SomeSeurat, idents = GOI, downsample=num1, seed= seed1)
  seed2 <- round(runif(1,0,10000))
  random_2 <- WhichCells(object = SomeSeurat, idents = GOI, downsample=num2, seed= seed2)
  
  aggregate_1 <- as.data.frame(rowSums(as.data.frame(((GetAssayData(SomeSeurat, slots=counts)[,random_1])))))
  aggregate_2 <- as.data.frame(rowSums(as.data.frame(((GetAssayData(SomeSeurat, slots=counts)[,random_2])))))
  
  normalized_agg1 <- cpm(aggregate_1, log = TRUE, prior.count = .25)
  normalized_agg2 <- cpm(aggregate_2, log = TRUE, prior.count = .25)
  
  return(list(aggregate_1, aggregate_2))
  #return(list(normalized_agg1, normalized_agg2))
}

getDE <- function(SomeSeurat, number_cells, number_reps, GOI)   ### takes a Seurat dataset and returns edgeR GLM DE genes between cells with given guide vs non-tagergeting
{
  
  pseudo_replicates <- cbind(AggregateCounts(SomeSeurat, GOI, number_cells, number_reps), AggregateCounts(SomeSeurat, "Nontargeting", number_cells, number_reps)) 
  sample_names <- "GOI_1"
  ctrls_names <- "CTRL_1"
  
  for (i in 2:number_reps)
  {
    next_sample <- paste("GOI_",i, sep="")
    next_ctrl <- paste("CTRL_",i, sep="")
    sample_names <- c(sample_names, next_sample)
    ctrls_names <- c(ctrls_names, next_ctrl)
  }
  
  groups <- as.factor(c(rep("GOI",number_reps), rep("CTRL", number_reps)))
  samples <- as.factor(c(sample_names, ctrls_names))
  colnames(pseudo_replicates) <- samples
  
  GuideDGE <- DGEList(pseudo_replicates^2, group=groups) # if log transformed raw counts were used 
  #GuideDGE <- DGEList(pseudo_replicates, group=groups)
  
  x <- GuideDGE
  selr1 <- rowSums(cpm(x$counts)>20)>=number_reps  ###  count > 20 in half of the samples 
  x <- x[selr1,]
  
  mito.genes <- grep(pattern = "^MT-", x = rownames(x), ignore.case=TRUE) 
  x <- x[-mito.genes,]
  
  x <- calcNormFactors(x)
  design <- model.matrix(~x$samples$group)
  xglm <- estimateGLMCommonDisp(x, design, verbose=T)
  
  #xglm <- estimateCommonDisp(x, design, verbose=T)
  fit <- glmFit(xglm, design)
  lrt <- glmLRT(fit, coef=2)  ### any DE gene at any time point 
  
  deGenes <- topTags(lrt, n=Inf)
  deGenes <- deGenes$table
  deSig <- subset(deGenes, deGenes$FDR < 0.005)
  deSig <- subset(deSig, abs(deSig$logFC) > 0.5)
  
  return(deSig)
} 

panel.cor <- function(x,y)  ### usage:   pairs(counts_matrix, upper.panel = panel.cor )
{
  r <- cor(x,y)  
  u <- par("usr")
  txt <- format(r^2, digits=2)
  text(max(x)/2, max(y)/2, paste("r2=",txt,sep=""), cex=2)
}


filterDropData <- function(y)   ## my original Drop-seq filtration method 
{
  mito.genes <- grep(pattern = "^MT-", x = rownames(x = y@assays$RNA), value = TRUE, ignore.case=TRUE) 
  percent.mito <- Matrix::colSums(y@assays$RNA[mito.genes, ]) / Matrix::colSums(y@assays$RNA)
  y <- AddMetaData(object = y, metadata = percent.mito, col.name = "percent.mito")
  VlnPlot(object = y, features = c("nCount_RNA", "nFeature_RNA", "percent.mito"), nCol = 3)
  
  y <- subset(x = y, subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mito < 0.15)
  
  y <- NormalizeData(object = y, normalization.method="LogNormalize", scale.factor=1e4)
  y <- ScaleData(y, vars.to.regress = c("nCount_RNA", "percent.mito"))
  y <- FindVariableFeatures(object = y, mean.function = ExpMean, dispersion.function = LogVMR, x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 0.5)
  return(y)
} 

## monocle functions 
detect_expression <- function(some_mon) 
{ 
  some_mon <- detectGenes(some_mon, min_expr=0.1)
  expressed_genes <- row.names(subset(fData(some_mon), num_cells_expressed >= 50))
  pData(some_mon)$Total_mRNAs <- Matrix::colSums(exprs(some_mon))
  print(summary(some_mon$num_genes_expressed))
  hist(some_mon$num_genes_expressed)
  print("Use these values to select number of genes to filter")
  return(some_mon)
}

filter_expression_normalize <- function(some_mon, lower_bound, upper_bound)
{
  expressed_genes <- row.names(subset(fData(some_mon), num_cells_expressed >= 50))
  some_mon <- some_mon[expressed_genes]
  some_mon <- some_mon[,pData(some_mon)$num_genes_expressed > lower_bound & pData(some_mon)$num_genes_expressed < upper_bound]
  some_mon <- estimateSizeFactors(some_mon)
  some_mon <- estimateDispersions(some_mon)
  return(some_mon)
}

add_sgRNA_monocle <- function(some_mon, bc_table) 
{
  colnames(some_mon) <- substr(colnames(some_mon), 1,18)
  sgRNA <- rep("none", length(colnames(some_mon)))
  
  for(i in 1:length(colnames(some_mon)))
  {
    if (length(bc_table[grep(colnames(some_mon)[i], bc_table$V1),]$V2) > 0) 
      sgRNA[i] <- paste((bc_table[grep(colnames(some_mon)[i], bc_table$V1),]$V2), collapse=",")
  }
  
  pData(some_mon)$sgRNA <- as.factor(sgRNA)
  gene_level <- sapply(strsplit(as.character(sgRNA), "_"), "[[", 1)
  gene_level[grep('none|no', gene_level)] <- 0
  gene_level_numeric <- as.numeric(as.factor(gene_level))
  pData(some_mon)$gene_level <- gene_level
  return(some_mon) 
}

reduce_and_cluster <- function(SFARI_mon) 
{
  SFARI_mon <- reduceDimension(SFARI_mon, max_components=2, reduction_method = 'tSNE', verbose = T)
  SFARI_mon <- clusterCells(SFARI_mon) 
  #plot_cell_clusters(SFARI_mon_1, color_by = "gene_level", cell_size = 1) + theme(legend.position = "none")
  return(SFARI_mon)
}

pseudotime_ordering <- function(SFARI_mon) 
{
  disp_table <- dispersionTable(SFARI_mon)
  ordering_genes <- subset(disp_table, mean_expression >= 0.05 & dispersion_empirical >= 1 * dispersion_fit)$gene_id ##2764
  SFARI_mon_myo <- setOrderingFilter(SFARI_mon, ordering_genes)
  plot_ordering_genes(SFARI_mon_myo)
  SFARI_mon_myo <- reduceDimension(SFARI_mon_myo, max_components=2)
  SFARI_mon_myo <- orderCells(SFARI_mon_myo)
  return(SFARI_mon_myo)
} 

highlight_guides_in_pseudotime <- function(pseudotime_ordered_monocle, gene_of_interest, rotate)
{
  ptom <- pseudotime_ordered_monocle 
  GOI <- gene_of_interest
  which(pData(ptom)$gene_level %in% GOI)
  GuideStatus <- rep(0, length(colnames(ptom)))  ## Set all GuideStatus to 0 
  GuideStatus[which(pData(ptom)$gene_level %in% GOI)] <- 2
  GuideStatus[which(pData(ptom)$gene_level %in% c("Nontargeting"))] <- 1  
  pData(ptom)$Guide <- as.factor(GuideStatus)
  
  a <- plot_cell_trajectory(ptom, color_by = "Guide", cell_size=2, theta=rotate)
  a$layer[[2]]$aes_params$size <- -10  ### remove the default points 
  a$data <- a$data[order(a$data$Guide),]
  a + geom_point(aes(color=Guide, size=Guide)) + scale_size_manual(values = c(2,2,2)) + scale_color_manual(values= c("grey", "firebrick", "mediumblue")) + scale_alpha_manual(values=c(0.4,1,1)) + theme_grey(base_size=20)
}

sgRNA_pt_enrich_test <- function(pseudotime_ordered_monocle, gene_of_interest)
{
  ptom <- pseudotime_ordered_monocle 
  GOI <- gene_of_interest
  cells_with_guide <- ptom[, which(pData(ptom)$gene_level %in% GOI)]
  #non_targeting <- ptom[, which(pData(ptom)$gene_level %in% "Nontargeting")]
  expected_proportion <- table(ptom$State)/sum(table(ptom$State))  ## given all cells, proportion of any cell in each state
  observed_counts <- table(cells_with_guide$State)
  expected_cell_num <- sum(table(cells_with_guide$State))/sum(table(ptom$State))*table(ptom$State)
  
  xsq <- chisq.test(as.numeric(observed_counts), p = as.numeric(expected_proportion))
  print(xsq)
  #print(observed_counts)
  #print(round(expected_cell_num,1))
  print(observed_counts/expected_cell_num)
  print(t.test(as.numeric(cells_with_guide$State), as.numeric(ptom$State)))
}

indiv_sgRNA_pt_enrich_test <- function(pseudotime_ordered_monocle, guide_of_interest)
{
  ptom <- pseudotime_ordered_monocle 
  GOI <- guide_of_interest
  cells_with_guide <- ptom[, which(pData(ptom)$sgRNA %in% GOI)]
  #non_targeting <- ptom[, which(pData(ptom)$gene_level %in% "Nontargeting")]
  expected_proportion <- table(ptom$State)/sum(table(ptom$State))  ## given all cells, proportion of any cell in each state
  observed_counts <- table(cells_with_guide$State)
  expected_cell_num <- sum(table(cells_with_guide$State))/sum(table(ptom$State))*table(ptom$State)
  
  xsq <- chisq.test(as.numeric(observed_counts), p = as.numeric(expected_proportion))
  print(xsq)
  #print(observed_counts)
  #print(round(expected_cell_num,1))
  print(observed_counts/expected_cell_num)
  print(t.test(as.numeric(cells_with_guide$State), as.numeric(ptom$State)))
}


ClusterEnrichment <- function(SomeSeurat, cluster, gene_of_interest)
{
  GOI <- gene_of_interest
  
  cells_with_guide <- subset(SomeSeurat, idents=GOI)
  
  expected_proportion <- table(FetchData(SomeSeurat, cluster))/sum(table(FetchData(SomeSeurat, cluster)))  ## given all cells, proportion of any cell in each state
  observed_counts <- table(FetchData(cells_with_guide, cluster))
  expected_cell_num <- sum(table(FetchData(cells_with_guide, cluster)))/sum(table(FetchData(SomeSeurat, cluster)))*table(FetchData(SomeSeurat, cluster))
  xsq <- chisq.test(as.numeric(observed_counts), p = as.numeric(expected_proportion))
  print(xsq)
  print(observed_counts/expected_cell_num)
  
}


hg19GeneLengths <- function(symbols)
{
  require(TxDb.Hsapiens.UCSC.hg19.knownGene) 
  require(org.Hs.eg.db)
  exons.db = exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, by='gene')    
  egs    = unlist(  mget(symbols[ symbols %in% keys(org.Hs.egSYMBOL2EG) ],org.Hs.egSYMBOL2EG) )
  sapply(egs,function(eg)
  {
    exons = exons.db[[eg]]
    if(length(exons)) { 
      exons = reduce(exons)
      sum( width(exons) )
    }
  })
}


getPseudoTimeDE <- function(pt_ordered_monocle, state_of_interest)
{
  onevsall <- rep(0,length(pt_ordered_monocle$State))
  onevsall[which(pt_ordered_monocle$State==state_of_interest)] <- 1
  pt_ordered_monocle$pt_binary <- onevsall
  state_genes <- differentialGeneTest(pt_ordered_monocle, fullModelFormulaStr = '~pt_binary', cores=2)
  state_genes <- subset(state_genes, state_genes$qval < 0.0001)
  state_genes <- state_genes[order(state_genes$qval),]
  return(state_genes)
}

getSeuratPseudoTimeDE <- function(pt_ordered_monocle, some_seurat, state_of_interest, positive=T) 
{
  # some_seurat is the Seurat object of the cells used in Monocle  
  
  theseCells <- rownames(pt_ordered_monocle@phenoData)[which(pt_ordered_monocle$State %in% state_of_interest)]
  #keepThese <- sapply(strsplit(theseCells, "-"), "[[", 1)  ## check if Seurat object has number in the name, if not, use this line
  keepThese <- theseCells
  testMarkers <- FindMarkers(some_seurat, WhichCells(some_seurat, keepThese), only.pos = positive, logfc.threshold = 0.1)
  return(testMarkers) 
}

getSeuratPseudoTimeCellList <- function(pt_ordered_monocle, some_seurat, state_of_interest) 
{
  # some_seurat is the Seurat object of the cells used in Monocle  
  
  theseCells <- rownames(pt_ordered_monocle@phenoData)[which(pt_ordered_monocle$State %in% state_of_interest)]
  #keepThese <- sapply(strsplit(theseCells, "-"), "[[", 1)  ## check if Seurat object has number in the name, if not, use this line
  return(theseCells) 
}


visualizeGuideEfficiency <- function(some_seurat, gene_of_interest)
{
  Idents(some_seurat) <- some_seurat$gene_level
  GOI <- gene_of_interest
  ctrl_mean <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting")])
  
  pct_untarg <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting")] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting")] == 0)  ### percent expressing in controls 
  
  ##targeted_mean
  targeted_mean <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=GOI)])
  
  ## percent expressing in targeted
  pct_targ <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=GOI)] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=GOI)] == 0) 
  
  kd_eff <- matrix(c(ctrl_mean, targeted_mean, targeted_mean/ctrl_mean, pct_untarg, pct_targ, pct_targ/pct_untarg),ncol=3,byrow=TRUE)
  colnames(kd_eff) <- c("Nontargeting",paste("Targeting\n",GOI),"Effect Size\n (Targeted/Nontargeted)")
  rownames(kd_eff) <- c("Mean     \nExpression ", "Percent of    \n Cells Expressing")
  
  a <- VlnPlot(some_seurat, features=c(GOI), idents=c(GOI,"Nontargeting"), log=T, adjust=1, pt.size = 0.1, cols=c("lightgrey","purple4"))
  a$labels$x <- "Guide"
  b <- tableGrob(round(kd_eff, 3))
  
  
  #fileN <- paste(GOI, substring(date(), 5, 10), 'VisualizeSGRNA.pdf')
  
  #pdf(file=fileN, width=12, height=6) 
  grid.arrange(a, b, nrow = 1)
  #dev.off()
}


visualizeGuideEfficiency_GuideLevel <- function(some_seurat, gene_of_interest)
{
  GOI <- gene_of_interest
  guide1 <- paste(GOI, "_G1", sep="")
  guide2 <- paste(GOI, "_G2", sep="")
  guide3 <- paste(GOI, "_G3", sep="")
  
  ctrl_mean1 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_1")])
  ctrl_mean2 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_2")])
  ctrl_mean3 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_3")])
  ctrl_mean <- mean(c(ctrl_mean1, ctrl_mean2, ctrl_mean3))
  
  pct_untarg_1 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_1")] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_1")] == 0)  ### percent expressing in controls 
  pct_untarg_2 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_2")] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_2")] == 0)  ### percent expressing in controls 
  pct_untarg_3 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_3")] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents="Nontargeting_3")] == 0)  ### percent expressing in controls 
  
  pct_untarg_mean <- mean(c(pct_untarg_1, pct_untarg_2,pct_untarg_3))
  
  
  ##targeted_mean
  targeted_mean_1 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide1)])
  targeted_mean_2 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide2)])
  targeted_mean_3 <- mean(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide3)])
  targ_mean <- mean(c(targeted_mean_1,targeted_mean_2,targeted_mean_3))
  
  ## percent expressing in targeted
  pct_targ_1 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide1)] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide1)] == 0) 
  pct_targ_2 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide2)] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide2)] == 0) 
  pct_targ_3 <- table(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide3)] == 0)[1] / length(GetAssayData(some_seurat, slots=scale.data)[GOI, WhichCells(some_seurat, idents=guide3)] == 0) 
  pct_targ_mean <- mean(c(pct_targ_1, pct_targ_2,pct_targ_3))
  
  
  kd_eff <- matrix(c(ctrl_mean1, ctrl_mean2, ctrl_mean3, ctrl_mean, targeted_mean_1, targeted_mean_2,targeted_mean_3,targ_mean,pct_untarg_1,pct_untarg_2,pct_untarg_3,pct_untarg_mean,pct_targ_1,pct_targ_2,pct_targ_3,pct_targ_mean), ncol=4,byrow=T)
  
  colnames(kd_eff) <- c('Guide 1','Guide 2','Guide 3','Mean')
  rownames(kd_eff) <- c('Expression_Nontargeting','Expression Targeting','Percent Cells Expressing Nontargeting','Percent Cells Expressing Targeting')
  
  a <- VlnPlot(some_seurat, features=c(GOI), idents=c(guide1, guide2, guide3, 'Nontargeting_1','Nontargeting_2','Nontargeting_3'), log=T, adjust=1, pt.size = 0.1, cols=c(rep("lightgrey",3),rep("purple4",3)))
  a$labels$x <- "Guide"
  
  #t1 <- ttheme_default(core=list(fg_params=list(fontface=c(rep("plain", 4), "bold.italic")),bg_params = list(fill=c(rep(c("grey95", "red"),length.out=3)))))
  
  b <- tableGrob(round(kd_eff, 3))
  grid.arrange(a, b, nrow = 1)
  
  
}



plotGeneSetBulk <- function(geneSet, time_course_lrt)
{
  lrt <- time_course_lrt
  geneSet <- geneSet[which(geneSet%in%row.names(lrt))]
  plotThis <- data.frame(  values =  log(cpm(as.vector(lrt$fitted.values[as.character(geneSet),]))),
                           sample = rep(c('8','8','6','6','4','4','2','2','0','0'), each = length(geneSet)),  gene = rep(geneSet,10))
  
  
  ggplot(plotThis, aes(x=sample, y=values)) +  stat_summary(fun.y=mean, geom="line", group=1, lwd=2) + stat_summary(fun.y=mean, geom="point", size=3) +  facet_wrap(~gene) +  theme_grey(base_size=20) + theme(axis.title.x=element_blank(), axis.title.y=element_blank()) + guides(fill=FALSE)
  
  #ggplot(plotThis, aes(x=sample, y=values, fill = sample)) + geom_boxplot() + facet_wrap( ~ gene) +  theme_grey(base_size=20) + theme(axis.title.x=element_blank(), axis.title.y=element_blank()) + guides(fill=FALSE)
}


plotGeneSetDplyr <- function(geneSet, time_course_lrt)
{
  lrt <- time_course_lrt
  geneSet <- geneSet[which(geneSet%in%row.names(lrt))]
  plotThis <- data.frame(  values =  log(cpm(as.vector(lrt$fitted.values[as.character(geneSet),]))),
                           sample = rep(c('8','8','6','6','4','4','2','2','0','0'), each = length(geneSet)),  gene = rep(geneSet,10))
  plotThis$sample <- as.numeric(as.vector(plotThis$sample))
  pt2 <- plotThis %>% group_by(sample, gene) %>% summarise(value = mean(values), sd = sd(values))
  ggplot(pt2, aes(x=sample, y=value, color=gene)) + geom_line(lwd=2) + geom_errorbar(aes(ymin=value-sd/sqrt(2), ymax=value+sd/sqrt(2)), width=0.1, lwd=1) + geom_point(size=3) + theme_grey(base_size=30) + theme(axis.title.x=element_blank(), axis.title.y=element_blank()) + guides(fill=FALSE) + scale_color_viridis(option="magma", discrete=T, end=0.9)
}

plotGeneSetDplyrFacet <- function(geneSet, time_course_lrt)
{
  lrt <- time_course_lrt
  geneSet <- geneSet[which(geneSet%in%row.names(lrt))]
  plotThis <- data.frame(  values =  log(cpm(as.vector(lrt$fitted.values[as.character(geneSet),]))),
                           sample = rep(c('8','8','6','6','4','4','2','2','0','0'), each = length(geneSet)),  gene = rep(geneSet,10))
  plotThis$sample <- as.numeric(as.vector(plotThis$sample))
  pt2 <- plotThis %>% group_by(sample, gene) %>% summarise(value = mean(values), sd = sd(values))
  ggplot(pt2, aes(x=sample, y=value, color=gene)) + geom_line(lwd=2) + geom_errorbar(aes(ymin=value-sd, ymax=value+sd), width=0.1, lwd=1) + geom_point(size=3) + theme_grey(base_size=30) + theme(axis.title.x=element_blank(), axis.title.y=element_blank()) + guides(fill=FALSE) +  facet_wrap(~gene) + scale_color_viridis(option="magma", discrete=T, end=0.9)
}

get200VF <- function(some_mon, GOI, min_expr)
{ 
  test_vf <- FindVariableFeatures(subset(some_mon, idents=GOI))
  vg <- subset(HVFInfo(test_vf), HVFInfo(test_vf)$mean > min_expr)
  top200VF <- vg[order(vg$variance.standardized, decreasing=T)[1:200],]
  return(top200VF)
}

get_density <- function(x, y, ...) {
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])
}