library(Seurat)
library(tidyverse)

patient = "patient1"
sample1 = "208020X5"
sample2 = "208020X6"

## Read in pre-treatment sample and make seurat object ##
matrix <- ReadMtx(paste(sample1,"/genes_seurat/matrix.mtx", sep = ""),
                  features = paste(sample1,"/genes_seurat/genes.tsv", sep = ""),
                  cells = paste(sample1,"/genes_seurat/barcodes.tsv", sep = ""),
                  feature.column = 2)

pre <- CreateSeuratObject(counts = matrix, project="BTK_pre", 
                                    min.cells = 3, min.features = 200)

FeatureScatter(pre, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
pre <- subset(pre, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)

pre$status = "pre"
pre <- NormalizeData(pre, verbose = FALSE)
pre <- FindVariableFeatures(pre, selection.method = "vst", nfeatures = 2000)

## Read in pre-treatment sample and make seurat object ##
matrix <- ReadMtx(paste(sample2,"/genes_seurat/matrix.mtx", sep = ""),
                  features = paste(sample2,"/genes_seurat/genes.tsv", sep = ""),
                  cells = paste(sample2,"/genes_seurat/barcodes.tsv", sep = ""),
                  feature.column = 2)

post <- CreateSeuratObject(counts = matrix, project="BTK_post", 
                                     min.cells = 3, min.features = 200)

FeatureScatter(post, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
post <- subset(post, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)

## Remove T Cells from post-treatment sample
Tcells = c("CGAGACGCTGATCACT-1", "CTAGTCGCTTGTTGGT-1", "CACGGACACCATCAGT-1", 
           "AACTCTTTGAAATCGA-1", "CTCTCCCACTATCCGA-1", "CGACAGCTGCGTTGGC-1", 
           "AACAGCTTGCGATAGG-1", "TCGTTAACTGAAACGC-1", "GCCTACGGAGTAGAAT-1")

post = subset(post, cells = Tcells, invert = TRUE)
post$status = "post"
post <- NormalizeData(post, verbose = FALSE)
post <- FindVariableFeatures(post, selection.method = "vst", nfeatures = 2000)

# Integrate
immune.anchors <- FindIntegrationAnchors(object.list = list(pre, post), dims = 1:20)
immune.combined <- IntegrateData(anchorset = immune.anchors, dims = 1:20)

DefaultAssay(immune.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
immune.combined <- ScaleData(immune.combined, verbose = FALSE)
immune.combined <- RunPCA(immune.combined, npcs = 30, verbose = FALSE)

ElbowPlot(immune.combined)

# t-SNE and Clustering
immune.combined <- RunUMAP(immune.combined, reduction = "pca", dims = 1:10)
immune.combined <- FindNeighbors(immune.combined, reduction = "pca", dims = 1:10)
immune.combined <- FindClusters(immune.combined, resolution = 0.5)

### Visualization ###
umap_plot_integrated = DimPlot(immune.combined, label = TRUE) + NoLegend()
umap_status <- DimPlot(immune.combined, reduction = "umap", group.by = "status")

ggsave(paste("umaps/",patient,"/",patient,".merged.pdf",sep = ""), umap_plot_integrated, width=8, height=6, useDingbats=FALSE)
ggsave(paste("umaps/",patient,"/",patient,".merged_status.pdf",sep = ""), umap_status, width=8, height=6, useDingbats=FALSE)

# Get top markers per cluster
top_markers <- markers %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)  # Top 5 genes per cluster

# Create a heatmap of these markers
htmap <- DoHeatmap(immune.combined, 
                   features = unique(top_markers$gene),
                   group.by = 'seurat_clusters',
                   label = FALSE) +
  scale_fill_gradientn(colors = c("navy", "white", "firebrick3")) +
  theme(
    axis.text.y = element_text(size = 13, face = "italic"),
    legend.text = element_text(size = 14),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.line = element_blank()
  )

ggsave(paste(patient,".post.cluster.heatmap.merged.png",sep = ""), htmap, width=11, height=8, device="png")

# Barcode subclone assignments
pre_sc_assignments = read.csv(paste("Genotype/",patient,"/",sample1,".assigned", sep=""), sep = "\t")
pre_sc_assignments$Barcode = paste(pre_sc_assignments$Barcode, "-1_1", sep="")

post_sc_assignments = read.csv(paste("Genotype/",patient,"/",sample2,".assigned", sep=""), sep = "\t")
post_sc_assignments$Barcode = paste(post_sc_assignments$Barcode, "-1_2", sep="")

write.csv(immune.combined@reductions$umap@cell.embeddings, paste("umap_coordinate_files/",patient,".umapcoordinate.csv",sep=""))
umap_coordinate = read.csv(paste("umap_coordinate_files/",patient,".umapcoordinate.csv",sep=""))
rownames(umap_coordinate) = umap_coordinate$X

sc0 = c(filter(pre_sc_assignments, ASIG == "C0")$Barcode, filter(post_sc_assignments, ASIG == "C0")$Barcode)
sc1 = c(filter(pre_sc_assignments, ASIG == "C1")$Barcode, filter(post_sc_assignments, ASIG == "C1")$Barcode)
sc2 = c(filter(pre_sc_assignments, ASIG == "C2")$Barcode, filter(post_sc_assignments, ASIG == "C2")$Barcode)
sc3 = c(filter(pre_sc_assignments, ASIG == "C3")$Barcode, filter(post_sc_assignments, ASIG == "C3")$Barcode)
sc4 = c(filter(pre_sc_assignments, ASIG == "C4")$Barcode, filter(post_sc_assignments, ASIG == "C4")$Barcode)
sc5 = c(filter(pre_sc_assignments, ASIG == "C5")$Barcode, filter(post_sc_assignments, ASIG == "C5")$Barcode)
unassign = c(filter(pre_sc_assignments, ASIG == "UNASSIGN")$Barcode, filter(post_sc_assignments, ASIG == "UNASSIGN")$Barcode)
normal = c(filter(pre_sc_assignments, ASIG == "normal")$Barcode, filter(post_sc_assignments, ASIG == "normal")$Barcode)

pdf(paste("umaps/",patient,"/",patient,".merged.by_subclone.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if yo uwant to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% sc0,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc0,][,3], pch=16, cex=0.5,col="blue")
points(umap_coordinate[rownames(umap_coordinate) %in% sc3,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc3,][,3], pch=16, cex=0.5,col="green")
points(umap_coordinate[rownames(umap_coordinate) %in% sc2,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc2,][,3], pch=16, cex=0.5,col="red")
points(umap_coordinate[rownames(umap_coordinate) %in% sc1,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc1,][,3], pch=16, cex=0.5,col="black")
points(umap_coordinate[rownames(umap_coordinate) %in% sc4,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc4,][,3], pch=16, cex=0.5,col="orange")
points(umap_coordinate[rownames(umap_coordinate) %in% sc5,][,2],umap_coordinate[rownames(umap_coordinate) %in% sc5,][,3], pch=16, cex=0.5,col="cyan2")
points(umap_coordinate[rownames(umap_coordinate) %in% normal,][,2],umap_coordinate[rownames(umap_coordinate) %in% normal,][,3], pch=16, cex=0.5,col="brown")
dev.off()

# Compare Subclone 1 pre and post
presc1 = filter(pre_sc_assignments, ASIG == "C0")$Barcode
postsc1 = filter(post_sc_assignments, ASIG == "C0")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone1.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc1,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc1,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc1,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc1,][,3], pch=16, cex=0.6,col="blue")
dev.off()

# Compare Subclone 2 pre and post (Only present in pre)
presc2 = filter(pre_sc_assignments, ASIG == "C3")$Barcode
postsc2 = filter(post_sc_assignments, ASIG == "C3")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone2.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc2,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc2,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc2,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc2,][,3], pch=16, cex=0.6,col="blue")
dev.off()

# Compare Subclone 3 pre and post
presc3 = filter(pre_sc_assignments, ASIG == "C2")$Barcode
postsc3 = filter(post_sc_assignments, ASIG == "C2")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone3.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc3,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc3,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc3,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc3,][,3], pch=16, cex=0.6,col="blue")
dev.off()

# Compare Subclone 4 pre and post (Only present in pre)
presc4 = filter(pre_sc_assignments, ASIG == "C1")$Barcode
postsc4 = filter(post_sc_assignments, ASIG == "C1")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone4.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc4,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc4,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc4,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc4,][,3], pch=16, cex=0.6,col="blue")
dev.off()

# Compare Subclone 5 pre and post
presc5 = filter(pre_sc_assignments, ASIG == "C4")$Barcode
postsc5 = filter(post_sc_assignments, ASIG == "C4")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone5.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc5,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc5,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc5,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc5,][,3], pch=16, cex=0.6,col="blue")
dev.off()

# Compare Subclone 6 pre and post (only present in post)
presc6 = filter(pre_sc_assignments, ASIG == "C5")$Barcode
postsc6 = filter(post_sc_assignments, ASIG == "C5")$Barcode

pdf(paste("umaps/",patient,"/",patient,".merged.subclone6.pdf",sep = ""))
plot(umap_coordinate$UMAP_1,umap_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_coordinate[rownames(umap_coordinate) %in% presc6,][,2],umap_coordinate[rownames(umap_coordinate) %in% presc6,][,3], pch=16, cex=0.6,col="red3")
points(umap_coordinate[rownames(umap_coordinate) %in% postsc6,][,2],umap_coordinate[rownames(umap_coordinate) %in% postsc6,][,3], pch=16, cex=0.6,col="cyan")
dev.off()


Idents(immune.combined) <- "status" #"RNA"

# Perform differential expression analysis between all pre and post cells.
de_results <- FindMarkers(immune.combined, ident.1 = "post", ident.2 = "pre")
de_results$gene = rownames(de_results)

cll_drivers = read_tsv("meta/CLL_Drivers_2023.tsv")$GENE_NAME
cll_bcr_drivers = read_tsv("meta/CLL_and_BCR_Genes.tsv")$GENE_NAME

driver_diff_prepost = subset(de_results, gene %in% cll_drivers)
driver_bcr_diff = subset(de_results, gene %in% cll_bcr_drivers)

write.csv(driver_bcr_diff, paste(patient,".genes.merged.driver_bcr_diff.csv",sep = ""))
write.csv(diff_genes, paste(patient,".genes.merged.diff.csv",sep = ""))


## Subclone-level DGE analysis

# Subclone 1
sc1_barcodes <- c(presc1, postsc1)
immune.sc1 <- subset(immune.combined, cells = sc1_barcodes)

sc1_de_results <- FindMarkers(immune.sc1, ident.1 = "post", ident.2 = "pre")
sc1_de_results$gene = rownames(sc1_de_results)

sc1_driver_diff_prepost = subset(sc1_de_results, gene %in% cll_drivers)
sc1_driver_bcr_diff = subset(sc1_de_results, gene %in% cll_bcr_drivers)

write.csv(sc1_de_results, paste(patient,".genes.merged.diff.sc1.csv",sep = ""))


# Subclone 3
sc3_barcodes <- c(presc3, postsc3)
immune.sc3 <- subset(immune.combined, cells = sc3_barcodes)

sc3_de_results <- FindMarkers(immune.sc3, ident.1 = "post", ident.2 = "pre")
sc3_de_results$gene = rownames(sc3_de_results)

sc3_driver_diff_prepost = subset(sc3_de_results, gene %in% cll_drivers)
sc3_driver_bcr_diff = subset(sc3_de_results, gene %in% cll_bcr_drivers)

write.csv(sc3_de_results, paste(patient,".genes.merged.diff.sc3.csv",sep = ""))


# Subclone 5
sc5_barcodes <- c(presc5, postsc5)
immune.sc5 <- subset(immune.combined, cells = sc5_barcodes)

sc5_de_results <- FindMarkers(immune.sc5, ident.1 = "post", ident.2 = "pre")
sc5_de_results$gene = rownames(sc5_de_results)

sc5_driver_diff_prepost = subset(sc5_de_results, gene %in% cll_drivers)
sc5_driver_bcr_diff = subset(sc5_de_results, gene %in% cll_bcr_drivers)

write.csv(sc5_de_results, paste(patient,".genes.merged.diff.sc5.csv",sep = ""))
