library(Seurat)
library(tidyverse)

patient = "patient1"
sample1 = "208020X5"
sample2 = "208020X6"

## Read in pre-treatment sample and make seurat object ##
matrix <- ReadMtx(paste(sample1,"/isoforms_seurat/matrix.mtx", sep = ""),
                  features = paste(sample1,"/isoforms_seurat/genes_renamed.tsv", sep = ""),
                  cells = paste(sample1,"/isoforms_seurat/barcodes.tsv", sep = ""),
                  feature.column = 2)
seurat_df_pre <- CreateSeuratObject(counts = matrix, project="BTK_pre", 
                                    min.cells = 3, min.features = 100)

FeatureScatter(seurat_df_pre, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
seurat_df_pre <- subset(seurat_df_pre, subset = nFeature_RNA > 100 & nFeature_RNA < 2500)

## Read in post-treatment sample and make seurat object ##
matrix <- ReadMtx(paste(sample2,"/isoforms_seurat/matrix.mtx", sep = ""),
                  features = paste(sample2,"/isoforms_seurat/genes_renamed.tsv", sep = ""),
                  cells = paste(sample2,"/isoforms_seurat/barcodes.tsv", sep = ""),
                  feature.column = 2)
post <- CreateSeuratObject(counts = matrix, project="BTK_post", 
                           min.cells = 3, min.features = 100)

FeatureScatter(post, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
post <- subset(post, subset = nFeature_RNA > 100 & nFeature_RNA < 3000)

## Remove T Cells from post-treatment sample
Tcells = c("CGAGACGCTGATCACT-1", "CTAGTCGCTTGTTGGT-1", "CACGGACACCATCAGT-1", 
           "AACTCTTTGAAATCGA-1", "CTCTCCCACTATCCGA-1", "CGACAGCTGCGTTGGC-1", 
           "AACAGCTTGCGATAGG-1", "TCGTTAACTGAAACGC-1", "GCCTACGGAGTAGAAT-1")

seurat_df_post = subset(post, cells = Tcells, invert = TRUE)

# Run SCTransform on both samples.
seurat_df_pre_sct <- SCTransform(seurat_df_pre, method = "glmGamPoi", vst.flavor = "v2")
seurat_df_post_sct <- SCTransform(seurat_df_post, method = "glmGamPoi", vst.flavor = "v2")

#################################################################################
# Process pre-treatment sample
seurat_df_pre_sct <- RunPCA(seurat_df_pre_sct, verbose = FALSE)
seurat_df_pre_sct <- RunUMAP(seurat_df_pre_sct, dims = 1:20, verbose = FALSE)
seurat_df_pre_sct <- FindNeighbors(seurat_df_pre_sct, dims = 1:20, verbose = FALSE)
seurat_df_pre_sct <- FindClusters(seurat_df_pre_sct, verbose = FALSE)
umap_plot_pre = DimPlot(seurat_df_pre_sct, label = TRUE) + NoLegend()
umap_plot_pre
ggsave(paste("umaps/",patient,"/",patient,".isoform.pre.sct.umap.pdf",sep = ""), umap_plot_pre, width=8, height=6, useDingbats=FALSE)

## Read in subclone assignments, and write cell coordinates.
pre_sc_assignments = read.csv(paste("Genotype/",patient,"/",sample1,".assigned", sep=""), sep = "\t")
pre_sc_assignments$Barcode = paste(pre_sc_assignments$Barcode, "-1", sep="")

write.csv(seurat_df_pre_sct@reductions$umap@cell.embeddings, paste("umap_coordinate_files/",patient,".pre.isoform.umapcoordinate.csv",sep=""))
umap_pre_coordinate = read.csv(paste("umap_coordinate_files/",patient,".pre.isoform.umapcoordinate.csv",sep=""))
rownames(umap_pre_coordinate) = umap_pre_coordinate$X

sc0 = filter(pre_sc_assignments, ASIG == "C0")$Barcode
sc1 = filter(pre_sc_assignments, ASIG == "C1")$Barcode
sc2 = filter(pre_sc_assignments, ASIG == "C2")$Barcode
sc3 = filter(pre_sc_assignments, ASIG == "C3")$Barcode
sc4 = filter(pre_sc_assignments, ASIG == "C4")$Barcode
sc5 = filter(pre_sc_assignments, ASIG == "C5")$Barcode
unassign = filter(pre_sc_assignments, ASIG == "UNASSIGN")$Barcode
normal = filter(pre_sc_assignments, ASIG == "normal")$Barcode

# Make umap plot coloring cells by subclone.
pdf(paste("umaps/",patient,"/",patient,".pre.isoform.sct.by_subclone.pdf",sep = ""))
plot(umap_pre_coordinate$UMAP_1,umap_pre_coordinate$UMAP_2,pch=16, cex=0.2,col="gray",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if yo uwant to focus on somewhere.
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc0,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc0,][,3], pch=16, cex=0.5,col="blue")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc3,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc3,][,3], pch=16, cex=0.5,col="green")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc2,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc2,][,3], pch=16, cex=0.5,col="red")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc1,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc1,][,3], pch=16, cex=0.5,col="black")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc4,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc4,][,3], pch=16, cex=0.5,col="orange")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc5,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% sc5,][,3], pch=16, cex=0.5,col="cyan2")
points(umap_pre_coordinate[rownames(umap_pre_coordinate) %in% normal,][,2],umap_pre_coordinate[rownames(umap_pre_coordinate) %in% normal,][,3], pch=16, cex=0.5,col="brown")
dev.off()

#################################################################################
# Process post-treatment sample
seurat_df_post_sct <- RunPCA(seurat_df_post_sct, verbose = FALSE)
seurat_df_post_sct <- RunUMAP(seurat_df_post_sct, dims = 1:20, verbose = FALSE)
seurat_df_post_sct <- FindNeighbors(seurat_df_post_sct, dims = 1:20, verbose = FALSE)
seurat_df_post_sct <- FindClusters(seurat_df_post_sct, verbose = FALSE)
umap_plot_post = DimPlot(seurat_df_post_sct, label = TRUE) + NoLegend()
umap_plot_post
ggsave(paste("umaps/",patient,"/",patient,".isoform.post.sct.umap.pdf",sep = ""), umap_plot_post, width=8, height=6, useDingbats=FALSE)

# Find marker genes for each cluster
markers <- FindAllMarkers(seurat_df_post_sct,
                          only.pos = TRUE,    
                          min.pct = 0.25,     
                          logfc.threshold = 0.25)

# Get top markers per cluster
top_markers <- markers %>%
  group_by(cluster) %>%
  slice_max(n = 5, order_by = avg_log2FC)  # Top 5 genes per cluster

# Create a heatmap of these markers
htmap <- DoHeatmap(seurat_df_post_sct, 
                   features = unique(top_markers$gene),
                   group.by = 'seurat_clusters',
                   label = FALSE) +
  scale_fill_gradientn(colors = c("navy", "white", "firebrick3")) +
  theme(
    axis.text.y = element_text(size = 12, face = "italic"),
    legend.text = element_text(size = 14),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.line = element_blank()
  )

ggsave(paste(patient,".post.cluster.heatmap.isoform.png",sep = ""), htmap, width=12, height=8, device="png")


# Pull in subclone assignment info
post_sc_assignments = read.csv(paste("Genotype/",patient,"/",sample2,".assigned", sep=""), sep = "\t")
post_sc_assignments$Barcode = paste(post_sc_assignments$Barcode, "-1", sep="")

write.csv(seurat_df_post_sct@reductions$umap@cell.embeddings, paste("umap_coordinate_files/",patient,".post.isoform.umapcoordinate.csv",sep=""))
umap_post_coordinate = read.csv(paste("umap_coordinate_files/",patient,".post.isoform.umapcoordinate.csv",sep=""))
rownames(umap_post_coordinate) = umap_post_coordinate$X

# Assign the cells of each subclone to a variable.
sc0 = filter(post_sc_assignments, ASIG == "C0")$Barcode
sc1 = filter(post_sc_assignments, ASIG == "C1")$Barcode
sc2 = filter(post_sc_assignments, ASIG == "C2")$Barcode
sc3 = filter(post_sc_assignments, ASIG == "C3")$Barcode
sc4 = filter(post_sc_assignments, ASIG == "C4")$Barcode
sc5 = filter(post_sc_assignments, ASIG == "C5")$Barcode
unassign = filter(post_sc_assignments, ASIG == "UNASSIGN")$Barcode
normal = filter(post_sc_assignments, ASIG == "normal")$Barcode

# Make umap plot coloring cells by subclone.
pdf(paste("umaps/",patient,"/",patient,".post.isoform.sct.by_subclone.pdf",sep = ""))
plot(umap_post_coordinate$UMAP_1,umap_post_coordinate$UMAP_2,pch=16, cex=0.4,col="gray90",xlab="UMAP1",ylab="UMAP2")#,xlim=c(-10,7),ylim=c(-8,9))  ## X and Y lim only if you want to focus on somewhere.
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc0,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc0,][,3], pch=16, cex=0.7,col="blue")
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc3,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc3,][,3], pch=16, cex=0.7,col="green")
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc2,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc2,][,3], pch=16, cex=0.7,col="red") 
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc1,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc1,][,3], pch=16, cex=0.7,col="black")
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc4,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc4,][,3], pch=16, cex=0.7,col="orange")
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% sc5,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% sc5,][,3], pch=16, cex=0.7,col="cyan2")
points(umap_post_coordinate[rownames(umap_post_coordinate) %in% normal,][,2],umap_post_coordinate[rownames(umap_post_coordinate) %in% normal,][,3], pch=16, cex=0.5,col="brown")
dev.off()


# Perform differential expression analysis
post_diff_genes = FindMarkers(seurat_df_post_sct,ident.1=c(1,10),ident.2=NULL)
post_diff_genes$gene = rownames(post_diff_genes)
post_diff_genes$gene_base <- sapply(strsplit(rownames(post_diff_genes), ":"), `[`, 1)

cll_drivers = read_tsv("meta/CLL_Drivers_2023.tsv")$GENE_NAME
cll_bcr_drivers = read_tsv("meta/CLL_and_BCR_Genes.tsv")$GENE_NAME

post_driver_diff = subset(post_diff_genes, gene_base %in% cll_drivers)
post_driver_bcr_diff = subset(post_diff_genes, gene_base %in% cll_bcr_drivers)

write.csv(post_driver_bcr_diff, paste(patient,".isoforms.post.driver_bcr_diff.csv",sep = ""))
write.csv(post_diff_genes, paste(patient,".isoforms.post.diff.csv",sep = ""))

# Find the difference between the two clusters within the original popualtion
btk_diff = FindMarkers(seurat_df_post_sct, ident.1=c(1), ident.2=c(10))

# Rename clusters
seurat_df_post_sct$subclone <- ifelse(Idents(seurat_df_post_sct) == "1" | Idents(seurat_df_post_sct) == "10", "Original Clone", "BTK Subclone")
# Set the subclone identities
Idents(seurat_df_post_sct) <- seurat_df_post_sct$subclone

## Heatmap

# Filter significant genes (adjusted p-value < 0.05)
sig_genes <- subset(post_diff_genes, p_val_adj < 0.05)

# Split genes into upregulated in each clonal population
up_genes <- subset(sig_genes, avg_log2FC > 0)
down_genes <- subset(sig_genes, avg_log2FC < 0)

# Order genes by avg_log2FC
up_genes <- up_genes[order(-up_genes$avg_log2FC), ]    # Highest logFC first
down_genes <- down_genes[order(down_genes$avg_log2FC), ]  # Most negative logFC first

# Select top N genes from each group
N <- 10  # You can change this number based on how many genes you want to display
top_up_genes <- head(up_genes$gene, N)
top_down_genes <- head(down_genes$gene, N)

# Combine the gene list, upregulated genes first
ordered_genes <- c(top_down_genes, top_up_genes)

# Generate the heatmap
htmap <- DoHeatmap(seurat_df_post_sct, 
                   features = ordered_genes, 
                   group.colors = c("cyan2", "dodgerblue3"),
                   label = FALSE) +
  scale_fill_gradientn(colors = c("navy", "white", "firebrick3")) +
  theme(
    axis.text.y = element_text(size = 15, face = "italic"),
    legend.text = element_text(size = 15),
    panel.border = element_blank(),                    # Remove the black border
    panel.background = element_blank(),                # Remove background color if any
    axis.line = element_blank()                        # Remove axis lines if necessary
  )
ggsave(paste(patient,".post.heatmap.isoform.png",sep = ""), htmap, width=14, height=6, device="png")

