setwd("/Users/islizovs/Desktop/TELSeqII_re_analysis")
meta <- read.csv("ARGMOBRich_TELSII_Metadata.csv", header = TRUE)
head(meta)
library(dplyr)
library(ggplot2)

filter_metadata<- meta %>%
  subset(Seq_Type != "NovaSeq")
tail(filter_metadata)


filter_metadata$SampleType_Lab <- factor(filter_metadata$SampleType_Lab, levels= c("BF", "FMT", "MOCK", "PPS"))
filter_metadata$Seq_Type <- factor(filter_metadata$Seq_Type, levels= c("TELSeq", "PacBio"))
filter_metadata$TELS_Type <- factor(filter_metadata$TELS_Type, levels= c("HS2", "XT", "Non-enriched"))

#filter_metadata <- filter_metadata %>%
# mutate(TELS_Type = recode(
#  TELS_Type,
# "HSV2" = "TELSeq--HSV2",
#"XT" = "TELSeq--XT"
#))
filter_metadata$Probe_Type <- factor(filter_metadata$Probe_Type, levels= c("RES", "MOB", "Combo", "Non-enriched"))
filter_metadata

filter_metadata$CCSRd_MeanL <- as.integer(filter_metadata$CCSRd_MeanL)
filter_metadata$CCSRd_Bases <- as.integer(filter_metadata$CCSRd_Bases)
filter_metadata$Dup_Pct <- as.integer(filter_metadata$Dup_Pct)
filter_metadata$Dedup_Reads <- as.integer(filter_metadata$Dedup_Reads)
filter_metadata$ARG_OnTarget_Pct <- as.integer(filter_metadata$ARG_OnTarget_Pct)
filter_metadata$MGE_OnTarget_Pct <- as.integer(filter_metadata$MGE_OnTarget_Pct)



BP1_MASS_SampleType_TELS_Type<-ggplot(subset(filter_metadata, !is.na(Mass_ng)), aes(x=SampleType_Lab, y=log10(Mass_ng), fill=TELS_Type)) + 
  geom_boxplot(outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.75) +  # Customize outliers
  geom_point(position = position_jitterdodge(jitter.width = 0.1, dodge.width = 0.75), alpha=0.5, aes(color=TELS_Type)) +  # Jittered points within the boxplots
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme_bw() +
  theme(legend.position = "bottom")+  # Optional: Move legend to the bottom
  labs(x = "Sample type", y = "Log10 gDNA Mass (ng)", fill = "TELS Type", color = "TELS Type")  # Custom axis labels

BP1_MASS_SampleType_TELS_Type

BP1_Conc_PicoG_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= Conc_PicoG, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Picogreen gDNA (ng/ul)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP1_Conc_PicoG_SampleType_TELS_Type_Probe_Type


BP2_Nanodrop_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= Nano_260_280, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Nanodrop quality (260/280 nm ratio)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP2_Nanodrop_SampleType_TELS_Type_Probe_Type

BP3_TotalBases_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= (CCSRd_Bases/1000000), fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Generated throughput (x 1,000,000 bp)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP3_TotalBases_SampleType_TELS_Type_Probe_Type


BP4_ReadL_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= CCSRd_MeanL, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Read length (bp)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP4_ReadL_SampleType_TELS_Type_Probe_Type

install.packages("gridExtra")
library(gridExtra)

Seq_stats_plot<-grid.arrange(BP1_Conc_PicoG_SampleType_TELS_Type_Probe_Type, 
                             BP2_Nanodrop_SampleType_TELS_Type_Probe_Type, 
                             BP3_TotalBases_SampleType_TELS_Type_Probe_Type,
                             BP4_ReadL_SampleType_TELS_Type_Probe_Type, ncol = 2)

plot(Seq_stats_plot)


BP5_DupPct_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= Dup_Pct, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Duplication rate (%)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP5_DupPct_SampleType_TELS_Type_Probe_Type

BP6_DedupReads_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= Dedup_Reads, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "Deduplicated reads (n)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP6_DedupReads_SampleType_TELS_Type_Probe_Type


BP7_ARGTarget_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= ARG_OnTarget_Pct, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "ARG 'on-target' rate (%)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP7_ARGTarget_SampleType_TELS_Type_Probe_Type


BP8_MGETarget_SampleType_TELS_Type_Probe_Type<-ggplot(filter_metadata, aes(x = SampleType_Lab, y= MGE_OnTarget_Pct, fill = Probe_Type)) +
  geom_boxplot(alpha = 0.5, outlier.shape = 16, outlier.size = 2, outlier.color = "black", lwd=0.5) +
  facet_grid(. ~ TELS_Type) +
  labs(x = "Sample type", y = "MGE 'on-target' rate (%)", fill = "Probe set") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom
BP8_MGETarget_SampleType_TELS_Type_Probe_Type


Target_stats_plot<-grid.arrange(BP5_DupPct_SampleType_TELS_Type_Probe_Type, 
                                BP6_DedupReads_SampleType_TELS_Type_Probe_Type, 
                                BP7_ARGTarget_SampleType_TELS_Type_Probe_Type,
                                BP8_MGETarget_SampleType_TELS_Type_Probe_Type, ncol = 2)

plot(Target_stats_plot)


install.packages("cowplot")
library(cowplot)
install.packages("svglite")
library(svglite)
install.packages("Cairo", type = "source")

labeled_plot1 <- plot_grid(Seq_stats_plot, labels = "a", label_size = 14)
labeled_plot2 <- plot_grid(Target_stats_plot, labels = "b", label_size = 14)

combined_plot <- grid.arrange(
  labeled_plot1, labeled_plot2,
  ncol = 1
)



ggsave(filename = "/Users/islizovs/Desktop/TELSeqII_re_analysis/Images/Final_seq_summary.svg", 
       plot = combined_plot, 
       device = "svg")


svglite("/Users/islizovs/Desktop/TELSeqII_re_analysis/Images/Rplot.svg")
print(combined_plot)
dev.off()

########################################################
########################################################
####ANALYSIS OF ARG LOG ABUNDANCE OF VIOLIN PLOTS#######
########################################################



library(lme4)
install.packages("car")
library(car)
install.packages("gamlss")
library(gamlss)
library(dplyr)
df.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.ARG_ABUNDANCE <- subset(df.abundance, gene_type == "ARG")
tail(df.ARG_ABUNDANCE)

# Subsetting df.ARG_ABUNDANCE to include only BF
df.ARG_ABUNDANCE_BF <- df.ARG_ABUNDANCE %>%
  filter(sample_type == " BF")
df.ARG_ABUNDANCE_BF
table(df.ARG_ABUNDANCE_BF$probe_type, df.ARG_ABUNDANCE_BF$tels_type)

#df.ARG_ABUNDANCE_PPS$tels_type <- relevel(df.ARG_ABUNDANCE_PPS$tels_type, ref = " PacBio")
#df.ARG_ABUNDANCE_PPS$probe_type <- relevel(df.ARG_ABUNDANCE_PPS$probe_type, ref = " NonEnr")

df.ARG_ABUNDANCE_BF$tels_type <- factor(df.ARG_ABUNDANCE_BF$tels_type, levels = c(" PacBio", " TELSeq V2", " TELSeq XT"))
df.ARG_ABUNDANCE_BF$probe_type <- factor(df.ARG_ABUNDANCE_BF$probe_type, levels = c(" NonEnr", " RES", " MOB"," Combo"))

# Assuming df is your data frame
# Calculate median and range for each combination of tels_type and probe_type
result <- df.ARG_ABUNDANCE_BF %>%
  group_by(tels_type) %>%
  summarise(
    median_nonlogabundance = median(nonlogabundance),
    range_nonlogabundance = max(nonlogabundance) - min(nonlogabundance)
  )

print(result)

#General GAMLSS model
ARG_ABUNDmodel <- gamlss(nonlogabundance ~tels_type:probe_type, 
                         family = ZAGA(mu.link = "logit"), 
                         data = na.omit(df.ARG_ABUNDANCE_BF),
                         method=mixed(),
                         control = gamlss.control(niter = 10000, trace=TRUE))
modelsummary<-summary(ARG_ABUNDmodel)

# Obtain summary of the GAMLSS model
model_summary <- summary(ARG_ABUNDmodel)
str(model_summary)
print(model_summary)
# Extract p-values from the model summary
p_values<- model_summary[, "Pr(>|t|)"]

# Adjust p-values for FDR using Benjamini-Hochberg method
p_adjusted <- p.adjust(p_values, method = "fdr")

# Print adjusted p-values
print(p_adjusted)


plot(ARG_ABUNDmodel)

install.packages("nleqslv")
library(nleqslv)
install.packages("brms")
library(brms)

# Example formula
formula <- nonlogabundance ~ tels_type:probe_type

# Specify the model using brm()
#model <- brm(formula = formula,
#             family = Gamma(link = "log"),  # Specify the distribution family
#             data = df.ARG_ABUNDANCE_BF, iter = 4000,  # Increase the number of iterations
#             control = list(max_treedepth = 15),  # Increase max_treedepth
#             chains = 6,  # Use more chains if you have the computational resources
#             cores = 6)  # Use more cores to speed up computation
#fit <- brm(formula, family = Gamma(link = "log"), data = df.ARG_ABUNDANCE_BF)

#summary(model)  # Summary of the model
#plot(model)     # Diagnostic plots (e.g., trace plots, posterior distributions)

#posterior_predictions <- predict(model)

#df.ARG_ABUNDANCE_BF$tels_type <- relevel(df.ARG_ABUNDANCE_BF$tels_type, ref = " PacBio")
#df.ARG_ABUNDANCE_BF$probe_type <- relevel(df.ARG_ABUNDANCE_BF$probe_type, ref = " NonEnr")

#hyp <- c(
#  "tels_type TELSeq V2:probe_type Combo > 0",
#  "tels_type TELSeq XT:probe_type Combo > 0",
#  "tels_type TELSeq V2:probe_type MOB > 0",
#  "tels_type TELSeq XT:probe_type MOB > 0",
#  "tels_type TELSeq V2:probe_type RES > 0",
#  "tels_type TELSeq XT:probe_type RES > 0"
#)

# Test the hypotheses
#hypothesis_test <- hypothesis(model, hyp)
#hypothesis_test

#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)                          -19.4657     0.3488 -55.803  < 2e-16 ***
#  tels_type TELSeq V2                    2.2928     0.8181   2.803 0.005272 ** 
#  tels_type TELSeq XT                    5.4008     0.3817  14.151  < 2e-16 ***
#  probe_type Combo                      -0.5282     0.1963  -2.691 0.007367 ** 
#  probe_type MOB                         1.1332     0.3323   3.410 0.000704 ***
#  tels_type TELSeq V2:probe_type Combo   4.0982     0.8162   5.021 7.24e-07 ***
#  tels_type TELSeq V2:probe_type MOB     3.7420     0.9226   4.056 5.83e-05 ***

#(Intercept)                          -14.5931     0.1206 -121.034  < 2e-16 ***
#  tels_type PacBio:probe_type NonEnr    -4.8726     0.3691  -13.202  < 2e-16 ***
#  tels_type TELSeq V2:probe_type RES    -2.5798     0.7497   -3.441  0.00063 ***
#  tels_type TELSeq XT:probe_type RES     0.5282     0.1963    2.691  0.00737 ** 
#  tels_type TELSeq V2:probe_type MOB     2.2954     0.4558    5.035 6.74e-07 ***
#  tels_type TELSeq XT:probe_type MOB     1.6614     0.3178    5.228 2.56e-07 ***
#  tels_type TELSeq V2:probe_type Combo   0.9902     0.3077    3.218  0.00138 **   

df.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.ARG_ABUNDANCE <- subset(df.abundance, gene_type == "ARG")
tail(df.ARG_ABUNDANCE)

# Subsetting df.ARG_ABUNDANCE to include only BF
df.ARG_ABUNDANCE_FMT <- df.ARG_ABUNDANCE %>%
  filter(sample_type == " FMT")

# Calculate median and range for each combination of tels_type and probe_type
result <- df.ARG_ABUNDANCE_FMT %>%
  group_by(tels_type) %>%
  summarise(
    median_nonlogabundance = median(nonlogabundance),
    range_nonlogabundance = max(nonlogabundance) - min(nonlogabundance)
  )

print(result)

#General GAMLSS model
ARG_ABUNDmodel_FMT <- gamlss(nonlogabundance ~ tels_type + probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.ARG_ABUNDANCE_FMT,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(ARG_ABUNDmodel_FMT)

ARG_ABUNDmodel_FMT <- gamlss(nonlogabundance ~ tels_type:probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.ARG_ABUNDANCE_FMT,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(ARG_ABUNDmodel_FMT)

#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)         -20.9149     0.2281 -91.683   <2e-16 ***
#  tels_type TELSeq V2   6.0914     0.3343  18.222   <2e-16 ***
#  tels_type TELSeq XT   5.6623     0.2808  20.163   <2e-16 ***
#  probe_type MOB        0.2478     0.2446   1.013   0.3114    
#  probe_type RES       -0.3941     0.1810  -2.178   0.0297 *  

#(Intercept)                          -15.6423     0.0781 -200.279  < 2e-16 ***
#  tels_type TELSeq V2:probe_type Combo   0.4763     0.2666    1.787 0.074290 .  
#  tels_type TELSeq XT:probe_type Combo   0.5344     0.2024    2.640 0.008432 ** 
#  tels_type TELSeq V2:probe_type MOB     1.7964     0.4617    3.891 0.000108 ***
#  tels_type TELSeq XT:probe_type MOB     0.3810     0.2229    1.709 0.087775 .  
#  tels_type PacBio:probe_type NonEnr    -5.2726     0.2405  -21.921  < 2e-16 *** 
#  tels_type TELSeq V2:probe_type RES    -0.2276     0.8177   -0.278 0.780795    

df.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.ARG_ABUNDANCE <- subset(df.abundance, gene_type == "ARG")
tail(df.ARG_ABUNDANCE)

# Subsetting df.ARG_ABUNDANCE to include only PPS
df.ARG_ABUNDANCE_PPS <- df.ARG_ABUNDANCE %>%
  filter(sample_type == " PPS")
str(df.ARG_ABUNDANCE_PPS)
# Set the reference level for each covariate

result <- df.ARG_ABUNDANCE_PPS %>%
  group_by(tels_type) %>%
  summarise(
    median_nonlogabundance = median(nonlogabundance),
    range_nonlogabundance = max(nonlogabundance) - min(nonlogabundance)
  )

print(result)

df.ARG_ABUNDANCE_PPS$tels_type <- relevel(df.ARG_ABUNDANCE_PPS$tels_type, ref = " PacBio")
df.ARG_ABUNDANCE_PPS$probe_type <- relevel(df.ARG_ABUNDANCE_PPS$probe_type, ref = " NonEnr")


#General GAMLSS model
ARG_ABUNDmodel_PPS <- gamlss(nonlogabundance ~ tels_type+probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.ARG_ABUNDANCE_PPS,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(ARG_ABUNDmodel_PPS)

#General GAMLSS model
ARG_ABUNDmodel_PPS <- gamlss(nonlogabundance ~ tels_type:probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.ARG_ABUNDANCE_PPS,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(ARG_ABUNDmodel_PPS)

#                     Estimate Std. Error t value Pr(>|t|)    
#(Intercept)         -22.0655     0.2627 -84.004  < 2e-16 ***
#  tels_type TELSeq V2   3.4931     0.6333   5.516 5.48e-08 ***
#  tels_type TELSeq XT   2.6230     0.2748   9.545  < 2e-16 ***
#  probe_type Combo      2.4419     0.1689  14.460  < 2e-16 ***
#  probe_type MOB        1.7409     0.5696   3.056  0.00236 ** 


#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)                          -19.44252    0.07794 -249.447   <2e-16 ***
#  tels_type PacBio:probe_type NonEnr    -2.62299    0.26538   -9.884   <2e-16 ***
#  tels_type TELSeq V2:probe_type Combo   0.82618    0.32031    2.579   0.0102 *  
#  tels_type TELSeq XT:probe_type Combo   2.66265    0.17891   14.883   <2e-16 ***
#  tels_type TELSeq V2:probe_type MOB     2.75606    0.14989   18.388   <2e-16 ***
#  tels_type TELSeq XT:probe_type MOB     0.17887    0.29801    0.600   0.5486   

plot(ARG_ABUNDmodel_PPS)


########################################################
########################################################
####ANALYSIS OF MGE LOG ABUNDANCE OF VIOLIN PLOTS#######
########################################################

f.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.MGE_ABUNDANCE <- subset(df.abundance, gene_type == "MGE")
tail(df.MGE_ABUNDANCE)

df.MGE_ABUNDANCE_BF <- df.MGE_ABUNDANCE %>%
  filter(sample_type == " BF")
str(df.MGE_ABUNDANCE_BF)
# Set the reference level for each covariate
df.MGE_ABUNDANCE_BF$tels_type <- factor(df.MGE_ABUNDANCE_BF$tels_type, levels = c(" PacBio", " TELSeq V2", " TELSeq XT"))
df.MGE_ABUNDANCE_BF$probe_type <- factor(df.MGE_ABUNDANCE_BF$probe_type, levels = c(" NonEnr", " RES", " MOB"," Combo"))
df.MGE_ABUNDANCE_BF

MGE_ABUNDmodel_BF <- gamlss(nonlogabundance ~ tels_type+probe_type, 
                            family = ZAGA(mu.link = "logit"), 
                            data = na.omit(df.MGE_ABUNDANCE_BF),
                            control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_BF)

MGE_ABUNDmodel_BF <- gamlss(nonlogabundance ~ tels_type:probe_type, 
                            family = ZAGA(mu.link = "logit"), 
                            data = df.MGE_ABUNDANCE_BF,
                            control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_BF)

#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)         -18.2108     0.1670 -109.048  < 2e-16 ***
#  tels_type TELSeq V2   2.9879     0.2122   14.083  < 2e-16 ***
#  tels_type TELSeq XT   0.5949     0.1724    3.450 0.000573 ***
#  probe_type RES        2.7297     0.1320   20.681  < 2e-16 ***
#  probe_type MOB       -0.2877     0.1249   -2.304 0.021340 * 

#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)                          -17.65219    0.04173 -422.965  < 2e-16 ***
#  tels_type PacBio:probe_type NonEnr    -0.55864    0.16975   -3.291  0.00102 ** 
#  tels_type TELSeq V2:probe_type RES     1.59348    0.29286    5.441 5.98e-08 ***
#  tels_type TELSeq XT:probe_type RES     2.95943    0.14213   20.822  < 2e-16 ***
#  tels_type TELSeq V2:probe_type MOB     0.86587    0.28804    3.006  0.00268 ** 
#  tels_type TELSeq XT:probe_type MOB    -0.11767    0.13371   -0.880  0.37898    
#  tels_type TELSeq V2:probe_type Combo   2.87770    0.17156   16.774  < 2e-16 ***


f.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.MGE_ABUNDANCE <- subset(df.abundance, gene_type == "MGE")
tail(df.MGE_ABUNDANCE)

df.MGE_ABUNDANCE_FMT <- df.MGE_ABUNDANCE %>%
  filter(sample_type == " FMT")
str(df.MGE_ABUNDANCE_BF)
# Set the reference level for each covariate
df.MGE_ABUNDANCE_FMT$tels_type <- factor(df.MGE_ABUNDANCE_FMT$tels_type, levels = c(" PacBio", " TELSeq V2", " TELSeq XT"))
df.MGE_ABUNDANCE_FMT$probe_type <- factor(df.MGE_ABUNDANCE_FMT$probe_type, levels = c(" NonEnr", " RES", " MOB"," Combo"))
df.MGE_ABUNDANCE_FMT

MGE_ABUNDmodel_FMT <- gamlss(nonlogabundance ~ tels_type+probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.MGE_ABUNDANCE_FMT,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_FMT)

MGE_ABUNDmodel_FMT <- gamlss(nonlogabundance ~ tels_type:probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.MGE_ABUNDANCE_FMT,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_FMT)

#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)         -19.90111    0.07730 -257.449  < 2e-16 ***
#  tels_type TELSeq V2   4.41014    0.13148   33.542  < 2e-16 ***
#  tels_type TELSeq XT   3.29291    0.11572   28.455  < 2e-16 ***
#  probe_type RES        0.48950    0.11446    4.277 1.98e-05 ***
#  probe_type MOB       -1.58246    0.09556  -16.560  < 2e-16 ***

#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)                          -16.3573     0.1085 -150.773  < 2e-16 ***
#  tels_type PacBio:probe_type NonEnr    -3.5438     0.1328  -26.692  < 2e-16 ***
#  tels_type TELSeq V2:probe_type RES    -0.3367     0.2752   -1.224  0.22121    
#  tels_type TELSeq XT:probe_type RES     0.3149     0.1341    2.349  0.01893 *  
#  tels_type TELSeq V2:probe_type MOB    -0.2078     0.1723   -1.206  0.22805    
#  tels_type TELSeq XT:probe_type MOB    -1.9447     0.1209  -16.083  < 2e-16 ***
#  tels_type TELSeq V2:probe_type Combo   0.4430     0.1613    2.746  0.00608 ** 

f.abundance <- read.csv("violin_json.csv")
df.abundance

str(df.abundance)
df.abundance$abundance <- as.numeric(as.character(df.abundance$abundance))
# Convert the covariates to factors (if not already factors)
df.abundance$sample_type <- as.factor(df.abundance$sample_type)
df.abundance$tels_type <- as.factor(df.abundance$tels_type)
df.abundance$probe_type <- as.factor(df.abundance$probe_type)

df.abundance$nonlogabundance <- 10^df.abundance$abundance

df.abundance

# Subsetting df.abundance to include only rows where gene_type is 'ARG'
df.MGE_ABUNDANCE <- subset(df.abundance, gene_type == "MGE")
tail(df.MGE_ABUNDANCE)

df.MGE_ABUNDANCE_PPS <- df.MGE_ABUNDANCE %>%
  filter(sample_type == " PPS")
str(df.MGE_ABUNDANCE_PPS)
# Set the reference level for each covariate
df.MGE_ABUNDANCE_PPS$tels_type <- factor(df.MGE_ABUNDANCE_PPS$tels_type, levels = c(" PacBio", " TELSeq V2", " TELSeq XT"))
df.MGE_ABUNDANCE_PPS$probe_type <- factor(df.MGE_ABUNDANCE_PPS$probe_type, levels = c(" NonEnr", " RES", " MOB"," Combo"))


MGE_ABUNDmodel_PPS <- gamlss(nonlogabundance ~ tels_type+probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.MGE_ABUNDANCE_PPS,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_PPS)

MGE_ABUNDmodel_PPS <- gamlss(nonlogabundance ~ tels_type:probe_type, 
                             family = ZAGA(mu.link = "logit"), 
                             data = df.MGE_ABUNDANCE_PPS,
                             control = gamlss.control(niter = 1000, trace=TRUE))
modelsummary<-summary(MGE_ABUNDmodel_PPS)

#Estimate Std. Error t value Pr(>|t|)    
#(Intercept)         -21.83384    0.04646 -469.96   <2e-16 ***
#  tels_type TELSeq V2   6.88447    0.10444   65.92   <2e-16 ***
#  tels_type TELSeq XT   5.66384    0.07522   75.30   <2e-16 ***
#  probe_type RES        1.05312    0.09482   11.11   <2e-16 ***
#  probe_type MOB       -2.40789    0.07692  -31.30   <2e-16 ***

#Estimate Std. Error  t value Pr(>|t|)    
#(Intercept)                          -16.01765    0.06614 -242.180  < 2e-16 ***
#  tels_type PacBio:probe_type NonEnr    -5.81619    0.08069  -72.079  < 2e-16 ***
#  tels_type TELSeq V2:probe_type RES     2.41685    1.74989    1.381    0.167    
#  tels_type TELSeq XT:probe_type RES     0.90020    0.09701    9.279  < 2e-16 ***
#  tels_type TELSeq V2:probe_type MOB    -0.74439    0.16143   -4.611 4.12e-06 ***
#  tels_type TELSeq XT:probe_type MOB    -2.68498    0.08661  -31.002  < 2e-16 ***
#  tels_type TELSeq V2:probe_type Combo   0.60262    0.11946    5.045 4.74e-07 ***

ggplot(filter_metadata, aes(x =CCSRd_MeanL, color = Probe_Type)) +
  stat_ecdf(size = 1.5) +
  facet_grid(SampleType_Lab ~ TELS_Type) +
  labs(x = "Read Length (bp)", y = "Cumulative proportion", fill = "Experimental Condition") +
  theme_bw() +
  scale_fill_brewer(palette = "Set1") + 
  scale_color_brewer(palette = "Set1") + 
  theme(legend.position = "bottom")  # Optional: Move legend to the bottom

########################################################
########################################################
####ORDINATION ANALYSIS#################################
########################################################
library(readxl)
library(dplyr)
library(gt)
library(webshot2)
library(ggpubr)
library(cowplot)


##RESISTOME PLOTS

HUMAN_ARGgroupPLOTviz <- HUMAN.imputed.group.ps %>%
  tax_transform("hell", rank = "group") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type", plot_taxa = 1:3, size = 2, tax_vec_length = 1.5,center=TRUE, tax_lab_length = 1.5, tax_lab_style = tax_lab_style(type= "text", max_angle= 90, size=3, fontface= "bold.italic", check_overlap=FALSE), auto_caption = NA) +
  #scale_colour_brewer(palette = "Dark2") +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted")) + coord_fixed(xlim=c(-1.5,2.0), ylim = c(-2.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                      text = element_text(family="Arial",  size=10),
                                                                                                                                                                      panel.background = element_blank(),
                                                                                                                                                                      axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                      axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                      axis.title.x = element_text(size = 10, vjust = -1.5), 
                                                                                                                                                                      legend.text=element_text(size=15),
                                                                                                                                                                      legend.title=element_text(size=20),
                                                                                                                                                                      plot.margin = margin(t = 10, r = 10, b = 10, l = 10)) +
  
  scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")


HUMAN_ARGgroupPLOTviz

BOVINE_ARGgroupPLOTviz<- BOVINE.imputed.group.ps %>%
  tax_transform("hell", rank = "group") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type", plot_taxa = 1:3, size = 2, tax_vec_length = 3.0,center=TRUE, tax_lab_length = 3.2, tax_lab_style = tax_lab_style(type= "text", max_angle= 90, size=3, fontface= "bold.italic", check_overlap=FALSE), auto_caption = NA) +
  scale_colour_brewer(palette = "Dark2", aesthetics = c("colour")) +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  #scale_fill_manual(values = c("Bovine.TELSeq" = "#0E8D40", "Bovine.PacBio" = "#73C06D",
  #                             "Human.TELSeq" = "#483E8B", "Human.PacBio" = "#8C7BB7",
  #                             "Mock.TELSeq" = "#A96F25", "Mock.PacBio" = "#F49E1C",
  #                             "Soil.TELSeq" = "#15B0B8", "Soil.PacBio" = "#5CC8DA")) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted"))+ coord_fixed(xlim=c(-1.5,2), ylim = c(-2.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                   text=element_text(family="Arial",  size=10),
                                                                                                                                                                   panel.background = element_blank(),
                                                                                                                                                                   axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                   axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                   axis.title.x = element_text(size = 10, vjust = -1.5),
                                                                                                                                                                   legend.text=element_text(size=15),
                                                                                                                                                                   legend.title=element_text(size=20),
                                                                                                                                                                   plot.margin = margin(t = 10, r = 10, b = 10, l = 10)) #+
scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")

SOIL_ARGgroupPLOTviz<- SOIL.imputed.group.ps %>%
  tax_transform("hell", rank = "group") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type", plot_taxa = 1:3, size = 2, tax_vec_length = 1.5,center=TRUE, tax_lab_length = 1.5, tax_lab_style = tax_lab_style(type= "text", max_angle= 90, size=3, fontface= "bold.italic", check_overlap=FALSE), auto_caption = NA) +
  scale_colour_brewer(palette = "Dark2", aesthetics = c("colour")) +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  scale_fill_manual(values = c("Bovine.TELSeq" = "#0E8D40", "Bovine.PacBio" = "#73C06D",
                               "Human.TELSeq" = "#483E8B", "Human.PacBio" = "#8C7BB7",
                               "Mock.TELSeq" = "#A96F25", "Mock.PacBio" = "#F49E1C",
                               "Soil.TELSeq" = "#15B0B8", "Soil.PacBio" = "#5CC8DA")) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted"))+ coord_fixed(xlim=c(-1.5,2.0), ylim = c(-2.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                     text=element_text(family="Arial",  size=10),
                                                                                                                                                                     panel.background = element_blank(),
                                                                                                                                                                     axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                     axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                     axis.title.x = element_text(size = 10, vjust = -1.5),
                                                                                                                                                                     legend.text=element_text(size=15),
                                                                                                                                                                     legend.title=element_text(size=20),
                                                                                                                                                                     plot.margin = margin(t = 10, r = 10, b = 10, l = 10)) +
  scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")

#combined ARG plot
arg_a <- HUMAN_ARGgroupPLOTviz
arg_b <- BOVINE_ARGgroupPLOTviz 
arg_c <- SOIL_ARGgroupPLOTviz

ggarrange(arg_a, arg_b, arg_c, common.legend = TRUE, legend = "bottom", labels = c('1A) FMT', '1B) BF', '1C) PPS'), nrow=1, align = "h")

##MOBILOME PLOTS

HUMAN_mobilome<- HUMAN.imputed.mobilome.ps %>%
  tax_transform("hell") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type", auto_caption = NA) +
  scale_colour_brewer(palette = "Dark2", aesthetics = c("colour")) +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  scale_fill_manual(values = c("Bovine.TELSeq" = "#0E8D40", "Bovine.PacBio" = "#73C06D",
                               "Human.TELSeq" = "#483E8B", "Human.PacBio" = "#8C7BB7",
                               "Mock.TELSeq" = "#A96F25", "Mock.PacBio" = "#F49E1C",
                               "Soil.TELSeq" = "#15B0B8", "Soil.PacBio" = "#5CC8DA")) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted"))+ coord_fixed(xlim=c(-1.5,2), ylim = c(-1.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                   text=element_text(family="Arial",  size=10),
                                                                                                                                                                   panel.background = element_blank(),
                                                                                                                                                                   axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                   axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                   axis.title.x = element_text(size = 10, vjust = -1.5), 
                                                                                                                                                                   legend.text=element_text(size=15),
                                                                                                                                                                   legend.title=element_text(size=20)) +
  scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")

HUMAN_mobilome

BOVINE_mobilome<- BOVINE.imputedmobilome.ps %>%
  tax_transform("hell") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type", auto_caption = NA) +
  scale_colour_brewer(palette = "Dark2", aesthetics = c("colour")) +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  scale_fill_manual(values = c("Bovine.TELSeq" = "#0E8D40", "Bovine.PacBio" = "#73C06D",
                               "Human.TELSeq" = "#483E8B", "Human.PacBio" = "#8C7BB7",
                               "Mock.TELSeq" = "#A96F25", "Mock.PacBio" = "#F49E1C",
                               "Soil.TELSeq" = "#15B0B8", "Soil.PacBio" = "#5CC8DA")) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted"))+ coord_fixed(xlim=c(-1.5,2), ylim = c(-1.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                   text=element_text(family="Arial",  size=10),
                                                                                                                                                                   panel.background = element_blank(),
                                                                                                                                                                   axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                   axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                   axis.title.x = element_text(size = 10, vjust = -1.5),
                                                                                                                                                                   legend.text=element_text(size=15),
                                                                                                                                                                   legend.title=element_text(size=20)) +
  scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")

BOVINE_mobilome


SOIL_mobilome<- SOIL.imputed.mobilome.ps %>%
  tax_transform("hell") %>%
  # when no distance matrix or constraints are supplied, PCA is the default/auto ordination method
  ord_calc(method = "PCA") %>%
  ord_plot(color = "TELS_Type",  auto_caption = NA) +
  scale_colour_brewer(palette = "Dark2", aesthetics = c("colour")) +
  ggside::scale_xsidey_discrete(labels = NULL) +
  ggside::scale_ysidex_discrete(labels = NULL) + 
  stat_ellipse(aes(colour = TELS_Type), geom = "path", type= "t", level = 0.95, alpha = 0.5, lwd = 0.75) +
  scale_fill_manual(values = c("Bovine.TELSeq" = "#0E8D40", "Bovine.PacBio" = "#73C06D",
                               "Human.TELSeq" = "#483E8B", "Human.PacBio" = "#8C7BB7",
                               "Mock.TELSeq" = "#A96F25", "Mock.PacBio" = "#F49E1C",
                               "Soil.TELSeq" = "#15B0B8", "Soil.PacBio" = "#5CC8DA")) +
  scale_linetype_manual(values = c("TELSeq--XT" = "solid", "PacBio" = "dashed","TELSeq--HSV2" = "dotted"))+ coord_fixed(xlim=c(-1.5,2.0), ylim = c(-1.5,1.5)) +theme(axis.line = element_line(), 
                                                                                                                                                                     text=element_text(family="Arial",  size=10),
                                                                                                                                                                     panel.background = element_blank(),
                                                                                                                                                                     axis.text = element_text(size = 10, colour = "black"),
                                                                                                                                                                     axis.title.y = element_text(size = 10, vjust = 1.75),
                                                                                                                                                                     axis.title.x = element_text(size = 10, vjust = -1.5),
                                                                                                                                                                     legend.text=element_text(size=15),
                                                                                                                                                                     legend.title=element_text(size=20)) +
  
  scale_color_discrete(labels=c('XT-HS2', 'XT', 'Non-enriched')) +
  labs(color = "Enrichment Platform")

SOIL_mobilome


#combined MGE plot
mge_a <- HUMAN_mobilome
mge_b <- BOVINE_mobilome 
mge_c <- SOIL_mobilome


ordination.plot <-   ggarrange(arg_a, arg_b, arg_c, mge_a, mge_b, mge_c, 
                               common.legend = TRUE, legend = "bottom", 
                               labels = c('A) ARGs -- FMT', 'B) ARGs -- BF', 'C) ARGs -- PPS', 'D) MGEs -- FMT', 'E) MGEs -- BF', 'F) MGEs -- PPS'), 
                               nrow=2, ncol=3, align = "hv", hjust = -.25, vjust = 1) 

ggsave(filename = "ordination_plot.png",
       plot = ordination.plot,
       width = 270, # 14.1 x 5.05 in 358 x 256 mm 
       height = 225,# 
       units = "mm",
       dpi = 200
)


########################################################
########################################################
####MOCK COMMUNITY ANALYSIS#############################
########################################################


library(gplots)
df <- read.table(file="count_matrix.txt", header=TRUE, row.names=1)
dim(df)

df <- subset(df, select = -c(Genome,GT.ARG))

list <- c(rep("XT-HS2 RES",3),rep("XT-HS2 COMBO",3),rep("XT-HS2 MOB",3),rep("XT RES",3),rep("XT COMBO",3),rep("XT MOB", 3),rep("NE",6))
colnames(df) <- list

df[df == 0] <- NA

matrix <- data.matrix(df)
#colors = c(seq(0,0,by=0),seq(1,100,by=10),seq(100,10000,by=100))

my_palette <- colorRampPalette(c("gold", "orange", "maroon"))(n = 500)

heatmap.2(matrix, margins = c(10,12),
          Rowv = FALSE, Colv = FALSE, col = my_palette, na.color = "Gray",
          colsep = c(3,6,9,12,15,18), sepcolor = "White",
          symbreaks = FALSE, scale="none", trace = "none", density.info="none", dendrogram = c("none"),
          key.title = NULL, key.xlab = "Number of Alignments to ARG Group", keysize = 1.0)


