library(tidyverse)
library(ggplot2)

# Variant mean depth
var_depth <- read_delim("gar.ldepth.mean",delim = "\t",
                        col_names = c("chr","pos","mean_depth","var_depth"),skip=1)
pdf("mean_depth.pdf")
ggplot(var_depth, aes(mean_depth)) + 
  geom_density(fill = "dodgerblue1", colour = "black", alpha = 0.3) +
  theme_light() + xlim(0, 100)
dev.off()
summary(var_depth$mean_depth)
#Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#0.00   23.04   28.48   29.24   33.26 6054.36
# minimum depth: 20
# maximum depth: mean dpeth *2  = 29 * 2 = 58

## Variant missingness
var_miss <- read_delim("gar.lmiss",delim = "\t",
                       col_names = c("chr", "pos", "nchr", "nfiltered", "nmiss", "fmiss"),
                                             skip=1)
pdf("fmiss.pdf")
ggplot(var_miss, aes(fmiss)) + geom_density(fill = "dodgerblue1", colour = "black", alpha = 0.3)+
    theme_light()
dev.off()
summary(var_miss$fmiss)
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.0000  0.0000  0.0000  0.0260  0.0000  0.9855 

# Minor allele frequency
# minor-allele frequency (MAF) thresholds
var_freq <- read_delim("gar.frq", delim = "\t",
                       col_names = c("chr", "pos", "nalleles", "nchr", "a1", "a2"), skip = 1)
# find minor allele frequency
var_freq$maf <- var_freq %>% select(a1, a2) %>% apply(1, function(z) min(z))
pdf("maf.pdf")
ggplot(var_freq, aes(maf)) + geom_density(fill = "dodgerblue1", colour = "black", alpha = 0.3)+ theme_light()
dev.off()
summary(var_freq$maf)
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.0000  0.1957  0.3478  0.2848  0.3478  0.5000 
# Mean depth per individual
ind_depth <- read_delim("gar.idepth", delim = "\t",
                        col_names = c("ind", "nsites", "depth"), skip = 1)
pdf("indMeandepth.pdf")
ggplot(ind_depth, aes(depth)) + geom_histogram(fill = "dodgerblue1", colour = "black", alpha = 0.3) +theme_light()
dev.off()
summary(ind_depth$depth)
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 21.06   24.37   27.53   29.25   32.53   51.08 

# roportion of missing data per individual
ind_miss  <- read_delim("gar.imiss", delim = "\t",
                        col_names = c("ind", "ndata", "nfiltered", "nmiss", "fmiss"), skip = 1)
pdf("indMissing.pdf")
ggplot(ind_miss, aes(fmiss)) + geom_histogram(fill = "dodgerblue1", colour = "black", alpha = 0.3) + theme_light()
dev.off()
summary(ind_miss$fmiss)
# Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# 0.002623 0.010171 0.026523 0.025995 0.038102 0.065640 

# Variant quality
var_qual <- read_delim("./gar.lqual", delim = "\t",
                       col_names = c("chr", "pos", "qual"), skip = 1)
pdf("qual.pdf")
ggplot(var_qual, aes(qual)) + 
  geom_density(fill = "dodgerblue1", colour = "black", alpha = 0.3)+ theme_light() +
  xlim(0,1000)
dev.off()
summary(var_qual$qual)
# Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
# 10    11962    36216    35864    55070 17722400 

# Heterozygosity and inbreeding coefficient per individual
ind_het <- read_delim("./gar.het", delim = "\t",
                      col_names = c("ind","ho", "he", "nsites", "f"), skip = 1)
pdf("het.pdf")
ggplot(ind_het, aes(f)) + geom_histogram(fill = "dodgerblue1", colour = "black", alpha = 0.3) + theme_light()
dev.off()
summary(ind_het$f)
# Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
# 0.4419  0.6328  0.8824  0.8013  0.9611  0.9671 