#!usr/bin/Rscript
# to put all the data blobs together and plot the results

r2_thresh = as.numeric(commandArgs(trailingOnly=TRUE)[1]) # all r2 values below this will be thrown out 
r_thresh = sqrt(r2_thresh) # actually use this for thresholding, avoid a bunch of unnecessary squaring.
omit_outside = 30000 # omit values with dist>this from lowess computations

# rbind em all. crappy but simple.
if(file.exists('all_ld_ests_020217.Rdat')) { load('all_ld_ests_020217.Rdat') 
	} else {
ld_ests = c()
for (file in dir(pattern='mcld_processed_chr')) {
	ld_ests = rbind(ld_ests,read.table(file,header=TRUE))
	}

# save it all somewhere convenient.
save(ld_ests,file='all_ld_ests_020217.Rdat')
}


# apply threshold
ld_ests$r = as.numeric(ld_ests$r)
ld_ests = ld_ests[ld_ests$r >= r_thresh,]
ld_ests = ld_ests[ld_ests$dist <= omit_outside,]
ld_ests$r = ld_ests$r^2

# lowess to summarize data
snp_lowed = lowess(ld_ests$r[ld_ests$str==0] ~ ld_ests$dist[ld_ests$str == 0], f=.1)
str_lowed = lowess(ld_ests$r[ld_ests$str==1] ~ ld_ests$dist[ld_ests$str == 1], f=.1)

cat('tabulating number of LD estimates with the following number of STRs in pair (0,1,2):\n')
print(table(ld_ests$str))

# plot em
plot(snp_lowed, type='l', lwd=2, xlim=c(0,20000), ylim=c(0.05,.4), xlab='Distance (bp)', ylab=expression('R'^2))
lines(str_lowed, lty=2, lwd=2, col='red')

legend(15000,.4, legend=c('SNP-SNP LD','STR-SNP LD'), fill=c('black','red'))
