require(plyr)

setwd("DIR/TO/METH_DATA")
tissues <- c("brain", "retina", "lung")

outliers <- list()
for (tissue in tissues) {
  print(tissue)
  M.df <- read.table(paste0(tissue, "_male_window.meth", sep = "")) # pooled average fractional methylation levels of males (each row is a 10kb window)
  F.df <- read.table(paste0(tissue, "_female_windows.meth", sep = "")) # pooled average fractional methylation levels of males (each row is a 10kb window)
  colnames(M.df) <- c("Chrom","Start","End","methy")
  colnames(F.df) <- c("Chrom","Start","End","methy")
  
  MFdiff.df <- data.frame(Chrom = M.df$Chrom, Start = M.df$Start, End = M.df$End, Diff = M.df$methy - F.df$methy)
  lo <- quantile(MFdiff.df$Diff, na.rm=T)[[2]] - 3 * IQR(MFdiff.df$Diff, na.rm = T) # Extreme outliers (lower bound)
  up <- quantile(MFdiff.df$Diff, na.rm=T)[[4]] + 3 * IQR(MFdiff.df$Diff, na.rm = T) # Extreme outliers (upper bound)
  outliers[[tissue]] <- subset(MFdiff.df, Diff < lo | Diff > up)
}

outliers.df <- join_all(list(outliers[["brain"]],  outliers[["retina"]], outliers[["lung"]]), by = c('Chrom', 'Start', 'End'))
outliers.df <- outliers.df[complete.cases(outliers.df),]
colnames(outliers.df) <- c("Chrom", "Start", "End", "Diff.brain", "Diff.retina", "Diff.lung")
outliers.df <- subset(outliers.df, (Diff.brain > 0 & Diff.retina > 0 & Diff.lung > 0) | (Diff.brain < 0 & Diff.retina < 0 & Diff.lung < 0) )

### Windows with extreme sex differences in DNA methylation 5mC (%) (M-F)
# Chrom    Start      End Diff.brain Diff.retina Diff.lung
# 6388 NC_006127.4 27261000 27271000     0.5744      0.6746    0.4495
# 6389 NC_006127.4 27262000 27272000     0.6401      0.6637    0.5415
# 6390 NC_006127.4 27263000 27273000     0.6401      0.6637    0.5366
# 6391 NC_006127.4 27264000 27274000     0.6401      0.6637    0.5104
# 6392 NC_006127.4 27265000 27275000     0.6401      0.6637    0.5104
# 6393 NC_006127.4 27266000 27276000     0.6401      0.6479    0.4828
# 6394 NC_006127.4 27267000 27277000     0.6401      0.6479    0.4678
# 6395 NC_006127.4 27268000 27278000     0.6401      0.6479    0.4678
# 6396 NC_006127.4 27299000 27309000     0.4844      0.6157    0.5968
# 6397 NC_006127.4 27300000 27310000     0.4844      0.6157    0.5514
# 6398 NC_006127.4 27301000 27311000     0.4844      0.6157    0.5514
# 6399 NC_006127.4 27302000 27312000     0.4942      0.5055    0.5174
# 6400 NC_006127.4 27303000 27313000     0.4942      0.5055    0.5131
# 6401 NC_006127.4 27304000 27314000     0.5374      0.5385    0.5142
# 6402 NC_006127.4 27305000 27315000     0.5155      0.5405    0.4631
# 6403 NC_006127.4 27306000 27316000     0.4602      0.5294    0.4628
# 6404 NC_006127.4 27307000 27317000     0.3852      0.4722    0.3881
# 6405 NC_006127.4 27308000 27318000     0.3219      0.2897    0.3090
# 6406 NC_006127.4 27309000 27319000     0.2836      0.2851    0.2292
# 6407 NC_006127.4 27310000 27320000     0.2871      0.2845    0.2380
# 6548 NC_006127.4 73157000 73167000     0.3007      0.3829    0.4506
# 6549 NC_006127.4 73158000 73168000     0.3337      0.4363    0.4797
# 6550 NC_006127.4 73159000 73169000     0.4485      0.5452    0.5703
# 6551 NC_006127.4 73160000 73170000     0.4537      0.5845    0.5983
# 6552 NC_006127.4 73161000 73171000     0.5388      0.6383    0.6292
# 6553 NC_006127.4 73162000 73172000     0.5776      0.6836    0.6647
# 6554 NC_006127.4 73163000 73173000     0.4964      0.6478    0.6399
# 6555 NC_006127.4 73164000 73174000     0.4153      0.6360    0.5841
# 6556 NC_006127.4 73165000 73175000     0.3569      0.6246    0.5466
# 6557 NC_006127.4 73166000 73176000     0.3383      0.6102    0.4261
# 6558 NC_006127.4 73167000 73177000     0.2987      0.5628    0.3645
# 6559 NC_006127.4 73168000 73178000     0.3013      0.5253    0.3226
