#1. Loading the data
df <- read.table("results_dnm_candidate_sites.txt", head=TRUE, stringsAsFactors=FALSE)


#2. Apply basic filters & modifications
df <- df[df$AC<30,]
df <- df[df$SVTYPE!="BND",]
df <- df[df$GQ_1+df$GQ_2+df$GQ_3>120,]
df$DHBFC_1 <- as.numeric(df$DHBFC_1)
df$DHBFC_2 <- as.numeric(df$DHBFC_2)
df$DHBFC_3 <- as.numeric(df$DHBFC_3)
df$DHFFC_1 <- as.numeric(df$DHFFC_1)
df$DHFFC_2 <- as.numeric(df$DHFFC_2)
df$DHFFC_3 <- as.numeric(df$DHFFC_3)
frame <- df[0,]



#3. Apply the filters for detecting dnSV depending on the type of SV
for (var_no in df$ID[1:nrow(df)]) {
	t <- df[df$ID==var_no,]
	if (t$SVTYPE == "DEL") {
		if (grepl("0/1",t$GT_1) == TRUE ) {              #GT_1= proband 
			if ((t$DHFFC_1 < 0.8 && (t$DHFFC_2 + t$DHFFC_3)/2 > 0.8 &&
			t$AO_1/(t$RO_1+t$AO_1) > 0.05 && t$AO_1 > 2 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_2) == TRUE ) {       #GT_2= proband
			if ((t$DHFFC_2 < 0.8 && (t$DHFFC_1 + t$DHFFC_3)/2 > 0.8 &&
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) > 0.05 && t$AO_2 > 2 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_3) == TRUE ) {       #GT_3= proband
			if ((t$DHFFC_3< 0.8 && (t$DHFFC_1 + t$DHFFC_2)/2 > 0.8 &&
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) > 0.05 && t$AO_3 > 2 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		}
	} else if (t$SVTYPE == "DUP") {
		if (grepl("0/1",t$GT_1) == TRUE ) {              #GT_1= proband 
			if ((t$DHBFC_1 > 1.1 && (t$DHBFC_2 + t$DHBFC_3)/2 <1.2 &&
			t$AO_1/(t$RO_1+t$AO_1) > 0.1 && t$AO_1 > 2 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_2) == TRUE ) {       #GT_2= proband
			if ((t$DHBFC_2 > 1.1 && (t$DHBFC_1 + t$DHBFC_3)/2 <1.2 &&
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) > 0.1 && t$AO_2 > 2 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_3) == TRUE ) {       #GT_3= proband
			if ((t$DHBFC_3 > 1.1 && (t$DHBFC_1 + t$DHBFC_2)/2 <1.2 &&
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) > 0.1 && t$AO_2 > 2 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		}
	} else if (t$SVTYPE=="INV") {
		if (grepl("0/1",t$GT_1) == TRUE ) {              #GT_1= proband 
			if ((
			t$AO_1/(t$RO_1+t$AO_1) > 0.2 && t$AO_1 > 4 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_2) == TRUE ) {       #GT_2= proband
			if ((
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) > 0.2 && t$AO_2 > 4 &&
			t$AO_3/(t$RO_3+t$AO_3) < 0.1 && t$AO_3 < 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		} else if (grepl("0/1",t$GT_3) == TRUE ) {       #GT_3= proband
			if ((
			t$AO_1/(t$RO_1+t$AO_1) < 0.1 && t$AO_1 < 4 &&
			t$AO_2/(t$RO_2+t$AO_2) < 0.1 && t$AO_2 < 4 &&
			t$AO_3/(t$RO_3+t$AO_3) > 0.2 && t$AO_3 > 4 ) == TRUE ) {
			print('DNM_candidate')
			df[df$ID==var_no,"DNM"] <- "High_evidence"
			} else {
			print ('False')
			df[df$ID==var_no,"DNM"] <- "False"
			}
		}
	}
}


write.table(df,"results_dnm_candidate_sites_filter.txt", col.names=T, row.names=F, quote=F, sep="\t")

quit()
n

