package edu.mit.csail.cgs.deepseq;

import java.util.ArrayList;
import java.util.List;

/**
 * It represents the ChIP-Seq tag (fragment) which is generated by a short read
 * sequencer. <br>
 * It is UNIQUE and thus assigned a (unique) ID <br>
 * Each <tt>Read</tt> can map to different locations (<tt>ReadHits</tt>) on
 * the genome (if non-unique hits are supported)
 *  
 * @author shaunmahony
 *
 */
public class Read {
	protected List<ReadHit> hits = new ArrayList<ReadHit>();
	protected int ID;
	protected double numHits=0; //have to store this separately because we can't always trust the size of the hits list
	
	public Read(int id){
		this.ID=id;
	}
	
	//Accessor
	public double getNumHits(){return (double)hits.size();}
	public void setNumHits(double n){
		numHits=n;
		double w = 1/numHits;
		for(ReadHit x : hits){
			x.setWeight(w);
		}
	}
	public void addHit(ReadHit h){addHit(h, true);}
	public void addHit(ReadHit h, boolean updateWeight){
		//First add the hit
		hits.add(h);
		numHits++;
		
		if(updateWeight){
			//Now propagate the effect of adding the hit to the read weights
			double w = 1/numHits;
			for(ReadHit x : hits){
				x.setWeight(w);
			}
		}
	}
	
	public List<ReadHit> getHits(){return hits;}
	
	/**
	 * Produces a "filtered" version of this read, removing all sub-optimal hits. <br>
	 * Essentially, the "filtered" version of the read holds the <tt>ReadHits</tt>
	 * with the minimum number of mismatches.
	 * @return
	 */
	public Read filter() {
		Read newRead= new Read(ID);
		int minMis=Integer.MAX_VALUE;
		double count=0;
		for(ReadHit h : hits){if(h.getMisMatch()<minMis){minMis=h.getMisMatch();}}
		for(ReadHit h : hits){
			if(h.getMisMatch()==minMis){
				count++;
				newRead.addHit(h);
			}
		}
		newRead.setNumHits(count);
		return newRead;
	}
}
