001    /*
002     * The Broad Institute SOFTWARE COPYRIGHT NOTICE AGREEMENT This software and its documentation are copyright 2003 by
003     * the Broad Institute/Massachusetts Institute of Technology. All rights are reserved. This software is supplied
004     * without any warranty or guaranteed support whatsoever. Neither the Broad Institute nor MIT can be responsible for
005     * its use, misuse, or functionality.
006     */
007    package calhoun.util;
008    
009    import java.io.Serializable;
010    import java.util.ArrayList;
011    import java.util.Collections;
012    import java.util.HashMap;
013    import java.util.HashSet;
014    import java.util.Iterator;
015    import java.util.List;
016    import java.util.Map;
017    import java.util.Set;
018    import java.util.SortedMap;
019    import java.util.TreeMap;
020    
021    /**
022     * A rangeMap contains a set of intervals and maps each interval to a set of objects that exist in that interval. It
023     * allows very fast lookups of range queries.
024     *
025     * An interval is closed, that is, the interval from 20-30 includes both 20 and 30.
026     */
027    public class RangeMap implements Serializable {
028    
029            public static final long serialVersionUID = 413339879647819935L;
030            
031            /**
032             * Imagine a number line. Take the set of start and stop coordinates for each object in the RangeMap and place them
033             * on the number line. This divides the number line into a series of intervals. Each interval can be associated
034             * with a distinct list of objects that contain all points in that interval. The RangeMap is a sorted map that keys
035             * the start of the interval to this distinct list of objects.
036             * <p>
037             * Objects that fully contain the points in more than one interval will be contained in multiple lists. One list
038             * for each interval they contain.
039             */
040            SortedMap map = new TreeMap();
041    
042            /**
043             * Keeps track of the start and end for each object in the map so we can
044             * remove them easily if necessary.
045             *
046             * Maps Object -> int[2]. the first element in the array is the start of
047             * the key and the second element is the end of the key.
048             */
049            Map objectList = new HashMap();
050    
051            public RangeMap() {
052                    super();
053            }
054    
055            public int size() {
056                    return objectList.size();
057            }
058    
059            public Set values() {
060                    return objectList.keySet();
061            }
062    
063            /** Returns the start of the first non-empty interval.  If the RangeMap has no entries, returns 0. */
064            public int getStart() {
065                    return size() == 0 ? 0 : ((Integer) map.firstKey()).intValue();
066            }
067            
068            /** Returns the end of the last interval */
069            public int getStop() {
070                    return size() == 0 ? 0 : ((Integer) map.lastKey()).intValue() - 1;
071            }
072    
073            /**
074             * An interval is a convenient data structure used to return a section of a RangeMap. It contains a start, a stop,
075             * and a set of elements that occur in that range.
076             */
077            public static class Interval {
078                    public int start;
079                    public int stop;
080                    public Set elements;
081                    
082                    public Interval() {}
083                    
084                    public Interval(int start, int stop, Set elements)
085                    {
086                            this.start = start;
087                            this.stop = stop;
088                            this.elements = new HashSet(elements);
089                    }
090                    
091                    @Override
092                    public boolean equals(Object obj)
093                    {
094                            if (!(obj instanceof Interval)) {
095                                    return false;
096                            }
097    
098                            Interval interval = (Interval) obj;
099    
100                            if (this.start != interval.start) {
101                                    return false;
102                            } else if (this.stop != interval.stop) {
103                                    return false;
104                            } else if (!this.elements.containsAll(interval.elements)) {
105                                    return false;
106                            } else if (!interval.elements.containsAll(this.elements)) {
107                                    return false;
108                            }
109                    
110                            return true;
111                    }
112    
113                    @Override
114                    public String toString() {
115                            String result = "";
116                            for (Iterator iter = elements.iterator(); iter.hasNext(); ){
117                                    result += iter.next().toString();
118                            }
119                            return "interval " + start +" -> " +  stop + " : "+ result + "\n";
120                    }
121                    
122                    public int getLength()
123                    {
124                            return 1 + stop - start;
125                    }
126            }
127    
128            public SortedMap getMap()
129            {
130                    return Collections.unmodifiableSortedMap(map);
131            }
132            
133            public Map getObjectList()
134            {
135                    return Collections.unmodifiableMap(objectList);
136            }
137            
138            public List getDisjointRegions()
139            {
140                    return getRegions(true);
141            }
142            
143            public List getRegions()
144            {
145                    return getRegions(false);
146            }
147            
148            /**
149             * Returns a list of Interval objects that contain overlapping elements.
150             *
151             * @param splitDisjointRegions if true, return disjoint regions,
152             * if false, return "old-style" regions. Consider the following:
153             *
154             *    <---------Feature A------->
155             *                               <----------Feature B---------->
156             *    <---------Feature C------->
157             *
158             * - If we group these into a single region, then any two groups
159             *   returned by this method will be separated by at least one gap
160             *   (ie an area that contains no elements at all). This is more
161             *   convenient.
162             * - If we group them into two regions, then every element in
163             *   a region will overlap at least one other element in that
164             *   region (if the region contains more than one element).
165             *   This is more mathematically precise.
166             *
167             * There are good reasons to want either behavior so we support both.
168             * Disjoint regions are more mathematically consistent but may lie
169             * immediately adjacent to each other. Nondisjoint regions are better
170             * separated, but "old-style" regions may contain an element that
171             * does not overlap anything else within it.
172             *
173             * No element in a region will ever overlap an element outside the region.
174             *
175             * This method returns a List of Interval objects. The Interval objects
176             * returned will be in increasing order of their placement on the RangeMap.
177             *
178             * @see #getIntervals()
179             */
180            public List getRegions(boolean splitDisjointRegions) {
181                    List list = new ArrayList();
182                    Interval currentInterval = null;
183                    // Iterate through the map (in order). As long as there are intervals with at least one entry, add that entry
184                    // to the current cluster.
185                    // When you hit an interval with no values, finish the interval to return.
186                    Iterator it = map.entrySet().iterator();
187                    while (it.hasNext()) {
188                            Map.Entry entry = (Map.Entry) it.next();
189                            List currentValue = (List) entry.getValue();
190                            int currentKey = ((Integer) entry.getKey()).intValue();
191                            if (currentValue.size() == 0) {
192                                    Assert.a(currentInterval != null,
193                                      "Multiple 0 intervals in a row were detected.");
194                                    currentInterval.stop = currentKey - 1;
195                                    list.add(currentInterval);
196                                    currentInterval = null;
197                            } else {
198                                    if (currentInterval != null) {
199                                            /*
200                                             * If two intervals are adjacent but disjoint (no element
201                                             * spans them) then return them as two groups, not one.
202                                             */
203                                            boolean disjoint = true;
204                                            for (Iterator i = currentValue.iterator(); i.hasNext(); ) {
205                                                    if (currentInterval.elements.contains(i.next())) {
206                                                            disjoint = false;
207                                                    }
208                                            }
209                                            if (disjoint && splitDisjointRegions) {
210                                                    currentInterval.stop = currentKey - 1;
211                                                    list.add(currentInterval);
212                                                    currentInterval = null;
213                                            }
214                                    }
215                            
216                                    if (currentInterval == null) {
217                                            currentInterval = new Interval();
218                                            currentInterval.start = currentKey;
219                                            currentInterval.elements = new HashSet();
220                                    }
221                                    currentInterval.elements.addAll(currentValue);
222                            }
223                    }
224                    Assert.a(currentInterval == null, "RangeMap did not end with a 0 entry.");
225                    return list;
226            }
227    
228            /**
229             * Returns the individual intervals in the rangeMap.
230             *
231             * An interval is an area of a range map where every point in that
232             * area is overlapped by the same set of elements. Consider this:
233             *
234             *    <-----Feature A----->           <---Feature C--->
235             *            <---------Feature B--------->
236             *
237             *    |   1   |     2     |     3     | 4 |     5     |
238             *
239             * This would be represented by one region but counts as five
240             * intervals: interval 1 contains A only, interval 2 contains A & B,
241             * interval 3 holds B only, and so on.
242             *
243             * Callers should be prepared for Intervals that contain no elements.
244             *
245             * This method returns a List of Interval objects. The Interval objects
246             * returned will be in increasing order of their placement on the RangeMap.
247             *
248             * @see #getRegions()
249             */
250            public List getIntervals()
251            {
252                    List list = new ArrayList();
253                    Interval currentInterval = null;
254                    Iterator it = map.entrySet().iterator();
255    
256                    while (it.hasNext()) {
257                            Map.Entry entry = (Map.Entry) it.next();
258                            List currentValue = (List) entry.getValue();
259                            int currentKey = ((Integer) entry.getKey()).intValue();
260                            
261                            if (currentInterval != null) {
262                                    currentInterval.stop = currentKey - 1;
263                                    list.add(currentInterval);
264                            }
265                            
266                            currentInterval = new Interval();
267                            currentInterval.start = currentKey;
268                            currentInterval.elements = new HashSet();
269                            if (currentValue != null) {
270                                    currentInterval.elements.addAll(currentValue);
271                            }
272                    }
273                    
274                    Assert.a(currentInterval.elements.size() == 0,
275                      "RangeMap did not end with an empty entry");
276                    return list;
277            }
278    
279            /** Like getIntervals, but allows you to define a start and stop.  First and last interval are guaranteed to start and stop at the endpoints. */
280            public List getIntervals(int start, int stop)
281            {
282                    List list = new ArrayList();
283                    Interval currentInterval = null;
284                    Iterator it = getSubMap(start, stop+1).entrySet().iterator();
285    
286                    while (it.hasNext()) {
287                            Map.Entry entry = (Map.Entry) it.next();
288                            List currentValue = (List) entry.getValue();
289                            int currentKey = ((Integer) entry.getKey()).intValue();
290                            
291                            if(currentInterval == null && start < currentKey) {
292                                    // We started before the beginning of the range map, add in a dummy empty interval
293                                    currentInterval = new Interval();
294                                    currentInterval.start = start;
295                                    currentInterval.elements = Collections.EMPTY_SET;
296                            }
297    
298                            if (currentInterval != null) {
299                                    currentInterval.stop = currentKey - 1;
300                                    list.add(currentInterval);
301                            }
302    
303                            currentInterval = new Interval();
304                            currentInterval.start = currentKey > start ? currentKey : start;
305                            if (currentValue == null) {
306                                    currentInterval.elements = Collections.EMPTY_SET;
307                            }
308                            else {
309                                    currentInterval.elements = new HashSet(currentValue);
310                            }
311                    }
312    
313                    if(currentInterval != null) {
314                            currentInterval.stop = stop;
315                            list.add(currentInterval);
316                    }
317                    return list;
318            }       
319                            
320            /** Returns the empty intervals.  A start and stop are required to bound the edges of the first and last region.  These bounds can be within the existin region.
321             * The returned intervals are closed.
322            */
323            public List getEmptyIntervals(int start, int totalSize) {
324                    Assert.a(start <= totalSize, "Start (",new Integer(start),") must be less than stop (",new Integer(totalSize),")");
325                    List list = new ArrayList();
326                    Interval currentInterval = null;
327                    
328                    boolean beforeStart = true;
329                    Iterator it = map.entrySet().iterator();
330                    while (it.hasNext()) {
331                            Map.Entry entry = (Map.Entry) it.next();
332                            List currentValue = (List) entry.getValue();
333                            int currentKey = ((Integer) entry.getKey()).intValue();
334                            if(currentKey > totalSize)
335                                    break;
336                            if (currentValue.size() == 0) {
337                                    Assert.a(beforeStart == false, "Map started with a 0 entry.");
338                                    Assert.a(currentInterval == null, "Multiple 0 intervals in a row were detected.");
339                                    if(currentKey < start) {
340                                            beforeStart = true;
341                                    }
342                                    else {
343                                            currentInterval = new Interval();
344                                            currentInterval.start = currentKey;
345                                    }
346                            } else {
347                                    if(beforeStart == true) {
348                                            beforeStart = false;
349                                            if(start < currentKey) {
350                                                    currentInterval = new Interval();
351                                                    currentInterval.start = start;
352                                            }
353                                    }
354                                    if(currentInterval != null) {
355                                            currentInterval.stop = currentKey - 1;
356                                            list.add(currentInterval);
357                                            currentInterval = null;
358                                    }
359                            }
360                    }
361                    if(beforeStart == true) {
362                            Assert.a(currentInterval == null);
363                            currentInterval = new Interval();
364                            currentInterval.start = start;
365                    }
366                    if(currentInterval != null) {
367                            currentInterval.stop = totalSize;
368                            list.add(currentInterval);
369                    }
370                    return list;
371            }
372    
373            /**
374             * Returns objects in the range map that overlap this range.
375             *
376             * query:      *-----------------*      returned
377             *          *------------*              returned
378             *                *-------------*       returned
379             *           *---------------------*    returned
380             *        *----*                        returned
381             *                               *-*    returned
382             */
383            public Set find(int argLow, int argHigh) {
384                    Iterator it = getSubMap(argLow, argHigh+1).values().iterator();
385                    Set ret = new HashSet();
386                    while (it.hasNext()) {
387                            ArrayList l = (ArrayList) it.next();
388                            ret.addAll(l);
389                    }
390                    return ret;
391            }
392    
393            /**
394             * Returns objects in the range map that are fully contained by the range.
395             *
396             * query:      *-----------------*      returned
397             *          *------------*              not returned
398             *                *-------------*       returned
399             *           *---------------------*    not returned
400             *        *----*                        not returned
401             *                               *-*    not returned
402             */
403            public Set findContained(int argLow, int argHigh) {
404                    Iterator it = find(argLow, argHigh).iterator();
405    
406                    Set ret = new HashSet();
407                    while (it.hasNext()) {
408                            Object o = it.next();
409                            Integer [] bounds = (Integer[]) objectList.get(o);
410                            if(bounds[0].intValue() >= argLow && bounds[1].intValue() <= argHigh+1) {
411                                    ret.add(o);
412                            }
413                    }
414                    return ret;
415            }
416            
417            /**
418             * Returns objects in the range map that fully contain the range.
419             *
420             * query:      *-----------------*      returned
421             *          *------------*              not returned
422             *                *-------------*       not returned
423             *           *---------------------*    returned
424             *        *----*                        not returned
425             *                               *-*    not returned
426             */
427            public Set findContaining(int argLow, int argHigh) {
428                    Iterator it = find(argLow, argHigh).iterator();
429    
430                    Set ret = new HashSet();
431                    while (it.hasNext()) {
432                            Object o = it.next();
433                            Integer [] bounds = (Integer[]) objectList.get(o);
434                            if(bounds[0].intValue() <= argLow && bounds[1].intValue() >= argHigh+1) {
435                                    ret.add(o);
436                            }
437                    }
438                    return ret;
439            }
440            
441            /** Returns true if the map has an entry in the specfied range. */
442            public boolean hasEntry(int argLow, int argHigh) {
443                    Iterator it = getSubMap(argLow, argHigh+1).values().iterator();
444    
445                    // If there is no entry or a single empty entry return false.  Otherwise, return true
446                    if(!it.hasNext() || ((ArrayList) it.next()).size() == 0 && !it.hasNext()) {
447                            return false;
448                    }
449                    else
450                            return true;
451            }
452            
453            /** Returns the submap from argLow+1 included to argHigh excluded. */
454            protected SortedMap getSubMap(int argLow, int argHigh) {
455                    if (argLow > argHigh)
456                            throw new IllegalArgumentException(
457                                    "Low end of range (" + argLow + ") is greater than high end (" + argHigh + ")");
458                    Integer lowBound = new Integer(argLow + 1);
459                    Integer high = new Integer(argHigh);
460    
461                    SortedMap lowMap = map.headMap(lowBound);
462                    if (lowMap.size() != 0)
463                            lowBound = (Integer) lowMap.lastKey();
464                    return map.subMap(lowBound, high);
465            }
466    
467            /**
468             * Adds the object o into the map with interval lo - high. The interval is inclusive, with both the high and low being part of the interval.
469             */
470            public void add(int argLow, int argHigh, Object o) {
471                    if (argLow > argHigh)
472                            throw new IllegalArgumentException(
473                                    "Low end of range (" + argLow + ") is greater than high end (" + argHigh + ")");
474                    if (objectList.containsKey(o)) {
475                            throw new IllegalArgumentException("RangeMap already contains " + o);
476                    }
477                    Integer low = new Integer(argLow);
478                    Integer high = new Integer(argHigh+1);
479                    objectList.put(o, new Integer[] { low, high });
480    
481                    makeSplitAt(low);
482                    makeSplitAt(high);
483    
484                    // Take the new submap and add o to every element
485                    Iterator it = map.subMap(low, high).values().iterator();
486                    while (it.hasNext()) {
487                            ArrayList l = (ArrayList) it.next();
488                            l.add(o);
489                    }
490            }
491            
492    
493            /** Returns true if the object is in the RangeMap */
494            public boolean contains(Object o) {
495                    return objectList.get(o) != null;
496            }
497    
498            /**
499             * Removes the object o from the map.
500             */
501            public void remove(Object o) {
502                    Integer[] bounds = (Integer[]) objectList.get(o);
503                    if (bounds == null) {
504                            throw new IllegalArgumentException("RangeMap does not contain: " + o);
505                    }
506                    objectList.remove(o);
507                    // Remove the object from each interval it contained.
508                    Iterator it = map.subMap(bounds[0], bounds[1]).values().iterator();
509                    while (it.hasNext()) {
510                            ArrayList l = (ArrayList) it.next();
511                            l.remove(o);
512                    }
513                    /*
514                     * Check to see if we can remove an interval. This occurs if there is no change from the
515                     */
516                    mergeAt(bounds[0]);
517                    mergeAt(bounds[1]);
518            }
519    
520            private void mergeAt(Integer bound) {
521                    List startList = (List) map.get(bound);
522                    SortedMap lowMap = map.headMap(bound);
523                    if (lowMap.size() == 0) {
524                            if (startList.size() == 0)
525                                    map.remove(bound);
526                    } else {
527                            List previousList = (List) lowMap.get(lowMap.lastKey());
528                            if (previousList.equals(startList)) {
529                                    map.remove(bound);
530                            }
531                    }
532            }
533    
534            private void makeSplitAt(Integer bound) {
535                    // If there is already a split, we are done
536                    Object entry = map.get(bound);
537                    if (entry == null) {
538                            // Find the previous interval and duplicate it
539                            SortedMap headMap = map.headMap(bound);
540                            if (headMap.size() == 0) {
541                                    map.put(bound, new ArrayList());
542                            } else {
543                                    map.put(bound, new ArrayList((List) map.get(headMap.lastKey())));
544                            }
545                    }
546            }
547    
548            @Override
549            public String toString() {
550                    StringBuffer ret = new StringBuffer("Range Map (x-y:count) ");
551                    Integer lastKey = null;
552                    int lastCount = 0;
553                    Iterator it = map.keySet().iterator();
554                    while (it.hasNext()) {
555                            Integer key = (Integer) it.next();
556                            if (lastKey != null) {
557                                    ret.append(lastKey + "-" + (key.intValue() - 1) + ":" + lastCount + " ");
558                            }
559                            lastKey = key;
560                            lastCount = ((List) map.get(key)).size();
561                    }
562                    ret.append(lastKey + "-end:" + lastCount);
563                    return ret.toString();
564            }
565    }