001 /*
002 * The Broad Institute SOFTWARE COPYRIGHT NOTICE AGREEMENT This software and its documentation are copyright 2003 by
003 * the Broad Institute/Massachusetts Institute of Technology. All rights are reserved. This software is supplied
004 * without any warranty or guaranteed support whatsoever. Neither the Broad Institute nor MIT can be responsible for
005 * its use, misuse, or functionality.
006 */
007 package calhoun.util;
008
009 import java.io.Serializable;
010 import java.util.ArrayList;
011 import java.util.Collections;
012 import java.util.HashMap;
013 import java.util.HashSet;
014 import java.util.Iterator;
015 import java.util.List;
016 import java.util.Map;
017 import java.util.Set;
018 import java.util.SortedMap;
019 import java.util.TreeMap;
020
021 /**
022 * A rangeMap contains a set of intervals and maps each interval to a set of objects that exist in that interval. It
023 * allows very fast lookups of range queries.
024 *
025 * An interval is closed, that is, the interval from 20-30 includes both 20 and 30.
026 */
027 public class RangeMap implements Serializable {
028
029 public static final long serialVersionUID = 413339879647819935L;
030
031 /**
032 * Imagine a number line. Take the set of start and stop coordinates for each object in the RangeMap and place them
033 * on the number line. This divides the number line into a series of intervals. Each interval can be associated
034 * with a distinct list of objects that contain all points in that interval. The RangeMap is a sorted map that keys
035 * the start of the interval to this distinct list of objects.
036 * <p>
037 * Objects that fully contain the points in more than one interval will be contained in multiple lists. One list
038 * for each interval they contain.
039 */
040 SortedMap map = new TreeMap();
041
042 /**
043 * Keeps track of the start and end for each object in the map so we can
044 * remove them easily if necessary.
045 *
046 * Maps Object -> int[2]. the first element in the array is the start of
047 * the key and the second element is the end of the key.
048 */
049 Map objectList = new HashMap();
050
051 public RangeMap() {
052 super();
053 }
054
055 public int size() {
056 return objectList.size();
057 }
058
059 public Set values() {
060 return objectList.keySet();
061 }
062
063 /** Returns the start of the first non-empty interval. If the RangeMap has no entries, returns 0. */
064 public int getStart() {
065 return size() == 0 ? 0 : ((Integer) map.firstKey()).intValue();
066 }
067
068 /** Returns the end of the last interval */
069 public int getStop() {
070 return size() == 0 ? 0 : ((Integer) map.lastKey()).intValue() - 1;
071 }
072
073 /**
074 * An interval is a convenient data structure used to return a section of a RangeMap. It contains a start, a stop,
075 * and a set of elements that occur in that range.
076 */
077 public static class Interval {
078 public int start;
079 public int stop;
080 public Set elements;
081
082 public Interval() {}
083
084 public Interval(int start, int stop, Set elements)
085 {
086 this.start = start;
087 this.stop = stop;
088 this.elements = new HashSet(elements);
089 }
090
091 @Override
092 public boolean equals(Object obj)
093 {
094 if (!(obj instanceof Interval)) {
095 return false;
096 }
097
098 Interval interval = (Interval) obj;
099
100 if (this.start != interval.start) {
101 return false;
102 } else if (this.stop != interval.stop) {
103 return false;
104 } else if (!this.elements.containsAll(interval.elements)) {
105 return false;
106 } else if (!interval.elements.containsAll(this.elements)) {
107 return false;
108 }
109
110 return true;
111 }
112
113 @Override
114 public String toString() {
115 String result = "";
116 for (Iterator iter = elements.iterator(); iter.hasNext(); ){
117 result += iter.next().toString();
118 }
119 return "interval " + start +" -> " + stop + " : "+ result + "\n";
120 }
121
122 public int getLength()
123 {
124 return 1 + stop - start;
125 }
126 }
127
128 public SortedMap getMap()
129 {
130 return Collections.unmodifiableSortedMap(map);
131 }
132
133 public Map getObjectList()
134 {
135 return Collections.unmodifiableMap(objectList);
136 }
137
138 public List getDisjointRegions()
139 {
140 return getRegions(true);
141 }
142
143 public List getRegions()
144 {
145 return getRegions(false);
146 }
147
148 /**
149 * Returns a list of Interval objects that contain overlapping elements.
150 *
151 * @param splitDisjointRegions if true, return disjoint regions,
152 * if false, return "old-style" regions. Consider the following:
153 *
154 * <---------Feature A------->
155 * <----------Feature B---------->
156 * <---------Feature C------->
157 *
158 * - If we group these into a single region, then any two groups
159 * returned by this method will be separated by at least one gap
160 * (ie an area that contains no elements at all). This is more
161 * convenient.
162 * - If we group them into two regions, then every element in
163 * a region will overlap at least one other element in that
164 * region (if the region contains more than one element).
165 * This is more mathematically precise.
166 *
167 * There are good reasons to want either behavior so we support both.
168 * Disjoint regions are more mathematically consistent but may lie
169 * immediately adjacent to each other. Nondisjoint regions are better
170 * separated, but "old-style" regions may contain an element that
171 * does not overlap anything else within it.
172 *
173 * No element in a region will ever overlap an element outside the region.
174 *
175 * This method returns a List of Interval objects. The Interval objects
176 * returned will be in increasing order of their placement on the RangeMap.
177 *
178 * @see #getIntervals()
179 */
180 public List getRegions(boolean splitDisjointRegions) {
181 List list = new ArrayList();
182 Interval currentInterval = null;
183 // Iterate through the map (in order). As long as there are intervals with at least one entry, add that entry
184 // to the current cluster.
185 // When you hit an interval with no values, finish the interval to return.
186 Iterator it = map.entrySet().iterator();
187 while (it.hasNext()) {
188 Map.Entry entry = (Map.Entry) it.next();
189 List currentValue = (List) entry.getValue();
190 int currentKey = ((Integer) entry.getKey()).intValue();
191 if (currentValue.size() == 0) {
192 Assert.a(currentInterval != null,
193 "Multiple 0 intervals in a row were detected.");
194 currentInterval.stop = currentKey - 1;
195 list.add(currentInterval);
196 currentInterval = null;
197 } else {
198 if (currentInterval != null) {
199 /*
200 * If two intervals are adjacent but disjoint (no element
201 * spans them) then return them as two groups, not one.
202 */
203 boolean disjoint = true;
204 for (Iterator i = currentValue.iterator(); i.hasNext(); ) {
205 if (currentInterval.elements.contains(i.next())) {
206 disjoint = false;
207 }
208 }
209 if (disjoint && splitDisjointRegions) {
210 currentInterval.stop = currentKey - 1;
211 list.add(currentInterval);
212 currentInterval = null;
213 }
214 }
215
216 if (currentInterval == null) {
217 currentInterval = new Interval();
218 currentInterval.start = currentKey;
219 currentInterval.elements = new HashSet();
220 }
221 currentInterval.elements.addAll(currentValue);
222 }
223 }
224 Assert.a(currentInterval == null, "RangeMap did not end with a 0 entry.");
225 return list;
226 }
227
228 /**
229 * Returns the individual intervals in the rangeMap.
230 *
231 * An interval is an area of a range map where every point in that
232 * area is overlapped by the same set of elements. Consider this:
233 *
234 * <-----Feature A-----> <---Feature C--->
235 * <---------Feature B--------->
236 *
237 * | 1 | 2 | 3 | 4 | 5 |
238 *
239 * This would be represented by one region but counts as five
240 * intervals: interval 1 contains A only, interval 2 contains A & B,
241 * interval 3 holds B only, and so on.
242 *
243 * Callers should be prepared for Intervals that contain no elements.
244 *
245 * This method returns a List of Interval objects. The Interval objects
246 * returned will be in increasing order of their placement on the RangeMap.
247 *
248 * @see #getRegions()
249 */
250 public List getIntervals()
251 {
252 List list = new ArrayList();
253 Interval currentInterval = null;
254 Iterator it = map.entrySet().iterator();
255
256 while (it.hasNext()) {
257 Map.Entry entry = (Map.Entry) it.next();
258 List currentValue = (List) entry.getValue();
259 int currentKey = ((Integer) entry.getKey()).intValue();
260
261 if (currentInterval != null) {
262 currentInterval.stop = currentKey - 1;
263 list.add(currentInterval);
264 }
265
266 currentInterval = new Interval();
267 currentInterval.start = currentKey;
268 currentInterval.elements = new HashSet();
269 if (currentValue != null) {
270 currentInterval.elements.addAll(currentValue);
271 }
272 }
273
274 Assert.a(currentInterval.elements.size() == 0,
275 "RangeMap did not end with an empty entry");
276 return list;
277 }
278
279 /** Like getIntervals, but allows you to define a start and stop. First and last interval are guaranteed to start and stop at the endpoints. */
280 public List getIntervals(int start, int stop)
281 {
282 List list = new ArrayList();
283 Interval currentInterval = null;
284 Iterator it = getSubMap(start, stop+1).entrySet().iterator();
285
286 while (it.hasNext()) {
287 Map.Entry entry = (Map.Entry) it.next();
288 List currentValue = (List) entry.getValue();
289 int currentKey = ((Integer) entry.getKey()).intValue();
290
291 if(currentInterval == null && start < currentKey) {
292 // We started before the beginning of the range map, add in a dummy empty interval
293 currentInterval = new Interval();
294 currentInterval.start = start;
295 currentInterval.elements = Collections.EMPTY_SET;
296 }
297
298 if (currentInterval != null) {
299 currentInterval.stop = currentKey - 1;
300 list.add(currentInterval);
301 }
302
303 currentInterval = new Interval();
304 currentInterval.start = currentKey > start ? currentKey : start;
305 if (currentValue == null) {
306 currentInterval.elements = Collections.EMPTY_SET;
307 }
308 else {
309 currentInterval.elements = new HashSet(currentValue);
310 }
311 }
312
313 if(currentInterval != null) {
314 currentInterval.stop = stop;
315 list.add(currentInterval);
316 }
317 return list;
318 }
319
320 /** Returns the empty intervals. A start and stop are required to bound the edges of the first and last region. These bounds can be within the existin region.
321 * The returned intervals are closed.
322 */
323 public List getEmptyIntervals(int start, int totalSize) {
324 Assert.a(start <= totalSize, "Start (",new Integer(start),") must be less than stop (",new Integer(totalSize),")");
325 List list = new ArrayList();
326 Interval currentInterval = null;
327
328 boolean beforeStart = true;
329 Iterator it = map.entrySet().iterator();
330 while (it.hasNext()) {
331 Map.Entry entry = (Map.Entry) it.next();
332 List currentValue = (List) entry.getValue();
333 int currentKey = ((Integer) entry.getKey()).intValue();
334 if(currentKey > totalSize)
335 break;
336 if (currentValue.size() == 0) {
337 Assert.a(beforeStart == false, "Map started with a 0 entry.");
338 Assert.a(currentInterval == null, "Multiple 0 intervals in a row were detected.");
339 if(currentKey < start) {
340 beforeStart = true;
341 }
342 else {
343 currentInterval = new Interval();
344 currentInterval.start = currentKey;
345 }
346 } else {
347 if(beforeStart == true) {
348 beforeStart = false;
349 if(start < currentKey) {
350 currentInterval = new Interval();
351 currentInterval.start = start;
352 }
353 }
354 if(currentInterval != null) {
355 currentInterval.stop = currentKey - 1;
356 list.add(currentInterval);
357 currentInterval = null;
358 }
359 }
360 }
361 if(beforeStart == true) {
362 Assert.a(currentInterval == null);
363 currentInterval = new Interval();
364 currentInterval.start = start;
365 }
366 if(currentInterval != null) {
367 currentInterval.stop = totalSize;
368 list.add(currentInterval);
369 }
370 return list;
371 }
372
373 /**
374 * Returns objects in the range map that overlap this range.
375 *
376 * query: *-----------------* returned
377 * *------------* returned
378 * *-------------* returned
379 * *---------------------* returned
380 * *----* returned
381 * *-* returned
382 */
383 public Set find(int argLow, int argHigh) {
384 Iterator it = getSubMap(argLow, argHigh+1).values().iterator();
385 Set ret = new HashSet();
386 while (it.hasNext()) {
387 ArrayList l = (ArrayList) it.next();
388 ret.addAll(l);
389 }
390 return ret;
391 }
392
393 /**
394 * Returns objects in the range map that are fully contained by the range.
395 *
396 * query: *-----------------* returned
397 * *------------* not returned
398 * *-------------* returned
399 * *---------------------* not returned
400 * *----* not returned
401 * *-* not returned
402 */
403 public Set findContained(int argLow, int argHigh) {
404 Iterator it = find(argLow, argHigh).iterator();
405
406 Set ret = new HashSet();
407 while (it.hasNext()) {
408 Object o = it.next();
409 Integer [] bounds = (Integer[]) objectList.get(o);
410 if(bounds[0].intValue() >= argLow && bounds[1].intValue() <= argHigh+1) {
411 ret.add(o);
412 }
413 }
414 return ret;
415 }
416
417 /**
418 * Returns objects in the range map that fully contain the range.
419 *
420 * query: *-----------------* returned
421 * *------------* not returned
422 * *-------------* not returned
423 * *---------------------* returned
424 * *----* not returned
425 * *-* not returned
426 */
427 public Set findContaining(int argLow, int argHigh) {
428 Iterator it = find(argLow, argHigh).iterator();
429
430 Set ret = new HashSet();
431 while (it.hasNext()) {
432 Object o = it.next();
433 Integer [] bounds = (Integer[]) objectList.get(o);
434 if(bounds[0].intValue() <= argLow && bounds[1].intValue() >= argHigh+1) {
435 ret.add(o);
436 }
437 }
438 return ret;
439 }
440
441 /** Returns true if the map has an entry in the specfied range. */
442 public boolean hasEntry(int argLow, int argHigh) {
443 Iterator it = getSubMap(argLow, argHigh+1).values().iterator();
444
445 // If there is no entry or a single empty entry return false. Otherwise, return true
446 if(!it.hasNext() || ((ArrayList) it.next()).size() == 0 && !it.hasNext()) {
447 return false;
448 }
449 else
450 return true;
451 }
452
453 /** Returns the submap from argLow+1 included to argHigh excluded. */
454 protected SortedMap getSubMap(int argLow, int argHigh) {
455 if (argLow > argHigh)
456 throw new IllegalArgumentException(
457 "Low end of range (" + argLow + ") is greater than high end (" + argHigh + ")");
458 Integer lowBound = new Integer(argLow + 1);
459 Integer high = new Integer(argHigh);
460
461 SortedMap lowMap = map.headMap(lowBound);
462 if (lowMap.size() != 0)
463 lowBound = (Integer) lowMap.lastKey();
464 return map.subMap(lowBound, high);
465 }
466
467 /**
468 * Adds the object o into the map with interval lo - high. The interval is inclusive, with both the high and low being part of the interval.
469 */
470 public void add(int argLow, int argHigh, Object o) {
471 if (argLow > argHigh)
472 throw new IllegalArgumentException(
473 "Low end of range (" + argLow + ") is greater than high end (" + argHigh + ")");
474 if (objectList.containsKey(o)) {
475 throw new IllegalArgumentException("RangeMap already contains " + o);
476 }
477 Integer low = new Integer(argLow);
478 Integer high = new Integer(argHigh+1);
479 objectList.put(o, new Integer[] { low, high });
480
481 makeSplitAt(low);
482 makeSplitAt(high);
483
484 // Take the new submap and add o to every element
485 Iterator it = map.subMap(low, high).values().iterator();
486 while (it.hasNext()) {
487 ArrayList l = (ArrayList) it.next();
488 l.add(o);
489 }
490 }
491
492
493 /** Returns true if the object is in the RangeMap */
494 public boolean contains(Object o) {
495 return objectList.get(o) != null;
496 }
497
498 /**
499 * Removes the object o from the map.
500 */
501 public void remove(Object o) {
502 Integer[] bounds = (Integer[]) objectList.get(o);
503 if (bounds == null) {
504 throw new IllegalArgumentException("RangeMap does not contain: " + o);
505 }
506 objectList.remove(o);
507 // Remove the object from each interval it contained.
508 Iterator it = map.subMap(bounds[0], bounds[1]).values().iterator();
509 while (it.hasNext()) {
510 ArrayList l = (ArrayList) it.next();
511 l.remove(o);
512 }
513 /*
514 * Check to see if we can remove an interval. This occurs if there is no change from the
515 */
516 mergeAt(bounds[0]);
517 mergeAt(bounds[1]);
518 }
519
520 private void mergeAt(Integer bound) {
521 List startList = (List) map.get(bound);
522 SortedMap lowMap = map.headMap(bound);
523 if (lowMap.size() == 0) {
524 if (startList.size() == 0)
525 map.remove(bound);
526 } else {
527 List previousList = (List) lowMap.get(lowMap.lastKey());
528 if (previousList.equals(startList)) {
529 map.remove(bound);
530 }
531 }
532 }
533
534 private void makeSplitAt(Integer bound) {
535 // If there is already a split, we are done
536 Object entry = map.get(bound);
537 if (entry == null) {
538 // Find the previous interval and duplicate it
539 SortedMap headMap = map.headMap(bound);
540 if (headMap.size() == 0) {
541 map.put(bound, new ArrayList());
542 } else {
543 map.put(bound, new ArrayList((List) map.get(headMap.lastKey())));
544 }
545 }
546 }
547
548 @Override
549 public String toString() {
550 StringBuffer ret = new StringBuffer("Range Map (x-y:count) ");
551 Integer lastKey = null;
552 int lastCount = 0;
553 Iterator it = map.keySet().iterator();
554 while (it.hasNext()) {
555 Integer key = (Integer) it.next();
556 if (lastKey != null) {
557 ret.append(lastKey + "-" + (key.intValue() - 1) + ":" + lastCount + " ");
558 }
559 lastKey = key;
560 lastCount = ((List) map.get(key)).size();
561 }
562 ret.append(lastKey + "-end:" + lastCount);
563 return ret.toString();
564 }
565 }