+ * The main use case for this method is when mapping cDNA sequence to its + * protein product, based on CDS feature ranges which derive from spliced + * exons, but are contiguous on the cDNA sequence. For example + * + *
+ * CDS 1-20 // from exon1 + * CDS 21-35 // from exon2 + * CDS 36-71 // from exon3 + * 'coalesce' to range 1-71 + *+ * + * @param ranges + * @return the same list (if unchanged), else a new merged list, leaving the + * input list unchanged + */ + public static List
+ * Returns null if the mappings cannot be traversed (not all toRanges of this + * map correspond to fromRanges of the input), or if this.toRatio does not + * match map.fromRatio. + * + *
+ * Example 1: + * this: from [1-100] to [501-600] + * input: from [10-40] to [60-90] + * output: from [10-40] to [560-590] + * Example 2 ('reverse strand exons'): + * this: from [1-100] to [2000-1951], [1000-951] // transcript to loci + * input: from [1-50] to [41-90] // CDS to transcript + * output: from [10-40] to [1960-1951], [1000-971] // CDS to gene loci + *+ * + * @param map + * @return + */ + public MapList traverse(MapList map) + { + if (map == null) { - int f = mmap[0][2], t = mmap[0][3]; - while (f <= t) - { - System.out.println("Range " + f + " to " + t); - int rng[] = ml.locateInFrom(f, t); - if (rng != null) - { - for (int i = 0; i < rng.length; i++) - { - System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";")); - } - } - else - { - System.out.println("No range!"); - } - System.out.print("\nReversed\n"); - rng = ml.locateInFrom(t, f); - if (rng != null) - { - for (int i = 0; i < rng.length; i++) - { - System.out.print(rng[i] + ((i % 2 == 0) ? "," : ";")); - } - } - else - { - System.out.println("No range!"); - } - System.out.print("\n"); - f++; - t--; - } + return null; } - System.out.print("\n"); - mmap = ml.makeToMap(); - System.out.println("ToMap : (" + mmap[0][0] + " " + mmap[0][1] + " " - + mmap[0][2] + " " + mmap[0][3] + " "); - for (int i = 1; i <= mmap[1].length; i++) - { - if (mmap[1][i - 1] == -1) - { - System.out.print(i + "=XXX"); - } - else + /* + * compound the ratios by this rule: + * A:B with M:N gives A*M:B*N + * reduced by greatest common divisor + * so 1:3 with 3:3 is 3:9 or 1:3 + * 1:3 with 3:1 is 3:3 or 1:1 + * 1:3 with 1:3 is 1:9 + * 2:5 with 3:7 is 6:35 + */ + int outFromRatio = getFromRatio() * map.getFromRatio(); + int outToRatio = getToRatio() * map.getToRatio(); + int gcd = MathUtils.gcd(outFromRatio, outToRatio); + outFromRatio /= gcd; + outToRatio /= gcd; + + List
+ * For example: + * 1:1 (e.g. gene to CDS): + * ranges { [10-20], [31-40] }, wordLengthFrom = wordLength 2 = 1 + * for start = 1, end = 9, returns a BitSet with no bits set + * for start = 1, end = 11, returns a BitSet with bits 0-1 set + * for start = 15, end = 35, returns a BitSet with bits 5-15 set + * 1:3 (peptide to codon): + * ranges { [1-200] }, wordLengthFrom = 1, wordLength 2 = 3 + * for start = 9, end = 9, returns a BitSet with bits 24-26 set + * 3:1 (codon to peptide): + * ranges { [101-150], [171-180] }, wordLengthFrom = 3, wordLength 2 = 1 + * for start = 101, end = 102 (partial first codon), returns a BitSet with bit 0 set + * for start = 150, end = 171 (partial 17th codon), returns a BitSet with bit 16 set + * 3:1 (circular DNA to peptide): + * ranges { [101-150], [21-30] }, wordLengthFrom = 3, wordLength 2 = 1 + * for start = 24, end = 40 (spans codons 18-20), returns a BitSet with bits 17-19 set + *+ * + * @param start + * @param end + * @param ranges + * @param wordLengthFrom + * @param wordLengthTo + * @return + */ + protected final static BitSet getMappedOffsetsForPositions(int start, + int end, List