- public boolean processGffKey(Map<String, List<String>> set, int nattr,
- SequenceI seq, SequenceFeature sf, AlignmentI align,
- List<SequenceI> newseqs, boolean relaxedIdMatching)
- throws InvalidGFF3FieldException
- {
- String attr;
- // decide how to interpret according to type
- if (sf.getType().equals("similarity"))
- {
- int strand = sf.getStrand();
- // exonerate cdna/protein map
- // look for fields
- List<SequenceI> querySeq = findNames(align, newseqs,
- relaxedIdMatching, set.get(attr="Query"));
- if (querySeq==null || querySeq.size()!=1)
- {
- throw new InvalidGFF3FieldException( attr, set,
- "Expecting exactly one sequence in Query field (got "
- + set.get(attr) + ")");
- }
- if (set.containsKey(attr="Align"))
- {
- // process the align maps and create cdna/protein maps
- // ideally, the query sequences are in the alignment, but maybe not...
-
- AlignedCodonFrame alco = new AlignedCodonFrame();
- MapList codonmapping = constructCodonMappingFromAlign(set, attr,
- strand);
-
- // add codon mapping, and hope!
- alco.addMap(seq, querySeq.get(0), codonmapping);
- align.addCodonFrame(alco);
- // everything that's needed to be done is done
- // no features to create here !
- return false;
- }
-
- }
- return true;
- }
-
- private MapList constructCodonMappingFromAlign(
- Map<String, List<String>> set,
- String attr, int strand) throws InvalidGFF3FieldException
- {
- if (strand == 0)
- {
- throw new InvalidGFF3FieldException(attr, set,
- "Invalid strand for a codon mapping (cannot be 0)");
- }
- List<Integer> fromrange = new ArrayList<Integer>(), torange = new ArrayList<Integer>();
- int lastppos = 0, lastpframe = 0;
- for (String range : set.get(attr))
- {
- List<Integer> ints = new ArrayList<Integer>();
- StringTokenizer st = new StringTokenizer(range, " ");
- while (st.hasMoreTokens())
- {
- String num = st.nextToken();
- try
- {
- ints.add(new Integer(num));
- } catch (NumberFormatException nfe)
- {
- throw new InvalidGFF3FieldException(attr, set,
- "Invalid number in field " + num);
- }
- }
- // Align positionInRef positionInQuery LengthInRef
- // contig_1146 exonerate:protein2genome:local similarity 8534 11269
- // 3652 - . alignment_id 0 ;
- // Query DDB_G0269124
- // Align 11270 143 120
- // corresponds to : 120 bases align at pos 143 in protein to 11270 on
- // dna in strand direction
- // Align 11150 187 282
- // corresponds to : 282 bases align at pos 187 in protein to 11150 on
- // dna in strand direction
- //
- // Align 10865 281 888
- // Align 9977 578 1068
- // Align 8909 935 375
- //
- if (ints.size() != 3)
- {
- throw new InvalidGFF3FieldException(attr, set,
- "Invalid number of fields for this attribute ("
- + ints.size() + ")");
- }
- fromrange.add(new Integer(ints.get(0).intValue()));
- fromrange.add(new Integer(ints.get(0).intValue() + strand
- * ints.get(2).intValue()));
- // how are intron/exon boundaries that do not align in codons
- // represented
- if (ints.get(1).equals(lastppos) && lastpframe > 0)
- {
- // extend existing to map
- lastppos += ints.get(2) / 3;
- lastpframe = ints.get(2) % 3;
- torange.set(torange.size() - 1, new Integer(lastppos));
- }
- else
- {
- // new to map range
- torange.add(ints.get(1));
- lastppos = ints.get(1) + ints.get(2) / 3;
- lastpframe = ints.get(2) % 3;
- torange.add(new Integer(lastppos));
- }
- }
- // from and to ranges must end up being a series of start/end intervals
- if (fromrange.size() % 2 == 1)
- {
- throw new InvalidGFF3FieldException(attr, set,
- "Couldn't parse the DNA alignment range correctly");
- }
- if (torange.size() % 2 == 1)
- {
- throw new InvalidGFF3FieldException(attr, set,
- "Couldn't parse the protein alignment range correctly");
- }
- // finally, build the map
- int[] frommap = new int[fromrange.size()], tomap = new int[torange
- .size()];
- int p = 0;
- for (Integer ip : fromrange)
- {
- frommap[p++] = ip.intValue();
- }
- p = 0;
- for (Integer ip : torange)
- {
- tomap[p++] = ip.intValue();
- }
-
- return new MapList(frommap, tomap, 3, 1);
- }
-
- private List<SequenceI> findNames(AlignmentI align,
- List<SequenceI> newseqs, boolean relaxedIdMatching,
- List<String> list)
- {
- List<SequenceI> found = new ArrayList<SequenceI>();
- for (String seqId : list)
- {
- SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs);
- if (seq != null)
- {
- found.add(seq);
- }
- }
- return found;
- }
-
- private AlignmentI lastmatchedAl = null;
-
- private SequenceIdMatcher matcher = null;
-
- /**
- * clear any temporary handles used to speed up ID matching
- */
- private void resetMatcher()