+ }
+
+ /**
+ * Returns the first identical sequence in the dataset if any, else null
+ *
+ * @param xref
+ * @return
+ */
+ SequenceI findInDataset(DBRefEntry xref)
+ {
+ if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null)
+ {
+ return null;
+ }
+ SequenceI mapsTo = xref.getMap().getTo();
+ String name = xref.getAccessionId();
+ String name2 = xref.getSource() + "|" + name;
+ SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo
+ .getDatasetSequence();
+ for (SequenceI seq : dataset.getSequences())
+ {
+ /*
+ * clumsy alternative to using SequenceIdMatcher which currently
+ * returns sequences with a dbref to the matched accession id
+ * which we don't want
+ */
+ if (name.equals(seq.getName()) || seq.getName().startsWith(name2))
+ {
+ if (sameSequence(seq, dss))
+ {
+ return seq;
+ }
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Answers true if seq1 and seq2 contain exactly the same characters (ignoring
+ * case), else false. This method compares the lengths, then each character in
+ * turn, in order to 'fail fast'. For case-sensitive comparison, it would be
+ * possible to use Arrays.equals(seq1.getSequence(), seq2.getSequence()).
+ *
+ * @param seq1
+ * @param seq2
+ * @return
+ */
+ // TODO move to Sequence / SequenceI
+ static boolean sameSequence(SequenceI seq1, SequenceI seq2)
+ {
+ if (seq1 == seq2)
+ {
+ return true;
+ }
+ if (seq1 == null || seq2 == null)
+ {
+ return false;
+ }
+ char[] c1 = seq1.getSequence();
+ char[] c2 = seq2.getSequence();
+ if (c1.length != c2.length)
+ {
+ return false;
+ }
+ for (int i = 0; i < c1.length; i++)
+ {
+ int diff = c1[i] - c2[i];
+ /*
+ * same char or differ in case only ('a'-'A' == 32)
+ */
+ if (diff != 0 && diff != 32 && diff != -32)
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Updates any empty mappings in the cross-references with one to a compatible
+ * retrieved sequence if found, and adds any new mappings to the
+ * AlignedCodonFrame
+ *
+ * @param mapFrom
+ * @param xrefs
+ * @param retrieved
+ * @param acf
+ */
+ void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs,
+ SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna)
+ {
+ SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved);
+ for (DBRefEntry xref : xrefs)
+ {
+ if (!xref.hasMap())
+ {
+ String targetSeqName = xref.getSource() + "|"
+ + xref.getAccessionId();
+ SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName);
+ if (matches == null)
+ {
+ return;
+ }
+ for (SequenceI seq : matches)
+ {
+ constructMapping(mapFrom, seq, xref, acf, fromDna);
+ }
+ }
+ }
+ }
+
+ /**
+ * Tries to make a mapping between sequences. If successful, adds the mapping
+ * to the dbref and the mappings collection and answers true, otherwise
+ * answers false. The following methods of making are mapping are tried in
+ * turn:
+ * <ul>
+ * <li>if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for
+ * example, the case after fetching EMBL cross-references for a Uniprot
+ * sequence</li>
+ * <li>else check if the dna translates exactly to the protein (give or take
+ * start and stop codons></li>
+ * <li>else try to map based on CDS features on the dna sequence</li>
+ * </ul>
+ *
+ * @param mapFrom
+ * @param mapTo
+ * @param xref
+ * @param mappings
+ * @return
+ */
+ boolean constructMapping(SequenceI mapFrom, SequenceI mapTo,
+ DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna)
+ {
+ MapList mapping = null;
+
+ /*
+ * look for a reverse mapping, if found make its inverse
+ */
+ if (mapTo.getDBRefs() != null)
+ {
+ for (DBRefEntry dbref : mapTo.getDBRefs())
+ {
+ String name = dbref.getSource() + "|" + dbref.getAccessionId();
+ if (dbref.hasMap() && mapFrom.getName().startsWith(name))
+ {
+ /*
+ * looks like we've found a map from 'mapTo' to 'mapFrom'
+ * - invert it to make the mapping the other way
+ */
+ MapList reverse = dbref.getMap().getMap().getInverse();
+ xref.setMap(new Mapping(mapTo, reverse));
+ mappings.addMap(mapFrom, mapTo, reverse);
+ return true;
+ }
+ }
+ }
+
+ if (fromDna)
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom);
+ }
+ else
+ {
+ mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo);
+ if (mapping != null)
+ {
+ mapping = mapping.getInverse();
+ }
+ }
+ if (mapping == null)
+ {
+ return false;
+ }
+ xref.setMap(new Mapping(mapTo, mapping));
+
+ /*
+ * and add a reverse DbRef with the inverse mapping
+ */
+ if (mapFrom.getDatasetSequence() != null
+ && mapFrom.getDatasetSequence().getSourceDBRef() != null)
+ {
+ DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence()
+ .getSourceDBRef());
+ dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping
+ .getInverse()));
+ mapTo.addDBRef(dbref);
+ }
+
+ if (fromDna)
+ {
+ AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping);
+ mappings.addMap(mapFrom, mapTo, mapping);
+ }
+ else
+ {
+ mappings.addMap(mapTo, mapFrom, mapping.getInverse());
+ }
+
+ return true;