+ * @param cdsSeq
+ * @param contig
+ * @param proteinProduct
+ * @param mapping
+ * @return list of DBRefEntrys added
+ */
+ protected static List<DBRefEntry> propagateDBRefsToCDS(SequenceI cdsSeq,
+ SequenceI contig, SequenceI proteinProduct, Mapping mapping)
+ {
+
+ // gather direct refs from contig congruent with mapping
+ List<DBRefEntry> direct = new ArrayList<>();
+ HashSet<String> directSources = new HashSet<>();
+
+ if (contig.getDBRefs() != null)
+ {
+ for (DBRefEntry dbr : contig.getDBRefs())
+ {
+ if (dbr.hasMap() && dbr.getMap().getMap().isTripletMap())
+ {
+ MapList map = dbr.getMap().getMap();
+ // check if map is the CDS mapping
+ if (mapping.getMap().equals(map))
+ {
+ direct.add(dbr);
+ directSources.add(dbr.getSource());
+ }
+ }
+ }
+ }
+ DBRefEntry[] onSource = DBRefUtils.selectRefs(
+ proteinProduct.getDBRefs(),
+ directSources.toArray(new String[0]));
+ List<DBRefEntry> propagated = new ArrayList<>();
+
+ // and generate appropriate mappings
+ for (DBRefEntry cdsref : direct)
+ {
+ // clone maplist and mapping
+ MapList cdsposmap = new MapList(
+ Arrays.asList(new int[][]
+ { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
+ cdsref.getMap().getMap().getToRanges(), 3, 1);
+ Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),
+ cdsref.getMap().getMap());
+
+ // create dbref
+ DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
+ cdsref.getVersion(), cdsref.getAccessionId(),
+ new Mapping(cdsmap.getTo(), cdsposmap));
+
+ // and see if we can map to the protein product for this mapping.
+ // onSource is the filtered set of accessions on protein that we are
+ // tranferring, so we assume accession is the same.
+ if (cdsmap.getTo() == null && onSource != null)
+ {
+ List<DBRefEntry> sourceRefs = DBRefUtils.searchRefs(onSource,
+ cdsref.getAccessionId());
+ if (sourceRefs != null)
+ {
+ for (DBRefEntry srcref : sourceRefs)
+ {
+ if (srcref.getSource().equalsIgnoreCase(cdsref.getSource()))
+ {
+ // we have found a complementary dbref on the protein product, so
+ // update mapping's getTo
+ newref.getMap().setTo(proteinProduct);
+ }
+ }
+ }
+ }
+ cdsSeq.addDBRef(newref);
+ propagated.add(newref);
+ }
+ return propagated;
+ }
+
+ /**
+ * Transfers co-located features on 'fromSeq' to 'toSeq', adjusting the
+ * feature start/end ranges, optionally omitting specified feature types.
+ * Returns the number of features copied.
+ *
+ * @param fromSeq
+ * @param toSeq
+ * @param mapping
+ * the mapping from 'fromSeq' to 'toSeq'
+ * @param select
+ * if not null, only features of this type are copied (including
+ * subtypes in the Sequence Ontology)
+ * @param omitting
+ */
+ protected static int transferFeatures(SequenceI fromSeq, SequenceI toSeq,
+ MapList mapping, String select, String... omitting)
+ {
+ SequenceI copyTo = toSeq;
+ while (copyTo.getDatasetSequence() != null)
+ {
+ copyTo = copyTo.getDatasetSequence();
+ }
+ if (fromSeq == copyTo || fromSeq.getDatasetSequence() == copyTo)
+ {
+ return 0; // shared dataset sequence
+ }
+
+ /*
+ * get features, optionally restricted by an ontology term
+ */
+ List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()
+ .getPositionalFeatures() : fromSeq.getFeatures()
+ .getFeaturesByOntology(select);
+
+ int count = 0;
+ for (SequenceFeature sf : sfs)
+ {
+ String type = sf.getType();
+ boolean omit = false;
+ for (String toOmit : omitting)
+ {
+ if (type.equals(toOmit))
+ {
+ omit = true;
+ }
+ }
+ if (omit)
+ {
+ continue;
+ }
+
+ /*
+ * locate the mapped range - null if either start or end is
+ * not mapped (no partial overlaps are calculated)
+ */
+ int start = sf.getBegin();
+ int end = sf.getEnd();
+ int[] mappedTo = mapping.locateInTo(start, end);
+ /*
+ * if whole exon range doesn't map, try interpreting it
+ * as 5' or 3' exon overlapping the CDS range
+ */
+ if (mappedTo == null)
+ {
+ mappedTo = mapping.locateInTo(end, end);
+ if (mappedTo != null)
+ {
+ /*
+ * end of exon is in CDS range - 5' overlap
+ * to a range from the start of the peptide
+ */
+ mappedTo[0] = 1;
+ }
+ }
+ if (mappedTo == null)
+ {
+ mappedTo = mapping.locateInTo(start, start);
+ if (mappedTo != null)
+ {
+ /*
+ * start of exon is in CDS range - 3' overlap
+ * to a range up to the end of the peptide
+ */
+ mappedTo[1] = toSeq.getLength();
+ }
+ }
+ if (mappedTo != null)
+ {
+ int newBegin = Math.min(mappedTo[0], mappedTo[1]);
+ int newEnd = Math.max(mappedTo[0], mappedTo[1]);
+ SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd,
+ sf.getFeatureGroup(), sf.getScore());
+ copyTo.addSequenceFeature(copy);
+ count++;
+ }
+ }
+ return count;
+ }
+
+ /**
+ * Returns a mapping from dna to protein by inspecting sequence features of
+ * type "CDS" on the dna. A mapping is constructed if the total CDS feature
+ * length is 3 times the peptide length (optionally after dropping a trailing
+ * stop codon). This method does not check whether the CDS nucleotide sequence
+ * translates to the peptide sequence.
+ *
+ * @param dnaSeq