addedXref |= importCrossRefSeq(cf, newDsSeqs, doNotAdd, dss,
retrievedDss);
}
+ // JBPNote: What assumptions are made for dbref structures on
+ // retrieved sequences ?
+ // addedXref will be true means importCrossRefSeq found
+ // sequences with dbrefs with mappings to sequences congruent with dss
+
if (!addedXref)
{
// try again, after looking for matching IDs
/**
* process sequence retrieved via a dbref on source sequence to resolve and
- * transfer data
+ * transfer data JBPNote: as of 2022-02-03 - this assumes retrievedSequence
+ * has dbRefs with Mapping references to a sequence congruent with
+ * sourceSequence
*
* @param cf
* @param sourceSequence
List<DBRefEntry> dbr = retrievedSequence.getDBRefs();
if (dbr != null)
{
- for (int ib = 0, nb = dbr.size(); ib < nb; ib++)
+ for (int ib = 0, nb = dbr.size(); ib < nb; ib++)
{
- DBRefEntry dbref = dbr.get(ib);
+ DBRefEntry dbref = dbr.get(ib);
+ // matched will return null if the dbref has no map
SequenceI matched = findInDataset(dbref);
if (matched == sourceSequence)
{
Mapping map = dbref.getMap();
if (map != null)
{
- SequenceI ms = map.getTo();
+ SequenceI ms = map.getTo();
if (ms != null && map.getMap() != null)
{
if (ms == sourceSequence)
* Returns null or the first sequence in the dataset which is identical to
* xref.mapTo, and has a) a primary dbref matching xref, or if none found, the
* first one with an ID source|xrefacc
- *
+ * JBPNote: Could refactor this to AlignmentI/DatasetI
* @param xref
* with map and mapped-to sequence
* @return
* Updates any empty mappings in the cross-references with one to a compatible
* retrieved sequence if found, and adds any new mappings to the
* AlignedCodonFrame
- *
+ * JBPNote: TODO: this relies on sequence IDs like UNIPROT|ACCESSION - which do not always happen.
* @param mapFrom
* @param xrefs
* @param retrieved
*/
public boolean covers(SequenceI seq)
{
- List<int[]> mappedRanges = null;
+ return covers(seq,false,false);
+ }
+ /**
+ *
+ * @param seq
+ * @param localCover - when true - compare extent of seq's dataset sequence rather than the local extent
+ * @param either - when true coverage is required for either seq or the mapped sequence
+ * @return true if mapping covers full length of given sequence (or the other if either==true)
+ */
+ public boolean covers(SequenceI seq, boolean localCover,boolean either)
+ {
+ List<int[]> mappedRanges = null,otherRanges=null;
MapList mapList = mapping.getMap();
+ int mstart=seq.getStart(),mend=seq.getEnd(),ostart,oend;
+ ;
if (fromSeq == seq || fromSeq == seq.getDatasetSequence())
{
+ if (localCover && fromSeq !=seq)
+ {
+ mstart=fromSeq.getStart();
+ mend=fromSeq.getEnd();
+ }
mappedRanges = mapList.getFromRanges();
+ otherRanges=mapList.getToRanges();
+ ostart=mapping.to.getStart();
+ oend=mapping.to.getEnd();
}
else if (mapping.to == seq || mapping.to == seq.getDatasetSequence())
{
+ if (localCover && mapping.to !=seq)
+ {
+ mstart=mapping.to.getStart();
+ mend=mapping.to.getEnd();
+ }
mappedRanges = mapList.getToRanges();
+ otherRanges=mapList.getFromRanges();
+ ostart=fromSeq.getStart();
+ oend=fromSeq.getEnd();
}
else
{
* (necessary for circular CDS - example EMBL:J03321:AAA91567)
* and mapped length covers (at least) sequence length
*/
- int length = 0;
+ int length = countRange(mappedRanges,mstart,mend);
+
+ if (length != -1)
+ {
+ // add 3 to mapped length to allow for a mapped stop codon
+ if (length + 3 >= (mend - mstart + 1))
+ {
+ return true;
+ }
+ }
+ if (either)
+ {
+ // also check coverage of the other range
+ length = countRange(otherRanges, ostart, oend);
+ if (length != -1)
+ {
+ if (length + 1 >= (oend - ostart + 1))
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ private int countRange(List<int[]> mappedRanges,int mstart,int mend)
+ {
+ int length=0;
for (int[] range : mappedRanges)
{
int from = Math.min(range[0], range[1]);
int to = Math.max(range[0], range[1]);
- if (from < seq.getStart() || to > seq.getEnd())
+ if (from < mstart || to > mend)
{
- return false;
+ return -1;
}
length += (to - from + 1);
}
- // add 1 to mapped length to allow for a mapped stop codon
- if (length + 1 < (seq.getEnd() - seq.getStart() + 1))
- {
- return false;
- }
- return true;
+ return length;
}
/**
* Adds any regions mapped to or from position {@code pos} in sequence
* {@code seq} to the given search results
- *
+ * Note: recommend first using the .covers(,true,true) to ensure mapping covers both sequences
* @param seq
* @param pos
* @param sr
}
for (SequenceToSequenceMapping ssm : mappings)
{
- ssm.markMappedRegion(ds, index, results);
+ if (ssm.covers(seq,true,true)) {
+ ssm.markMappedRegion(ds, index, results);
+ }
}
}
JPopupMenu men = new JPopupMenu(MessageManager
.formatMessage("label.settings_for_param", new String[]
{ type }));
- final FeatureColourI featureColour = (FeatureColourI) typeCol;
-
- /*
- * menu option to select (or deselect) variable colour
- */
- final JCheckBoxMenuItem variableColourCB = new JCheckBoxMenuItem(
- MessageManager.getString("label.variable_colour"));
- variableColourCB.setSelected(!featureColour.isSimpleColour());
- men.add(variableColourCB);
-
- /*
- * checkbox action listener doubles up as listener to OK
- * from the variable colour / filters dialog
- */
- variableColourCB.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- if (e.getSource() == variableColourCB)
- {
- // BH 2018 for JavaScript because this is a checkbox
- men.setVisible(true);
- men.setVisible(false);
- if (featureColour.isSimpleColour())
- {
- /*
- * toggle simple colour to variable colour - show dialog
- */
- FeatureTypeSettings fc = new FeatureTypeSettings(fr, type);
- fc.addActionListener(this);
- }
- else
- {
- /*
- * toggle variable to simple colour - show colour chooser
- */
- String title = MessageManager
- .formatMessage("label.select_colour_for", type);
- ColourChooserListener listener = new ColourChooserListener()
- {
- @Override
- public void colourSelected(Color c)
- {
- table.setValueAt(new FeatureColour(c), rowSelected,
- COLOUR_COLUMN);
- table.validate();
- updateFeatureRenderer(
- ((FeatureTableModel) table.getModel()).getData(),
- false);
- }
- };
- JalviewColourChooser.showColourChooser(FeatureSettings.this,
- title, featureColour.getMaxColour(), listener);
- }
- }
- else
- {
- if (e.getSource() instanceof FeatureTypeSettings)
- {
- /*
- * update after OK in feature colour dialog; the updated
- * colour will have already been set in the FeatureRenderer
- */
- FeatureColourI fci = fr.getFeatureColours().get(type);
- table.setValueAt(fci, rowSelected, COLOUR_COLUMN);
- // BH 2018 setting a table value does not invalidate it.
- // System.out.println("FeatureSettings is valied" +
- // table.validate();
- }
- }
- }
- });
-
- men.addSeparator();
JMenuItem scr = new JMenuItem(
MessageManager.getString("label.sort_by_score"));
/**
* Truncates (if necessary) the exon intervals to match 3 times the length of
- * the protein; also accepts 3 bases longer (for stop codon not included in
- * protein)
+ * the protein(including truncation for stop codon included in exon)
*
* @param proteinLength
* @param exon
int exonLength = MappingUtils.getLength(Arrays.asList(exon));
/*
- * if exon length matches protein, or is shorter, or longer by the
- * length of a stop codon (3 bases), then leave it unchanged
+ * if exon length matches protein, or is shorter, then leave it unchanged
*/
- if (expectedCdsLength >= exonLength
- || expectedCdsLength == exonLength - 3)
+ if (expectedCdsLength >= exonLength)
{
return exon;
}
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.HiddenColumns;
+import jalview.datamodel.Mapping;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
import jalview.datamodel.SearchResultsI;
*/
int startResiduePos = selected.findPosition(firstUngappedPos);
int endResiduePos = selected.findPosition(lastUngappedPos);
-
- for (AlignedCodonFrame acf : codonFrames)
+ for (SequenceI seq : mapTo.getAlignment().getSequences())
{
- for (SequenceI seq : mapTo.getAlignment().getSequences())
+ int mappedStartResidue = 0;
+ int mappedEndResidue = 0;
+ for (AlignedCodonFrame acf : codonFrames)
{
- SequenceI peptide = targetIsNucleotide ? selected : seq;
- SequenceI cds = targetIsNucleotide ? seq : selected;
- SequenceToSequenceMapping s2s = acf.getCoveringMapping(cds,
- peptide);
- if (s2s == null)
+ // rather than use acf.getCoveringMapping() we iterate through all
+ // mappings to make sure all CDS are selected for a protein
+ for (SequenceToSequenceMapping map: acf.getMappings())
{
- continue;
- }
- int mappedStartResidue = 0;
- int mappedEndResidue = 0;
- List<AlignedCodonFrame> mapping = Arrays.asList(acf);
- SearchResultsI sr = buildSearchResults(selected, startResiduePos,
- mapping);
- for (SearchResultMatchI m : sr.getResults())
- {
- mappedStartResidue = m.getStart();
- mappedEndResidue = m.getEnd();
- }
- sr = buildSearchResults(selected, endResiduePos, mapping);
- for (SearchResultMatchI m : sr.getResults())
+ if (map.covers(selected) && map.covers(seq))
{
- mappedStartResidue = Math.min(mappedStartResidue, m.getStart());
- mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
- }
+ /*
+ * Found a sequence mapping. Locate the start/end mapped residues.
+ */
+ List<AlignedCodonFrame> mapping = Arrays
+ .asList(new AlignedCodonFrame[]
+ { acf });
+ // locate start
+ SearchResultsI sr = buildSearchResults(selected,
+ startResiduePos, mapping);
+ for (SearchResultMatchI m : sr.getResults())
+ {
+ mappedStartResidue = m.getStart();
+ mappedEndResidue = m.getEnd();
+ }
+ // locate end - allowing for adjustment of start range
+ sr = buildSearchResults(selected, endResiduePos, mapping);
+ for (SearchResultMatchI m : sr.getResults())
+ {
+ mappedStartResidue = Math.min(mappedStartResidue,
+ m.getStart());
+ mappedEndResidue = Math.max(mappedEndResidue, m.getEnd());
+ }
- /*
- * Find the mapped aligned columns, save the range. Note findIndex
- * returns a base 1 position, SequenceGroup uses base 0
- */
- int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
- minStartCol = minStartCol == -1 ? mappedStartCol
- : Math.min(minStartCol, mappedStartCol);
- int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
- maxEndCol = maxEndCol == -1 ? mappedEndCol
- : Math.max(maxEndCol, mappedEndCol);
- mappedGroup.addSequence(seq, false);
- break;
- }
+ /*
+ * Find the mapped aligned columns, save the range. Note findIndex
+ * returns a base 1 position, SequenceGroup uses base 0
+ */
+ int mappedStartCol = seq.findIndex(mappedStartResidue) - 1;
+ minStartCol = minStartCol == -1 ? mappedStartCol
+ : Math.min(minStartCol, mappedStartCol);
+ int mappedEndCol = seq.findIndex(mappedEndResidue) - 1;
+ maxEndCol = maxEndCol == -1 ? mappedEndCol
+ : Math.max(maxEndCol, mappedEndCol);
+ mappedGroup.addSequence(seq, false);
+ break;
+ }
+ }}
}
}
mappedGroup.setStartRes(minStartCol < 0 ? 0 : minStartCol);
@Test(groups = { "Functional_Failing" })
public void testFindXrefSequences_withFetch()
{
+ // JBPNote: this fails because pep1 and pep2 do not have DbRefEntrys with mappings
+ // Fix#1 would be to revise the test data so it fits with 2.11.2+ Jalview assumptions
+ // that ENA retrievals yield dbrefs with Mappings
+
SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC");
dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "Q9ZTS2"));
dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P30419"));
dna1.addDBRef(new DBRefEntry("UNIPROT", "ENA:0", "P00314"));
final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW");
- pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2"));
+ pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2",null,true));
final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG");
- pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314"));
+ pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314",null,true));
/*
* argument false suppresses adding DAS sources
{
assertEquals((ranges = map.getFromRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1579);
- assertEquals(ranges.get(0)[1], 2934);
+ assertEquals(ranges.get(0)[1], 2931); // excludes stop 2934
assertEquals((ranges = map.getToRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1);
assertEquals(ranges.get(0)[1], 451);
{
assertEquals((ranges = map.getFromRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 2928);
- assertEquals(ranges.get(0)[1], 3992);
+ assertEquals(ranges.get(0)[1], 3989); // excludes stop 3992
assertEquals((ranges = map.getToRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1);
assertEquals(ranges.get(0)[1], 354);
{
assertEquals((ranges = map.getFromRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 4054);
- assertEquals(ranges.get(0)[1], 4848);
+ assertEquals(ranges.get(0)[1], 4845); // excludes stop 4848
assertEquals((ranges = map.getToRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1);
assertEquals(ranges.get(0)[1], 264);
assertEquals(ranges.get(0)[0], 7022);
assertEquals(ranges.get(0)[1], 7502);
assertEquals(ranges.get(1)[0], 1);
- assertEquals(ranges.get(1)[1], 437);
+ assertEquals(ranges.get(1)[1], 434); // excludes stop at 437
assertEquals((ranges = map.getToRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1);
assertEquals(ranges.get(0)[1], 305);
// complement(488..1480)
assertEquals((ranges = map.getFromRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1480);
- assertEquals(ranges.get(0)[1], 488);
+ assertEquals(ranges.get(0)[1], 491); // // excludes stop at 488
assertEquals((ranges = map.getToRanges()).size(), 1);
assertEquals(ranges.get(0)[0], 1);
assertEquals(ranges.get(0)[1], 330);
// exact length match:
assertSame(exons, EmblFlatFile.adjustForProteinLength(6, exons));
- // match if we assume exons include stop codon not in protein:
- assertSame(exons, EmblFlatFile.adjustForProteinLength(5, exons));
-
+ // patch from JAL-3725 in EmblXmlSource propagated to Flatfile
+ // match if we assume exons include stop codon not in protein:
+ int[] truncated = EmblFlatFile.adjustForProteinLength(5, exons);
+ assertEquals(Arrays.toString(truncated), "[11, 15, 21, 25, 31, 35]");
+
// truncate last exon by 6bp
- int[] truncated = EmblFlatFile.adjustForProteinLength(4, exons);
- assertEquals("[11, 15, 21, 25, 31, 32]", Arrays.toString(truncated));
+ truncated = EmblFlatFile.adjustForProteinLength(4, exons);
+ assertEquals(Arrays.toString(truncated),"[11, 15, 21, 25, 31, 32]");
// remove last exon and truncate preceding by 1bp (so 3bp in total)
truncated = EmblFlatFile.adjustForProteinLength(3, exons);
- assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated));
+ assertEquals(Arrays.toString(truncated),"[11, 15, 21, 24]");
// exact removal of exon case:
exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp
truncated = EmblFlatFile.adjustForProteinLength(4, exons);
- assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated));
+ assertEquals(Arrays.toString(truncated), "[11, 15, 21, 27]");
// what if exons are too short for protein?
truncated = EmblFlatFile.adjustForProteinLength(7, exons);
assertTrue(seqString.endsWith("FKQKS"));
map = mapping.getMap();
assertEquals(map.getFromLowest(), 6045);
- assertEquals(map.getFromHighest(), 6788);
+ assertEquals(map.getFromHighest(), 6785); // excludes stop at 6788
assertEquals(map.getToLowest(), 1);
assertEquals(map.getToHighest(), 247);
assertEquals(map.getFromRatio(), 3);
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
import jalview.api.AlignViewportI;
import jalview.bin.Cache;
import jalview.commands.EditCommand;