From: gmungoc Date: Thu, 28 Jan 2016 12:07:40 +0000 (+0000) Subject: Merge branch 'develop' into JAL-1705_trialMerge X-Git-Tag: Release_2_10_0~296^2~53 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=a6b324e3f5edac3df0b968f0037b1cc8b651598e;p=jalview.git Merge branch 'develop' into JAL-1705_trialMerge Conflicts: src/jalview/analysis/AlignmentUtils.java src/jalview/analysis/CrossRef.java src/jalview/structure/StructureSelectionManager.java src/jalview/ws/dbsources/Uniprot.java test/jalview/analysis/AlignmentUtilsTests.java test/jalview/util/MappingUtilsTest.java test/jalview/ws/seqfetcher/DbRefFetcherTest.java --- a6b324e3f5edac3df0b968f0037b1cc8b651598e diff --cc resources/lang/Messages.properties index 6bbe798,ec5f592..876b815 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@@ -1281,3 -1279,6 +1281,6 @@@ exception.pdb_rest_service_no_longer_av exception.resource_not_be_found = The requested resource could not be found exception.pdb_server_error = There seems to be an error from the PDB server exception.pdb_server_unreachable = Jalview is unable to reach the PDBe Solr server. \nPlease ensure that you are connected to the internet and try again. + label.nw_mapping = Needleman & Wunsch Alignment + label.sifts_mapping = SIFTs Mapping -label.mapping_method = Sequence \u27f7 Structure mapping method ++label.mapping_method = Sequence \u27f7 Structure mapping method diff --cc src/jalview/analysis/AlignmentUtils.java index 2311dea,da5bc2f..fe95dca --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@@ -1386,186 -1369,65 +1386,186 @@@ public class AlignmentUtil * mapped protein sequences * @return */ - protected static List makeExonSequences(SequenceI dnaSeq, - AlignedCodonFrame mapping, AlignedCodonFrame newMapping) + protected static List makeCdsSequences(SequenceI dnaSeq, + AlignedCodonFrame mapping, AlignedCodonFrame newMappings) { - List exonSequences = new ArrayList(); + List cdsSequences = new ArrayList(); List seqMappings = mapping.getMappingsForSequence(dnaSeq); - final char[] dna = dnaSeq.getSequence(); + for (Mapping seqMapping : seqMappings) { - StringBuilder newSequence = new StringBuilder(dnaSeq.getLength()); + SequenceI cds = makeCdsSequence(dnaSeq, seqMapping); + cdsSequences.add(cds); /* - * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } + * add new mappings, from dna to cds, and from cds to peptide */ - final List dnaExonRanges = seqMapping.getMap().getFromRanges(); - for (int[] range : dnaExonRanges) + MapList dnaToCds = addCdsMappings(dnaSeq, cds, seqMapping, + newMappings); + + /* + * transfer any features on dna that overlap the CDS + */ + transferFeatures(dnaSeq, cds, dnaToCds, "CDS" /* SequenceOntology.CDS */); + } + return cdsSequences; + } + + /** + * Transfers any co-located features on 'fromSeq' to 'toSeq', adjusting the + * feature start/end ranges, optionally omitting specified feature types. + * + * @param fromSeq + * @param toSeq + * @param mapping + * the mapping from 'fromSeq' to 'toSeq' + * @param omitting + */ + protected static void transferFeatures(SequenceI fromSeq, + SequenceI toSeq, MapList mapping, String... omitting) + { + SequenceI copyTo = toSeq; + while (copyTo.getDatasetSequence() != null) + { + copyTo = copyTo.getDatasetSequence(); + } + + SequenceFeature[] sfs = fromSeq.getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature sf : sfs) { - for (int pos = range[0]; pos <= range[1]; pos++) + String type = sf.getType(); + boolean omit = false; + for (String toOmit : omitting) + { + if (type.equals(toOmit)) + { + omit = true; + } + } + if (omit) + { + continue; + } + + /* + * locate the mapped range - null if either start or end is + * not mapped (no partial overlaps are calculated) + */ + int[] mappedTo = mapping.locateInTo(sf.getBegin(), sf.getEnd()); + if (mappedTo != null) { - newSequence.append(dna[pos - 1]); + SequenceFeature copy = new SequenceFeature(sf); + copy.setBegin(Math.min(mappedTo[0], mappedTo[1])); + copy.setEnd(Math.max(mappedTo[0], mappedTo[1])); + copyTo.addSequenceFeature(copy); } } + } + } - SequenceI exon = new Sequence(dnaSeq.getName(), - newSequence.toString()); + /** + * Creates and adds mappings + *
    + *
  • from cds to peptide
  • + *
  • from dna to cds
  • + *
+ * and returns the dna-to-cds mapping + * + * @param dnaSeq + * @param cdsSeq + * @param dnaMapping + * @param newMappings + * @return + */ + protected static MapList addCdsMappings(SequenceI dnaSeq, + SequenceI cdsSeq, + Mapping dnaMapping, AlignedCodonFrame newMappings) + { + cdsSeq.createDatasetSequence(); - /* - * Locate any xrefs to CDS database on the protein product and attach to - * the CDS sequence. Also add as a sub-token of the sequence name. - */ - // default to "CDS" if we can't locate an actual gene id - String cdsAccId = FeatureProperties - .getCodingFeature(DBRefSource.EMBL); - DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(seqMapping.getTo() - .getDBRefs(), DBRefSource.CODINGDBS); - if (cdsRefs != null) + /* + * CDS to peptide is just a contiguous 3:1 mapping, with + * the peptide ranges taken unchanged from the dna mapping + */ + List cdsRanges = new ArrayList(); + cdsRanges.add(new int[] { 1, cdsSeq.getLength() }); + MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap() + .getToRanges(), 3, 1); + newMappings.addMap(cdsSeq.getDatasetSequence(), dnaMapping.getTo(), + cdsToPeptide); + + /* + * dna 'from' ranges map 1:1 to the contiguous extracted CDS + */ + MapList dnaToCds = new MapList( + dnaMapping.getMap().getFromRanges(), cdsRanges, 1, 1); + newMappings.addMap(dnaSeq, cdsSeq.getDatasetSequence(), dnaToCds); + return dnaToCds; + } + + /** + * Makes and returns a CDS-only sequence, where the CDS regions are identified + * as the 'from' ranges of the mapping on the dna. + * + * @param dnaSeq + * nucleotide sequence + * @param seqMapping + * mappings from CDS regions of nucleotide + * @return + */ + protected static SequenceI makeCdsSequence(SequenceI dnaSeq, + Mapping seqMapping) + { + StringBuilder newSequence = new StringBuilder(dnaSeq.getLength()); + final char[] dna = dnaSeq.getSequence(); + int offset = dnaSeq.getStart() - 1; + + /* + * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } + */ + final List dnaCdsRanges = seqMapping.getMap().getFromRanges(); + for (int[] range : dnaCdsRanges) + { + // TODO handle reverse mapping as well (range[1] < range[0]) + for (int pos = range[0]; pos <= range[1]; pos++) { - for (DBRefEntry cdsRef : cdsRefs) - { - exon.addDBRef(new DBRefEntry(cdsRef)); - cdsAccId = cdsRef.getAccessionId(); - } + newSequence.append(dna[pos - offset - 1]); } - exon.setName(exon.getName() + "|" + cdsAccId); - exon.createDatasetSequence(); + } - /* - * Build new mappings - from the same protein regions, but now to - * contiguous exons - */ - List exonRange = new ArrayList(); - exonRange.add(new int[] { 1, newSequence.length() }); - MapList map = new MapList(exonRange, seqMapping.getMap() - .getToRanges(), 3, 1); - newMapping.addMap(exon.getDatasetSequence(), seqMapping.getTo(), map); - MapList cdsToDnaMap = new MapList(dnaExonRanges, exonRange, 1, 1); - newMapping.addMap(dnaSeq, exon.getDatasetSequence(), cdsToDnaMap); - - exonSequences.add(exon); + SequenceI cds = new Sequence(dnaSeq.getName(), + newSequence.toString()); + + transferDbRefs(seqMapping.getTo(), cds); + + return cds; + } + + /** + * Locate any xrefs to CDS databases on the protein product and attach to the + * CDS sequence. Also add as a sub-token of the sequence name. + * + * @param from + * @param to + */ + protected static void transferDbRefs(SequenceI from, SequenceI to) + { + String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL); - DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRef(), ++ DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(), + DBRefSource.CODINGDBS); + if (cdsRefs != null) + { + for (DBRefEntry cdsRef : cdsRefs) + { + to.addDBRef(new DBRefEntry(cdsRef)); + cdsAccId = cdsRef.getAccessionId(); + } + } + if (!to.getName().contains(cdsAccId)) + { + to.setName(to.getName() + "|" + cdsAccId); } - return exonSequences; } } diff --cc src/jalview/analysis/CrossRef.java index e96d9d7,a71e614..21fd08d --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@@ -98,45 -97,32 +98,45 @@@ public class CrossRe { dss = dss.getDatasetSequence(); } - DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRef()); + DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs()); - for (int r = 0; rfs != null && r < rfs.length; r++) + if (rfs != null) { - if (!refs.contains(rfs[r].getSource())) + for (DBRefEntry ref : rfs) { - refs.add(rfs[r].getSource()); + if (!refs.contains(ref.getSource())) + { + refs.add(ref.getSource()); + } } } if (dataset != null) { // search for references to this sequence's direct references. - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef()); - DBRefEntry[] lrfs = CrossRef - .findXDbRefs(!dna, seqs[s].getDBRefs()); ++ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); List rseqs = new ArrayList(); - CrossRef.searchDatasetXrefs(seqs[s], !dna, lrfs, dataset, rseqs, + CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs, null); // don't need to specify codon frame for mapping here for (SequenceI rs : rseqs) { - DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRef()); - DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); // not used?? - for (int r = 0; rfs != null && r < rfs.length; r++) ++ DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); + if (xrs != null) { - if (!refs.contains(rfs[r].getSource())) + for (DBRefEntry ref : xrs) { - refs.add(rfs[r].getSource()); + if (!refs.contains(ref.getSource())) + { + refs.add(ref.getSource()); + } } } + // looks like copy and paste - change rfs to xrs? + // for (int r = 0; rfs != null && r < rfs.length; r++) + // { + // if (!refs.contains(rfs[r].getSource())) + // { + // refs.add(rfs[r].getSource()); + // } + // } } } } @@@ -235,8 -223,7 +235,8 @@@ if ((xrfs == null || xrfs.length == 0) && dataset != null) { System.out.println("Attempting to find ds Xrefs refs."); - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seqs[s].getDBRefs()); + // FIXME should be dss not seq here? - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRef()); ++ DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); // less ambiguous would be a 'find primary dbRefEntry' method. // filter for desired source xref here found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, @@@ -342,7 -329,8 +342,7 @@@ for (int rs = 0; rs < retrieved.length; rs++) { // TODO: examine each sequence for 'redundancy' - DBRefEntry[] dbr = retrieved[rs].getDBRef(); - jalview.datamodel.DBRefEntry[] dbr = retrieved[rs] - .getDBRefs(); ++ DBRefEntry[] dbr = retrieved[rs].getDBRefs(); if (dbr != null && dbr.length > 0) { for (int di = 0; di < dbr.length; di++) diff --cc src/jalview/structure/StructureSelectionManager.java index b497824,2f962b5..871f076 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@@ -1146,8 -1236,29 +1234,34 @@@ public class StructureSelectionManage return null; } + public IProgressIndicator getProgressIndicator() + { + return progressIndicator; + } + + public void setProgressIndicator(IProgressIndicator progressIndicator) + { + this.progressIndicator = progressIndicator; + } + + public long getProgressSessionId() + { + return progressSessionId; + } + + public void setProgressSessionId(long progressSessionId) + { + this.progressSessionId = progressSessionId; + } + + public void setProgressBar(String message) + { + progressIndicator.setProgressBar(message, progressSessionId); + } + + public List getSequenceMappings() + { + return seqmappings; + } ++ } diff --cc src/jalview/util/MappingUtils.java index 22714b8,45d166d..1bbfc73 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@@ -522,60 -523,154 +522,153 @@@ public final class MappingUtil char fromGapChar = mapFrom.getAlignment().getGapCharacter(); -- // FIXME allow for hidden columns -- /* * For each mapped column, find the range of columns that residues in that * column map to. */ - for (Object obj : colsel.getSelected()) + List fromSequences = mapFrom.getAlignment().getSequences(); + List toSequences = mapTo.getAlignment().getSequences(); + + for (Integer sel : colsel.getSelected()) { - int col = ((Integer) obj).intValue(); - int mappedToMin = Integer.MAX_VALUE; - int mappedToMax = Integer.MIN_VALUE; + mapColumn(sel.intValue(), codonFrames, mappedColumns, fromSequences, + toSequences, fromGapChar); + } + + for (int[] hidden : colsel.getHiddenColumns()) + { + mapHiddenColumns(hidden, codonFrames, mappedColumns, fromSequences, + toSequences, fromGapChar); + } + return mappedColumns; + } + + /** + * Helper method that maps a [start, end] hidden column range to its mapped + * equivalent + * + * @param hidden + * @param mappings + * @param mappedColumns + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ + protected static void mapHiddenColumns(int[] hidden, - Set mappings, ++ List mappings, + ColumnSelection mappedColumns, List fromSequences, + List toSequences, char fromGapChar) + { + for (int col = hidden[0]; col <= hidden[1]; col++) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); /* - * For each sequence in the 'from' alignment + * Add the range of hidden columns to the mapped selection (converting + * base 1 to base 0). */ - for (SequenceI fromSeq : mapFrom.getAlignment().getSequences()) + if (mappedTo != null) { - /* - * Ignore gaps (unmapped anyway) - */ - if (fromSeq.getCharAt(col) == fromGapChar) - { - continue; - } + mappedColumns.hideColumns(mappedTo[0] - 1, mappedTo[1] - 1); + } + } + } + + /** + * Helper method to map one column selection + * + * @param col + * the column number (base 0) + * @param mappings + * the sequence mappings + * @param mappedColumns + * the mapped column selections to add to + * @param fromSequences + * @param toSequences + * @param fromGapChar + */ - protected static void mapColumn(int col, Set mappings, ++ protected static void mapColumn(int col, ++ List mappings, + ColumnSelection mappedColumns, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = findMappedColumns(col, mappings, fromSequences, + toSequences, fromGapChar); + + /* + * Add the range of mapped columns to the mapped selection (converting + * base 1 to base 0). Note that this may include intron-only regions which + * lie between the start and end ranges of the selection. + */ + if (mappedTo != null) + { + for (int i = mappedTo[0]; i <= mappedTo[1]; i++) + { + mappedColumns.addElement(i - 1); + } + } + } + + /** + * Helper method to find the range of columns mapped to from one column. + * Returns the maximal range of columns mapped to from all sequences in the + * source column, or null if no mappings were found. + * + * @param col + * @param mappings + * @param fromSequences + * @param toSequences + * @param fromGapChar + * @return + */ + protected static int[] findMappedColumns(int col, - Set mappings, List fromSequences, ++ List mappings, List fromSequences, + List toSequences, char fromGapChar) + { + int[] mappedTo = new int[] { Integer.MAX_VALUE, Integer.MIN_VALUE }; + boolean found = false; + + /* + * For each sequence in the 'from' alignment + */ + for (SequenceI fromSeq : fromSequences) + { + /* + * Ignore gaps (unmapped anyway) + */ + if (fromSeq.getCharAt(col) == fromGapChar) + { + continue; + } + + /* + * Get the residue position and find the mapped position. + */ + int residuePos = fromSeq.findPosition(col); + SearchResults sr = buildSearchResults(fromSeq, residuePos, + mappings); + for (Match m : sr.getResults()) + { + int mappedStartResidue = m.getStart(); + int mappedEndResidue = m.getEnd(); + SequenceI mappedSeq = m.getSequence(); /* - * Get the residue position and find the mapped position. + * Locate the aligned sequence whose dataset is mappedSeq. TODO a + * datamodel that can do this efficiently. */ - int residuePos = fromSeq.findPosition(col); - SearchResults sr = buildSearchResults(fromSeq, residuePos, - codonFrames); - for (Match m : sr.getResults()) + for (SequenceI toSeq : toSequences) { - int mappedStartResidue = m.getStart(); - int mappedEndResidue = m.getEnd(); - SequenceI mappedSeq = m.getSequence(); - - /* - * Locate the aligned sequence whose dataset is mappedSeq. TODO a - * datamodel that can do this efficiently. - */ - for (SequenceI toSeq : mapTo.getAlignment().getSequences()) + if (toSeq.getDatasetSequence() == mappedSeq) { - if (toSeq.getDatasetSequence() == mappedSeq) - { - int mappedStartCol = toSeq.findIndex(mappedStartResidue); - int mappedEndCol = toSeq.findIndex(mappedEndResidue); - mappedToMin = Math.min(mappedToMin, mappedStartCol); - mappedToMax = Math.max(mappedToMax, mappedEndCol); - // System.out.println(fromSeq.getName() + " mapped to cols " - // + mappedStartCol + ":" + mappedEndCol); - break; - // note: remove break if we ever want to map one to many sequences - } + int mappedStartCol = toSeq.findIndex(mappedStartResidue); + int mappedEndCol = toSeq.findIndex(mappedEndResidue); + mappedTo[0] = Math.min(mappedTo[0], mappedStartCol); + mappedTo[1] = Math.max(mappedTo[1], mappedEndCol); + found = true; + break; + // note: remove break if we ever want to map one to many sequences } } } diff --cc src/jalview/ws/dbsources/Pdb.java index b9fb8f3,7f8c76c..3fd7541 --- a/src/jalview/ws/dbsources/Pdb.java +++ b/src/jalview/ws/dbsources/Pdb.java @@@ -97,10 -95,9 +97,10 @@@ public class Pdb extends EbiFileRetriev * * @see jalview.ws.DbSourceProxy#getSequenceRecords(java.lang.String[]) */ + @Override public AlignmentI getSequenceRecords(String queries) throws Exception { - AlignmentI pdbfile = null; + AlignmentI pdbAlignment = null; Vector result = new Vector(); String chain = null; String id = null; diff --cc src/jalview/ws/dbsources/Uniprot.java index 0a252b1,843828b..02da009 --- a/src/jalview/ws/dbsources/Uniprot.java +++ b/src/jalview/ws/dbsources/Uniprot.java @@@ -34,10 -37,9 +36,11 @@@ import jalview.ws.seqfetcher.DbSourcePr import java.io.File; import java.io.FileReader; import java.io.Reader; +import java.net.URL; + import java.util.ArrayList; import java.util.Vector; +import org.exolab.castor.mapping.Mapping; import org.exolab.castor.xml.Unmarshaller; import com.stevesoft.pat.Regex; @@@ -46,14 -48,12 +49,14 @@@ * @author JimP * */ -public class Uniprot extends DbSourceProxyImpl implements DbSourceProxy +public class Uniprot extends DbSourceProxyImpl { -- private static final String BAR_DELIMITER = "|"; - private static final String NEWLINE = "\n"; - - private static org.exolab.castor.mapping.Mapping map; ++ /* ++ * Castor mapping loaded from uniprot_mapping.xml ++ */ + private static Mapping map; /** * Constructor diff --cc test/jalview/analysis/AlignmentUtilsTests.java index a48db4b,74e4940..09bd64e --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@@ -1118,15 -1117,15 +1118,15 @@@ public class AlignmentUtilsTest mappings.add(acf); AlignedCodonFrame newMapping = new AlignedCodonFrame(); - List exons = AlignmentUtils.makeExonSequences(dna1, acf, + List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, newMapping); - assertEquals(1, exons.size()); - SequenceI exon = exons.get(0); + assertEquals(1, cdsSeqs.size()); + SequenceI cdsSeq = cdsSeqs.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - DBRefEntry cdsRef = exon.getDBRefs()[0]; + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRef().length); - DBRefEntry cdsRef = cdsSeq.getDBRef()[0]; ++ assertEquals(1, cdsSeq.getDBRefs().length); ++ DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); @@@ -1188,34 -1187,34 +1188,34 @@@ new SequenceI[] { dna1 }, mappings); /* - * Verify we have 3 exon sequences, mapped to pep1/2/3 respectively + * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively */ - List exons = exal.getSequences(); - assertEquals(3, exons.size()); - - SequenceI exon = exons.get(0); - assertEquals("GGGTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12345", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - DBRefEntry cdsRef = exon.getDBRefs()[0]; + List cds = exal.getSequences(); + assertEquals(3, cds.size()); + + SequenceI cdsSeq = cds.get(0); + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRef().length); - DBRefEntry cdsRef = cdsSeq.getDBRef()[0]; ++ assertEquals(1, cdsSeq.getDBRefs().length); ++ DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("2", cdsRef.getVersion()); assertEquals("A12345", cdsRef.getAccessionId()); - exon = exons.get(1); - assertEquals("aaaccc", exon.getSequenceAsString()); - assertEquals("dna1|A12346", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - cdsRef = exon.getDBRefs()[0]; + cdsSeq = cds.get(1); + assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12346", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRef().length); - cdsRef = cdsSeq.getDBRef()[0]; ++ assertEquals(1, cdsSeq.getDBRefs().length); ++ cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("3", cdsRef.getVersion()); assertEquals("A12346", cdsRef.getAccessionId()); - exon = exons.get(2); - assertEquals("aaaTTT", exon.getSequenceAsString()); - assertEquals("dna1|A12347", exon.getName()); - assertEquals(1, exon.getDBRefs().length); - cdsRef = exon.getDBRefs()[0]; + cdsSeq = cds.get(2); + assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + assertEquals("dna1|A12347", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRef().length); - cdsRef = cdsSeq.getDBRef()[0]; ++ assertEquals(1, cdsSeq.getDBRefs().length); ++ cdsRef = cdsSeq.getDBRefs()[0]; assertEquals("EMBLCDS", cdsRef.getSource()); assertEquals("4", cdsRef.getVersion()); assertEquals("A12347", cdsRef.getAccessionId()); diff --cc test/jalview/datamodel/SequenceTest.java index 851caf0,9c306a3..dcc8ef7 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@@ -421,99 -417,4 +421,99 @@@ public class SequenceTes assertEquals("ABCDEF", derived.getDatasetSequence() .getSequenceAsString()); } + + @Test(groups = { "Functional" }) + public void testCopyConstructor_noDataset() + { + SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF"); + seq1.setDescription("description"); + seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc", + 1.3d)); + seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33, + 12.4f, "group")); + seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File")); + seq1.addDBRef(new DBRefEntry("EMBL", "1.2", "AZ12345")); + + SequenceI copy = new Sequence(seq1); + + assertNull(copy.getDatasetSequence()); + + verifyCopiedSequence(seq1, copy); + + // copy has a copy of the DBRefEntry + // this is murky - DBrefs are only copied for dataset sequences + // where the test for 'dataset sequence' is 'dataset is null' + // but that doesn't distinguish it from an aligned sequence + // which has not yet generated a dataset sequence + // NB getDBRef looks inside dataset sequence if not null - DBRefEntry[] dbrefs = copy.getDBRef(); ++ DBRefEntry[] dbrefs = copy.getDBRefs(); + assertEquals(1, dbrefs.length); - assertFalse(dbrefs[0] == seq1.getDBRef()[0]); - assertTrue(dbrefs[0].equals(seq1.getDBRef()[0])); ++ assertFalse(dbrefs[0] == seq1.getDBRefs()[0]); ++ assertTrue(dbrefs[0].equals(seq1.getDBRefs()[0])); + } + + @Test(groups = { "Functional" }) + public void testCopyConstructor_withDataset() + { + SequenceI seq1 = new Sequence("Seq1", "AB-C.D EF"); + seq1.createDatasetSequence(); + seq1.setDescription("description"); + seq1.addAlignmentAnnotation(new AlignmentAnnotation("label", "desc", + 1.3d)); + seq1.addSequenceFeature(new SequenceFeature("type", "desc", 22, 33, + 12.4f, "group")); + seq1.addPDBId(new PDBEntry("1A70", "B", Type.PDB, "File")); + // here we add DBRef to the dataset sequence: + seq1.getDatasetSequence().addDBRef( + new DBRefEntry("EMBL", "1.2", "AZ12345")); + + SequenceI copy = new Sequence(seq1); + + assertNotNull(copy.getDatasetSequence()); + assertSame(copy.getDatasetSequence(), seq1.getDatasetSequence()); + + verifyCopiedSequence(seq1, copy); + + // getDBRef looks inside dataset sequence and this is shared, + // so holds the same dbref objects - DBRefEntry[] dbrefs = copy.getDBRef(); ++ DBRefEntry[] dbrefs = copy.getDBRefs(); + assertEquals(1, dbrefs.length); - assertSame(dbrefs[0], seq1.getDBRef()[0]); ++ assertSame(dbrefs[0], seq1.getDBRefs()[0]); + } + + /** + * Helper to make assertions about a copied sequence + * + * @param seq1 + * @param copy + */ + protected void verifyCopiedSequence(SequenceI seq1, SequenceI copy) + { + // verify basic properties: + assertEquals(copy.getName(), seq1.getName()); + assertEquals(copy.getDescription(), seq1.getDescription()); + assertEquals(copy.getStart(), seq1.getStart()); + assertEquals(copy.getEnd(), seq1.getEnd()); + assertEquals(copy.getSequenceAsString(), seq1.getSequenceAsString()); + + // copy has a copy of the annotation: + AlignmentAnnotation[] anns = copy.getAnnotation(); + assertEquals(1, anns.length); + assertFalse(anns[0] == seq1.getAnnotation()[0]); + assertEquals(anns[0].label, seq1.getAnnotation()[0].label); + assertEquals(anns[0].description, seq1.getAnnotation()[0].description); + assertEquals(anns[0].score, seq1.getAnnotation()[0].score); + + // copy has a copy of the sequence feature: + SequenceFeature[] sfs = copy.getSequenceFeatures(); + assertEquals(1, sfs.length); + assertFalse(sfs[0] == seq1.getSequenceFeatures()[0]); + assertTrue(sfs[0].equals(seq1.getSequenceFeatures()[0])); + + // copy has a copy of the PDB entry + Vector pdbs = copy.getAllPDBEntries(); + assertEquals(1, pdbs.size()); + assertFalse(pdbs.get(0) == seq1.getAllPDBEntries().get(0)); + assertTrue(pdbs.get(0).equals(seq1.getAllPDBEntries().get(0))); + } } diff --cc test/jalview/util/MappingUtilsTest.java index b3a1d8a,51c99af..7100381 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@@ -343,8 -343,8 +343,10 @@@ public class MappingUtilsTes protected void setupMappedAlignments() throws IOException { /* -- * Set up dna and protein Seq1/2/3 with mappings (held on the protein -- * viewport). Lower case for introns. ++ * Map (upper-case = coding): ++ * Seq1/10-18 AC-GctGtC-T to Seq1/40 -K-P ++ * Seq2/20-27 Tc-GA-G-T-T to Seq2/20-27 L--Q ++ * Seq3/30-38 TtTT-AaCGg- to Seq3/60-61\nG--S */ AlignmentI cdna = loadAlignment(">Seq1/10-18\nAC-GctGtC-T\n" + ">Seq2/20-27\nTc-GA-G-T-Tc\n" + ">Seq3/30-38\nTtTT-AaCGg-\n", @@@ -741,41 -738,73 +743,116 @@@ } /** + * Tests for the method that converts a series of [start, end] ranges to + * single positions, where the mapping is to a reverse strand i.e. start is + * greater than end point mapped to + */ + @Test(groups = { "Functional" }) + public void testFlattenRanges_reverseStrand() + { + assertEquals("[4, 3, 2, 1]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 1 }))); + assertEquals( + "[4, 3, 2, 1]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 3, 2, + 1 }))); + assertEquals( + "[4, 3, 2, 1]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 4, 3, + 3, 2, 2, 1, 1 }))); + assertEquals( + "[12, 9, 8, 7, 4, 3, 2, 1]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12, + 9, 7, 4, 1 }))); + // forwards and backwards anyone? + assertEquals( + "[4, 5, 6, 3, 2, 1]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 4, 6, 3, + 1 }))); + // backwards and forwards + assertEquals( + "[3, 2, 1, 4, 5, 6]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 3, 1, 4, + 6 }))); + // trailing unpaired start position is ignored: + assertEquals( + "[12, 9, 8, 7, 4, 3, 2]", + Arrays.toString(MappingUtils.flattenRanges(new int[] { 12, 12, + 9, 7, 4, 2, 1 }))); + } ++ ++ /** + * Test mapping a column selection including hidden columns + * + * @throws IOException + */ + @Test(groups = { "Functional" }) + public void testMapColumnSelection_hiddenColumns() throws IOException + { + setupMappedAlignments(); + - ColumnSelection colsel = new ColumnSelection(); ++ ColumnSelection proteinSelection = new ColumnSelection(); + + /* + * Column 0 in protein picks up Seq2/L, Seq3/G which map to cols 0-4 and 0-3 + * in dna respectively, overall 0-4 + */ - colsel.hideColumns(0); - ColumnSelection cs = MappingUtils.mapColumnSelection(colsel, ++ proteinSelection.hideColumns(0); ++ ColumnSelection dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, + proteinView, dnaView); - assertEquals("[]", cs.getSelected().toString()); - List hidden = cs.getHiddenColumns(); ++ assertEquals("[]", dnaSelection.getSelected().toString()); ++ List hidden = dnaSelection.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[0, 4]", Arrays.toString(hidden.get(0))); + + /* + * Column 1 in protein picks up Seq1/K which maps to cols 0-3 in dna + */ - colsel.revealAllHiddenColumns(); - colsel.hideColumns(1); - cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); - hidden = cs.getHiddenColumns(); ++ proteinSelection.revealAllHiddenColumns(); ++ // the unhidden columns are now marked selected! ++ assertEquals("[0]", proteinSelection.getSelected().toString()); ++ // deselect these or hideColumns will be expanded to include 0 ++ proteinSelection.clear(); ++ proteinSelection.hideColumns(1); ++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView); ++ hidden = dnaSelection.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[0, 3]", Arrays.toString(hidden.get(0))); + + /* + * Column 2 in protein picks up gaps only - no mapping + */ - colsel.revealAllHiddenColumns(); - colsel.clear(); - colsel.hideColumns(2); - cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); - assertTrue(cs.getHiddenColumns().isEmpty()); ++ proteinSelection.revealAllHiddenColumns(); ++ proteinSelection.clear(); ++ proteinSelection.hideColumns(2); ++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView); ++ assertTrue(dnaSelection.getHiddenColumns().isEmpty()); + + /* + * Column 3 in protein picks up Seq1/P, Seq2/Q, Seq3/S which map to columns + * 6-9, 6-10, 5-8 respectively, overall to 5-10 + */ - colsel.revealAllHiddenColumns(); - colsel.clear(); - colsel.hideColumns(3); // 5-10 hidden in dna - colsel.addElement(1); // 0-3 selected in dna - cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); - assertEquals("[0, 1, 2, 3]", cs.getSelected().toString()); - hidden = cs.getHiddenColumns(); ++ proteinSelection.revealAllHiddenColumns(); ++ proteinSelection.clear(); ++ proteinSelection.hideColumns(3); // 5-10 hidden in dna ++ proteinSelection.addElement(1); // 0-3 selected in dna ++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView); ++ assertEquals("[0, 1, 2, 3]", dnaSelection.getSelected().toString()); ++ hidden = dnaSelection.getHiddenColumns(); + assertEquals(1, hidden.size()); + assertEquals("[5, 10]", Arrays.toString(hidden.get(0))); + + /* + * Combine hiding columns 1 and 3 to get discontiguous hidden columns + */ - colsel.revealAllHiddenColumns(); - colsel.clear(); - colsel.hideColumns(1); - colsel.hideColumns(3); - cs = MappingUtils.mapColumnSelection(colsel, proteinView, dnaView); - hidden = cs.getHiddenColumns(); ++ proteinSelection.revealAllHiddenColumns(); ++ proteinSelection.clear(); ++ proteinSelection.hideColumns(1); ++ proteinSelection.hideColumns(3); ++ dnaSelection = MappingUtils.mapColumnSelection(proteinSelection, proteinView, dnaView); ++ hidden = dnaSelection.getHiddenColumns(); + assertEquals(2, hidden.size()); + assertEquals("[0, 3]", Arrays.toString(hidden.get(0))); + assertEquals("[5, 10]", Arrays.toString(hidden.get(1))); + } } diff --cc test/jalview/ws/seqfetcher/DbRefFetcherTest.java index 4574a09,902498b..fae5778 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@@ -169,9 -166,8 +169,9 @@@ public class DbRefFetcherTes FeatureProperties.isCodingFeature(embl.getDbSource(), sfs[0].getType())); assertEquals(embl.getDbSource(), sfs[0].getFeatureGroup()); - DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRef(), + DBRefEntry[] dr = DBRefUtils.selectRefs(seq.getDBRefs(), - DBRefSource.PROTEINSEQ); + new String[] { DBRefSource.UNIPROT, DBRefSource.UNIPROTKB, + DBRefSource.EMBLCDSProduct, DBRefSource.ENSEMBL }); assertNotNull(dr); assertEquals("Expected a single Uniprot cross reference", 1, dr.length); assertEquals("Expected cross reference map to be one amino acid", dr[0]