From: tcofoegbu Date: Wed, 6 Apr 2016 14:04:26 +0000 (+0100) Subject: merge X-Git-Tag: Release_2_10_0~270 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=c9df9ff2328d948b50aecced66df5f0b57edac82;hp=d8953149cfddc56c08aecac475e5154a49f4c2f7;p=jalview.git merge --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index db69823..14e3907 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -26,11 +26,8 @@ import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; -import jalview.datamodel.FeatureProperties; import jalview.datamodel.IncompleteCodonException; import jalview.datamodel.Mapping; -import jalview.datamodel.SearchResults; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; @@ -39,7 +36,6 @@ import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; -import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; @@ -564,7 +560,7 @@ public class AlignmentUtils AlignedCodonFrame mapping = null; for (AlignedCodonFrame mp : mappings) { - alignFrom = mp.findAlignedSequence(seq.getDatasetSequence(), al); + alignFrom = mp.findAlignedSequence(seq, al); if (alignFrom != null) { mapping = mp; @@ -813,148 +809,6 @@ public class AlignmentUtils } /** - * Returns a list of sequences mapped from the given sequences and aligned - * (gapped) in the same way. For example, the cDNA for aligned protein, where - * a single gap in protein generates three gaps in cDNA. - * - * @param sequences - * @param gapCharacter - * @param mappings - * @return - */ - public static List getAlignedTranslation( - List sequences, char gapCharacter, - Set mappings) - { - List alignedSeqs = new ArrayList(); - - for (SequenceI seq : sequences) - { - List mapped = getAlignedTranslation(seq, gapCharacter, - mappings); - alignedSeqs.addAll(mapped); - } - return alignedSeqs; - } - - /** - * Returns sequences aligned 'like' the source sequence, as mapped by the - * given mappings. Normally we expect zero or one 'mapped' sequences, but this - * will support 1-to-many as well. - * - * @param seq - * @param gapCharacter - * @param mappings - * @return - */ - protected static List getAlignedTranslation(SequenceI seq, - char gapCharacter, Set mappings) - { - List result = new ArrayList(); - for (AlignedCodonFrame mapping : mappings) - { - if (mapping.involvesSequence(seq)) - { - SequenceI mapped = getAlignedTranslation(seq, gapCharacter, mapping); - if (mapped != null) - { - result.add(mapped); - } - } - } - return result; - } - - /** - * Returns the translation of 'seq' (as held in the mapping) with - * corresponding alignment (gaps). - * - * @param seq - * @param gapCharacter - * @param mapping - * @return - */ - protected static SequenceI getAlignedTranslation(SequenceI seq, - char gapCharacter, AlignedCodonFrame mapping) - { - String gap = String.valueOf(gapCharacter); - boolean toDna = false; - int fromRatio = 1; - SequenceI mapTo = mapping.getDnaForAaSeq(seq); - if (mapTo != null) - { - // mapping is from protein to nucleotide - toDna = true; - // should ideally get gap count ratio from mapping - gap = String.valueOf(new char[] { gapCharacter, gapCharacter, - gapCharacter }); - } - else - { - // mapping is from nucleotide to protein - mapTo = mapping.getAaForDnaSeq(seq); - fromRatio = 3; - } - StringBuilder newseq = new StringBuilder(seq.getLength() - * (toDna ? 3 : 1)); - - int residueNo = 0; // in seq, base 1 - int[] phrase = new int[fromRatio]; - int phraseOffset = 0; - int gapWidth = 0; - boolean first = true; - final Sequence alignedSeq = new Sequence("", ""); - - for (char c : seq.getSequence()) - { - if (c == gapCharacter) - { - gapWidth++; - if (gapWidth >= fromRatio) - { - newseq.append(gap); - gapWidth = 0; - } - } - else - { - phrase[phraseOffset++] = residueNo + 1; - if (phraseOffset == fromRatio) - { - /* - * Have read a whole codon (or protein residue), now translate: map - * source phrase to positions in target sequence add characters at - * these positions to newseq Note mapping positions are base 1, our - * sequence positions base 0. - */ - SearchResults sr = new SearchResults(); - for (int pos : phrase) - { - mapping.markMappedRegion(seq, pos, sr); - } - newseq.append(sr.getCharacters()); - if (first) - { - first = false; - // Hack: Copy sequence dataset, name and description from - // SearchResults.match[0].sequence - // TODO? carry over sequence names from original 'complement' - // alignment - SequenceI mappedTo = sr.getResultSequence(0); - alignedSeq.setName(mappedTo.getName()); - alignedSeq.setDescription(mappedTo.getDescription()); - alignedSeq.setDatasetSequence(mappedTo); - } - phraseOffset = 0; - } - residueNo++; - } - } - alignedSeq.setSequence(newseq.toString()); - return alignedSeq; - } - - /** * Realigns the given protein to match the alignment of the dna, using codon * mappings to translate aligned codon positions to protein residues. * @@ -1011,8 +865,7 @@ public class AlignmentUtils { for (AlignedCodonFrame mapping : mappings) { - SequenceI prot = mapping.findAlignedSequence( - dnaSeq.getDatasetSequence(), protein); + SequenceI prot = mapping.findAlignedSequence(dnaSeq, protein); if (prot != null) { Mapping seqMap = mapping.getMappingForSequence(dnaSeq); @@ -1027,6 +880,7 @@ public class AlignmentUtils * Finally add any unmapped peptide start residues (e.g. for incomplete * codons) as if at the codon position before the second residue */ + // TODO resolve JAL-2022 so this fudge can be removed int mappedSequenceCount = protein.getHeight() - unmappedProtein.size(); addUnmappedPeptideStarts(alignedCodons, mappedSequenceCount); @@ -1510,14 +1364,15 @@ public class AlignmentUtils /** * Constructs an alignment consisting of the mapped (CDS) regions in the given - * nucleotide sequences, and updates mappings to match. The new sequences are - * aligned as per the original sequence, with entirely gapped columns (codon - * interrupted by intron) omitted. + * nucleotide sequences, and updates mappings to match. The CDS sequences are + * added to the original alignment's dataset, which is shared by the new + * alignment. Mappings from nucleotide to CDS, and from CDS to protein, are + * added to the alignment dataset. * * @param dna * aligned dna sequences * @param mappings - * from dna to protein; these are replaced with new mappings + * from dna to protein * @param al * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) @@ -1525,228 +1380,108 @@ public class AlignmentUtils public static AlignmentI makeCdsAlignment(SequenceI[] dna, List mappings, AlignmentI al) { - List cdsColumns = findCdsColumns(dna); - - /* - * create CDS sequences and new mappings - * (from cdna to cds, and cds to peptide) - */ - List newMappings = new ArrayList(); - List cdsSequences = new ArrayList(); - char gap = al.getGapCharacter(); - - for (SequenceI dnaSeq : dna) + List cdsSeqs = new ArrayList(); + + for (SequenceI seq : dna) { - final SequenceI ds = dnaSeq.getDatasetSequence(); + AlignedCodonFrame cdsMappings = new AlignedCodonFrame(); List seqMappings = MappingUtils - .findMappingsForSequence(ds, mappings); - for (AlignedCodonFrame acf : seqMappings) + .findMappingsForSequence(seq, mappings); + List alignmentMappings = al.getCodonFrames(); + for (AlignedCodonFrame mapping : seqMappings) { - AlignedCodonFrame newMapping = new AlignedCodonFrame(); - final List mappedCds = makeCdsSequences(dnaSeq, acf, - cdsColumns, newMapping, gap); - if (!mappedCds.isEmpty()) + for (Mapping aMapping : mapping.getMappingsFromSequence(seq)) { - cdsSequences.addAll(mappedCds); - newMappings.add(newMapping); + SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(), + aMapping); + cdsSeqs.add(cdsSeq); + + /* + * add a mapping from CDS to the (unchanged) mapped to range + */ + List cdsRange = Collections.singletonList(new int[] { 1, + cdsSeq.getLength() }); + MapList map = new MapList(cdsRange, aMapping.getMap() + .getToRanges(), aMapping.getMap().getFromRatio(), + aMapping.getMap().getToRatio()); + cdsMappings.addMap(cdsSeq, aMapping.getTo(), map); + + /* + * add another mapping from original 'from' range to CDS + */ + map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1, + 1); + cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map); + + alignmentMappings.add(cdsMappings); + + /* + * transfer any features on dna that overlap the CDS + */ + transferFeatures(seq, cdsSeq, map, null, SequenceOntologyI.CDS); } } } - AlignmentI newAl = new Alignment( - cdsSequences.toArray(new SequenceI[cdsSequences.size()])); /* - * add new sequences to the shared dataset, set it on the new alignment + * add CDS seqs to shared dataset */ - List dsseqs = al.getDataset().getSequences(); - for (SequenceI seq : newAl.getSequences()) + Alignment dataset = al.getDataset(); + for (SequenceI seq : cdsSeqs) { - if (!dsseqs.contains(seq.getDatasetSequence())) + if (!dataset.getSequences().contains(seq.getDatasetSequence())) { - dsseqs.add(seq.getDatasetSequence()); + dataset.addSequence(seq.getDatasetSequence()); } } - newAl.setDataset(al.getDataset()); + AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs + .size()])); + cds.setDataset(dataset); - /* - * Replace the old mappings with the new ones - */ - mappings.clear(); - mappings.addAll(newMappings); - - return newAl; + return cds; } /** - * Returns a consolidated list of column ranges where at least one sequence - * has a CDS feature. This assumes CDS features are on genomic sequence i.e. - * are for contiguous CDS ranges (no gaps). + * Helper method that makes a CDS sequence as defined by the mappings from the + * given sequence i.e. extracts the 'mapped from' ranges (which may be on + * forward or reverse strand). * - * @param seqs + * @param seq + * @param mapping * @return */ - public static List findCdsColumns(SequenceI[] seqs) - { - // TODO use refactored code from AlignViewController - // markColumnsContainingFeatures, not reinvent the wheel! - - List result = new ArrayList(); - for (SequenceI seq : seqs) - { - result.addAll(findCdsColumns(seq)); - } - - /* - * sort and compact the list into ascending, non-overlapping ranges - */ - Collections.sort(result, new Comparator() - { - @Override - public int compare(int[] o1, int[] o2) - { - return Integer.compare(o1[0], o2[0]); - } - }); - result = MapList.coalesceRanges(result); - - return result; - } - - public static List findCdsColumns(SequenceI seq) + static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping) { - List result = new ArrayList(); - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - SequenceFeature[] sfs = seq.getSequenceFeatures(); - if (sfs != null) - { - for (SequenceFeature sf : sfs) - { - if (so.isA(sf.getType(), SequenceOntologyI.CDS)) - { - int colStart = seq.findIndex(sf.getBegin()); - int colEnd = seq.findIndex(sf.getEnd()); - result.add(new int[] { colStart, colEnd }); - } - } - } - return result; - } + char[] seqChars = seq.getSequence(); + List fromRanges = mapping.getMap().getFromRanges(); + int cdsWidth = MappingUtils.getLength(fromRanges); + char[] newSeqChars = new char[cdsWidth]; - /** - * Answers true if all sequences have a gap at (or do not extend to) the - * specified column position (base 1) - * - * @param seqs - * @param col - * @return - */ - public static boolean isGappedColumn(List seqs, int col) - { - if (seqs != null) + int newPos = 0; + for (int[] range : fromRanges) { - for (SequenceI seq : seqs) + if (range[0] <= range[1]) { - if (!Comparison.isGap(seq.getCharAt(col - 1))) - { - return false; - } + // forward strand mapping - just copy the range + int length = range[1] - range[0] + 1; + System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos, + length); + newPos += length; } - } - return true; - } - - /** - * Returns the column ranges (base 1) of each aligned sequence that are - * involved in any mapping. This is a helper method for aligning protein - * products of aligned transcripts. - * - * @param mappedSequences - * (possibly gapped) dna sequences - * @param mappings - * @return - */ - protected static List> getMappedColumns( - List mappedSequences, List mappings) - { - List> result = new ArrayList>(); - for (SequenceI seq : mappedSequences) - { - List columns = new ArrayList(); - List seqMappings = MappingUtils - .findMappingsForSequence(seq, mappings); - for (AlignedCodonFrame mapping : seqMappings) + else { - List maps = mapping.getMappingsForSequence(seq); - for (Mapping map : maps) + // reverse strand mapping - copy and complement one by one + for (int i = range[0]; i >= range[1]; i--) { - /* - * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } - * Find and add the overall aligned column range for each - */ - for (int[] cdsRange : map.getMap().getFromRanges()) - { - int startPos = cdsRange[0]; - int endPos = cdsRange[1]; - int startCol = seq.findIndex(startPos); - int endCol = seq.findIndex(endPos); - columns.add(new int[] { startCol, endCol }); - } + newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]); } } - result.add(columns); } - return result; - } - - /** - * Helper method to make cds-only sequences and populate their mappings to - * protein products - *

- * For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein - * then generate a sequence CCTTGA with mapping [1, 6] to the same protein - * residues - *

- * Typically eukaryotic dna will include cds encoding for a single peptide - * sequence i.e. return a single result. Bacterial dna may have overlapping - * cds mappings coding for multiple peptides so return multiple results - * (example EMBL KF591215). - * - * @param dnaSeq - * a dna aligned sequence - * @param mapping - * containing one or more mappings of the sequence to protein - * @param ungappedCdsColumns - * @param newMappings - * the new mapping to populate, from the cds-only sequences to their - * mapped protein sequences - * @return - */ - protected static List makeCdsSequences(SequenceI dnaSeq, - AlignedCodonFrame mapping, List ungappedCdsColumns, - AlignedCodonFrame newMappings, char gapChar) - { - List cdsSequences = new ArrayList(); - List seqMappings = mapping.getMappingsForSequence(dnaSeq); - - for (Mapping seqMapping : seqMappings) - { - SequenceI cds = makeCdsSequence(dnaSeq, seqMapping, - ungappedCdsColumns, gapChar); - cds.createDatasetSequence(); - cdsSequences.add(cds); - - /* - * add new mappings, from dna to cds, and from cds to peptide - */ - MapList dnaToCds = addCdsMappings(dnaSeq.getDatasetSequence(), cds, - seqMapping, newMappings); - /* - * transfer any features on dna that overlap the CDS - */ - transferFeatures(dnaSeq, cds, dnaToCds, null, SequenceOntologyI.CDS); - } - return cdsSequences; + SequenceI newSeq = new Sequence(seq.getName() + "|" + + mapping.getTo().getName(), newSeqChars, 1, newPos); + newSeq.createDatasetSequence(); + return newSeq; } /** @@ -1846,128 +1581,6 @@ public class AlignmentUtils } /** - * Creates and adds mappings - *

    - *
  • from cds to peptide
  • - *
  • from dna to cds
  • - *
- * and returns the dna-to-cds mapping - * - * @param dnaSeq - * @param cdsSeq - * @param dnaMapping - * @param newMappings - * @return - */ - protected static MapList addCdsMappings(SequenceI dnaSeq, - SequenceI cdsSeq, Mapping dnaMapping, - AlignedCodonFrame newMappings) - { - cdsSeq.createDatasetSequence(); - - /* - * CDS to peptide is just a contiguous 3:1 mapping, with - * the peptide ranges taken unchanged from the dna mapping - */ - List cdsRanges = new ArrayList(); - SequenceI cdsDataset = cdsSeq.getDatasetSequence(); - cdsRanges.add(new int[] { 1, cdsDataset.getLength() }); - MapList cdsToPeptide = new MapList(cdsRanges, dnaMapping.getMap() - .getToRanges(), 3, 1); - newMappings.addMap(cdsDataset, dnaMapping.getTo(), cdsToPeptide); - - /* - * dna 'from' ranges map 1:1 to the contiguous extracted CDS - */ - MapList dnaToCds = new MapList(dnaMapping.getMap().getFromRanges(), - cdsRanges, 1, 1); - newMappings.addMap(dnaSeq, cdsDataset, dnaToCds); - return dnaToCds; - } - - /** - * Makes and returns a CDS-only sequence, where the CDS regions are identified - * as the 'from' ranges of the mapping on the dna. - * - * @param dnaSeq - * nucleotide sequence - * @param seqMapping - * mappings from CDS regions of nucleotide - * @param ungappedCdsColumns - * @return - */ - protected static SequenceI makeCdsSequence(SequenceI dnaSeq, - Mapping seqMapping, List ungappedCdsColumns, char gapChar) - { - int cdsWidth = MappingUtils.getLength(ungappedCdsColumns); - - /* - * populate CDS columns with the aligned - * column character if that column is mapped (which may be a gap - * if an intron interrupts a codon), else with a gap - */ - List fromRanges = seqMapping.getMap().getFromRanges(); - char[] cdsChars = new char[cdsWidth]; - int pos = 0; - for (int[] columns : ungappedCdsColumns) - { - for (int i = columns[0]; i <= columns[1]; i++) - { - char dnaChar = dnaSeq.getCharAt(i - 1); - if (Comparison.isGap(dnaChar)) - { - cdsChars[pos] = gapChar; - } - else - { - int seqPos = dnaSeq.findPosition(i - 1); - if (MappingUtils.contains(fromRanges, seqPos)) - { - cdsChars[pos] = dnaChar; - } - else - { - cdsChars[pos] = gapChar; - } - } - pos++; - } - } - SequenceI cdsSequence = new Sequence(dnaSeq.getName(), - String.valueOf(cdsChars)); - - transferDbRefs(seqMapping.getTo(), cdsSequence); - - return cdsSequence; - } - - /** - * Locate any xrefs to CDS databases on the protein product and attach to the - * CDS sequence. Also add as a sub-token of the sequence name. - * - * @param from - * @param to - */ - protected static void transferDbRefs(SequenceI from, SequenceI to) - { - String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL); - DBRefEntry[] cdsRefs = DBRefUtils.selectRefs(from.getDBRefs(), - DBRefSource.CODINGDBS); - if (cdsRefs != null) - { - for (DBRefEntry cdsRef : cdsRefs) - { - to.addDBRef(new DBRefEntry(cdsRef)); - cdsAccId = cdsRef.getAccessionId(); - } - } - if (!to.getName().contains(cdsAccId)) - { - to.setName(to.getName() + "|" + cdsAccId); - } - } - - /** * Returns a mapping from dna to protein by inspecting sequence features of * type "CDS" on the dna. * @@ -1980,11 +1593,11 @@ public class AlignmentUtils { List ranges = findCdsPositions(dnaSeq); int mappedDnaLength = MappingUtils.getLength(ranges); - + int proteinLength = proteinSeq.getLength(); int proteinStart = proteinSeq.getStart(); int proteinEnd = proteinSeq.getEnd(); - + /* * incomplete start codon may mean X at start of peptide * we ignore both for mapping purposes @@ -1996,7 +1609,7 @@ public class AlignmentUtils proteinLength--; } List proteinRange = new ArrayList(); - + /* * dna length should map to protein (or protein plus stop codon) */ @@ -2017,7 +1630,9 @@ public class AlignmentUtils /** * Returns a list of CDS ranges found (as sequence positions base 1), i.e. of * start/end positions of sequence features of type "CDS" (or a sub-type of - * CDS in the Sequence Ontology) + * CDS in the Sequence Ontology). The ranges are sorted into ascending start + * position order, so this method is only valid for linear CDS in the same + * sense as the protein product. * * @param dnaSeq * @return @@ -2030,7 +1645,10 @@ public class AlignmentUtils { return result; } + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + int startPhase = 0; + for (SequenceFeature sf : sfs) { /* @@ -2039,7 +1657,8 @@ public class AlignmentUtils if (so.isA(sf.getType(), SequenceOntologyI.CDS)) { int phase = 0; - try { + try + { phase = Integer.parseInt(sf.getPhase()); } catch (NumberFormatException e) { @@ -2053,16 +1672,44 @@ public class AlignmentUtils int end = sf.getEnd(); if (result.isEmpty()) { - // TODO JAL-2022 support start phase > 0 begin += phase; if (begin > end) { - continue; // shouldn't happen? + // shouldn't happen! + System.err + .println("Error: start phase extends beyond start CDS in " + + dnaSeq.getName()); } } result.add(new int[] { begin, end }); } } + + /* + * remove 'startPhase' positions (usually 0) from the first range + * so we begin at the start of a complete codon + */ + if (!result.isEmpty()) + { + // TODO JAL-2022 correctly model start phase > 0 + result.get(0)[0] += startPhase; + } + + /* + * Finally sort ranges by start position. This avoids a dependency on + * keeping features in order on the sequence (if they are in order anyway, + * the sort will have almost no work to do). The implicit assumption is CDS + * ranges are assembled in order. Other cases should not use this method, + * but instead construct an explicit mapping for CDS (e.g. EMBL parsing). + */ + Collections.sort(result, new Comparator() + { + @Override + public int compare(int[] o1, int[] o2) + { + return Integer.compare(o1[0], o2[0]); + } + }); return result; } @@ -2087,13 +1734,19 @@ public class AlignmentUtils { peptide = peptide.getDatasetSequence(); } - - transferFeatures(dnaSeq, peptide, dnaToProtein, - SequenceOntologyI.EXON); - + + transferFeatures(dnaSeq, peptide, dnaToProtein, SequenceOntologyI.EXON); + + /* + * compute protein variants from dna variants and codon mappings; + * NB - alternatively we could retrieve this using the REST service e.g. + * http://rest.ensembl.org/overlap/translation + * /ENSP00000288602?feature=transcript_variation;content-type=text/xml + * which would be a bit slower but possibly more reliable + */ LinkedHashMap variants = buildDnaVariantsMap( dnaSeq, dnaToProtein); - + /* * scan codon variations, compute peptide variants and add to peptide sequence */ @@ -2107,8 +1760,8 @@ public class AlignmentUtils residue); if (!peptideVariants.isEmpty()) { - String desc = StringUtils.listToDelimitedString(peptideVariants, - ", "); + String desc = residue + "," // include canonical residue in description + + StringUtils.listToDelimitedString(peptideVariants, ", "); SequenceFeature sf = new SequenceFeature( SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos, peptidePos, 0f, null); @@ -2116,7 +1769,7 @@ public class AlignmentUtils count++; } } - + /* * ugly sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet instead? @@ -2152,17 +1805,17 @@ public class AlignmentUtils */ LinkedHashMap variants = new LinkedHashMap(); SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - + SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); if (dnaFeatures == null) { return variants; } - + int dnaStart = dnaSeq.getStart(); int[] lastCodon = null; int lastPeptidePostion = 0; - + /* * build a map of codon variations for peptides */ @@ -2189,7 +1842,7 @@ public class AlignmentUtils codonVariants = new String[3][]; variants.put(peptidePosition, codonVariants); } - + /* * extract dna variants to a string array */ @@ -2204,7 +1857,7 @@ public class AlignmentUtils { alleles[i++] = allele.trim(); // lose any space characters "A, G" } - + /* * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] */ @@ -2213,7 +1866,7 @@ public class AlignmentUtils peptidePosition, peptidePosition)); lastPeptidePostion = peptidePosition; lastCodon = codon; - + /* * save nucleotide (and this variant) for each codon position */ @@ -2257,8 +1910,8 @@ public class AlignmentUtils * the current residue translation * @return */ - static List computePeptideVariants( - String[][] codonVariants, String residue) + static List computePeptideVariants(String[][] codonVariants, + String residue) { List result = new ArrayList(); for (String base1 : codonVariants[0]) @@ -2285,13 +1938,13 @@ public class AlignmentUtils } } } - + /* * sort alphabetically with STOP at the end */ Collections.sort(result, new Comparator() { - + @Override public int compare(String o1, String o2) { @@ -2311,4 +1964,254 @@ public class AlignmentUtils }); return result; } + + /** + * Makes an alignment with a copy of the given sequences, adding in any + * non-redundant sequences which are mapped to by the cross-referenced + * sequences. + * + * @param seqs + * @param xrefs + * @return + */ + public static AlignmentI makeCopyAlignment(SequenceI[] seqs, + SequenceI[] xrefs) + { + AlignmentI copy = new Alignment(new Alignment(seqs)); + + SequenceIdMatcher matcher = new SequenceIdMatcher(seqs); + if (xrefs != null) + { + for (SequenceI xref : xrefs) + { + DBRefEntry[] dbrefs = xref.getDBRefs(); + if (dbrefs != null) + { + for (DBRefEntry dbref : dbrefs) + { + if (dbref.getMap() == null || dbref.getMap().getTo() == null) + { + continue; + } + SequenceI mappedTo = dbref.getMap().getTo(); + SequenceI match = matcher.findIdMatch(mappedTo); + if (match == null) + { + matcher.add(mappedTo); + copy.addSequence(mappedTo); + } + } + } + } + } + return copy; + } + + /** + * Try to align sequences in 'unaligned' to match the alignment of their + * mapped regions in 'aligned'. For example, could use this to align CDS + * sequences which are mapped to their parent cDNA sequences. + * + * This method handles 1:1 mappings (dna-to-dna or protein-to-protein). For + * dna-to-protein or protein-to-dna use alternative methods. + * + * @param unaligned + * sequences to be aligned + * @param aligned + * holds aligned sequences and their mappings + * @return + */ + public static int alignAs(AlignmentI unaligned, AlignmentI aligned) + { + List unmapped = new ArrayList(); + Map> columnMap = buildMappedColumnsMap( + unaligned, aligned, unmapped); + int width = columnMap.size(); + char gap = unaligned.getGapCharacter(); + int realignedCount = 0; + + for (SequenceI seq : unaligned.getSequences()) + { + if (!unmapped.contains(seq)) + { + char[] newSeq = new char[width]; + Arrays.fill(newSeq, gap); + int newCol = 0; + int lastCol = 0; + + /* + * traverse the map to find columns populated + * by our sequence + */ + for (Integer column : columnMap.keySet()) + { + Character c = columnMap.get(column).get(seq); + if (c != null) + { + /* + * sequence has a character at this position + * + */ + newSeq[newCol] = c; + lastCol = newCol; + } + newCol++; + } + + /* + * trim trailing gaps + */ + if (lastCol < width) + { + char[] tmp = new char[lastCol + 1]; + System.arraycopy(newSeq, 0, tmp, 0, lastCol + 1); + newSeq = tmp; + } + seq.setSequence(String.valueOf(newSeq)); + realignedCount++; + } + } + return realignedCount; + } + + /** + * Returns a map whose key is alignment column number (base 1), and whose + * values are a map of sequence characters in that column. + * + * @param unaligned + * @param aligned + * @param unmapped + * @return + */ + static Map> buildMappedColumnsMap( + AlignmentI unaligned, AlignmentI aligned, List unmapped) + { + /* + * Map will hold, for each aligned column position, a map of + * {unalignedSequence, sequenceCharacter} at that position. + * TreeMap keeps the entries in ascending column order. + */ + Map> map = new TreeMap>(); + + /* + * r any sequences that have no mapping so can't be realigned + */ + unmapped.addAll(unaligned.getSequences()); + + List mappings = aligned.getCodonFrames(); + + for (SequenceI seq : unaligned.getSequences()) + { + for (AlignedCodonFrame mapping : mappings) + { + SequenceI fromSeq = mapping.findAlignedSequence(seq, aligned); + if (fromSeq != null) + { + Mapping seqMap = mapping.getMappingBetween(fromSeq, seq); + if (addMappedPositions(seq, fromSeq, seqMap, map)) + { + unmapped.remove(seq); + } + } + } + } + return map; + } + + /** + * Helper method that adds to a map the mapped column positions of a sequence.
+ * For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record + * that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to + * sequence. + * + * @param seq + * the sequence whose column positions we are recording + * @param fromSeq + * a sequence that is mapped to the first sequence + * @param seqMap + * the mapping from 'fromSeq' to 'seq' + * @param map + * a map to add the column positions (in fromSeq) of the mapped + * positions of seq + * @return + */ + static boolean addMappedPositions(SequenceI seq, SequenceI fromSeq, + Mapping seqMap, Map> map) + { + if (seqMap == null) + { + return false; + } + + char[] fromChars = fromSeq.getSequence(); + int toStart = seq.getStart(); + char[] toChars = seq.getSequence(); + + /* + * traverse [start, end, start, end...] ranges in fromSeq + */ + for (int[] fromRange : seqMap.getMap().getFromRanges()) + { + for (int i = 0; i < fromRange.length - 1; i += 2) + { + boolean forward = fromRange[i + 1] >= fromRange[i]; + + /* + * find the range mapped to (sequence positions base 1) + */ + int[] range = seqMap.locateMappedRange(fromRange[i], + fromRange[i + 1]); + if (range == null) + { + System.err.println("Error in mapping " + seqMap + " from " + + fromSeq.getName()); + return false; + } + int fromCol = fromSeq.findIndex(fromRange[i]); + int mappedCharPos = range[0]; + + /* + * walk over the 'from' aligned sequence in forward or reverse + * direction; when a non-gap is found, record the column position + * of the next character of the mapped-to sequence; stop when all + * the characters of the range have been counted + */ + while (mappedCharPos <= range[1]) + { + if (!Comparison.isGap(fromChars[fromCol - 1])) + { + /* + * mapped from sequence has a character in this column + * record the column position for the mapped to character + */ + Map seqsMap = map.get(fromCol); + if (seqsMap == null) + { + seqsMap = new HashMap(); + map.put(fromCol, seqsMap); + } + seqsMap.put(seq, toChars[mappedCharPos - toStart]); + mappedCharPos++; + } + fromCol += (forward ? 1 : -1); + } + } + } + return true; + } + + // strictly temporary hack until proper criteria for aligning protein to cds + // are in place; this is so Ensembl -> fetch xrefs Uniprot aligns the Uniprot + public static boolean looksLikeEnsembl(AlignmentI alignment) + { + for (SequenceI seq : alignment.getSequences()) + { + String name = seq.getName(); + if (!name.startsWith("ENSG") && !name.startsWith("ENST")) + { + return false; + } + } + return true; + } } diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 0f7a754..3563eba 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -228,14 +228,10 @@ public class CrossRef * @param al * alignment to search for cross-referenced sequences (and possibly * add to) - * @param addedPeers - * a list of sequences to add to if 'peers' to the original sequences - * are found e.g. alternative protein products for a protein's gene * @return products (as dataset sequences) */ public static Alignment findXrefSequences(SequenceI[] seqs, - final boolean dna, final String source, AlignmentI al, - List addedPeers) + final boolean dna, final String source, AlignmentI al) { AlignmentI dataset = al.getDataset() == null ? al : al.getDataset(); List rseqs = new ArrayList(); @@ -298,7 +294,6 @@ public class CrossRef { found |= searchDataset(dss, xref, dataset, rseqs, cf, false, !dna); - // ,false,!dna); if (found) { xrfs[r] = null; // we've recovered seqs for this one. @@ -363,7 +358,6 @@ public class CrossRef SequenceIdMatcher matcher = new SequenceIdMatcher( dataset.getSequences()); - matcher.addAll(addedPeers); List copiedFeatures = new ArrayList(); CrossRef me = new CrossRef(); for (int rs = 0; rs < retrieved.length; rs++) @@ -392,7 +386,7 @@ public class CrossRef */ for (DBRefEntry ref : map.getTo().getDBRefs()) { - matched.addDBRef(ref); + matched.addDBRef(ref); // add or update mapping } map.setTo(matched); } @@ -426,7 +420,8 @@ public class CrossRef map.setTo(dss); /* * copy sequence features as well, avoiding - * duplication (e.g. from 2 transcripts) + * duplication (e.g. same variation from 2 + * transcripts) */ SequenceFeature[] sfs = ms .getSequenceFeatures(); @@ -453,10 +448,6 @@ public class CrossRef } else { - if (!addedPeers.contains(map.getTo())) - { - addedPeers.add(map.getTo()); - } cf.addMap(retrieved[rs].getDatasetSequence(), map.getTo(), map.getMap()); } diff --git a/src/jalview/appletgui/AlignViewport.java b/src/jalview/appletgui/AlignViewport.java index 09e6562..e5178cb 100644 --- a/src/jalview/appletgui/AlignViewport.java +++ b/src/jalview/appletgui/AlignViewport.java @@ -348,39 +348,6 @@ public class AlignViewport extends AlignmentViewport implements .getStructureSelectionManager(applet); } - /** - * synthesize a column selection if none exists so it covers the given - * selection group. if wholewidth is false, no column selection is made if the - * selection group covers the whole alignment width. - * - * @param sg - * @param wholewidth - */ - public void expandColSelection(SequenceGroup sg, boolean wholewidth) - { - int sgs, sge; - if (sg != null - && (sgs = sg.getStartRes()) >= 0 - && sg.getStartRes() <= (sge = sg.getEndRes()) - && (colSel == null || colSel.getSelected() == null || colSel - .getSelected().size() == 0)) - { - if (!wholewidth && alignment.getWidth() == (1 + sge - sgs)) - { - // do nothing - return; - } - if (colSel == null) - { - colSel = new ColumnSelection(); - } - for (int cspos = sg.getStartRes(); cspos <= sg.getEndRes(); cspos++) - { - colSel.addElement(cspos); - } - } - } - @Override public boolean isNormaliseSequenceLogo() { diff --git a/src/jalview/appletgui/AnnotationPanel.java b/src/jalview/appletgui/AnnotationPanel.java index d642c14..77700d0 100755 --- a/src/jalview/appletgui/AnnotationPanel.java +++ b/src/jalview/appletgui/AnnotationPanel.java @@ -160,6 +160,9 @@ public class AnnotationPanel extends Panel implements AwtRenderPanelI, { for (int sel : av.getColumnSelection().getSelected()) { + // TODO: JAL-2001 check if applet has faulty 'REMOVE' selected columns + // of + // annotation if selection includes hidden columns anot[sel] = null; } } @@ -179,6 +182,8 @@ public class AnnotationPanel extends Panel implements AwtRenderPanelI, for (int index : av.getColumnSelection().getSelected()) { + // TODO: JAL-2001 - provide a fast method to list visible selected + // columns if (!av.getColumnSelection().isVisible(index)) { continue; diff --git a/src/jalview/appletgui/ScalePanel.java b/src/jalview/appletgui/ScalePanel.java index 3c6a4f1..9106385 100755 --- a/src/jalview/appletgui/ScalePanel.java +++ b/src/jalview/appletgui/ScalePanel.java @@ -410,6 +410,8 @@ public class ScalePanel extends Panel implements MouseMotionListener, int avcharWidth = av.getCharWidth(), avcharHeight = av.getCharHeight(); for (int sel : cs.getSelected()) { + // TODO: JAL-2001 - provide a fast method to list visible selected in a + // given range if (av.hasHiddenColumns()) { sel = av.getColumnSelection().findColumnPosition(sel); diff --git a/src/jalview/bin/JalviewLite.java b/src/jalview/bin/JalviewLite.java index ae84ba5..13e4b7e 100644 --- a/src/jalview/bin/JalviewLite.java +++ b/src/jalview/bin/JalviewLite.java @@ -100,6 +100,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getSelectedSequences() */ + @Override public String getSelectedSequences() { return getSelectedSequencesFrom(getDefaultTargetFrame()); @@ -110,6 +111,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getSelectedSequences(java.lang.String) */ + @Override public String getSelectedSequences(String sep) { return getSelectedSequencesFrom(getDefaultTargetFrame(), sep); @@ -122,6 +124,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getSelectedSequencesFrom(jalview.appletgui * .AlignFrame) */ + @Override public String getSelectedSequencesFrom(AlignFrame alf) { return getSelectedSequencesFrom(alf, separator); // ""+0x00AC); @@ -134,6 +137,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getSelectedSequencesFrom(jalview.appletgui * .AlignFrame, java.lang.String) */ + @Override public String getSelectedSequencesFrom(AlignFrame alf, String sep) { StringBuffer result = new StringBuffer(""); @@ -162,6 +166,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#highlight(java.lang.String, * java.lang.String, java.lang.String) */ + @Override public void highlight(String sequenceId, String position, String alignedPosition) { @@ -175,6 +180,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#highlightIn(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String, java.lang.String) */ + @Override public void highlightIn(final AlignFrame alf, final String sequenceId, final String position, final String alignedPosition) { @@ -231,6 +237,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#select(java.lang.String, * java.lang.String) */ + @Override public void select(String sequenceIds, String columns) { selectIn(getDefaultTargetFrame(), sequenceIds, columns, separator); @@ -242,6 +249,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#select(java.lang.String, * java.lang.String, java.lang.String) */ + @Override public void select(String sequenceIds, String columns, String sep) { selectIn(getDefaultTargetFrame(), sequenceIds, columns, sep); @@ -253,6 +261,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#selectIn(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String) */ + @Override public void selectIn(AlignFrame alf, String sequenceIds, String columns) { selectIn(alf, sequenceIds, columns, separator); @@ -264,6 +273,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#selectIn(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String, java.lang.String) */ + @Override public void selectIn(final AlignFrame alf, String sequenceIds, String columns, String sep) { @@ -459,6 +469,9 @@ public class JalviewLite extends Applet implements if (csel != null) { List cs = csel.getSelected(); + // note - the following actually clears cs as well, since + // csel.getSelected returns a reference. Need to check if we need to + // have a concurrentModification exception thrown here csel.clear(); for (Integer selectedCol : cs) { @@ -486,6 +499,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getSelectedSequencesAsAlignment(java.lang. * String, java.lang.String) */ + @Override public String getSelectedSequencesAsAlignment(String format, String suffix) { return getSelectedSequencesAsAlignmentFrom(getDefaultTargetFrame(), @@ -499,6 +513,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getSelectedSequencesAsAlignmentFrom(jalview * .appletgui.AlignFrame, java.lang.String, java.lang.String) */ + @Override public String getSelectedSequencesAsAlignmentFrom(AlignFrame alf, String format, String suffix) { @@ -528,6 +543,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getAlignmentOrder() */ + @Override public String getAlignmentOrder() { return getAlignmentOrderFrom(getDefaultTargetFrame()); @@ -540,6 +556,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getAlignmentOrderFrom(jalview.appletgui.AlignFrame * ) */ + @Override public String getAlignmentOrderFrom(AlignFrame alf) { return getAlignmentOrderFrom(alf, separator); @@ -552,6 +569,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getAlignmentOrderFrom(jalview.appletgui.AlignFrame * , java.lang.String) */ + @Override public String getAlignmentOrderFrom(AlignFrame alf, String sep) { AlignmentI alorder = alf.getAlignViewport().getAlignment(); @@ -569,6 +587,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#orderBy(java.lang.String, * java.lang.String) */ + @Override public String orderBy(String order, String undoName) { return orderBy(order, undoName, separator); @@ -580,6 +599,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#orderBy(java.lang.String, * java.lang.String, java.lang.String) */ + @Override public String orderBy(String order, String undoName, String sep) { return orderAlignmentBy(getDefaultTargetFrame(), order, undoName, sep); @@ -592,6 +612,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#orderAlignmentBy(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String, java.lang.String) */ + @Override public String orderAlignmentBy(AlignFrame alf, String order, String undoName, String sep) { @@ -648,6 +669,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getAlignment(java.lang.String) */ + @Override public String getAlignment(String format) { return getAlignmentFrom(getDefaultTargetFrame(), format, TRUE); @@ -660,6 +682,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getAlignmentFrom(jalview.appletgui.AlignFrame, * java.lang.String) */ + @Override public String getAlignmentFrom(AlignFrame alf, String format) { return getAlignmentFrom(alf, format, TRUE); @@ -671,6 +694,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#getAlignment(java.lang.String, * java.lang.String) */ + @Override public String getAlignment(String format, String suffix) { return getAlignmentFrom(getDefaultTargetFrame(), format, suffix); @@ -683,6 +707,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getAlignmentFrom(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String) */ + @Override public String getAlignmentFrom(AlignFrame alf, String format, String suffix) { @@ -705,6 +730,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#loadAnnotation(java.lang.String) */ + @Override public void loadAnnotation(String annotation) { loadAnnotationFrom(getDefaultTargetFrame(), annotation); @@ -717,6 +743,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#loadAnnotationFrom(jalview.appletgui.AlignFrame * , java.lang.String) */ + @Override public void loadAnnotationFrom(AlignFrame alf, String annotation) { if (new AnnotationFile().annotateAlignmentView(alf.getAlignViewport(), @@ -736,6 +763,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#loadAnnotation(java.lang.String) */ + @Override public void loadFeatures(String features, boolean autoenabledisplay) { loadFeaturesFrom(getDefaultTargetFrame(), features, autoenabledisplay); @@ -748,6 +776,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#loadAnnotationFrom(jalview.appletgui.AlignFrame * , java.lang.String) */ + @Override public boolean loadFeaturesFrom(AlignFrame alf, String features, boolean autoenabledisplay) { @@ -760,6 +789,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getFeatures(java.lang.String) */ + @Override public String getFeatures(String format) { return getFeaturesFrom(getDefaultTargetFrame(), format); @@ -772,6 +802,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getFeaturesFrom(jalview.appletgui.AlignFrame, * java.lang.String) */ + @Override public String getFeaturesFrom(AlignFrame alf, String format) { return alf.outputFeatures(false, format); @@ -782,6 +813,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getAnnotation() */ + @Override public String getAnnotation() { return getAnnotationFrom(getDefaultTargetFrame()); @@ -794,6 +826,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getAnnotationFrom(jalview.appletgui.AlignFrame * ) */ + @Override public String getAnnotationFrom(AlignFrame alf) { return alf.outputAnnotations(false); @@ -804,6 +837,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#newView() */ + @Override public AlignFrame newView() { return newViewFrom(getDefaultTargetFrame()); @@ -814,6 +848,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#newView(java.lang.String) */ + @Override public AlignFrame newView(String name) { return newViewFrom(getDefaultTargetFrame(), name); @@ -824,6 +859,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#newViewFrom(jalview.appletgui.AlignFrame) */ + @Override public AlignFrame newViewFrom(AlignFrame alf) { return alf.newView(null); @@ -835,6 +871,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#newViewFrom(jalview.appletgui.AlignFrame, * java.lang.String) */ + @Override public AlignFrame newViewFrom(AlignFrame alf, String name) { return alf.newView(name); @@ -846,6 +883,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#loadAlignment(java.lang.String, * java.lang.String) */ + @Override public AlignFrame loadAlignment(String text, String title) { AlignmentI al = null; @@ -872,6 +910,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#setMouseoverListener(java.lang.String) */ + @Override public void setMouseoverListener(String listener) { setMouseoverListener(currentAlignFrame, listener); @@ -886,6 +925,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#setMouseoverListener(jalview.appletgui.AlignFrame * , java.lang.String) */ + @Override public void setMouseoverListener(AlignFrame af, String listener) { if (listener != null) @@ -918,6 +958,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#setSelectionListener(java.lang.String) */ + @Override public void setSelectionListener(String listener) { setSelectionListener(null, listener); @@ -930,6 +971,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#setSelectionListener(jalview.appletgui.AlignFrame * , java.lang.String) */ + @Override public void setSelectionListener(AlignFrame af, String listener) { if (listener != null) @@ -968,6 +1010,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#setStructureListener(java.lang.String, * java.lang.String) */ + @Override public void setStructureListener(String listener, String modelSet) { if (listener != null) @@ -1001,6 +1044,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#removeJavascriptListener(jalview.appletgui * .AlignFrame, java.lang.String) */ + @Override public void removeJavascriptListener(AlignFrame af, String listener) { if (listener != null) @@ -1050,12 +1094,14 @@ public class JalviewLite extends Applet implements } } + @Override public void stop() { System.err.println("Applet " + getName() + " stop()."); tidyUp(); } + @Override public void destroy() { System.err.println("Applet " + getName() + " destroy()."); @@ -1112,6 +1158,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#mouseOverStructure(java.lang.String, * java.lang.String, java.lang.String) */ + @Override public void mouseOverStructure(final String pdbResNum, final String chain, final String pdbfile) { @@ -1149,6 +1196,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#scrollViewToIn(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String) */ + @Override public void scrollViewToIn(final AlignFrame alf, final String topRow, final String leftHandColumn) { @@ -1488,6 +1536,7 @@ public class JalviewLite extends Applet implements add(launcher); launcher.addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(ActionEvent e) { LoadingThread loader = new LoadingThread(file, file2, @@ -1610,6 +1659,7 @@ public class JalviewLite extends Applet implements frame.setTitle(title); frame.addWindowListener(new WindowAdapter() { + @Override public void windowClosing(WindowEvent e) { if (frame instanceof AlignFrame) @@ -1634,6 +1684,7 @@ public class JalviewLite extends Applet implements frame.dispose(); } + @Override public void windowActivated(WindowEvent e) { if (frame instanceof AlignFrame) @@ -1671,6 +1722,7 @@ public class JalviewLite extends Applet implements * @param g * graphics context */ + @Override public void paint(Graphics g) { if (!fileFound) @@ -1722,6 +1774,7 @@ public class JalviewLite extends Applet implements { private boolean running = false; + @Override public void run() { if (running || checkedForJmol) @@ -1866,6 +1919,7 @@ public class JalviewLite extends Applet implements applet = _applet; } + @Override public void run() { LoadJmolThread jmolchecker = new LoadJmolThread(); @@ -2566,6 +2620,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getFeatureGroups() */ + @Override public String getFeatureGroups() { String lst = arrayToSeparatorList(getDefaultTargetFrame() @@ -2580,6 +2635,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getFeatureGroupsOn(jalview.appletgui.AlignFrame * ) */ + @Override public String getFeatureGroupsOn(AlignFrame alf) { String lst = arrayToSeparatorList(alf.getFeatureGroups()); @@ -2591,6 +2647,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getFeatureGroupsOfState(boolean) */ + @Override public String getFeatureGroupsOfState(boolean visible) { return arrayToSeparatorList(getDefaultTargetFrame() @@ -2604,6 +2661,7 @@ public class JalviewLite extends Applet implements * jalview.bin.JalviewLiteJsApi#getFeatureGroupsOfStateOn(jalview.appletgui * .AlignFrame, boolean) */ + @Override public String getFeatureGroupsOfStateOn(AlignFrame alf, boolean visible) { return arrayToSeparatorList(alf.getFeatureGroupsOfState(visible)); @@ -2615,6 +2673,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#setFeatureGroupStateOn(jalview.appletgui. * AlignFrame, java.lang.String, boolean) */ + @Override public void setFeatureGroupStateOn(final AlignFrame alf, final String groups, boolean state) { @@ -2636,6 +2695,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#setFeatureGroupState(java.lang.String, * boolean) */ + @Override public void setFeatureGroupState(String groups, boolean state) { setFeatureGroupStateOn(getDefaultTargetFrame(), groups, state); @@ -2646,6 +2706,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#getSeparator() */ + @Override public String getSeparator() { return separator; @@ -2656,6 +2717,7 @@ public class JalviewLite extends Applet implements * * @see jalview.bin.JalviewLiteJsApi#setSeparator(java.lang.String) */ + @Override public void setSeparator(String separator) { if (separator == null || separator.length() < 1) @@ -2700,6 +2762,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#addPdbFile(jalview.appletgui.AlignFrame, * java.lang.String, java.lang.String, java.lang.String) */ + @Override public boolean addPdbFile(AlignFrame alFrame, String sequenceId, String pdbEntryString, String pdbFile) { @@ -2716,6 +2779,7 @@ public class JalviewLite extends Applet implements return alignPdbStructures; } + @Override public void start() { // callInitCallback(); @@ -2749,6 +2813,7 @@ public class JalviewLite extends Applet implements * @see jalview.bin.JalviewLiteJsApi#getJsMessage(java.lang.String, * java.lang.String) */ + @Override public String getJsMessage(String messageclass, String viewId) { Hashtable msgset = jsmessages.get(messageclass); diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 3fc8c28..6d6cdb5 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -303,7 +303,8 @@ public class AlignedCodonFrame /** * Convenience method to return the first aligned sequence in the given - * alignment whose dataset has a mapping with the given dataset sequence. + * alignment whose dataset has a mapping with the given (aligned or dataset) + * sequence. * * @param seq * @@ -317,7 +318,7 @@ public class AlignedCodonFrame */ for (SequenceToSequenceMapping ssm : mappings) { - if (ssm.fromSeq == seq) + if (ssm.fromSeq == seq || ssm.fromSeq == seq.getDatasetSequence()) { for (SequenceI sourceAligned : al.getSequences()) { @@ -335,7 +336,8 @@ public class AlignedCodonFrame */ for (SequenceToSequenceMapping ssm : mappings) { - if (ssm.mapping.to == seq) + if (ssm.mapping.to == seq + || ssm.mapping.to == seq.getDatasetSequence()) { for (SequenceI sourceAligned : al.getSequences()) { @@ -444,13 +446,13 @@ public class AlignedCodonFrame } /** - * Returns any mappings found which are to (or from) the given sequence, and - * to distinct sequences. + * Returns any mappings found which are from the given sequence, and to + * distinct sequences. * * @param seq * @return */ - public List getMappingsForSequence(SequenceI seq) + public List getMappingsFromSequence(SequenceI seq) { List result = new ArrayList(); List related = new ArrayList(); @@ -460,7 +462,7 @@ public class AlignedCodonFrame for (SequenceToSequenceMapping ssm : mappings) { final Mapping mapping = ssm.mapping; - if (ssm.fromSeq == seqDs || mapping.to == seqDs) + if (ssm.fromSeq == seqDs) { if (!related.contains(mapping.to)) { @@ -638,4 +640,38 @@ public class AlignedCodonFrame { return mappings.isEmpty(); } + + /** + * Method for debug / inspection purposes only, may change in future + */ + @Override + public String toString() + { + return mappings == null ? "null" : mappings.toString(); + } + + /** + * Returns the first mapping found that is from 'fromSeq' to 'toSeq', or null + * if none found + * + * @param fromSeq + * aligned or dataset sequence + * @param toSeq + * aligned or dataset sequence + * @return + */ + public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq) + { + for (SequenceToSequenceMapping mapping : mappings) + { + SequenceI from = mapping.fromSeq; + SequenceI to = mapping.mapping.to; + if ((from == fromSeq || from == fromSeq.getDatasetSequence()) + && (to == toSeq || to == toSeq.getDatasetSequence())) + { + return mapping.mapping; + } + } + return null; + } } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index 1134857..d1ea70d 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1704,31 +1704,13 @@ public class Alignment implements AlignmentI boolean preserveUnmappedGaps) { // TODO should this method signature be the one in the interface? - int count = 0; boolean thisIsNucleotide = this.isNucleotide(); boolean thatIsProtein = !al.isNucleotide(); if (!thatIsProtein && !thisIsNucleotide) { return AlignmentUtils.alignProteinAsDna(this, al); } - - char thisGapChar = this.getGapCharacter(); - String gap = thisIsNucleotide && thatIsProtein ? String - .valueOf(new char[] { thisGapChar, thisGapChar, thisGapChar }) - : String.valueOf(thisGapChar); - - // TODO handle intron regions? Needs a 'holistic' alignment of dna, - // not just sequence by sequence. But how to 'gap' intron regions? - - /* - * Get mappings from 'that' alignment's sequences to this. - */ - for (SequenceI alignTo : getSequences()) - { - count += AlignmentUtils.alignSequenceAs(alignTo, al, gap, - preserveMappedGaps, preserveUnmappedGaps) ? 1 : 0; - } - return count; + return AlignmentUtils.alignAs(this, al); } /** diff --git a/src/jalview/datamodel/ColumnSelection.java b/src/jalview/datamodel/ColumnSelection.java index 6fb584c..c63b6cd 100644 --- a/src/jalview/datamodel/ColumnSelection.java +++ b/src/jalview/datamodel/ColumnSelection.java @@ -25,6 +25,7 @@ import jalview.viewmodel.annotationfilter.AnnotationFilterParameter; import jalview.viewmodel.annotationfilter.AnnotationFilterParameter.SearchableAnnotationField; import java.util.ArrayList; +import java.util.BitSet; import java.util.Collections; import java.util.List; import java.util.Vector; @@ -34,11 +35,158 @@ import java.util.Vector; */ public class ColumnSelection { - /* - * list of selected columns (not ordered) - */ - Vector selected = new Vector(); + private class IntList + { + /* + * list of selected columns (ordered by selection order, not column order) + */ + private List order = new ArrayList(); + + /** + * bitfield for column selection - allows quick lookup + */ + private BitSet selected = new BitSet(); + + /** + * adds a new column i to the selection - only if i is not already selected + * + * @param i + */ + public void add(int i) + { + if (!selected.get(i)) + { + order.add(Integer.valueOf(i)); + selected.set(i); + } + } + + public void clear() + { + order.clear(); + selected.clear(); + } + + public void remove(int col) + { + + Integer colInt = new Integer(col); + + if (selected.get(col)) + { + // if this ever changes to List.remove(), ensure Integer not int + // argument + // as List.remove(int i) removes the i'th item which is wrong + order.remove(colInt); + selected.clear(col); + } + } + + public boolean contains(Integer colInt) + { + return selected.get(colInt); + } + + public boolean isEmpty() + { + return order.isEmpty(); + } + + public List getList() + { + return order; + } + + public int size() + { + return order.size(); + } + + /** + * gets the column that was selected first, second or i'th + * + * @param i + * @return + */ + public int elementAt(int i) + { + return order.get(i); + } + + protected boolean pruneColumnList(final List shifts) + { + int s = 0, t = shifts.size(); + int[] sr = shifts.get(s++); + boolean pruned = false; + int i = 0, j = order.size(); + while (i < j && s <= t) + { + int c = order.get(i++).intValue(); + if (sr[0] <= c) + { + if (sr[1] + sr[0] >= c) + { // sr[1] -ve means inseriton. + order.remove(--i); + selected.clear(c); + j--; + } + else + { + if (s < t) + { + sr = shifts.get(s); + } + s++; + } + } + } + return pruned; + } + + /** + * shift every selected column at or above start by change + * + * @param start + * - leftmost column to be shifted + * @param change + * - delta for shift + */ + public void compensateForEdits(int start, int change) + { + BitSet mask = new BitSet(); + for (int i = 0; i < order.size(); i++) + { + int temp = order.get(i); + if (temp >= start) + { + // clear shifted bits and update List of selected columns + selected.clear(temp); + mask.set(temp - change); + order.set(i, new Integer(temp - change)); + } + } + // lastly update the bitfield all at once + selected.or(mask); + } + + public boolean isSelected(int column) + { + return selected.get(column); + } + + public int getMaxColumn() + { + return selected.length() - 1; + } + + public int getMinColumn() + { + return selected.get(0) ? 0 : selected.nextSetBit(0); + } + } + + IntList selected = new IntList(); /* * list of hidden column [start, end] ranges; the list is maintained in * ascending start column order @@ -53,11 +201,7 @@ public class ColumnSelection */ public void addElement(int col) { - Integer column = new Integer(col); - if (!selected.contains(column)) - { - selected.addElement(column); - } + selected.add(col); } /** @@ -65,7 +209,7 @@ public class ColumnSelection */ public void clear() { - selected.removeAllElements(); + selected.clear(); } /** @@ -76,14 +220,7 @@ public class ColumnSelection */ public void removeElement(int col) { - Integer colInt = new Integer(col); - - if (selected.contains(colInt)) - { - // if this ever changes to List.remove(), ensure Integer not int argument - // as List.remove(int i) removes the i'th item which is wrong - selected.removeElement(colInt); - } + selected.remove(col); } /** @@ -102,18 +239,19 @@ public class ColumnSelection colInt = new Integer(i); if (selected.contains(colInt)) { - selected.removeElement(colInt); + selected.remove(colInt); } } } /** * Returns a list of selected columns. The list contains no duplicates but is - * not necessarily ordered. + * not necessarily ordered. It also may include columns hidden from the + * current view */ public List getSelected() { - return selected; + return selected.getList(); } /** @@ -121,11 +259,11 @@ public class ColumnSelection * @param col * index to search for in column selection * - * @return true if Integer(col) is in selection. + * @return true if col is selected */ public boolean contains(int col) { - return selected.contains(new Integer(col)); + return (col > -1) ? selected.isSelected(col) : false; } /** @@ -143,17 +281,11 @@ public class ColumnSelection */ public int getMax() { - int max = -1; - - for (int sel : getSelected()) + if (selected.isEmpty()) { - if (sel > max) - { - max = sel; - } + return -1; } - - return max; + return selected.getMaxColumn(); } /** @@ -163,17 +295,11 @@ public class ColumnSelection */ public int getMin() { - int min = 1000000000; - - for (int sel : getSelected()) + if (selected.isEmpty()) { - if (sel < min) - { - min = sel; - } + return 1000000000; } - - return min; + return selected.getMinColumn(); } /** @@ -187,16 +313,7 @@ public class ColumnSelection public List compensateForEdit(int start, int change) { List deletedHiddenColumns = null; - for (int i = 0; i < selected.size(); i++) - { - int temp = selected.get(i); - - if (temp >= start) - { - // if this ever changes to List.set(), swap parameter order!! - selected.setElementAt(new Integer(temp - change), i); - } - } + selected.compensateForEdits(start, change); if (hiddenColumns != null) { @@ -245,16 +362,8 @@ public class ColumnSelection */ private void compensateForDelEdits(int start, int change) { - for (int i = 0; i < selected.size(); i++) - { - int temp = selected.get(i); - if (temp >= start) - { - // if this ever changes to List.set(), must swap parameter order!!! - selected.setElementAt(new Integer(temp - change), i); - } - } + selected.compensateForEdits(start, change); if (hiddenColumns != null) { @@ -409,36 +518,6 @@ public class ColumnSelection // operations. } - private boolean pruneColumnList(final List shifts, - Vector list) - { - int s = 0, t = shifts.size(); - int[] sr = shifts.get(s++); - boolean pruned = false; - int i = 0, j = list.size(); - while (i < j && s <= t) - { - int c = list.elementAt(i++).intValue(); - if (sr[0] <= c) - { - if (sr[1] + sr[0] >= c) - { // sr[1] -ve means inseriton. - list.removeElementAt(--i); - j--; - } - else - { - if (s < t) - { - sr = shifts.get(s); - } - s++; - } - } - } - return pruned; - } - /** * remove any hiddenColumns or selected columns and shift remaining based on a * series of position, range deletions. @@ -463,7 +542,7 @@ public class ColumnSelection } if (selected != null && selected.size() > 0) { - pruneColumnList(shifts, selected); + selected.pruneColumnList(shifts); if (selected != null && selected.size() == 0) { selected = null; @@ -631,7 +710,7 @@ public class ColumnSelection { while (!selected.isEmpty()) { - int column = selected.get(0).intValue(); + int column = selected.elementAt(0); hideColumns(column); } @@ -812,10 +891,10 @@ public class ColumnSelection { if (copy.selected != null) { - selected = new Vector(); + selected = new IntList(); for (int i = 0, j = copy.selected.size(); i < j; i++) { - selected.addElement(copy.selected.elementAt(i)); + selected.add(copy.selected.elementAt(i)); } } if (copy.hiddenColumns != null) @@ -1112,10 +1191,7 @@ public class ColumnSelection { if (hiddenColumns != null && isVisible(col.intValue())) { - if (!selected.contains(col)) - { - selected.addElement(col); - } + selected.add(col); } } } @@ -1129,7 +1205,7 @@ public class ColumnSelection */ public void setElementsFrom(ColumnSelection colsel) { - selected = new Vector(); + selected = new IntList(); if (colsel.selected != null && colsel.selected.size() > 0) { if (hiddenColumns != null && hiddenColumns.size() > 0) diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 8146400..4a7706f 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -423,6 +423,12 @@ public class SequenceFeature return strand; } + /** + * Set the value of strand + * + * @param strand + * should be "+" for forward, or "-" for reverse + */ public void setStrand(String strand) { setValue(STRAND, strand); diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index d830130..691a4c9 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -33,6 +33,7 @@ import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; +import java.util.Arrays; import java.util.Hashtable; import java.util.List; import java.util.Map; @@ -400,7 +401,6 @@ public class EmblEntry } } - // SequenceI product = null; DBRefEntry protEMBLCDS = null; exon = MappingUtils.removeStartPositions(codonStart - 1, exon); boolean noProteinDbref = true; @@ -436,7 +436,8 @@ public class EmblEntry .println("Not allowing for additional stop codon at end of cDNA fragment... !"); // this might occur for CDS sequences where no features are // marked. - exon = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() }; + exon = new int[] { dna.getStart() + (codonStart - 1), + dna.getEnd() }; map = new Mapping(product, exon, new int[] { 1, prseq.length() }, 3, 1); } @@ -469,9 +470,9 @@ public class EmblEntry // TODO should from range include stop codon even if not in protein // in order to include stop codon in CDS sequence (as done for // Ensembl)? - int[] cdsRanges = adjustForProteinLength(prseq.length(), - exon); - map = new Mapping(product, cdsRanges, new int[] { 1, prseq.length() }, 3, 1); + int[] cdsRanges = adjustForProteinLength(prseq.length(), exon); + map = new Mapping(product, cdsRanges, new int[] { 1, + prseq.length() }, 3, 1); // reconstruct the EMBLCDS entry // TODO: this is only necessary when there codon annotation is // complete (I think JBPNote) @@ -536,8 +537,7 @@ public class EmblEntry if (proteinSeq == null) { proteinSeq = new Sequence(proteinSeqName, - product - .getSequenceAsString()); + product.getSequenceAsString()); matcher.add(proteinSeq); peptides.add(proteinSeq); } @@ -623,11 +623,11 @@ public class EmblEntry SequenceFeature sf = new SequenceFeature(); sf.setBegin(Math.min(exons[exonStartIndex], exons[exonStartIndex + 1])); sf.setEnd(Math.max(exons[exonStartIndex], exons[exonStartIndex + 1])); - sf.setDescription(String.format( - "Exon %d for protein '%s' EMBLCDS:%s", exonNumber, proteinName, - proteinAccessionId)); + sf.setDescription(String.format("Exon %d for protein '%s' EMBLCDS:%s", + exonNumber, proteinName, proteinAccessionId)); sf.setPhase(String.valueOf(codonStart - 1)); - sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+" : "-"); + sf.setStrand(exons[exonStartIndex] <= exons[exonStartIndex + 1] ? "+" + : "-"); sf.setValue(FeatureProperties.EXONPOS, exonNumber); sf.setValue(FeatureProperties.EXONPRODUCT, proteinName); if (!vals.isEmpty()) @@ -689,45 +689,69 @@ public class EmblEntry * @param exon * @return new exon */ - private int[] adjustForProteinLength(int prlength, int[] exon) + static int[] adjustForProteinLength(int prlength, int[] exon) { + if (prlength <= 0 || exon == null) + { + return exon; + } + int desiredCdsLength = prlength * 3; + int exonLength = MappingUtils.getLength(Arrays.asList(exon)); + + /* + * assuming here exon might include stop codon in addition to protein codons + */ + if (desiredCdsLength == exonLength + || desiredCdsLength == exonLength - 3) + { + return exon; + } - int origxon[], sxpos = -1, endxon = 0, cdslength = prlength * 3; - // first adjust range for codon start attribute - if (prlength >= 1 && exon != null) + int origxon[]; + int sxpos = -1; + int endxon = 0; + origxon = new int[exon.length]; + System.arraycopy(exon, 0, origxon, 0, exon.length); + int cdspos = 0; + for (int x = 0; x < exon.length; x += 2) { - origxon = new int[exon.length]; - System.arraycopy(exon, 0, origxon, 0, exon.length); - int cdspos = 0; - for (int x = 0; x < exon.length && sxpos == -1; x += 2) + cdspos += Math.abs(exon[x + 1] - exon[x]) + 1; + if (desiredCdsLength <= cdspos) { - cdspos += Math.abs(exon[x + 1] - exon[x]) + 1; - if (cdslength <= cdspos) + // advanced beyond last codon. + sxpos = x; + if (desiredCdsLength != cdspos) { - // advanced beyond last codon. - sxpos = x; - if (cdslength != cdspos) - { - System.err - .println("Truncating final exon interval on region by " - + (cdspos - cdslength)); - } - // locate the new end boundary of final exon as endxon - endxon = exon[x + 1] - cdspos + cdslength; - break; + // System.err + // .println("Truncating final exon interval on region by " + // + (cdspos - cdslength)); } - } - if (sxpos != -1) - { - // and trim the exon interval set if necessary - int[] nxon = new int[sxpos + 2]; - System.arraycopy(exon, 0, nxon, 0, sxpos + 2); - nxon[sxpos + 1] = endxon; // update the end boundary for the new exon - // set - exon = nxon; + /* + * shrink the final exon - reduce end position if forward + * strand, increase it if reverse + */ + if (exon[x + 1] >= exon[x]) + { + endxon = exon[x + 1] - cdspos + desiredCdsLength; + } + else + { + endxon = exon[x + 1] + cdspos - desiredCdsLength; + } + break; } } + + if (sxpos != -1) + { + // and trim the exon interval set if necessary + int[] nxon = new int[sxpos + 2]; + System.arraycopy(exon, 0, nxon, 0, sxpos + 2); + nxon[sxpos + 1] = endxon; // update the end boundary for the new exon + // set + exon = nxon; + } return exon; } } diff --git a/src/jalview/datamodel/xdb/embl/EmblFile.java b/src/jalview/datamodel/xdb/embl/EmblFile.java index e0b5ede..69870b6 100644 --- a/src/jalview/datamodel/xdb/embl/EmblFile.java +++ b/src/jalview/datamodel/xdb/embl/EmblFile.java @@ -20,6 +20,9 @@ */ package jalview.datamodel.xdb.embl; +import jalview.datamodel.DBRefEntry; +import jalview.ws.dbsources.Uniprot; + import java.io.File; import java.io.FileReader; import java.io.PrintWriter; @@ -129,6 +132,8 @@ public class EmblFile unmar.setMapping(map); unmar.setLogWriter(new PrintWriter(System.out)); record = (EmblFile) unmar.unmarshal(file); + + canonicaliseDbRefs(record); } catch (Exception e) { e.printStackTrace(System.err); @@ -137,4 +142,45 @@ public class EmblFile return record; } + + /** + * Change blank version to "0" in any DBRefEntry, to ensure consistent + * comparison with other DBRefEntry in Jalview + * + * @param record + * @see Uniprot#getDbVersion + */ + static void canonicaliseDbRefs(EmblFile record) + { + for (EmblEntry entry : record.getEntries()) + { + if (entry.getDbRefs() != null) + { + for (DBRefEntry dbref : entry.getDbRefs()) + { + if ("".equals(dbref.getVersion())) + { + dbref.setVersion("0"); + } + } + } + + if (entry.getFeatures() != null) + { + for (EmblFeature feature : entry.getFeatures()) + { + if (feature.getDbRefs() != null) + { + for (DBRefEntry dbref : feature.getDbRefs()) + { + if ("".equals(dbref.getVersion())) + { + dbref.setVersion("0"); + } + } + } + } + } + } + } } diff --git a/src/jalview/ext/ensembl/EnsemblCdna.java b/src/jalview/ext/ensembl/EnsemblCdna.java index 856be74..e141db4 100644 --- a/src/jalview/ext/ensembl/EnsemblCdna.java +++ b/src/jalview/ext/ensembl/EnsemblCdna.java @@ -4,9 +4,6 @@ import jalview.datamodel.SequenceFeature; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; -import java.util.Arrays; -import java.util.List; - import com.stevesoft.pat.Regex; /** @@ -19,9 +16,6 @@ import com.stevesoft.pat.Regex; */ public class EnsemblCdna extends EnsemblSeqProxy { - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "Uniprot/SWISSPROT", "Uniprot/SPTREMBL" }); - /* * accepts ENST or ENSTG with 11 digits * or ENSMUST or similar for other species @@ -114,12 +108,4 @@ public class EnsemblCdna extends EnsemblSeqProxy return false; } - @Override - protected List getCrossReferenceDatabases() - { - return CROSS_REFERENCES; - // 30/01/16 also found Vega_transcript, OTTT, ENS_LRG_transcript, UCSC, - // HGNC_trans_name, RefSeq_mRNA, RefSeq_mRNA_predicted - } - } diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index fa1e474..0bc6a74 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -29,9 +29,6 @@ import com.stevesoft.pat.Regex; */ public class EnsemblGene extends EnsemblSeqProxy { - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "CCDS" }); - private static final String GENE_PREFIX = "gene:"; /* @@ -465,15 +462,6 @@ public class EnsemblGene extends EnsemblSeqProxy return false; } - @Override - protected List getCrossReferenceDatabases() - { - // found these for ENSG00000157764 on 30/01/2016: - // return new String[] {"Vega_gene", "OTTG", "ENS_LRG_gene", "ArrayExpress", - // "EntrezGene", "HGNC", "MIM_GENE", "MIM_MORBID", "WikiGene"}; - return CROSS_REFERENCES; - } - /** * Override to do nothing as Ensembl doesn't return a protein sequence for a * gene identifier diff --git a/src/jalview/ext/ensembl/EnsemblProtein.java b/src/jalview/ext/ensembl/EnsemblProtein.java index 97796a5..0facbb5 100644 --- a/src/jalview/ext/ensembl/EnsemblProtein.java +++ b/src/jalview/ext/ensembl/EnsemblProtein.java @@ -3,7 +3,6 @@ package jalview.ext.ensembl; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; -import java.util.Arrays; import java.util.List; import com.stevesoft.pat.Regex; @@ -24,9 +23,6 @@ public class EnsemblProtein extends EnsemblSeqProxy private static final Regex ACCESSION_REGEX = new Regex( "(ENS([A-Z]{3}|)P[0-9]{11}$)" + "|" + "(CCDS[0-9.]{3,}$)"); - private static final List CROSSREFS = Arrays.asList(new String[] { - "PDB", "Uniprot/SPTREMBL", "Uniprot/SWISSPROT" }); - /** * Default constructor (to use rest.ensembl.org) */ @@ -99,12 +95,6 @@ public class EnsemblProtein extends EnsemblSeqProxy } @Override - protected List getCrossReferenceDatabases() - { - return CROSSREFS; - } - - @Override public Regex getAccessionValidator() { return ACCESSION_REGEX; diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index 5f3f1c8..0ceb29c 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -36,10 +36,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient { private static final String ALLELES = "alleles"; - private static final List CROSS_REFERENCES = Arrays - .asList(new String[] { "CCDS", "Uniprot/SWISSPROT", - "Uniprot/SPTREMBL" }); - protected static final String CONSEQUENCE_TYPE = "consequence_type"; protected static final String PARENT = "Parent"; @@ -316,8 +312,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } EnsemblXref xrefFetcher = new EnsemblXref(getDomain()); - List xrefs = xrefFetcher.getCrossReferences(seq.getName(), - getCrossReferenceDatabases()); + List xrefs = xrefFetcher.getCrossReferences(seq.getName()); for (DBRefEntry xref : xrefs) { seq.addDBRef(xref); @@ -338,18 +333,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient } /** - * Returns a list of database names to be used when fetching cross-references. - * Specifically, the names are used to filter data returned by the Ensembl - * xrefs REST service on the value in field 'dbname'. - * - * @return - */ - protected List getCrossReferenceDatabases() - { - return CROSS_REFERENCES; - } - - /** * Fetches sequences for the list of accession ids and adds them to the * alignment. Returns the extended (or created) alignment. * diff --git a/src/jalview/ext/ensembl/EnsemblSymbol.java b/src/jalview/ext/ensembl/EnsemblSymbol.java index 08f26c7..1c47f11 100644 --- a/src/jalview/ext/ensembl/EnsemblSymbol.java +++ b/src/jalview/ext/ensembl/EnsemblSymbol.java @@ -40,7 +40,7 @@ public class EnsemblSymbol extends EnsemblXref * @return * @throws IOException */ - protected String parseResponse(BufferedReader br) + protected String parseSymbolResponse(BufferedReader br) throws IOException { JSONParser jp = new JSONParser(); @@ -108,7 +108,7 @@ public class EnsemblSymbol extends EnsemblXref { br = getHttpResponse(url, ids); } - String geneId = parseResponse(br); + String geneId = parseSymbolResponse(br); if (geneId != null) { result.add(geneId); diff --git a/src/jalview/ext/ensembl/EnsemblXref.java b/src/jalview/ext/ensembl/EnsemblXref.java index 7b5f9da..fa86865 100644 --- a/src/jalview/ext/ensembl/EnsemblXref.java +++ b/src/jalview/ext/ensembl/EnsemblXref.java @@ -27,6 +27,8 @@ import org.json.simple.parser.ParseException; class EnsemblXref extends EnsemblRestClient { + private static final String GO_GENE_ONTOLOGY = "GO"; + /** * Constructor given the target domain to fetch data from * @@ -77,17 +79,14 @@ class EnsemblXref extends EnsemblRestClient * Calls the Ensembl xrefs REST endpoint and retrieves any cross-references * ("primary_id") for the given identifier (Ensembl accession id) and database * names. The "dbname" returned by Ensembl is canonicalised to Jalview's - * standard version, and a DBRefEntry constructed. If no databases are - * specified, all available cross-references are retrieved. + * standard version, and a DBRefEntry constructed. Currently takes all + * identifiers apart from GO terms and synonyms. * * @param identifier * an Ensembl stable identifier - * @param databases - * optional list of Ensembl cross-referenced databases of interest * @return */ - public List getCrossReferences(String identifier, - List databases) + public List getCrossReferences(String identifier) { List result = new ArrayList(); List ids = new ArrayList(); @@ -101,7 +100,7 @@ class EnsemblXref extends EnsemblRestClient { br = getHttpResponse(url, ids); } - return (parseResponse(br, databases)); + return (parseResponse(br)); } catch (IOException e) { // ignore @@ -124,17 +123,15 @@ class EnsemblXref extends EnsemblRestClient /** * Parses "primary_id" and "dbname" values from the JSON response and - * constructs a DBRefEntry if the dbname is in the list supplied. Returns a - * list of the DBRefEntry created. Note we don't parse "synonyms" as they - * appear to be either redirected or obsolete in Uniprot. + * constructs a DBRefEntry. Returns a list of the DBRefEntry created. Note we + * don't parse "synonyms" as they appear to be either redirected or obsolete + * in Uniprot. * * @param br - * @param databases * @return * @throws IOException */ - protected List parseResponse(BufferedReader br, - List databases) + protected List parseResponse(BufferedReader br) throws IOException { JSONParser jp = new JSONParser(); @@ -147,8 +144,7 @@ class EnsemblXref extends EnsemblRestClient { JSONObject val = (JSONObject) rvals.next(); String dbName = val.get("dbname").toString(); - if (databases != null && !databases.isEmpty() - && !databases.contains(dbName)) + if (dbName.equals(GO_GENE_ONTOLOGY)) { continue; } diff --git a/src/jalview/ext/rbvi/chimera/JalviewChimeraBinding.java b/src/jalview/ext/rbvi/chimera/JalviewChimeraBinding.java index af87e44..1ce0d2b 100644 --- a/src/jalview/ext/rbvi/chimera/JalviewChimeraBinding.java +++ b/src/jalview/ext/rbvi/chimera/JalviewChimeraBinding.java @@ -1127,6 +1127,8 @@ public abstract class JalviewChimeraBinding extends AAStructureBindingModel /** * Send a 'show' command for all atoms in the currently selected columns * + * TODO: pull up to abstract structure viewer interface + * * @param vp */ public void highlightSelection(AlignmentViewPanel vp) diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index fc145ed..d1f3421 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -102,7 +102,6 @@ import jalview.ws.SequenceFetcher; import jalview.ws.jws1.Discoverer; import jalview.ws.jws2.Jws2Discoverer; import jalview.ws.jws2.jabaws2.Jws2Instance; -import jalview.ws.seqfetcher.ASequenceFetcher; import jalview.ws.seqfetcher.DbSourceProxy; import java.awt.BorderLayout; @@ -4723,33 +4722,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, new Object[] { source }), sttime); try { - /* - * 'peer' sequences are any to add to this alignment, for example - * alternative protein products for my protein's gene - */ - List addedPeers = new ArrayList(); AlignmentI alignment = AlignFrame.this.getViewport() .getAlignment(); - Alignment xrefs = CrossRef.findXrefSequences(sel, dna, source, - alignment, addedPeers); + AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source, + alignment); if (xrefs != null) { /* - * figure out colour scheme if any to apply to features + * get display scheme (if any) to apply to features */ - ASequenceFetcher sftch = new SequenceFetcher(); - List proxies = sftch.getSourceProxy(source); - FeatureSettingsModelI featureColourScheme = null; - for (DbSourceProxy proxy : proxies) - { - FeatureSettingsModelI preferredColours = proxy - .getFeatureColourScheme(); - if (preferredColours != null) - { - featureColourScheme = preferredColours; - break; - } - } + FeatureSettingsModelI featureColourScheme = new SequenceFetcher() + .getFeatureColourScheme(source); + AlignmentI al = makeCrossReferencesAlignment( alignment.getDataset(), xrefs); @@ -4761,140 +4745,142 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, .getString("label.for"), getTitle()); newFrame.setTitle(newtitle); - boolean asSplitFrame = Cache.getDefault( - Preferences.ENABLE_SPLIT_FRAME, true); - if (asSplitFrame) + if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) { /* - * Make a copy of this alignment (sharing the same dataset - * sequences). If we are DNA, drop introns and update mappings + * split frame display is turned off in preferences file */ - AlignmentI copyAlignment = null; - final SequenceI[] sequenceSelection = AlignFrame.this.viewport - .getSequenceSelection(); - List cf = xrefs.getCodonFrames(); - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, cf, alignment); - if (copyAlignment.getHeight() == 0) - { - System.err.println("Failed to make CDS alignment"); - } - al.getCodonFrames().clear(); - al.getCodonFrames().addAll(cf); - } - else + Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, + DEFAULT_HEIGHT); + return; // via finally clause + } + + /* + * Make a copy of this alignment (sharing the same dataset + * sequences). If we are DNA, drop introns and update mappings + */ + AlignmentI copyAlignment = null; + final SequenceI[] sequenceSelection = AlignFrame.this.viewport + .getSequenceSelection(); + List cf = xrefs.getCodonFrames(); + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment( + sequenceSelection, cf, alignment); + if (copyAlignment.getHeight() == 0) { - copyAlignment = new Alignment(new Alignment( - sequenceSelection)); - copyAlignment.getCodonFrames().addAll(cf); + System.err.println("Failed to make CDS alignment"); } - copyAlignment.setGapCharacter(AlignFrame.this.viewport - .getGapCharacter()); - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.registerMappings(cf); + al.getCodonFrames().clear(); + al.getCodonFrames().addAll(copyAlignment.getCodonFrames()); /* - * add in any extra 'peer' sequences discovered - * (e.g. alternative protein products) + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl */ - for (SequenceI peer : addedPeers) + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) { - copyAlignment.addSequence(peer); + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; } + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment( + sequenceSelection, xrefs.getSequencesArray()); + copyAlignment.getCodonFrames().addAll(cf); + } + copyAlignment.setGapCharacter(AlignFrame.this.viewport + .getGapCharacter()); - if (copyAlignment.getHeight() > 0) - { - /* - * align protein to dna - */ - // FIXME what if the dna is not aligned :-O - if (dna) - { - al.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(al); - } - } + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); + ssm.registerMappings(cf); - AlignFrame copyThis = new AlignFrame(copyAlignment, - AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); - copyThis.setTitle(AlignFrame.this.getTitle()); - - boolean showSequenceFeatures = viewport - .isShowSequenceFeatures(); - newFrame.setShowSeqFeatures(showSequenceFeatures); - copyThis.setShowSeqFeatures(showSequenceFeatures); - FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer(); - - /* - * copy feature rendering settings to split frame - */ - newFrame.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer().transferSettings( - myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer().transferSettings( - myFeatureStyling); - - /* - * apply 'database source' feature configuration - * if any was found - */ - // TODO is this the feature colouring for the original - // alignment or the fetched xrefs? either could be Ensembl - newFrame.getViewport().applyFeaturesStyle( - featureColourScheme); - copyThis.getViewport().applyFeaturesStyle( - featureColourScheme); - - SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, - dna ? newFrame : copyThis); - newFrame.setVisible(true); - copyThis.setVisible(true); - String linkedTitle = MessageManager - .getString("label.linked_view_title"); - Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); - } + if (copyAlignment.getHeight() <= 0) + { + System.err.println("No Sequences generated for xRef type " + + source); + return; + } + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + al.alignAs(copyAlignment); } else { - Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, - DEFAULT_HEIGHT); + /* + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! + */ + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) + { + copyAlignment.alignAs(al); + } } - } - else - { - System.err.println("No Sequences generated for xRef type " - + source); + + AlignFrame copyThis = new AlignFrame(copyAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + copyThis.setTitle(AlignFrame.this.getTitle()); + + boolean showSequenceFeatures = viewport + .isShowSequenceFeatures(); + newFrame.setShowSeqFeatures(showSequenceFeatures); + copyThis.setShowSeqFeatures(showSequenceFeatures); + FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); + + /* + * copy feature rendering settings to split frame + */ + newFrame.alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer() + .transferSettings(myFeatureStyling); + copyThis.alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer() + .transferSettings(myFeatureStyling); + + /* + * apply 'database source' feature configuration + * if any was found + */ + // TODO is this the feature colouring for the original + // alignment or the fetched xrefs? either could be Ensembl + newFrame.getViewport().applyFeaturesStyle(featureColourScheme); + copyThis.getViewport().applyFeaturesStyle(featureColourScheme); + + SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, + dna ? newFrame : copyThis); + newFrame.setVisible(true); + copyThis.setVisible(true); + String linkedTitle = MessageManager + .getString("label.linked_view_title"); + Desktop.addInternalFrame(sf, linkedTitle, -1, -1); + sf.adjustDivider(); } } catch (Exception e) { - jalview.bin.Cache.log.error( + Cache.log.error( "Exception when finding crossreferences", e); } catch (OutOfMemoryError e) { new OOMWarning("whilst fetching crossreferences", e); - } catch (Error e) + } catch (Throwable e) { - jalview.bin.Cache.log.error("Error when finding crossreferences", + Cache.log.error("Error when finding crossreferences", e); + } finally + { + AlignFrame.this.setProgressBar(MessageManager.formatMessage( + "status.finished_searching_for_sequences_from", + new Object[] { source }), sttime); } - AlignFrame.this.setProgressBar(MessageManager.formatMessage( - "status.finished_searching_for_sequences_from", - new Object[] { source }), sttime); } /** @@ -4945,23 +4931,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, frunner.start(); } - public boolean canShowTranslationProducts(SequenceI[] selection, - AlignmentI alignment) - { - // old way - try - { - return (jalview.analysis.Dna.canTranslate(selection, - viewport.getViewAsVisibleContigs(true))); - } catch (Exception e) - { - jalview.bin.Cache.log - .warn("canTranslate threw an exception - please report to help@jalview.org", - e); - return false; - } - } - /** * Construct and display a new frame containing the translation of this * frame's DNA sequences to their aligned protein (amino acid) equivalents. diff --git a/src/jalview/gui/AlignViewport.java b/src/jalview/gui/AlignViewport.java index 7d8d4fe..692cd18 100644 --- a/src/jalview/gui/AlignViewport.java +++ b/src/jalview/gui/AlignViewport.java @@ -663,39 +663,6 @@ public class AlignViewport extends AlignmentViewport implements } /** - * synthesize a column selection if none exists so it covers the given - * selection group. if wholewidth is false, no column selection is made if the - * selection group covers the whole alignment width. - * - * @param sg - * @param wholewidth - */ - public void expandColSelection(SequenceGroup sg, boolean wholewidth) - { - int sgs, sge; - if (sg != null - && (sgs = sg.getStartRes()) >= 0 - && sg.getStartRes() <= (sge = sg.getEndRes()) - && (colSel == null || colSel.getSelected() == null || colSel - .getSelected().size() == 0)) - { - if (!wholewidth && alignment.getWidth() == (1 + sge - sgs)) - { - // do nothing - return; - } - if (colSel == null) - { - colSel = new ColumnSelection(); - } - for (int cspos = sg.getStartRes(); cspos <= sg.getEndRes(); cspos++) - { - colSel.addElement(cspos); - } - } - } - - /** * Returns the (Desktop) instance of the StructureSelectionManager */ @Override diff --git a/src/jalview/gui/ScalePanel.java b/src/jalview/gui/ScalePanel.java index b2c9a12..7e17f46 100755 --- a/src/jalview/gui/ScalePanel.java +++ b/src/jalview/gui/ScalePanel.java @@ -467,6 +467,9 @@ public class ScalePanel extends JPanel implements MouseMotionListener, for (int sel : cs.getSelected()) { + // TODO: JAL-2001 - provide a fast method to list visible selected in a + // given range + if (av.hasHiddenColumns()) { if (cs.isVisible(sel)) diff --git a/src/jalview/renderer/AnnotationRenderer.java b/src/jalview/renderer/AnnotationRenderer.java index 007df3e..75099c2 100644 --- a/src/jalview/renderer/AnnotationRenderer.java +++ b/src/jalview/renderer/AnnotationRenderer.java @@ -598,12 +598,9 @@ public class AnnotationRenderer if (columnSelection != null) { - for (int v : columnSelection.getSelected()) + if (columnSelection.contains(column)) { - if (v == column) - { - g.fillRect(x * charWidth, y, charWidth, charHeight); - } + g.fillRect(x * charWidth, y, charWidth, charHeight); } } } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index 8a014c6..e51442c 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -972,7 +972,9 @@ public class MapList { sb.append(" ").append(Arrays.toString(shift)); } - sb.append(" ] To ["); + sb.append(" ] "); + sb.append(fromRatio).append(":").append(toRatio); + sb.append(" to ["); for (int[] shift : toShifts) { sb.append(" ").append(Arrays.toString(shift)); diff --git a/src/jalview/util/MappingUtils.java b/src/jalview/util/MappingUtils.java index c2cad1f..ae4e55d 100644 --- a/src/jalview/util/MappingUtils.java +++ b/src/jalview/util/MappingUtils.java @@ -770,7 +770,8 @@ public final class MappingUtils } /** - * Returns the total length of the supplied ranges + * Returns the total length of the supplied ranges, which may be as single + * [start, end] or multiple [start, end, start, end ...] * * @param ranges * @return @@ -784,7 +785,16 @@ public final class MappingUtils int length = 0; for (int[] range : ranges) { - length += Math.abs(range[1] - range[0]) + 1; + if (range.length % 2 != 0) + { + System.err.println("Error unbalance start/end ranges: " + + ranges.toString()); + return 0; + } + for (int i = 0; i < range.length - 1; i += 2) + { + length += Math.abs(range[i + 1] - range[i]) + 1; + } } return length; } @@ -848,7 +858,6 @@ public final class MappingUtils int cdspos = 0; for (int x = 0; x < copy.length && sxpos == -1; x += 2) { - // fixme handle reverse strand cdspos += Math.abs(copy[x + 1] - copy[x]) + 1; if (removeCount < cdspos) { diff --git a/src/jalview/viewmodel/AlignmentViewport.java b/src/jalview/viewmodel/AlignmentViewport.java index 6322243..b70e92b 100644 --- a/src/jalview/viewmodel/AlignmentViewport.java +++ b/src/jalview/viewmodel/AlignmentViewport.java @@ -2655,4 +2655,37 @@ public abstract class AlignmentViewport implements AlignViewportI, sequence.findPosition(middleColumn), mappings); return seqOffset; } + + /** + * synthesize a column selection if none exists so it covers the given + * selection group. if wholewidth is false, no column selection is made if the + * selection group covers the whole alignment width. + * + * @param sg + * @param wholewidth + */ + public void expandColSelection(SequenceGroup sg, boolean wholewidth) + { + int sgs, sge; + if (sg != null + && (sgs = sg.getStartRes()) >= 0 + && sg.getStartRes() <= (sge = sg.getEndRes()) + && (colSel == null || colSel.getSelected() == null || colSel + .getSelected().size() == 0)) + { + if (!wholewidth && alignment.getWidth() == (1 + sge - sgs)) + { + // do nothing + return; + } + if (colSel == null) + { + colSel = new ColumnSelection(); + } + for (int cspos = sg.getStartRes(); cspos <= sg.getEndRes(); cspos++) + { + colSel.addElement(cspos); + } + } + } } diff --git a/src/jalview/ws/SequenceFetcher.java b/src/jalview/ws/SequenceFetcher.java index 902ce27..1f8c28a 100644 --- a/src/jalview/ws/SequenceFetcher.java +++ b/src/jalview/ws/SequenceFetcher.java @@ -39,9 +39,7 @@ import java.util.ArrayList; import java.util.List; /** - * This is the the concrete implementation of the sequence retrieval interface - * and abstract class in jalview.ws.seqfetcher. This implements the run-time - * discovery of sequence database clientss. + * This implements the run-time discovery of sequence database clients. * */ public class SequenceFetcher extends ASequenceFetcher diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 2ed3263..2392476 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -20,6 +20,7 @@ */ package jalview.ws.seqfetcher; +import jalview.api.FeatureSettingsModelI; import jalview.bin.Cache; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; @@ -435,4 +436,28 @@ public class ASequenceFetcher return prlist.toArray(new DbSourceProxy[0]); } + /** + * Returns a preferred feature colouring scheme for the given source, or null + * if none is defined. + * + * @param source + * @return + */ + public FeatureSettingsModelI getFeatureColourScheme(String source) + { + /* + * return the first non-null colour scheme for any proxy for + * this database source + */ + for (DbSourceProxy proxy : getSourceProxy(source)) + { + FeatureSettingsModelI preferredColours = proxy + .getFeatureColourScheme(); + if (preferredColours != null) + { + return preferredColours; + } + } + return null; + } } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 8bdd740..7ccbf97 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -46,52 +46,16 @@ import jalview.util.MappingUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Set; +import java.util.TreeMap; import org.testng.annotations.Test; public class AlignmentUtilsTests { - // @formatter:off - private static final String TEST_DATA = - "# STOCKHOLM 1.0\n" + - "#=GS D.melanogaster.1 AC AY119185.1/838-902\n" + - "#=GS D.melanogaster.2 AC AC092237.1/57223-57161\n" + - "#=GS D.melanogaster.3 AC AY060611.1/560-627\n" + - "D.melanogaster.1 G.AGCC.CU...AUGAUCGA\n" + - "#=GR D.melanogaster.1 SS ................((((\n" + - "D.melanogaster.2 C.AUUCAACU.UAUGAGGAU\n" + - "#=GR D.melanogaster.2 SS ................((((\n" + - "D.melanogaster.3 G.UGGCGCU..UAUGACGCA\n" + - "#=GR D.melanogaster.3 SS (.(((...(....(((((((\n" + - "//"; - - private static final String AA_SEQS_1 = - ">Seq1Name\n" + - "K-QY--L\n" + - ">Seq2Name\n" + - "-R-FP-W-\n"; - - private static final String CDNA_SEQS_1 = - ">Seq1Name\n" + - "AC-GG--CUC-CAA-CT\n" + - ">Seq2Name\n" + - "-CG-TTA--ACG---AAGT\n"; - - private static final String CDNA_SEQS_2 = - ">Seq1Name\n" + - "GCTCGUCGTACT\n" + - ">Seq2Name\n" + - "GGGTCAGGCAGT\n"; - // @formatter:on - - // public static Sequence ts=new - // Sequence("short","ASDASDASDASDASDASDASDASDASDASDASDASDASD"); public static Sequence ts = new Sequence("short", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"); @@ -498,30 +462,6 @@ public class AlignmentUtilsTests } /** - * Test for the method that generates an aligned translated sequence from one - * mapping. - */ - @Test(groups = { "Functional" }) - public void testGetAlignedTranslation_dnaLikeProtein() - { - // dna alignment will be replaced - SequenceI dna = new Sequence("Seq1", "T-G-CC-A--T-TAC-CAG-"); - dna.createDatasetSequence(); - // protein alignment will be 'applied' to dna - SequenceI protein = new Sequence("Seq1", "-CH-Y--Q-"); - protein.createDatasetSequence(); - MapList map = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, 3, 1); - AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna.getDatasetSequence(), protein.getDatasetSequence(), map); - - final SequenceI aligned = AlignmentUtils.getAlignedTranslation(protein, - '-', acf); - assertEquals("---TGCCAT---TAC------CAG---", - aligned.getSequenceAsString()); - assertSame(aligned.getDatasetSequence(), dna.getDatasetSequence()); - } - - /** * Test the method that realigns protein to match mapped codon alignment. */ @Test(groups = { "Functional" }) @@ -1066,12 +1006,16 @@ public class AlignmentUtilsTests acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); mappings.add(acf); + /* + * execute method under test: + */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { dna1, dna2 }, mappings, dna); + assertEquals(2, cds.getSequences().size()); - assertEquals("---GGG---TTT---", cds.getSequenceAt(0) + assertEquals("GGGTTT", cds.getSequenceAt(0) .getSequenceAsString()); - assertEquals("GGG---TTT---CCC", cds.getSequenceAt(1) + assertEquals("GGGTTTCCC", cds.getSequenceAt(1) .getSequenceAsString()); /* @@ -1084,18 +1028,22 @@ public class AlignmentUtilsTests .contains(cds.getSequenceAt(1).getDatasetSequence())); /* - * Verify updated mappings + * Verify mappings from CDS to peptide and cDNA to CDS + * the mappings are on the shared alignment dataset */ - assertEquals(2, mappings.size()); - + assertSame(dna.getCodonFrames(), cds.getCodonFrames()); + List cdsMappings = cds.getCodonFrames(); + assertEquals(2, cdsMappings.size()); + /* * Mapping from pep1 to GGGTTT in first new exon sequence */ List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, mappings); + .findMappingsForSequence(pep1, cdsMappings); assertEquals(1, pep1Mapping.size()); // map G to GGG - SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); + SearchResults sr = MappingUtils + .buildSearchResults(pep1, 1, cdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertSame(cds.getSequenceAt(0).getDatasetSequence(), @@ -1103,7 +1051,7 @@ public class AlignmentUtilsTests assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep1, 2, mappings); + sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); @@ -1114,10 +1062,10 @@ public class AlignmentUtilsTests * Mapping from pep2 to GGGTTTCCC in second new exon sequence */ List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, mappings); + .findMappingsForSequence(pep2, cdsMappings); assertEquals(1, pep2Mapping.size()); // map G to GGG - sr = MappingUtils.buildSearchResults(pep2, 1, mappings); + sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), @@ -1125,14 +1073,14 @@ public class AlignmentUtilsTests assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep2, 2, mappings); + sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC - sr = MappingUtils.buildSearchResults(pep2, 3, mappings); + sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); m = sr.getResults().get(0); assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); @@ -1141,52 +1089,6 @@ public class AlignmentUtilsTests } /** - * Test the method that makes a cds-only sequence from a DNA sequence and its - * product mapping. Test includes the expected case that the DNA sequence - * already has a protein product (Uniprot translation) which in turn has an - * x-ref to the EMBLCDS record. - */ - @Test(groups = { "Functional" }) - public void testMakeCdsSequences() - { - SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); - SequenceI pep1 = new Sequence("pep1", "GF"); - dna1.createDatasetSequence(); - pep1.createDatasetSequence(); - pep1.getDatasetSequence().addDBRef( - new DBRefEntry("EMBLCDS", "2", "A12345")); - - /* - * Make the mapping from dna to protein. The protein sequence has a DBRef to - * EMBLCDS|A12345. - */ - Set mappings = new HashSet(); - MapList map = new MapList(new int[] { 4, 6, 10, 12 }, - new int[] { 1, 2 }, 3, 1); - AlignedCodonFrame acf = new AlignedCodonFrame(); - acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); - - AlignedCodonFrame newMapping = new AlignedCodonFrame(); - List ungappedColumns = new ArrayList(); - ungappedColumns.add(new int[] { 4, 6 }); - ungappedColumns.add(new int[] { 10, 12 }); - List cdsSeqs = AlignmentUtils.makeCdsSequences(dna1, acf, - ungappedColumns, - newMapping, '-'); - assertEquals(1, cdsSeqs.size()); - SequenceI cdsSeq = cdsSeqs.get(0); - - assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("2", cdsRef.getVersion()); - assertEquals("A12345", cdsRef.getAccessionId()); - } - - /** * Test the method that makes a cds-only alignment from a DNA sequence and its * product mappings, for the case where there are multiple exon mappings to * different protein products. @@ -1245,24 +1147,28 @@ public class AlignmentUtilsTests mappings.add(acf); /* - * Create the Exon alignment; also replaces the dna-to-protein mappings with + * Create the CDS alignment; also augments the dna-to-protein mappings with * exon-to-protein and exon-to-dna mappings */ AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); dna.setDataset(null); - AlignmentI exal = AlignmentUtils.makeCdsAlignment( + + /* + * execute method under test + */ + AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( new SequenceI[] { dna1 }, mappings, dna); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively */ - List cds = exal.getSequences(); + List cds = cdsal.getSequences(); assertEquals(3, cds.size()); /* * verify shared, extended alignment dataset */ - assertSame(exal.getDataset(), dna.getDataset()); + assertSame(cdsal.getDataset(), dna.getDataset()); assertTrue(dna.getDataset().getSequences() .contains(cds.get(0).getDatasetSequence())); assertTrue(dna.getDataset().getSequences() @@ -1274,72 +1180,72 @@ public class AlignmentUtilsTests * verify aligned cds sequences and their xrefs */ SequenceI cdsSeq = cds.get(0); - assertEquals("---GGG---TTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("2", cdsRef.getVersion()); - assertEquals("A12345", cdsRef.getAccessionId()); + assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12345", cdsSeq.getName()); + assertEquals("dna1|pep1", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("2", cdsRef.getVersion()); + // assertEquals("A12345", cdsRef.getAccessionId()); cdsSeq = cds.get(1); - assertEquals("aaa---ccc---", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12346", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("3", cdsRef.getVersion()); - assertEquals("A12346", cdsRef.getAccessionId()); + assertEquals("aaaccc", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12346", cdsSeq.getName()); + assertEquals("dna1|pep2", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("3", cdsRef.getVersion()); + // assertEquals("A12346", cdsRef.getAccessionId()); cdsSeq = cds.get(2); - assertEquals("aaa------TTT", cdsSeq.getSequenceAsString()); - assertEquals("dna1|A12347", cdsSeq.getName()); - assertEquals(1, cdsSeq.getDBRefs().length); - cdsRef = cdsSeq.getDBRefs()[0]; - assertEquals("EMBLCDS", cdsRef.getSource()); - assertEquals("4", cdsRef.getVersion()); - assertEquals("A12347", cdsRef.getAccessionId()); + assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); + // assertEquals("dna1|A12347", cdsSeq.getName()); + assertEquals("dna1|pep3", cdsSeq.getName()); + // assertEquals(1, cdsSeq.getDBRefs().length); + // cdsRef = cdsSeq.getDBRefs()[0]; + // assertEquals("EMBLCDS", cdsRef.getSource()); + // assertEquals("4", cdsRef.getVersion()); + // assertEquals("A12347", cdsRef.getAccessionId()); /* * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ - Iterator newMappingsIterator = mappings.iterator(); + Iterator newMappingsIterator = cdsal + .getCodonFrames().iterator(); // mappings for dna1 - exon1 - pep1 AlignedCodonFrame cdsMapping = newMappingsIterator.next(); - List dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); + List dnaMappings = cdsMapping.getMappingsFromSequence(dna1); + assertEquals(3, dnaMappings.size()); assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) .getTo()); assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings .get(0).getMap().getToPosition(1)); - List peptideMappings = cdsMapping - .getMappingsForSequence(pep1); + List peptideMappings = cdsMapping.getMappingsFromSequence(cds + .get(0).getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); // mappings for dna1 - cds2 - pep2 - cdsMapping = newMappingsIterator.next(); - dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); - assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1) .getTo()); assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings - .get(0).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsForSequence(pep2); + .get(1).getMap().getToPosition(4)); + peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1) + .getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); // mappings for dna1 - cds3 - pep3 - cdsMapping = newMappingsIterator.next(); - dnaMappings = cdsMapping.getMappingsForSequence(dna1); - assertEquals(1, dnaMappings.size()); - assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(0) + assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2) .getTo()); assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings - .get(0).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsForSequence(pep3); + .get(2).getMap().getToPosition(4)); + peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2) + .getDatasetSequence()); assertEquals(1, peptideMappings.size()); assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); } @@ -1623,7 +1529,7 @@ public class AlignmentUtilsTests List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); - assertEquals("GGGCC---TGGG", cdsSeqs.get(1).getSequenceAsString()); + assertEquals("GGGCCTGGG", cdsSeqs.get(1).getSequenceAsString()); /* * verify shared, extended alignment dataset @@ -1637,33 +1543,35 @@ public class AlignmentUtilsTests /* * Verify updated mappings */ - assertEquals(2, mappings.size()); + List cdsMappings = cds.getCodonFrames(); + assertEquals(2, cdsMappings.size()); /* * Mapping from pep1 to GGGTTT in first new CDS sequence */ List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, mappings); + .findMappingsForSequence(pep1, cdsMappings); assertEquals(1, pep1Mapping.size()); /* * maps GPFG to 1-3,4-6,7-9,10-12 */ - SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); + SearchResults sr = MappingUtils + .buildSearchResults(pep1, 1, cdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 2, mappings); + sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 3, mappings); + sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 4, mappings); + sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings); m = sr.getResults().get(0); assertEquals(10, m.getStart()); assertEquals(12, m.getEnd()); @@ -1672,98 +1580,26 @@ public class AlignmentUtilsTests * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence */ List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, mappings); + .findMappingsForSequence(pep2, cdsMappings); assertEquals(1, pep2Mapping.size()); - sr = MappingUtils.buildSearchResults(pep2, 1, mappings); + sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 2, mappings); + sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 3, mappings); + sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } /** - * Tests for gapped column in sequences - */ - @Test(groups = { "Functional" }) - public void testIsGappedColumn() - { - SequenceI seq1 = new Sequence("Seq1", "a--c.tc-a-g"); - SequenceI seq2 = new Sequence("Seq2", "aa---t--a-g"); - SequenceI seq3 = new Sequence("Seq3", "ag-c t-g-"); - List seqs = Arrays - .asList(new SequenceI[] { seq1, seq2, seq3 }); - // the column number is base 1 - assertFalse(AlignmentUtils.isGappedColumn(seqs, 1)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 2)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 3)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 4)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 5)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 6)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 7)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 8)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 9)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 10)); - assertFalse(AlignmentUtils.isGappedColumn(seqs, 11)); - // out of bounds: - assertTrue(AlignmentUtils.isGappedColumn(seqs, 0)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, 100)); - assertTrue(AlignmentUtils.isGappedColumn(seqs, -100)); - assertTrue(AlignmentUtils.isGappedColumn(null, 0)); - } - - @Test(groups = { "Functional" }) - public void testFindCdsColumns() - { - // TODO target method belongs in a general-purpose alignment - // analysis method to find columns for feature - - /* - * NB this method assumes CDS ranges are contiguous (no introns) - */ - SequenceI gene = new Sequence("gene", "aaacccgggtttaaacccgggttt"); - SequenceI seq1 = new Sequence("Seq1", "--ac-cgGG-GGaaACC--GGtt-"); - SequenceI seq2 = new Sequence("Seq2", "AA--CCGG--g-AAA--cG-GTTt"); - seq1.createDatasetSequence(); - seq2.createDatasetSequence(); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 5, 6, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 7, 8, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 11, 13, 0f, - null)); - seq1.addSequenceFeature(new SequenceFeature("CDS", "cds", 14, 15, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 1, 2, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 3, 6, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 8, 10, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, - null)); - seq2.addSequenceFeature(new SequenceFeature("CDS", "cds", 13, 15, 0f, - null)); - - List cdsColumns = AlignmentUtils.findCdsColumns(new SequenceI[] { - seq1, seq2 }); - assertEquals(4, cdsColumns.size()); - assertEquals("[1, 2]", Arrays.toString(cdsColumns.get(0))); - assertEquals("[5, 9]", Arrays.toString(cdsColumns.get(1))); - assertEquals("[11, 17]", Arrays.toString(cdsColumns.get(2))); - assertEquals("[19, 23]", Arrays.toString(cdsColumns.get(3))); - } - - /** * Test the method that realigns protein to match mapped codon alignment. */ @Test(groups = { "Functional" }) @@ -1819,7 +1655,7 @@ public class AlignmentUtilsTests * (or subtype) feature - case where the start codon is incomplete. */ @Test(groups = "Functional") - public void testGetCdsRanges_fivePrimeIncomplete() + public void testFindCdsPositions_fivePrimeIncomplete() { SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); dnaSeq.createDatasetSequence(); @@ -1851,23 +1687,31 @@ public class AlignmentUtilsTests * (or subtype) feature. */ @Test(groups = "Functional") - public void testGetCdsRanges() + public void testFindCdsPositions() { SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); dnaSeq.createDatasetSequence(); SequenceI ds = dnaSeq.getDatasetSequence(); - // CDS for dna 3-6 - SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + // CDS for dna 10-12 + SequenceFeature sf = new SequenceFeature("CDS_predicted", "", 10, 12, + 0f, null); + sf.setStrand("+"); + ds.addSequenceFeature(sf); + // CDS for dna 4-6 + sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + sf.setStrand("+"); ds.addSequenceFeature(sf); // exon feature should be ignored here sf = new SequenceFeature("exon", "", 7, 9, 0f, null); ds.addSequenceFeature(sf); - // CDS for dna 10-12 - sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); - ds.addSequenceFeature(sf); List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + /* + * verify ranges { [4-6], [12-10] } + * note CDS ranges are ordered ascending even if the CDS + * features are not + */ assertEquals(6, MappingUtils.getLength(ranges)); assertEquals(2, ranges.size()); assertEquals(4, ranges.get(0)[0]); @@ -2006,4 +1850,169 @@ public class AlignmentUtilsTests variants = AlignmentUtils.computePeptideVariants(codonVariants, "S"); assertEquals("[C, R, T, W]", variants.toString()); } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature, with CDS strand = '-' (reverse) + */ + // test turned off as currently findCdsPositions is not strand-dependent + // left in case it comes around again... + @Test(groups = "Functional", enabled = false) + public void testFindCdsPositions_reverseStrand() + { + SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 4-6 + SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + // exon feature should be ignored here + sf = new SequenceFeature("exon", "", 7, 9, 0f, null); + ds.addSequenceFeature(sf); + // CDS for dna 10-12 + sf = new SequenceFeature("CDS_predicted", "", 10, 12, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + /* + * verify ranges { [12-10], [6-4] } + */ + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(12, ranges.get(0)[0]); + assertEquals(10, ranges.get(0)[1]); + assertEquals(6, ranges.get(1)[0]); + assertEquals(4, ranges.get(1)[1]); + } + + /** + * Tests for the method that maps the subset of a dna sequence that has CDS + * (or subtype) feature - reverse strand case where the start codon is + * incomplete. + */ + @Test(groups = "Functional", enabled = false) + // test turned off as currently findCdsPositions is not strand-dependent + // left in case it comes around again... + public void testFindCdsPositions_reverseStrandThreePrimeIncomplete() + { + SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt"); + dnaSeq.createDatasetSequence(); + SequenceI ds = dnaSeq.getDatasetSequence(); + + // CDS for dna 5-9 + SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null); + sf.setStrand("-"); + ds.addSequenceFeature(sf); + // CDS for dna 13-15 + sf = new SequenceFeature("CDS_predicted", "", 13, 15, 0f, null); + sf.setStrand("-"); + sf.setPhase("2"); // skip 2 bases to start of next codon + ds.addSequenceFeature(sf); + + List ranges = AlignmentUtils.findCdsPositions(dnaSeq); + + /* + * check the mapping starts with the first complete codon + * expect ranges [13, 13], [9, 5] + */ + assertEquals(6, MappingUtils.getLength(ranges)); + assertEquals(2, ranges.size()); + assertEquals(13, ranges.get(0)[0]); + assertEquals(13, ranges.get(0)[1]); + assertEquals(9, ranges.get(1)[0]); + assertEquals(5, ranges.get(1)[1]); + } + + @Test(groups = "Functional") + public void testAlignAs_alternateTranscriptsUngapped() + { + SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); + SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); + ((Alignment) dna).createDatasetAlignment(); + SequenceI cds1 = new Sequence("cds1", "GGGTTT"); + SequenceI cds2 = new Sequence("cds2", "CCCAAA"); + AlignmentI cds = new Alignment(new SequenceI[] { cds1, cds2 }); + ((Alignment) cds).createDatasetAlignment(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 4, 9 }, new int[] { 1, 6 }, 1, 1); + acf.addMap(dna1.getDatasetSequence(), cds1.getDatasetSequence(), map); + map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 6 }, 1, 1); + acf.addMap(dna2.getDatasetSequence(), cds2.getDatasetSequence(), map); + + /* + * verify CDS alignment is as: + * cccGGGTTTaaa (cdna) + * CCCgggtttAAA (cdna) + * + * ---GGGTTT--- (cds) + * CCC------AAA (cds) + */ + dna.addCodonFrame(acf); + AlignmentUtils.alignAs(cds, dna); + assertEquals("---GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("CCC------AAA", cds.getSequenceAt(1).getSequenceAsString()); + } + + @Test(groups = { "Functional" }) + public void testAddMappedPositions() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + + /* + * + */ + } + + /** + * Test case where the mapping 'from' range includes a stop codon which is + * absent in the 'to' range + */ + @Test(groups = { "Functional" }) + public void testAddMappedPositions_withStopCodon() + { + SequenceI from = new Sequence("dna", "ggAA-ATcc-TT-g"); + SequenceI seq1 = new Sequence("cds", "AAATTT"); + from.createDatasetSequence(); + seq1.createDatasetSequence(); + Mapping mapping = new Mapping(seq1, new MapList( + new int[] { 3, 6, 9, 10 }, + new int[] { 1, 6 }, 1, 1)); + Map> map = new TreeMap>(); + AlignmentUtils.addMappedPositions(seq1, from, mapping, map); + + /* + * verify map has seq1 residues in columns 3,4,6,7,11,12 + */ + assertEquals(6, map.size()); + assertEquals('A', map.get(3).get(seq1).charValue()); + assertEquals('A', map.get(4).get(seq1).charValue()); + assertEquals('A', map.get(6).get(seq1).charValue()); + assertEquals('T', map.get(7).get(seq1).charValue()); + assertEquals('T', map.get(11).get(seq1).charValue()); + assertEquals('T', map.get(12).get(seq1).charValue()); + } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index 989ed7c..cd8a1e3 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -74,6 +74,9 @@ public class AlignedCodonFrameTest */ assertEquals(aa.getSequenceAt(1), acf.findAlignedSequence(cdna .getSequenceAt(0).getDatasetSequence(), aa)); + // can also find this from the dna aligned sequence + assertEquals(aa.getSequenceAt(1), + acf.findAlignedSequence(cdna.getSequenceAt(0), aa)); assertEquals(cdna.getSequenceAt(0), acf.findAlignedSequence(aa .getSequenceAt(1).getDatasetSequence(), cdna)); diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index b4b0e12..bd445c4 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -181,12 +181,12 @@ public class AlignmentTest * Make mappings between sequences. The 'aligned cDNA' is playing the role * of what would normally be protein here. */ - makeMappings(al2, al1); + makeMappings(al1, al2); ((Alignment) al2).alignAs(al1, false, true); - assertEquals("GC-TC--GUC-GTA-CT", al2.getSequenceAt(0) + assertEquals("GC-TC--GUC-GTACT", al2.getSequenceAt(0) .getSequenceAsString()); - assertEquals("-GG-GTC--AGG---CAGT", al2.getSequenceAt(1) + assertEquals("-GG-GTC--AGG--CAGT", al2.getSequenceAt(1) .getSequenceAsString()); } @@ -203,38 +203,21 @@ public class AlignmentTest AlignmentI al2 = loadAlignment(AA_SEQS_1, "FASTA"); makeMappings(al1, al2); + // Fudge - alignProteinAsCdna expects mappings to be on protein + al2.getCodonFrames().addAll(al1.getCodonFrames()); + ((Alignment) al2).alignAs(al1, false, true); assertEquals("K-Q-Y-L-", al2.getSequenceAt(0).getSequenceAsString()); assertEquals("-R-F-P-W", al2.getSequenceAt(1).getSequenceAsString()); } /** - * Aligning protein from cDNA for a single sequence. This is the 'simple' case - * (as there is no need to compute codon 'alignments') but worth testing - * before tackling the multiple sequence case. - * - * @throws IOException - */ - @Test(groups = { "Functional" }) - public void testAlignAs_proteinAsCdna_singleSequence() throws IOException - { - /* - * simplest case remove all gaps - */ - verifyAlignAs(">protein\n-Q-K-\n", ">dna\nCAAaaa\n", "QK"); - - /* - * with sequence offsets - */ - verifyAlignAs(">protein/12-13\n-Q-K-\n", ">dna/20-25\nCAAaaa\n", "QK"); - } - - /** * Test aligning cdna as per protein alignment. * * @throws IOException */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) + // TODO review / update this test after redesign of alignAs method public void testAlignAs_cdnaAsProtein() throws IOException { /* @@ -259,7 +242,8 @@ public class AlignmentTest * * @throws IOException */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) + // TODO review / update this test after redesign of alignAs method public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException { /* @@ -308,32 +292,29 @@ public class AlignmentTest } /** - * Helper method to make mappings from protein to dna sequences, and add the - * mappings to the protein alignment + * Helper method to make mappings between sequences, and add the mappings to + * the 'mapped from' alignment * * @param alFrom * @param alTo */ public void makeMappings(AlignmentI alFrom, AlignmentI alTo) { - AlignmentI prot = !alFrom.isNucleotide() ? alFrom : alTo; - AlignmentI nuc = alFrom == prot ? alTo : alFrom; - int ratio = (alFrom.isNucleotide() == alTo.isNucleotide() ? 1 : 3); AlignedCodonFrame acf = new AlignedCodonFrame(); - for (int i = 0; i < nuc.getHeight(); i++) + for (int i = 0; i < alFrom.getHeight(); i++) { - SequenceI seqFrom = nuc.getSequenceAt(i); - SequenceI seqTo = prot.getSequenceAt(i); + SequenceI seqFrom = alFrom.getSequenceAt(i); + SequenceI seqTo = alTo.getSequenceAt(i); MapList ml = new MapList(new int[] { seqFrom.getStart(), seqFrom.getEnd() }, new int[] { seqTo.getStart(), seqTo.getEnd() }, ratio, 1); acf.addMap(seqFrom, seqTo, ml); } - prot.addCodonFrame(acf); + alFrom.addCodonFrame(acf); } /** @@ -342,7 +323,8 @@ public class AlignmentTest * * @throws IOException */ - @Test(groups = { "Functional" }) + @Test(groups = { "Functional" }, enabled = false) + // TODO review / update this test after redesign of alignAs method public void testAlignAs_dnaAsProtein_withIntrons() throws IOException { /* @@ -350,14 +332,13 @@ public class AlignmentTest */ String dna1 = "A-Aa-gG-GCC-cT-TT"; String dna2 = "c--CCGgg-TT--T-AA-A"; - AlignmentI al1 = loadAlignment(">Seq1/6-17\n" + dna1 - + "\n>Seq2/20-31\n" + dna2 + "\n", "FASTA"); + AlignmentI al1 = loadAlignment(">Dna1/6-17\n" + dna1 + + "\n>Dna2/20-31\n" + dna2 + "\n", "FASTA"); AlignmentI al2 = loadAlignment( - ">Seq1/7-9\n-P--YK\n>Seq2/11-13\nG-T--F\n", "FASTA"); + ">Pep1/7-9\n-P--YK\n>Pep2/11-13\nG-T--F\n", "FASTA"); AlignedCodonFrame acf = new AlignedCodonFrame(); // Seq1 has intron at dna positions 3,4,9 so splice is AAG GCC TTT // Seq2 has intron at dna positions 1,5,6 so splice is CCG TTT AAA - // TODO sequence offsets MapList ml1 = new MapList(new int[] { 6, 7, 10, 13, 15, 17 }, new int[] { 7, 9 }, 3, 1); acf.addMap(al1.getSequenceAt(0), al2.getSequenceAt(0), ml1); diff --git a/test/jalview/datamodel/ColumnSelectionTest.java b/test/jalview/datamodel/ColumnSelectionTest.java index 698f259..0f08ceb 100644 --- a/test/jalview/datamodel/ColumnSelectionTest.java +++ b/test/jalview/datamodel/ColumnSelectionTest.java @@ -343,4 +343,36 @@ public class ColumnSelectionTest cs.invertColumnSelection(1, 9); assertEquals("[1, 4, 8]", cs.getSelected().toString()); } + + @Test(groups = { "Functional" }) + public void testMaxColumnSelection() + { + ColumnSelection cs = new ColumnSelection(); + cs.addElement(0); + cs.addElement(513); + cs.addElement(1); + assertEquals(513, cs.getMax()); + cs.removeElement(513); + assertEquals(1, cs.getMax()); + cs.removeElement(1); + assertEquals(0, cs.getMax()); + cs.addElement(512); + cs.addElement(513); + assertEquals(513, cs.getMax()); + + } + + @Test(groups = { "Functional" }) + public void testMinColumnSelection() + { + ColumnSelection cs = new ColumnSelection(); + cs.addElement(0); + cs.addElement(513); + cs.addElement(1); + assertEquals(0, cs.getMin()); + cs.removeElement(0); + assertEquals(1, cs.getMin()); + cs.addElement(0); + assertEquals(0, cs.getMin()); + } } diff --git a/test/jalview/datamodel/MappingTest.java b/test/jalview/datamodel/MappingTest.java index cbecad5..3131ad7 100644 --- a/test/jalview/datamodel/MappingTest.java +++ b/test/jalview/datamodel/MappingTest.java @@ -66,13 +66,13 @@ public class MappingTest MapList fk = new MapList(new int[] { 1, 6, 8, 13 }, new int[] { 4, 7 }, 3, 1); Mapping m = new Mapping(fk); - assertEquals("[ [1, 6] [8, 13] ] To [ [4, 7] ] ", m.toString()); + assertEquals("[ [1, 6] [8, 13] ] 3:1 to [ [4, 7] ] ", m.toString()); /* * with a sequence */ SequenceI seq = new Sequence("Seq1", ""); m = new Mapping(seq, fk); - assertEquals("[ [1, 6] [8, 13] ] To [ [4, 7] ] Seq1", m.toString()); + assertEquals("[ [1, 6] [8, 13] ] 3:1 to [ [4, 7] ] Seq1", m.toString()); } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index b8116f5..95755ee 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -26,6 +26,7 @@ import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.datamodel.PDBEntry.Type; @@ -38,7 +39,7 @@ import org.testng.annotations.Test; public class SequenceTest { - SequenceI seq; + Sequence seq; @BeforeMethod(alwaysRun = true) public void setUp() @@ -382,20 +383,23 @@ public class SequenceTest @Test(groups = { "Functional" }) public void testDeriveSequence_existingDataset() { - SequenceI sq = new Sequence("Seq1", "CD"); + Sequence sq = new Sequence("Seq1", "CD"); sq.setDatasetSequence(new Sequence("Seq1", "ABCDEF")); sq.getDatasetSequence().addSequenceFeature( new SequenceFeature("", "", 1, 2, 0f, null)); sq.setStart(3); sq.setEnd(4); - SequenceI derived = sq.deriveSequence(); + + Sequence derived = (Sequence) sq.deriveSequence(); assertEquals("CD", derived.getSequenceAsString()); assertSame(sq.getDatasetSequence(), derived.getDatasetSequence()); - assertNull(((Sequence) seq).sequenceFeatures); - assertNull(((Sequence) derived).sequenceFeatures); - assertNotNull(seq.getSequenceFeatures()); - assertSame(seq.getSequenceFeatures(), derived.getSequenceFeatures()); + assertNull(sq.sequenceFeatures); + // assertNull(derived.sequenceFeatures); + assertNotNull(sq.getSequenceFeatures()); + // derived sequence has a copy of the sequence features (is this right?) + assertArrayEquals(sq.getSequenceFeatures(), + derived.getSequenceFeatures()); } /** diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index d7fd7e4..c6a94d7 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -41,6 +41,7 @@ public class EmblFileTest + "Chicken LDH-A mRNA for lactate dehydrogenase A chain (EC 1.1.1.27)" + "L-lactate dehydrogenasechutney" + "" + + "" + "" + "L-lactate dehydrogenase A-chainpickle" + "MSLKDHLIHNKeith" @@ -79,11 +80,16 @@ public class EmblFileTest /* * dbrefs */ - assertEquals(1, entry.getDbRefs().size()); + assertEquals(2, entry.getDbRefs().size()); DBRefEntry dbref = entry.getDbRefs().get(0); assertEquals("EuropePMC", dbref.getSource()); assertEquals("PMC1460223", dbref.getAccessionId()); assertEquals("9649548", dbref.getVersion()); + dbref = entry.getDbRefs().get(1); + assertEquals("MD5", dbref.getSource()); + assertEquals("d3b68", dbref.getAccessionId()); + // blank version has been converted to "0" + assertEquals("0", dbref.getVersion()); /* * sequence features @@ -99,7 +105,8 @@ public class EmblFileTest dbref = ef.getDbRefs().get(1); assertEquals("InterPro", dbref.getSource()); assertEquals("IPR001236", dbref.getAccessionId()); - assertEquals("", dbref.getVersion()); + // blank version converted to "0": + assertEquals("0", dbref.getVersion()); assertEquals(2, ef.getQualifiers().size()); // feature qualifiers @@ -142,5 +149,12 @@ public class EmblFileTest assertEquals("mRNA", seq.getType()); assertEquals("2", seq.getVersion()); assertEquals("GTGACG", seq.getSequence()); + + /* + * getSequence() converts empty DBRefEntry.version to "0" + */ + assertEquals("0", entry.getDbRefs().get(1).getVersion()); + assertEquals("0", entry.getFeatures().get(0).getDbRefs().get(1) + .getVersion()); } } diff --git a/test/jalview/ext/ensembl/EnsemblXrefTest.java b/test/jalview/ext/ensembl/EnsemblXrefTest.java index 5073423..cde4afe 100644 --- a/test/jalview/ext/ensembl/EnsemblXrefTest.java +++ b/test/jalview/ext/ensembl/EnsemblXrefTest.java @@ -1,7 +1,6 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; -import static org.testng.AssertJUnit.assertTrue; import jalview.datamodel.DBRefEntry; @@ -9,7 +8,6 @@ import java.io.BufferedReader; import java.io.IOException; import java.io.StringReader; import java.net.URL; -import java.util.Arrays; import java.util.List; import org.testng.annotations.Test; @@ -19,7 +17,8 @@ public class EnsemblXrefTest //@formatter:off private static final String JSON = "[{\"primary_id\":\"CCDS5863\",\"dbname\":\"CCDS\"}," + - "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\"}]"; + "{\"primary_id\":\"P15056\",\"dbname\":\"Uniprot/SWISSPROT\",\"synonyms\":[\"C21\"]}," + + "{\"primary_id\":\"GO:0000165\",\"dbname\":\"GO\"}]"; //@formatter:on @Test(groups = "functional") @@ -36,33 +35,13 @@ public class EnsemblXrefTest } }; - /* - * with no filter - */ - List dbrefs = testee.getCrossReferences("ABCDE", null); + // synonyms and GO terms are not returned + List dbrefs = testee.getCrossReferences("ABCDE"); assertEquals(2, dbrefs.size()); assertEquals("CCDS", dbrefs.get(0).getSource()); assertEquals("CCDS5863", dbrefs.get(0).getAccessionId()); // Uniprot name should get converted to Jalview canonical form assertEquals("UNIPROT", dbrefs.get(1).getSource()); assertEquals("P15056", dbrefs.get(1).getAccessionId()); - - /* - * filter for Uniprot only - */ - dbrefs = testee.getCrossReferences( - "ABCDE", - Arrays.asList(new String[] { "Uniprot/SWISSPROT", - "Uniprot/SPTREMBL" })); - assertEquals(1, dbrefs.size()); - assertEquals("UNIPROT", dbrefs.get(0).getSource()); - assertEquals("P15056", dbrefs.get(0).getAccessionId()); - - /* - * filter for PDB only - */ - dbrefs = testee.getCrossReferences("ABCDE", - Arrays.asList(new String[] { "PDB" })); - assertTrue(dbrefs.isEmpty()); } } diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java index 7112c77..6da160d 100644 --- a/test/jalview/io/FeaturesFileTest.java +++ b/test/jalview/io/FeaturesFileTest.java @@ -24,14 +24,10 @@ import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; -import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; -import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; -import jalview.datamodel.Mapping; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -42,9 +38,7 @@ import jalview.schemes.GraduatedColor; import java.awt.Color; import java.io.File; import java.io.IOException; -import java.util.Iterator; import java.util.Map; -import java.util.Set; import org.testng.annotations.Test; @@ -167,7 +161,7 @@ public class FeaturesFileTest .getSequenceFeatures(); assertEquals(1, sfs.length); SequenceFeature sf = sfs[0]; - assertEquals("Iron-sulfur; 2Fe-2S", sf.description); + assertEquals("Iron-sulfur,2Fe-2S", sf.description); assertEquals(44, sf.begin); assertEquals(45, sf.end); assertEquals("uniprot", sf.featureGroup); @@ -268,7 +262,7 @@ public class FeaturesFileTest assertEquals(1, sfs.length); SequenceFeature sf = sfs[0]; // description parsed from Note attribute - assertEquals("Iron-sulfur (2Fe-2S); another note", sf.description); + assertEquals("Iron-sulfur (2Fe-2S),another note", sf.description); assertEquals(39, sf.begin); assertEquals(39, sf.end); assertEquals("uniprot", sf.featureGroup); diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index e9bcf24..d4ed0ea 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -535,7 +535,7 @@ public class MapListTest MapList ml = new MapList(new int[] { 1, 5, 10, 15, 25, 20 }, new int[] { 51, 1 }, 1, 3); String s = ml.toString(); - assertEquals("[ [1, 5] [10, 15] [25, 20] ] To [ [51, 1] ]", + assertEquals("[ [1, 5] [10, 15] [25, 20] ] 1:3 to [ [51, 1] ]", s); } @@ -559,7 +559,7 @@ public class MapListTest String s = ml.toString(); assertEquals( - "[ [11, 15] [20, 25] [35, 30] [2, 4] [37, 40] ] To [ [72, 22] [12, 17] [78, 83] [88, 96] ]", + "[ [11, 15] [20, 25] [35, 30] [2, 4] [37, 40] ] 1:3 to [ [72, 22] [12, 17] [78, 83] [88, 96] ]", s); } @@ -572,7 +572,7 @@ public class MapListTest MapList ml2 = new MapList(new int[] { 15, 16 }, new int[] { 58, 53 }, 1, 3); ml.addMapList(ml2); - assertEquals("[ [11, 16] ] To [ [72, 53] ]", ml.toString()); + assertEquals("[ [11, 16] ] 1:3 to [ [72, 53] ]", ml.toString()); } @Test(groups = "Functional") diff --git a/test/jalview/util/MappingUtilsTest.java b/test/jalview/util/MappingUtilsTest.java index b53d513..3c417c3 100644 --- a/test/jalview/util/MappingUtilsTest.java +++ b/test/jalview/util/MappingUtilsTest.java @@ -861,6 +861,10 @@ public class MappingUtilsTest public void testGetLength() { assertEquals(0, MappingUtils.getLength(null)); + + /* + * [start, end] ranges + */ List ranges = new ArrayList(); assertEquals(0, MappingUtils.getLength(ranges)); ranges.add(new int[] { 1, 1 }); @@ -869,6 +873,15 @@ public class MappingUtilsTest assertEquals(10, MappingUtils.getLength(ranges)); ranges.add(new int[] { 20, 10 }); assertEquals(21, MappingUtils.getLength(ranges)); + + /* + * [start, end, start, end...] ranges + */ + ranges.clear(); + ranges.add(new int[] { 1, 5, 8, 4 }); + ranges.add(new int[] { 8, 2 }); + ranges.add(new int[] { 12, 12 }); + assertEquals(18, MappingUtils.getLength(ranges)); } @Test(groups = { "Functional" }) diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java index d7058d0..a54ce8b 100644 --- a/test/jalview/ws/SequenceFetcherTest.java +++ b/test/jalview/ws/SequenceFetcherTest.java @@ -7,7 +7,6 @@ import jalview.datamodel.SequenceI; import jalview.ws.seqfetcher.ASequenceFetcher; import jalview.ws.seqfetcher.DbSourceProxy; -import java.util.ArrayList; import java.util.Enumeration; import java.util.List; import java.util.Vector; @@ -26,7 +25,7 @@ public class SequenceFetcherTest // assertions AlignmentI ds = null; - Vector noProds = new Vector(); + Vector noProds = new Vector(); String usage = "SequenceFetcher.main [-nodas] [ []]\n" + "With no arguments, all DbSources will be queried with their test Accession number.\n" + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n" @@ -117,7 +116,7 @@ public class SequenceFetcherTest System.out.println("Type: " + types[t]); SequenceI[] prod = jalview.analysis.CrossRef .findXrefSequences(al.getSequencesArray(), dna, - types[t], null, new ArrayList()) + types[t], null) .getSequencesArray(); System.out.println("Found " + ((prod == null) ? "no" : "" + prod.length) @@ -186,11 +185,11 @@ public class SequenceFetcherTest } if (noProds.size() > 0) { - Enumeration ts = noProds.elements(); + Enumeration ts = noProds.elements(); while (ts.hasMoreElements()) { - Object[] typeSq = (Object[]) ts.nextElement(); + Object[] typeSq = ts.nextElement(); boolean dna = (typeSq.length > 1); AlignmentI al = (AlignmentI) typeSq[0]; System.out.println("Trying getProducts for " @@ -201,7 +200,7 @@ public class SequenceFetcherTest // sequences. SequenceI[] seqs = al.getSequencesArray(); Alignment prodal = jalview.analysis.CrossRef.findXrefSequences( - seqs, dna, null, ds, new ArrayList()); + seqs, dna, null, ds); System.out.println("Found " + ((prodal == null) ? "no" : "" + prodal.getHeight()) + " products"); diff --git a/test/jalview/ws/dbsources/UniprotTest.java b/test/jalview/ws/dbsources/UniprotTest.java index ed3ac77..72e599d 100644 --- a/test/jalview/ws/dbsources/UniprotTest.java +++ b/test/jalview/ws/dbsources/UniprotTest.java @@ -135,8 +135,9 @@ public class UniprotTest /* * name formatted as source | accession ids | names + * source database converted to Jalview canonical name */ - String expectedName = "UniProt/Swiss-Prot|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6"; + String expectedName = "UNIPROT|A9CKP4|A9CKP5|A9CKP4_AGRT5|A9CKP4_AGRT6"; assertEquals(expectedName, Uniprot.getUniprotEntryId(entry)); } diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index b9e209f..63b1b9c 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -179,8 +179,7 @@ public class DbRefFetcherTest assertEquals("Expected local reference map to be 3 nucleotides", dr[0] .getMap().getWidth(), 3); AlignmentI sprods = CrossRef.findXrefSequences( - alsq.getSequencesArray(), true, dr[0].getSource(), alsq, - new ArrayList()); + alsq.getSequencesArray(), true, dr[0].getSource(), alsq); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", sprods);