From: gmungoc Date: Sat, 9 Jul 2016 06:12:09 +0000 (+0100) Subject: Merge branch 'develop' into features/JAL-2110_crossRefDuplications X-Git-Tag: Release_2_10_0~140^2~5^2~10 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=4eb1ed22600411fe5b6e9ac93084b45429ccfad6;hp=0bec4854366c4bc4608ed0d7dc1506fc6afe2285;p=jalview.git Merge branch 'develop' into features/JAL-2110_crossRefDuplications --- diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 33a54e8..17aab15 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -22,8 +22,10 @@ package jalview.analysis; import static jalview.io.gff.GffConstants.CLINICAL_SIGNIFICANCE; +import jalview.api.DBRefEntryI; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; @@ -38,6 +40,7 @@ import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; +import jalview.util.DBRefUtils; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; @@ -849,6 +852,11 @@ public class AlignmentUtils */ public static int alignProteinAsDna(AlignmentI protein, AlignmentI dna) { + if (protein.isNucleotide() || !dna.isNucleotide()) + { + System.err.println("Wrong alignment type in alignProteinAsDna"); + return 0; + } List unmappedProtein = new ArrayList(); Map> alignedCodons = buildCodonColumnsMap( protein, dna, unmappedProtein); @@ -856,6 +864,162 @@ public class AlignmentUtils } /** + * Realigns the given dna to match the alignment of the protein, using codon + * mappings to translate aligned peptide positions to codons. + * + * @param dna + * the alignment whose sequences are realigned by this method + * @param protein + * the protein alignment whose alignment we are 'copying' + * @return the number of sequences that were realigned + */ + public static int alignCdsAsProtein(AlignmentI dna, AlignmentI protein) + { + if (protein.isNucleotide() || !dna.isNucleotide()) + { + System.err.println("Wrong alignment type in alignProteinAsDna"); + return 0; + } + // todo: implement this + List mappings = protein.getCodonFrames(); + int alignedCount = 0; + for (SequenceI dnaSeq : dna.getSequences()) + { + if (alignCdsSequenceAsProtein(dnaSeq, protein, mappings, + dna.getGapCharacter())) + { + alignedCount++; + } + } + return alignedCount; + } + + /** + * Helper method to align (if possible) the dna sequence to match the + * alignment of a mapped protein sequence. This is currently limited to + * handling coding sequence only. + * + * @param cdsSeq + * @param protein + * @param mappings + * @param gapChar + * @return + */ + static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq, + AlignmentI protein, List mappings, char gapChar) + { + SequenceI cdsDss = cdsSeq.getDatasetSequence(); + if (cdsDss == null) + { + System.err + .println("alignCdsSequenceAsProtein needs aligned sequence!"); + return false; + } + + List dnaMappings = MappingUtils + .findMappingsForSequence(cdsSeq, mappings); + for (AlignedCodonFrame mapping : dnaMappings) + { + SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein); + int peptideLength = peptide.getLength(); + if (peptide != null) + { + Mapping map = mapping.getMappingBetween(cdsSeq, peptide); + if (map != null) + { + MapList mapList = map.getMap(); + if (map.getTo() == peptide.getDatasetSequence()) + { + mapList = mapList.getInverse(); + } + int cdsLength = cdsDss.getLength(); + int mappedFromLength = MappingUtils.getLength(mapList + .getFromRanges()); + int mappedToLength = MappingUtils + .getLength(mapList.getToRanges()); + boolean addStopCodon = (cdsLength == mappedFromLength * 3 + 3) + || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1); + if (cdsLength != mappedToLength && !addStopCodon) + { + System.err + .println(String + .format("Can't align cds as protein (length mismatch %d/%d): %s", + cdsLength, mappedToLength, + cdsSeq.getName())); + } + + /* + * pre-fill the aligned cds sequence with gaps + */ + char[] alignedCds = new char[peptideLength * 3 + + (addStopCodon ? 3 : 0)]; + Arrays.fill(alignedCds, gapChar); + + /* + * walk over the aligned peptide sequence and insert mapped + * codons for residues in the aligned cds sequence + */ + char[] alignedPeptide = peptide.getSequence(); + char[] nucleotides = cdsDss.getSequence(); + int copiedBases = 0; + int cdsStart = cdsDss.getStart(); + int proteinPos = peptide.getStart() - 1; + int cdsCol = 0; + for (char residue : alignedPeptide) + { + if (Comparison.isGap(residue)) + { + cdsCol += 3; + } + else + { + proteinPos++; + int[] codon = mapList.locateInTo(proteinPos, proteinPos); + if (codon == null) + { + // e.g. incomplete start codon, X in peptide + cdsCol += 3; + } + else + { + for (int j = codon[0]; j <= codon[1]; j++) + { + char mappedBase = nucleotides[j - cdsStart]; + alignedCds[cdsCol++] = mappedBase; + copiedBases++; + } + } + } + } + + /* + * append stop codon if not mapped from protein, + * closing it up to the end of the mapped sequence + */ + if (copiedBases == nucleotides.length - 3) + { + for (int i = alignedCds.length - 1; i >= 0; i--) + { + if (!Comparison.isGap(alignedCds[i])) + { + cdsCol = i + 1; // gap just after end of sequence + break; + } + } + for (int i = nucleotides.length - 3; i < nucleotides.length; i++) + { + alignedCds[cdsCol++] = nucleotides[i]; + } + } + cdsSeq.setSequence(new String(alignedCds)); + return true; + } + } + } + return false; + } + + /** * Builds a map whose key is an aligned codon position (3 alignment column * numbers base 0), and whose value is a map from protein sequence to each * protein's peptide residue for that codon. The map generates an ordering of @@ -1403,75 +1567,272 @@ public class AlignmentUtils * added to the alignment dataset. * * @param dna - * aligned dna sequences - * @param mappings - * from dna to protein - * @param al + * aligned nucleotide (dna or cds) sequences + * @param dataset + * the alignment dataset the sequences belong to + * @param products + * (optional) to restrict results to CDS that map to specified + * protein products * @return an alignment whose sequences are the cds-only parts of the dna * sequences (or null if no mappings are found) */ public static AlignmentI makeCdsAlignment(SequenceI[] dna, - List mappings, AlignmentI al) + AlignmentI dataset, SequenceI[] products) { + if (dataset == null || dataset.getDataset() != null) + { + throw new IllegalArgumentException( + "IMPLEMENTATION ERROR: dataset.getDataset() must be null!"); + } + List foundSeqs = new ArrayList(); List cdsSeqs = new ArrayList(); - - for (SequenceI seq : dna) + List mappings = dataset.getCodonFrames(); + HashSet productSeqs = null; + if (products != null) + { + productSeqs = new HashSet(); + for (SequenceI seq : products) + { + productSeqs.add(seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence()); + } + } + + /* + * Construct CDS sequences from mappings on the alignment dataset. + * The logic is: + * - find the protein product(s) mapped to from each dna sequence + * - if the mapping covers the whole dna sequence (give or take start/stop + * codon), take the dna as the CDS sequence + * - else search dataset mappings for a suitable dna sequence, i.e. one + * whose whole sequence is mapped to the protein + * - if no sequence found, construct one from the dna sequence and mapping + * (and add it to dataset so it is found if this is repeated) + */ + for (SequenceI dnaSeq : dna) { - AlignedCodonFrame cdsMappings = new AlignedCodonFrame(); + SequenceI dnaDss = dnaSeq.getDatasetSequence() == null ? dnaSeq + : dnaSeq.getDatasetSequence(); + List seqMappings = MappingUtils - .findMappingsForSequence(seq, mappings); - List alignmentMappings = al.getCodonFrames(); + .findMappingsForSequence(dnaSeq, mappings); for (AlignedCodonFrame mapping : seqMappings) { - for (Mapping aMapping : mapping.getMappingsFromSequence(seq)) + List mappingsFromSequence = mapping + .getMappingsFromSequence(dnaSeq); + + for (Mapping aMapping : mappingsFromSequence) { - SequenceI cdsSeq = makeCdsSequence(seq.getDatasetSequence(), - aMapping); + MapList mapList = aMapping.getMap(); + if (mapList.getFromRatio() == 1) + { + /* + * not a dna-to-protein mapping (likely dna-to-cds) + */ + continue; + } + + /* + * skip if mapping is not to one of the target set of proteins + */ + SequenceI proteinProduct = aMapping.getTo(); + if (productSeqs != null && !productSeqs.contains(proteinProduct)) + { + continue; + } + + /* + * try to locate the CDS from the dataset mappings; + * guard against duplicate results (for the case that protein has + * dbrefs to both dna and cds sequences) + */ + SequenceI cdsSeq = findCdsForProtein(mappings, dnaSeq, + seqMappings, aMapping); + if (cdsSeq != null) + { + if (!foundSeqs.contains(cdsSeq)) + { + foundSeqs.add(cdsSeq); + SequenceI derivedSequence = cdsSeq.deriveSequence(); + cdsSeqs.add(derivedSequence); + if (!dataset.getSequences().contains(cdsSeq)) + { + dataset.addSequence(cdsSeq); + } + } + continue; + } + + /* + * didn't find mapped CDS sequence - construct it and add + * its dataset sequence to the dataset + */ + cdsSeq = makeCdsSequence(dnaSeq.getDatasetSequence(), aMapping); + SequenceI cdsSeqDss = cdsSeq.createDatasetSequence(); cdsSeqs.add(cdsSeq); - + if (!dataset.getSequences().contains(cdsSeqDss)) + { + dataset.addSequence(cdsSeqDss); + } + /* * add a mapping from CDS to the (unchanged) mapped to range */ List cdsRange = Collections.singletonList(new int[] { 1, cdsSeq.getLength() }); - MapList map = new MapList(cdsRange, aMapping.getMap() - .getToRanges(), aMapping.getMap().getFromRatio(), - aMapping.getMap().getToRatio()); - cdsMappings.addMap(cdsSeq, aMapping.getTo(), map); + MapList cdsToProteinMap = new MapList(cdsRange, mapList.getToRanges(), + mapList.getFromRatio(), mapList.getToRatio()); + AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame(); + cdsToProteinMapping.addMap(cdsSeq, proteinProduct, cdsToProteinMap); + + /* + * guard against duplicating the mapping if repeating this action + */ + if (!mappings.contains(cdsToProteinMapping)) + { + mappings.add(cdsToProteinMapping); + } + + /* + * copy protein's dbrefs to CDS sequence + * this enables Get Cross-References from CDS alignment + */ + DBRefEntry[] proteinRefs = DBRefUtils.selectDbRefs(false, + proteinProduct.getDBRefs()); + if (proteinRefs != null) + { + for (DBRefEntry ref : proteinRefs) + { + DBRefEntry cdsToProteinRef = new DBRefEntry(ref); + cdsToProteinRef.setMap(new Mapping(proteinProduct, + cdsToProteinMap)); + cdsSeqDss.addDBRef(cdsToProteinRef); + } + } /* * add another mapping from original 'from' range to CDS */ - map = new MapList(aMapping.getMap().getFromRanges(), cdsRange, 1, + AlignedCodonFrame dnaToCdsMapping = new AlignedCodonFrame(); + MapList dnaToCdsMap = new MapList(mapList.getFromRanges(), + cdsRange, 1, 1); - cdsMappings.addMap(seq.getDatasetSequence(), cdsSeq, map); + dnaToCdsMapping.addMap(dnaSeq.getDatasetSequence(), cdsSeq, + dnaToCdsMap); + if (!mappings.contains(dnaToCdsMapping)) + { + mappings.add(dnaToCdsMapping); + } - alignmentMappings.add(cdsMappings); + /* + * add DBRef with mapping from protein to CDS + * (this enables Get Cross-References from protein alignment) + * This is tricky because we can't have two DBRefs with the + * same source and accession, so need a different accession for + * the CDS from the dna sequence + */ + DBRefEntryI dnaRef = dnaDss.getSourceDBRef(); + if (dnaRef != null) + { + // assuming cds version same as dna ?!? + DBRefEntry proteinToCdsRef = new DBRefEntry(dnaRef.getSource(), + dnaRef.getVersion(), cdsSeq.getName()); + proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap + .getInverse())); + proteinProduct.addDBRef(proteinToCdsRef); + } /* * transfer any features on dna that overlap the CDS */ - transferFeatures(seq, cdsSeq, map, null, SequenceOntologyI.CDS); + transferFeatures(dnaSeq, cdsSeq, cdsToProteinMap, null, + SequenceOntologyI.CDS); } } } + AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs + .size()])); + cds.setDataset(dataset); + + return cds; + } + + /** + * A helper method that finds a CDS sequence in the alignment dataset that is + * mapped to the given protein sequence, and either is, or has a mapping from, + * the given dna sequence. + * + * @param mappings + * set of all mappings on the dataset + * @param dnaSeq + * a dna (or cds) sequence we are searching from + * @param seqMappings + * the set of mappings involving dnaSeq + * @param aMapping + * an initial candidate from seqMappings + * @return + */ + static SequenceI findCdsForProtein(List mappings, + SequenceI dnaSeq, List seqMappings, + Mapping aMapping) + { + /* + * TODO a better dna-cds-protein mapping data representation to allow easy + * navigation; until then this clunky looping around lists of mappings + */ + SequenceI seqDss = dnaSeq.getDatasetSequence() == null ? dnaSeq + : dnaSeq.getDatasetSequence(); + SequenceI proteinProduct = aMapping.getTo(); + + /* + * is this mapping from the whole dna sequence (i.e. CDS)? + * allowing for possible stop codon on dna but not peptide + */ + int mappedFromLength = MappingUtils.getLength(aMapping.getMap() + .getFromRanges()); + int dnaLength = seqDss.getLength(); + if (mappedFromLength == dnaLength || mappedFromLength == dnaLength - 3) + { + return seqDss; + } + /* - * add CDS seqs to shared dataset + * looks like we found the dna-to-protein mapping; search for the + * corresponding cds-to-protein mapping */ - Alignment dataset = al.getDataset(); - for (SequenceI seq : cdsSeqs) + List mappingsToPeptide = MappingUtils + .findMappingsForSequence(proteinProduct, mappings); + for (AlignedCodonFrame acf : mappingsToPeptide) { - if (!dataset.getSequences().contains(seq.getDatasetSequence())) + for (SequenceToSequenceMapping map : acf.getMappings()) { - dataset.addSequence(seq.getDatasetSequence()); + Mapping mapping = map.getMapping(); + if (mapping != aMapping && mapping.getMap().getFromRatio() == 3 + && proteinProduct == mapping.getTo() + && seqDss != map.getFromSeq()) + { + mappedFromLength = MappingUtils.getLength(mapping.getMap() + .getFromRanges()); + if (mappedFromLength == map.getFromSeq().getLength()) + { + /* + * found a 3:1 mapping to the protein product which covers + * the whole dna sequence i.e. is from CDS; finally check it + * is from the dna start sequence + */ + SequenceI cdsSeq = map.getFromSeq(); + List dnaToCdsMaps = MappingUtils + .findMappingsForSequence(cdsSeq, seqMappings); + if (!dnaToCdsMaps.isEmpty()) + { + return cdsSeq; + } + } + } } } - AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs - .size()])); - cds.setDataset(dataset); - - return cds; + return null; } /** @@ -1481,7 +1842,7 @@ public class AlignmentUtils * * @param seq * @param mapping - * @return + * @return CDS sequence (as a dataset sequence) */ static SequenceI makeCdsSequence(SequenceI seq, Mapping mapping) { @@ -1511,9 +1872,15 @@ public class AlignmentUtils } } - SequenceI newSeq = new Sequence(seq.getName() + "|" - + mapping.getTo().getName(), newSeqChars, 1, newPos); - newSeq.createDatasetSequence(); + /* + * assign 'from id' held in the mapping if set (e.g. EMBL protein_id), + * else generate a sequence name + */ + String mapFromId = mapping.getMappedFromId(); + String seqId = "CDS|" + (mapFromId != null ? mapFromId : seq.getName()); + SequenceI newSeq = new Sequence(seqId, newSeqChars, 1, newPos); + // newSeq.setDescription(mapFromId); + return newSeq; } @@ -1799,17 +2166,20 @@ public class AlignmentUtils * sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet or NCList? */ - Arrays.sort(peptide.getSequenceFeatures(), - new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) + if (peptide.getSequenceFeatures() != null) + { + Arrays.sort(peptide.getSequenceFeatures(), + new Comparator() { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) - : c; - } - }); + @Override + public int compare(SequenceFeature o1, SequenceFeature o2) + { + int c = Integer.compare(o1.getBegin(), o2.getBegin()); + return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) + : c; + } + }); + } return count; } @@ -2111,32 +2481,15 @@ public class AlignmentUtils * * @param seqs * @param xrefs + * @param dataset + * the alignment dataset shared by the new copy * @return */ public static AlignmentI makeCopyAlignment(SequenceI[] seqs, - SequenceI[] xrefs) + SequenceI[] xrefs, AlignmentI dataset) { AlignmentI copy = new Alignment(new Alignment(seqs)); - - /* - * add mappings between sequences to the new alignment - */ - AlignedCodonFrame mappings = new AlignedCodonFrame(); - copy.addCodonFrame(mappings); - for (int i = 0; i < copy.getHeight(); i++) - { - SequenceI from = seqs[i]; - SequenceI to = copy.getSequenceAt(i); - if (to.getDatasetSequence() != null) - { - to = to.getDatasetSequence(); - } - int start = from.getStart(); - int end = from.getEnd(); - MapList map = new MapList(new int[] { start, end }, new int[] { - start, end }, 1, 1); - mappings.addMap(to, from, map); - } + copy.setDataset(dataset); SequenceIdMatcher matcher = new SequenceIdMatcher(seqs); if (xrefs != null) @@ -2182,6 +2535,17 @@ public class AlignmentUtils */ public static int alignAs(AlignmentI unaligned, AlignmentI aligned) { + /* + * easy case - aligning a copy of aligned sequences + */ + if (alignAsSameSequences(unaligned, aligned)) + { + return unaligned.getHeight(); + } + + /* + * fancy case - aligning via mappings between sequences + */ List unmapped = new ArrayList(); Map> columnMap = buildMappedColumnsMap( unaligned, aligned, unmapped); @@ -2234,6 +2598,54 @@ public class AlignmentUtils } /** + * If unaligned and aligned sequences share the same dataset sequences, then + * simply copies the aligned sequences to the unaligned sequences and returns + * true; else returns false + * + * @param unaligned + * @param aligned + * @return + */ + static boolean alignAsSameSequences(AlignmentI unaligned, + AlignmentI aligned) + { + if (aligned.getDataset() == null || unaligned.getDataset() == null) + { + return false; // should only pass alignments with datasets here + } + + Map alignedDatasets = new HashMap(); + for (SequenceI seq : aligned.getSequences()) + { + alignedDatasets.put(seq.getDatasetSequence(), seq); + } + + /* + * first pass - check whether all sequences to be aligned share a dataset + * sequence with an aligned sequence + */ + for (SequenceI seq : unaligned.getSequences()) + { + if (!alignedDatasets.containsKey(seq.getDatasetSequence())) + { + return false; + } + } + + /* + * second pass - copy aligned sequences + */ + for (SequenceI seq : unaligned.getSequences()) + { + SequenceI alignedSequence = alignedDatasets.get(seq + .getDatasetSequence()); + seq.setSequence(alignedSequence.getSequenceAsString()); + } + + return true; + } + + /** * Returns a map whose key is alignment column number (base 1), and whose * values are a map of sequence characters in that column. * @@ -2247,13 +2659,13 @@ public class AlignmentUtils { /* * Map will hold, for each aligned column position, a map of - * {unalignedSequence, sequenceCharacter} at that position. + * {unalignedSequence, characterPerSequence} at that position. * TreeMap keeps the entries in ascending column order. */ Map> map = new TreeMap>(); /* - * r any sequences that have no mapping so can't be realigned + * record any sequences that have no mapping so can't be realigned */ unmapped.addAll(unaligned.getSequences()); @@ -2302,6 +2714,15 @@ public class AlignmentUtils return false; } + /* + * invert mapping if it is from unaligned to aligned sequence + */ + if (seqMap.getTo() == fromSeq.getDatasetSequence()) + { + seqMap = new Mapping(seq.getDatasetSequence(), seqMap.getMap() + .getInverse()); + } + char[] fromChars = fromSeq.getSequence(); int toStart = seq.getStart(); char[] toChars = seq.getSequence(); @@ -2335,7 +2756,8 @@ public class AlignmentUtils * of the next character of the mapped-to sequence; stop when all * the characters of the range have been counted */ - while (mappedCharPos <= range[1]) + while (mappedCharPos <= range[1] && fromCol <= fromChars.length + && fromCol >= 0) { if (!Comparison.isGap(fromChars[fromCol - 1])) { diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 7e77fc1..288d60e 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -24,23 +24,21 @@ import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.DBRefUtils; import jalview.util.MapList; -import jalview.ws.SequenceFetcher; +import jalview.ws.SequenceFetcherFactory; import jalview.ws.seqfetcher.ASequenceFetcher; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; -import java.util.Vector; /** - * Functions for cross-referencing sequence databases. user must first specify - * if cross-referencing from protein or dna (set dna==true) + * Functions for cross-referencing sequence databases. * * @author JimP * @@ -48,195 +46,173 @@ import java.util.Vector; public class CrossRef { /* - * A sub-class that ignores Parent attribute when comparing sequence - * features. This avoids 'duplicate' CDS features that only - * differ in their parent Transcript ids. + * the dataset of the alignment for which we are searching for + * cross-references; in some cases we may resolve xrefs by + * searching in the dataset */ - class MySequenceFeature extends SequenceFeature - { - private SequenceFeature feat; + private AlignmentI dataset; - MySequenceFeature(SequenceFeature sf) - { - this.feat = sf; - } + /* + * the sequences for which we are seeking cross-references + */ + private SequenceI[] fromSeqs; - @Override - public boolean equals(Object o) - { - return feat.equals(o, true); - } - } + /** + * matcher built from dataset + */ + SequenceIdMatcher matcher; /** - * Select just the DNA or protein references for a protein or dna sequence - * - * @param fromDna - * if true, select references from DNA (i.e. Protein databases), else - * DNA database references - * @param refs - * a set of references to select from - * @return + * sequences found by cross-ref searches to fromSeqs */ - public static DBRefEntry[] findXDbRefs(boolean fromDna, DBRefEntry[] refs) - { - return DBRefUtils.selectRefs(refs, fromDna ? DBRefSource.PROTEINDBS - : DBRefSource.DNACODINGDBS); - // could attempt to find other cross - // refs here - ie PDB xrefs - // (not dna, not protein seq) - } + List rseqs; /** - * @param dna - * true if seqs are DNA seqs + * Constructor + * * @param seqs - * @return a list of sequence database cross reference source types + * the sequences for which we are seeking cross-references + * @param ds + * the containing alignment dataset (may be searched to resolve + * cross-references) */ - public static String[] findSequenceXrefTypes(boolean dna, SequenceI[] seqs) + public CrossRef(SequenceI[] seqs, AlignmentI ds) { - return findSequenceXrefTypes(dna, seqs, null); + fromSeqs = seqs; + dataset = ds.getDataset() == null ? ds : ds.getDataset(); } /** - * Indirect references are references from other sequences from the dataset to - * any of the direct DBRefEntrys on the given sequences. + * Returns a list of distinct database sources for which sequences have either + *
    + *
  • a (dna-to-protein or protein-to-dna) cross-reference
  • + *
  • an indirect cross-reference - a (dna-to-protein or protein-to-dna) + * reference from another sequence in the dataset which has a cross-reference + * to a direct DBRefEntry on the given sequence
  • + *
* * @param dna - * true if seqs are DNA seqs - * @param seqs - * @return a list of sequence database cross reference source types + * - when true, cross-references *from* dna returned. When false, + * cross-references *from* protein are returned + * @return */ - public static String[] findSequenceXrefTypes(boolean dna, - SequenceI[] seqs, AlignmentI dataset) + public List findXrefSourcesForSequences(boolean dna) { - String[] dbrefs = null; - List refs = new ArrayList(); - for (SequenceI seq : seqs) + List sources = new ArrayList(); + for (SequenceI seq : fromSeqs) { if (seq != null) { - SequenceI dss = seq; - while (dss.getDatasetSequence() != null) - { - dss = dss.getDatasetSequence(); - } - DBRefEntry[] rfs = findXDbRefs(dna, dss.getDBRefs()); - if (rfs != null) - { - for (DBRefEntry ref : rfs) - { - if (!refs.contains(ref.getSource())) - { - refs.add(ref.getSource()); - } - } - } - if (dataset != null) - { - // search for references to this sequence's direct references. - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); - List rseqs = new ArrayList(); - CrossRef.searchDatasetXrefs(seq, !dna, lrfs, dataset, rseqs, - null); // don't need to specify codon frame for mapping here - for (SequenceI rs : rseqs) - { - DBRefEntry[] xrs = findXDbRefs(dna, rs.getDBRefs()); - if (xrs != null) - { - for (DBRefEntry ref : xrs) - { - if (!refs.contains(ref.getSource())) - { - refs.add(ref.getSource()); - } - } - } - // looks like copy and paste - change rfs to xrs? - // for (int r = 0; rfs != null && r < rfs.length; r++) - // { - // if (!refs.contains(rfs[r].getSource())) - // { - // refs.add(rfs[r].getSource()); - // } - // } - } - } + findXrefSourcesForSequence(seq, dna, sources); } } - if (refs.size() > 0) - { - dbrefs = new String[refs.size()]; - refs.toArray(dbrefs); - } - return dbrefs; + return sources; } - public static boolean hasCdnaMap(SequenceI[] seqs) + /** + * Returns a list of distinct database sources for which a sequence has either + *
    + *
  • a (dna-to-protein or protein-to-dna) cross-reference
  • + *
  • an indirect cross-reference - a (dna-to-protein or protein-to-dna) + * reference from another sequence in the dataset which has a cross-reference + * to a direct DBRefEntry on the given sequence
  • + *
+ * + * @param seq + * the sequence whose dbrefs we are searching against + * @param fromDna + * when true, context is DNA - so sources identifying protein + * products will be returned. + * @param sources + * a list of sources to add matches to + */ + void findXrefSourcesForSequence(SequenceI seq, boolean fromDna, + List sources) { - // TODO unused - remove? - String[] reftypes = findSequenceXrefTypes(false, seqs); - for (int s = 0; s < reftypes.length; s++) + /* + * first find seq's xrefs (dna-to-peptide or peptide-to-dna) + */ + DBRefEntry[] rfs = DBRefUtils.selectDbRefs(!fromDna, seq.getDBRefs()); + addXrefsToSources(rfs, sources); + if (dataset != null) { - if (reftypes.equals(DBRefSource.EMBLCDS)) + /* + * find sequence's direct (dna-to-dna, peptide-to-peptide) xrefs + */ + DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, seq.getDBRefs()); + List foundSeqs = new ArrayList(); + + /* + * find sequences in the alignment which xref one of these DBRefs + * i.e. is xref-ed to a common sequence identifier + */ + searchDatasetXrefs(fromDna, seq, lrfs, foundSeqs, null); + + /* + * add those sequences' (dna-to-peptide or peptide-to-dna) dbref sources + */ + for (SequenceI rs : foundSeqs) { - return true; - // no map + DBRefEntry[] xrs = DBRefUtils + .selectDbRefs(!fromDna, rs.getDBRefs()); + addXrefsToSources(xrs, sources); } } - return false; } - public static SequenceI[] getCdnaMap(SequenceI[] seqs) + /** + * Helper method that adds the source identifiers of some cross-references to + * a (non-redundant) list of database sources + * + * @param xrefs + * @param sources + */ + void addXrefsToSources(DBRefEntry[] xrefs, List sources) { - // TODO unused - remove? - Vector cseqs = new Vector(); - for (int s = 0; s < seqs.length; s++) + if (xrefs != null) { - DBRefEntry[] cdna = findXDbRefs(true, seqs[s].getDBRefs()); - for (int c = 0; c < cdna.length; c++) + for (DBRefEntry ref : xrefs) { - if (cdna[c].getSource().equals(DBRefSource.EMBLCDS)) + /* + * avoid duplication e.g. ENSEMBL and Ensembl + */ + String source = DBRefUtils.getCanonicalName(ref.getSource()); + if (!sources.contains(source)) { - System.err - .println("TODO: unimplemented sequence retrieval for coding region sequence."); - // TODO: retrieve CDS dataset sequences - // need global dataset sequence retriever/resolver to reuse refs - // and construct Mapping entry. - // insert gaps in CDS according to peptide gaps. - // add gapped sequence to cseqs + sources.add(source); } } } - if (cseqs.size() > 0) - { - SequenceI[] rsqs = new SequenceI[cseqs.size()]; - cseqs.copyInto(rsqs); - return rsqs; - } - return null; - } /** + * Attempts to find cross-references from the sequences provided in the + * constructor to the given source database. Cross-references may be found + *
    + *
  • in dbrefs on the sequence which hold a mapping to a sequence + *
      + *
    • provided with a fetched sequence (e.g. ENA translation), or
    • + *
    • populated previously after getting cross-references
    • + *
    + *
  • as other sequences in the alignment which share a dbref identifier with + * the sequence
  • + *
  • by fetching from the remote database
  • + *
+ * The cross-referenced sequences, and mappings to them, are added to the + * alignment dataset. * - * @param seqs - * sequences whose xrefs are being retrieved - * @param dna - * true if sequences are nucleotide * @param source - * @param al - * alignment to search for cross-referenced sequences (and possibly - * add to) - * @return products (as dataset sequences) + * @return cross-referenced sequences (as dataset sequences) */ - public static Alignment findXrefSequences(SequenceI[] seqs, - final boolean dna, final String source, AlignmentI al) + public Alignment findXrefSequences(String source, boolean fromDna) { - AlignmentI dataset = al.getDataset() == null ? al : al.getDataset(); - List rseqs = new ArrayList(); + + rseqs = new ArrayList(); AlignedCodonFrame cf = new AlignedCodonFrame(); - for (SequenceI seq : seqs) + matcher = new SequenceIdMatcher( + dataset.getSequences()); + + for (SequenceI seq : fromSeqs) { SequenceI dss = seq; while (dss.getDatasetSequence() != null) @@ -244,242 +220,389 @@ public class CrossRef dss = dss.getDatasetSequence(); } boolean found = false; - DBRefEntry[] xrfs = CrossRef.findXDbRefs(dna, dss.getDBRefs()); + DBRefEntry[] xrfs = DBRefUtils + .selectDbRefs(!fromDna, dss.getDBRefs()); if ((xrfs == null || xrfs.length == 0) && dataset != null) { - System.out.println("Attempting to find ds Xrefs refs."); - // FIXME should be dss not seq here? - DBRefEntry[] lrfs = CrossRef.findXDbRefs(!dna, seq.getDBRefs()); - // less ambiguous would be a 'find primary dbRefEntry' method. - // filter for desired source xref here - found = CrossRef.searchDatasetXrefs(dss, !dna, lrfs, dataset, - rseqs, cf); + /* + * found no suitable dbrefs on sequence - look for sequences in the + * alignment which share a dbref with this one + */ + DBRefEntry[] lrfs = DBRefUtils.selectDbRefs(fromDna, + seq.getDBRefs()); + + /* + * find sequences (except this one!), of complementary type, + * which have a dbref to an accession id for this sequence, + * and add them to the results + */ + found = searchDatasetXrefs(fromDna, dss, lrfs, rseqs, cf); } - for (int r = 0; xrfs != null && r < xrfs.length; r++) + if (xrfs == null && !found) { - DBRefEntry xref = xrfs[r]; - if (source != null && !source.equals(xref.getSource())) - { - continue; - } + /* + * no dbref to source on this sequence or matched + * complementary sequence in the dataset + */ + continue; + } + List sourceRefs = DBRefUtils.searchRefsForSource(xrfs, + source); + Iterator refIterator = sourceRefs.iterator(); + while (refIterator.hasNext()) + { + DBRefEntry xref = refIterator.next(); + found = false; if (xref.hasMap()) { - if (xref.getMap().getTo() != null) + SequenceI mappedTo = xref.getMap().getTo(); + if (mappedTo != null) { - SequenceI rsq = new Sequence(xref.getMap().getTo()); + /* + * dbref contains the sequence it maps to; add it to the + * results unless we have done so already (could happen if + * fetching xrefs for sequences which have xrefs in common) + * for example: UNIPROT {P0CE19, P0CE20} -> EMBL {J03321, X06707} + */ + found = true; + /* + * problem: matcher.findIdMatch() is lenient - returns a sequence + * with a dbref to the search arg e.g. ENST for ENSP - wrong + * but findInDataset() matches ENSP when looking for Uniprot... + */ + SequenceI matchInDataset = findInDataset(xref); + /*matcher.findIdMatch(mappedTo);*/ + if (matchInDataset != null) + { + if (!rseqs.contains(matchInDataset)) + { + rseqs.add(matchInDataset); + } + refIterator.remove(); + continue; + } + SequenceI rsq = new Sequence(mappedTo); rseqs.add(rsq); - if (xref.getMap().getMap().getFromRatio() != xref - .getMap().getMap().getToRatio()) + if (xref.getMap().getMap().getFromRatio() != xref.getMap() + .getMap().getToRatio()) { // get sense of map correct for adding to product alignment. - if (dna) + if (fromDna) { // map is from dna seq to a protein product - cf.addMap(dss, rsq, xref.getMap().getMap()); + cf.addMap(dss, rsq, xref.getMap().getMap(), xref.getMap() + .getMappedFromId()); } else { // map should be from protein seq to its coding dna - cf.addMap(rsq, dss, xref.getMap().getMap().getInverse()); + cf.addMap(rsq, dss, xref.getMap().getMap().getInverse(), + xref.getMap().getMappedFromId()); } } - found = true; } } + if (!found) { - // do a bit more work - search for sequences with references matching - // xrefs on this sequence. - if (dataset != null) + SequenceI matchedSeq = matcher.findIdMatch(xref.getSource() + "|" + + xref.getAccessionId()); + if (matchedSeq != null) { - found |= searchDataset(dss, xref, dataset, rseqs, cf, false, - !dna); - if (found) + if (constructMapping(seq, matchedSeq, xref, cf, fromDna)) { - xrfs[r] = null; // we've recovered seqs for this one. + found = true; } } } + + if (!found) + { + // do a bit more work - search for sequences with references matching + // xrefs on this sequence. + found = searchDataset(fromDna, dss, xref, rseqs, cf, false); + } + if (found) + { + refIterator.remove(); + } + } + + /* + * fetch from source database any dbrefs we haven't resolved up to here + */ + if (!sourceRefs.isEmpty()) + { + retrieveCrossRef(sourceRefs, seq, xrfs, fromDna, cf); + } + } + + Alignment ral = null; + if (rseqs.size() > 0) + { + ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); + if (!cf.isEmpty()) + { + dataset.addCodonFrame(cf); } - if (!found) + } + return ral; + } + + private void retrieveCrossRef(List sourceRefs, SequenceI seq, + DBRefEntry[] xrfs, boolean fromDna, AlignedCodonFrame cf) + { + ASequenceFetcher sftch = SequenceFetcherFactory.getSequenceFetcher(); + SequenceI[] retrieved = null; + SequenceI dss = seq.getDatasetSequence() == null ? seq : seq + .getDatasetSequence(); + try + { + retrieved = sftch.getSequences(sourceRefs, !fromDna); + } catch (Exception e) + { + System.err + .println("Problem whilst retrieving cross references for Sequence : " + + seq.getName()); + e.printStackTrace(); + } + + if (retrieved != null) + { + updateDbrefMappings(seq, xrfs, retrieved, cf, fromDna); + for (SequenceI retrievedSequence : retrieved) { - if (xrfs != null && xrfs.length > 0) + // dataset gets contaminated ccwith non-ds sequences. why ??! + // try: Ensembl -> Nuc->Ensembl, Nuc->Uniprot-->Protein->EMBL-> + SequenceI retrievedDss = retrievedSequence.getDatasetSequence() == null ? retrievedSequence + : retrievedSequence.getDatasetSequence(); + DBRefEntry[] dbr = retrievedSequence.getDBRefs(); + if (dbr != null) { - // Try and get the sequence reference... - /* - * Ideal world - we ask for a sequence fetcher implementation here if - * (jalview.io.RunTimeEnvironment.getSequenceFetcher()) ( - */ - ASequenceFetcher sftch = new SequenceFetcher(); - SequenceI[] retrieved = null; - int l = xrfs.length; - for (int r = 0; r < xrfs.length; r++) - { - // filter out any irrelevant or irretrievable references - if (xrfs[r] == null - || ((source != null && !source.equals(xrfs[r] - .getSource())) || !sftch.isFetchable(xrfs[r] - .getSource()))) - { - l--; - xrfs[r] = null; - } - } - if (l > 0) + for (DBRefEntry dbref : dbr) { - // System.out - // .println("Attempting to retrieve cross referenced sequences."); - DBRefEntry[] t = new DBRefEntry[l]; - l = 0; - for (int r = 0; r < xrfs.length; r++) + // find any entry where we should put in the sequence being + // cross-referenced into the map + Mapping map = dbref.getMap(); + if (map != null) { - if (xrfs[r] != null) + if (map.getTo() != null && map.getMap() != null) { - t[l++] = xrfs[r]; - } - } - xrfs = t; - try - { - retrieved = sftch.getSequences(xrfs, !dna); - // problem here is we don't know which of xrfs resulted in which - // retrieved element - } catch (Exception e) - { - System.err - .println("Problem whilst retrieving cross references for Sequence : " - + seq.getName()); - e.printStackTrace(); - } - - if (retrieved != null) - { - updateDbrefMappings(dna, seq, xrfs, retrieved, cf); - - SequenceIdMatcher matcher = new SequenceIdMatcher( - dataset.getSequences()); - List copiedFeatures = new ArrayList(); - CrossRef me = new CrossRef(); - for (int rs = 0; rs < retrieved.length; rs++) - { - // TODO: examine each sequence for 'redundancy' - DBRefEntry[] dbr = retrieved[rs].getDBRefs(); - if (dbr != null && dbr.length > 0) + // TODO findInDataset requires exact sequence match but + // 'congruent' test is only for the mapped part + // maybe not a problem in practice since only ENA provide a + // mapping and it is to the full protein translation of CDS + SequenceI matched = findInDataset(dbref); + // matcher.findIdMatch(map.getTo()); + if (matched != null) { - for (int di = 0; di < dbr.length; di++) + /* + * already got an xref to this sequence; update this + * map to point to the same sequence, and add + * any new dbrefs to it + */ + DBRefEntry[] toRefs = map.getTo().getDBRefs(); + if (toRefs != null) { - // find any entry where we should put in the sequence being - // cross-referenced into the map - Mapping map = dbr[di].getMap(); - if (map != null) + for (DBRefEntry ref : toRefs) { - if (map.getTo() != null && map.getMap() != null) + matched.addDBRef(ref); // add or update mapping + } + } + map.setTo(matched); + } + else + { + matcher.add(map.getTo()); + } + try + { + // compare ms with dss and replace with dss in mapping + // if map is congruent + SequenceI ms = map.getTo(); + int sf = map.getMap().getToLowest(); + int st = map.getMap().getToHighest(); + SequenceI mappedrg = ms.getSubSequence(sf, st); + // SequenceI loc = dss.getSubSequence(sf, st); + if (mappedrg.getLength() > 0 + && ms.getSequenceAsString().equals( + dss.getSequenceAsString())) + // && mappedrg.getSequenceAsString().equals( + // loc.getSequenceAsString())) + { + String msg = "Mapping updated from " + ms.getName() + + " to retrieved crossreference " + + dss.getName(); + System.out.println(msg); + map.setTo(dss); + + /* + * give the reverse reference the inverse mapping + * (if it doesn't have one already) + */ + setReverseMapping(dss, dbref, cf); + + /* + * copy sequence features as well, avoiding + * duplication (e.g. same variation from two + * transcripts) + */ + SequenceFeature[] sfs = ms.getSequenceFeatures(); + if (sfs != null) + { + for (SequenceFeature feat : sfs) { - SequenceI matched = matcher - .findIdMatch(map.getTo()); - if (matched != null) - { - /* - * already got an xref to this sequence; update this - * map to point to the same sequence, and add - * any new dbrefs to it - */ - for (DBRefEntry ref : map.getTo().getDBRefs()) - { - matched.addDBRef(ref); // add or update mapping - } - map.setTo(matched); - } - else + /* + * make a flyweight feature object which ignores Parent + * attribute in equality test; this avoids creating many + * otherwise duplicate exon features on genomic sequence + */ + SequenceFeature newFeature = new SequenceFeature( + feat) { - matcher.add(map.getTo()); - } - try - { - // compare ms with dss and replace with dss in mapping - // if map is congruent - SequenceI ms = map.getTo(); - int sf = map.getMap().getToLowest(); - int st = map.getMap().getToHighest(); - SequenceI mappedrg = ms.getSubSequence(sf, st); - // SequenceI loc = dss.getSubSequence(sf, st); - if (mappedrg.getLength() > 0 - && ms.getSequenceAsString().equals( - dss.getSequenceAsString())) - // && mappedrg.getSequenceAsString().equals( - // loc.getSequenceAsString())) - { - String msg = "Mapping updated from " - + ms.getName() - + " to retrieved crossreference " - + dss.getName(); - System.out.println(msg); - // method to update all refs of existing To on - // retrieved sequence with dss and merge any props - // on To onto dss. - map.setTo(dss); - /* - * copy sequence features as well, avoiding - * duplication (e.g. same variation from 2 - * transcripts) - */ - SequenceFeature[] sfs = ms - .getSequenceFeatures(); - if (sfs != null) - { - for (SequenceFeature feat : sfs) - { - /* - * we override SequenceFeature.equals here (but - * not elsewhere) to ignore Parent attribute - * TODO not quite working yet! - */ - if (!copiedFeatures - .contains(me.new MySequenceFeature( - feat))) - { - dss.addSequenceFeature(feat); - copiedFeatures.add(feat); - } - } - } - cf.addMap(retrieved[rs].getDatasetSequence(), - dss, map.getMap()); - } - else + @Override + public boolean equals(Object o) { - cf.addMap(retrieved[rs].getDatasetSequence(), - map.getTo(), map.getMap()); + return super.equals(o, true); } - } catch (Exception e) - { - System.err - .println("Exception when consolidating Mapped sequence set..."); - e.printStackTrace(System.err); - } + }; + dss.addSequenceFeature(newFeature); } } } + cf.addMap(retrievedDss, map.getTo(), map.getMap()); + } catch (Exception e) + { + System.err + .println("Exception when consolidating Mapped sequence set..."); + e.printStackTrace(System.err); } - retrieved[rs].updatePDBIds(); - rseqs.add(retrieved[rs]); } } } } + retrievedSequence.updatePDBIds(); + rseqs.add(retrievedDss); + dataset.addSequence(retrievedDss); + matcher.add(retrievedDss); + } + } + } + /** + * Sets the inverse sequence mapping in the corresponding dbref of the mapped + * to sequence (if any). This is used after fetching a cross-referenced + * sequence, if the fetched sequence has a mapping to the original sequence, + * to set the mapping in the original sequence's dbref. + * + * @param mapFrom + * the sequence mapped from + * @param dbref + * @param mappings + */ + void setReverseMapping(SequenceI mapFrom, DBRefEntry dbref, + AlignedCodonFrame mappings) + { + SequenceI mapTo = dbref.getMap().getTo(); + if (mapTo == null) + { + return; + } + DBRefEntry[] dbrefs = mapTo.getDBRefs(); + if (dbrefs == null) + { + return; + } + for (DBRefEntry toRef : dbrefs) + { + if (toRef.hasMap() && mapFrom == toRef.getMap().getTo()) + { + /* + * found the reverse dbref; update its mapping if null + */ + if (toRef.getMap().getMap() == null) + { + MapList inverse = dbref.getMap().getMap().getInverse(); + toRef.getMap().setMap(inverse); + mappings.addMap(mapTo, mapFrom, inverse); + } } } + } - Alignment ral = null; - if (rseqs.size() > 0) + /** + * Returns the first identical sequence in the dataset if any, else null + * + * @param xref + * @return + */ + SequenceI findInDataset(DBRefEntry xref) + { + if (xref == null || !xref.hasMap() || xref.getMap().getTo() == null) { - ral = new Alignment(rseqs.toArray(new SequenceI[rseqs.size()])); - if (cf != null && !cf.isEmpty()) + return null; + } + SequenceI mapsTo = xref.getMap().getTo(); + String name = xref.getAccessionId(); + String name2 = xref.getSource() + "|" + name; + SequenceI dss = mapsTo.getDatasetSequence() == null ? mapsTo : mapsTo + .getDatasetSequence(); + for (SequenceI seq : dataset.getSequences()) + { + /* + * clumsy alternative to using SequenceIdMatcher which currently + * returns sequences with a dbref to the matched accession id + * which we don't want + */ + if (name.equals(seq.getName()) || seq.getName().startsWith(name2)) { - ral.addCodonFrame(cf); + if (sameSequence(seq, dss)) + { + return seq; + } } } - return ral; + return null; + } + + /** + * Answers true if seq1 and seq2 contain exactly the same characters (ignoring + * case), else false. This method compares the lengths, then each character in + * turn, in order to 'fail fast'. For case-sensitive comparison, it would be + * possible to use Arrays.equals(seq1.getSequence(), seq2.getSequence()). + * + * @param seq1 + * @param seq2 + * @return + */ + // TODO move to Sequence / SequenceI + static boolean sameSequence(SequenceI seq1, SequenceI seq2) + { + if (seq1 == seq2) + { + return true; + } + if (seq1 == null || seq2 == null) + { + return false; + } + char[] c1 = seq1.getSequence(); + char[] c2 = seq2.getSequence(); + if (c1.length != c2.length) + { + return false; + } + for (int i = 0; i < c1.length; i++) + { + int diff = c1[i] - c2[i]; + /* + * same char or differ in case only ('a'-'A' == 32) + */ + if (diff != 0 && diff != 32 && diff != -32) + { + return false; + } + } + return true; } /** @@ -487,62 +610,123 @@ public class CrossRef * retrieved sequence if found, and adds any new mappings to the * AlignedCodonFrame * - * @param dna * @param mapFrom * @param xrefs * @param retrieved * @param acf */ - static void updateDbrefMappings(boolean dna, SequenceI mapFrom, - DBRefEntry[] xrefs, SequenceI[] retrieved, AlignedCodonFrame acf) + void updateDbrefMappings(SequenceI mapFrom, DBRefEntry[] xrefs, + SequenceI[] retrieved, AlignedCodonFrame acf, boolean fromDna) { - SequenceIdMatcher matcher = new SequenceIdMatcher(retrieved); + SequenceIdMatcher idMatcher = new SequenceIdMatcher(retrieved); for (DBRefEntry xref : xrefs) { if (!xref.hasMap()) { String targetSeqName = xref.getSource() + "|" + xref.getAccessionId(); - SequenceI[] matches = matcher.findAllIdMatches(targetSeqName); + SequenceI[] matches = idMatcher.findAllIdMatches(targetSeqName); if (matches == null) { return; } for (SequenceI seq : matches) { - MapList mapping = null; - if (dna) - { - mapping = AlignmentUtils.mapCdnaToProtein(seq, mapFrom); - } - else - { - mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, seq); - if (mapping != null) - { - mapping = mapping.getInverse(); - } - } - if (mapping != null) - { - xref.setMap(new Mapping(seq, mapping)); - if (dna) - { - AlignmentUtils.computeProteinFeatures(mapFrom, seq, mapping); - } - if (dna) - { - acf.addMap(mapFrom, seq, mapping); - } - else - { - acf.addMap(seq, mapFrom, mapping.getInverse()); - } - continue; - } + constructMapping(mapFrom, seq, xref, acf, fromDna); + } + } + } + } + + /** + * Tries to make a mapping between sequences. If successful, adds the mapping + * to the dbref and the mappings collection and answers true, otherwise + * answers false. The following methods of making are mapping are tried in + * turn: + *
    + *
  • if 'mapTo' holds a mapping to 'mapFrom', take the inverse; this is, for + * example, the case after fetching EMBL cross-references for a Uniprot + * sequence
  • + *
  • else check if the dna translates exactly to the protein (give or take + * start and stop codons>
  • + *
  • else try to map based on CDS features on the dna sequence
  • + *
+ * + * @param mapFrom + * @param mapTo + * @param xref + * @param mappings + * @return + */ + boolean constructMapping(SequenceI mapFrom, SequenceI mapTo, + DBRefEntry xref, AlignedCodonFrame mappings, boolean fromDna) + { + MapList mapping = null; + + /* + * look for a reverse mapping, if found make its inverse + */ + if (mapTo.getDBRefs() != null) + { + for (DBRefEntry dbref : mapTo.getDBRefs()) + { + String name = dbref.getSource() + "|" + dbref.getAccessionId(); + if (dbref.hasMap() && mapFrom.getName().startsWith(name)) + { + /* + * looks like we've found a map from 'mapTo' to 'mapFrom' + * - invert it to make the mapping the other way + */ + MapList reverse = dbref.getMap().getMap().getInverse(); + xref.setMap(new Mapping(mapTo, reverse)); + mappings.addMap(mapFrom, mapTo, reverse); + return true; } } } + + if (fromDna) + { + mapping = AlignmentUtils.mapCdnaToProtein(mapTo, mapFrom); + } + else + { + mapping = AlignmentUtils.mapCdnaToProtein(mapFrom, mapTo); + if (mapping != null) + { + mapping = mapping.getInverse(); + } + } + if (mapping == null) + { + return false; + } + xref.setMap(new Mapping(mapTo, mapping)); + + /* + * and add a reverse DbRef with the inverse mapping + */ + if (mapFrom.getDatasetSequence() != null + && mapFrom.getDatasetSequence().getSourceDBRef() != null) + { + DBRefEntry dbref = new DBRefEntry(mapFrom.getDatasetSequence() + .getSourceDBRef()); + dbref.setMap(new Mapping(mapFrom.getDatasetSequence(), mapping + .getInverse())); + mapTo.addDBRef(dbref); + } + + if (fromDna) + { + AlignmentUtils.computeProteinFeatures(mapFrom, mapTo, mapping); + mappings.addMap(mapFrom, mapTo, mapping); + } + else + { + mappings.addMap(mapTo, mapFrom, mapping.getInverse()); + } + + return true; } /** @@ -550,15 +734,16 @@ public class CrossRef * dataset (that is not equal to sequenceI) Identifies matching DBRefEntry * based on source and accession string only - Map and Version are nulled. * + * @param fromDna + * - true if context was searching from Dna sequences, false if + * context was searching from Protein sequences * @param sequenceI * @param lrfs - * @param dataset - * @param rseqs + * @param foundSeqs * @return true if matches were found. */ - private static boolean searchDatasetXrefs(SequenceI sequenceI, - boolean dna, DBRefEntry[] lrfs, AlignmentI dataset, - List rseqs, AlignedCodonFrame cf) + private boolean searchDatasetXrefs(boolean fromDna, SequenceI sequenceI, + DBRefEntry[] lrfs, List foundSeqs, AlignedCodonFrame cf) { boolean found = false; if (lrfs == null) @@ -571,50 +756,44 @@ public class CrossRef // add in wildcards xref.setVersion(null); xref.setMap(null); - found = searchDataset(sequenceI, xref, dataset, rseqs, cf, false, dna); + found |= searchDataset(fromDna, sequenceI, xref, foundSeqs, cf, false); } return found; } /** - * search a given sequence dataset for references matching cross-references to - * the given sequence - * - * @param sequenceI - * @param xrf - * @param dataset - * @param rseqs - * set of unique sequences - * @param cf - * @return true if one or more unique sequences were found and added - */ - public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, - AlignmentI dataset, List rseqs, AlignedCodonFrame cf) - { - return searchDataset(sequenceI, xrf, dataset, rseqs, cf, true, false); - } - - /** - * TODO: generalise to different protein classifications Search dataset for - * DBRefEntrys matching the given one (xrf) and add the associated sequence to - * rseq. + * Searches dataset for DBRefEntrys matching the given one (xrf) and adds the + * associated sequence to rseqs * - * @param sequenceI + * @param fromDna + * true if context was searching for refs *from* dna sequence, false + * if context was searching for refs *from* protein sequence + * @param fromSeq + * a sequence to ignore (start point of search) * @param xrf - * @param dataset - * @param rseqs + * a cross-reference to try to match + * @param foundSeqs + * result list to add to + * @param mappings + * a set of sequence mappings to add to * @param direct - * - search all references or only subset - * @param dna - * search dna or protein xrefs (if direct=false) + * - indicates the type of relationship between returned sequences, + * xrf, and sequenceI that is required. + *
    + *
  • direct implies xrf is a primary reference for sequenceI AND + * the sequences to be located (eg a uniprot ID for a protein + * sequence, and a uniprot ref on a transcript sequence).
  • + *
  • indirect means xrf is a cross reference with respect to + * sequenceI or all the returned sequences (eg a genomic reference + * associated with a locus and one or more transcripts)
  • + *
* @return true if relationship found and sequence added. */ - public static boolean searchDataset(SequenceI sequenceI, DBRefEntry xrf, - AlignmentI dataset, List rseqs, AlignedCodonFrame cf, - boolean direct, boolean dna) + boolean searchDataset(boolean fromDna, SequenceI fromSeq, + DBRefEntry xrf, List foundSeqs, AlignedCodonFrame mappings, + boolean direct) { boolean found = false; - SequenceI[] typer = new SequenceI[1]; if (dataset == null) { return false; @@ -634,107 +813,84 @@ public class CrossRef if (nxt.getDatasetSequence() != null) { System.err - .println("Implementation warning: getProducts passed a dataset alignment without dataset sequences in it!"); + .println("Implementation warning: CrossRef initialised with a dataset alignment with non-dataset sequences in it! (" + + nxt.getDisplayId(true) + + " has ds reference " + + nxt.getDatasetSequence().getDisplayId(true) + + ")"); + } + if (nxt == fromSeq || nxt == fromSeq.getDatasetSequence()) + { + continue; } - if (nxt != sequenceI && nxt != sequenceI.getDatasetSequence()) + /* + * only look at same molecule type if 'direct', or + * complementary type if !direct + */ { - // check if this is the correct sequence type + boolean isDna = !nxt.isProtein(); + if (direct ? (isDna != fromDna) : (isDna == fromDna)) { - typer[0] = nxt; - boolean isDna = jalview.util.Comparison.isNucleotide(typer); - if ((direct && isDna == dna) || (!direct && isDna != dna)) - { - // skip this sequence because it is same molecule type - continue; - } + // skip this sequence because it is wrong molecule type + continue; } + } - // look for direct or indirect references in common - DBRefEntry[] poss = nxt.getDBRefs(), cands = null; - if (direct) - { - cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); - } - else - { - poss = CrossRef.findXDbRefs(dna, poss); // - cands = jalview.util.DBRefUtils.searchRefs(poss, xrf); - } - if (cands != null) + // look for direct or indirect references in common + DBRefEntry[] poss = nxt.getDBRefs(); + List cands = null; + + // todo: indirect specifies we select either direct references to nxt + // that match xrf which is indirect to sequenceI, or indirect + // references to nxt that match xrf which is direct to sequenceI + cands = DBRefUtils.searchRefs(poss, xrf); + // else + // { + // poss = DBRefUtils.selectDbRefs(nxt.isProtein()!fromDna, poss); + // cands = DBRefUtils.searchRefs(poss, xrf); + // } + if (!cands.isEmpty()) + { + if (!foundSeqs.contains(nxt)) { - if (!rseqs.contains(nxt)) + found = true; + foundSeqs.add(nxt); + if (mappings != null && !direct) { - rseqs.add(nxt); - boolean foundmap = cf != null; - // don't search if we aren't given a codon map object - for (int r = 0; foundmap && r < cands.length; r++) + /* + * if the matched sequence has mapped dbrefs to + * protein product / cdna, add equivalent mappings to + * our source sequence + */ + for (DBRefEntry candidate : cands) { - if (cands[r].hasMap()) + Mapping mapping = candidate.getMap(); + if (mapping != null) { - if (cands[r].getMap().getTo() != null - && cands[r].getMap().getMap().getFromRatio() != cands[r] - .getMap().getMap().getToRatio()) + MapList map = mapping.getMap(); + if (mapping.getTo() != null + && map.getFromRatio() != map.getToRatio()) { - foundmap = true; - // get sense of map correct for adding to product - // alignment. - if (dna) + /* + * add a mapping, as from dna to peptide sequence + */ + if (map.getFromRatio() == 3) { - // map is from dna seq to a protein product - cf.addMap(sequenceI, nxt, cands[r].getMap() - .getMap()); + mappings.addMap(nxt, fromSeq, map); } else { - // map should be from protein seq to its coding dna - cf.addMap(nxt, sequenceI, cands[r].getMap() - .getMap().getInverse()); + mappings.addMap(nxt, fromSeq, map.getInverse()); } } } } - // TODO: add mapping between sequences if necessary - found = true; } } - } } } } return found; } - - /** - * precalculate different products that can be found for seqs in dataset and - * return them. - * - * @param dna - * @param seqs - * @param dataset - * @param fake - * - don't actually build lists - just get types - * @return public static Object[] buildXProductsList(boolean dna, SequenceI[] - * seqs, AlignmentI dataset, boolean fake) { String types[] = - * jalview.analysis.CrossRef.findSequenceXrefTypes( dna, seqs, - * dataset); if (types != null) { System.out.println("Xref Types for: - * "+(dna ? "dna" : "prot")); for (int t = 0; t < types.length; t++) { - * System.out.println("Type: " + types[t]); SequenceI[] prod = - * jalview.analysis.CrossRef.findXrefSequences(seqs, dna, types[t]); - * System.out.println("Found " + ((prod == null) ? "no" : "" + - * prod.length) + " products"); if (prod!=null) { for (int p=0; - * p -1)) : false; } } + + /** + * toString method returns the wrapped sequence id. For debugging purposes + * only, behaviour not guaranteed not to change. + */ + @Override + public String toString() + { + return id; + } } } diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 6d6cdb5..326cc4e 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -23,6 +23,7 @@ package jalview.datamodel; import jalview.util.MapList; import jalview.util.MappingUtils; +import java.util.AbstractList; import java.util.ArrayList; import java.util.List; @@ -36,7 +37,7 @@ public class AlignedCodonFrame /* * Data bean to hold mappings from one sequence to another */ - private class SequenceToSequenceMapping + public class SequenceToSequenceMapping { private SequenceI fromSeq; @@ -57,6 +58,54 @@ public class AlignedCodonFrame return String.format("From %s %s", fromSeq.getName(), mapping.toString()); } + + /** + * Returns a hashCode derived from the hashcodes of the mappings and fromSeq + * + * @see SequenceToSequenceMapping#hashCode() + */ + @Override + public int hashCode() + { + return (fromSeq == null ? 0 : fromSeq.hashCode() * 31) + + mapping.hashCode(); + } + + /** + * Answers true if the objects hold the same mapping between the same two + * sequences + * + * @see Mapping#equals + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof SequenceToSequenceMapping)) + { + return false; + } + SequenceToSequenceMapping that = (SequenceToSequenceMapping) obj; + if (this.mapping == null) + { + return that.mapping == null; + } + // TODO: can simplify by asserting fromSeq is a dataset sequence + return (this.fromSeq == that.fromSeq || (this.fromSeq != null + && that.fromSeq != null + && this.fromSeq.getDatasetSequence() != null && this.fromSeq + .getDatasetSequence() == that.fromSeq + .getDatasetSequence())) && this.mapping.equals(that.mapping); + } + + public SequenceI getFromSeq() + { + return fromSeq; + } + + public Mapping getMapping() + { + return mapping; + } } private List mappings; @@ -79,6 +128,21 @@ public class AlignedCodonFrame */ public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map) { + addMap(dnaseq, aaseq, map, null); + } + + /** + * Adds a mapping between the dataset sequences for the associated dna and + * protein sequence objects + * + * @param dnaseq + * @param aaseq + * @param map + * @param mapFromId + */ + public void addMap(SequenceI dnaseq, SequenceI aaseq, MapList map, + String mapFromId) + { // JBPNote DEBUG! THIS ! // dnaseq.transferAnnotation(aaseq, mp); // aaseq.transferAnnotation(dnaseq, new Mapping(map.getInverse())); @@ -90,6 +154,8 @@ public class AlignedCodonFrame /* * if we already hold a mapping between these sequences, just add to it + * note that 'adding' a duplicate map does nothing; this protects against + * creating duplicate mappings in AlignedCodonFrame */ for (SequenceToSequenceMapping ssm : mappings) { @@ -104,6 +170,7 @@ public class AlignedCodonFrame * otherwise, add a new sequence mapping */ Mapping mp = new Mapping(toSeq, map); + mp.setMappedFromId(mapFromId); mappings.add(new SequenceToSequenceMapping(fromSeq, mp)); } @@ -421,7 +488,8 @@ public class AlignedCodonFrame for (SequenceToSequenceMapping ssm : mappings) { - if (ssm.mapping.to == protein) + if (ssm.mapping.to == protein + && ssm.mapping.getMap().getFromRatio() == 3) { ml = ssm.mapping.map; dnaSeq = ssm.fromSeq; @@ -651,7 +719,7 @@ public class AlignedCodonFrame } /** - * Returns the first mapping found that is from 'fromSeq' to 'toSeq', or null + * Returns the first mapping found that is between 'fromSeq' and 'toSeq', or null * if none found * * @param fromSeq @@ -662,16 +730,54 @@ public class AlignedCodonFrame */ public Mapping getMappingBetween(SequenceI fromSeq, SequenceI toSeq) { + SequenceI dssFrom = fromSeq.getDatasetSequence() == null ? fromSeq + : fromSeq.getDatasetSequence(); + SequenceI dssTo = toSeq.getDatasetSequence() == null ? toSeq : toSeq + .getDatasetSequence(); + for (SequenceToSequenceMapping mapping : mappings) { SequenceI from = mapping.fromSeq; SequenceI to = mapping.mapping.to; - if ((from == fromSeq || from == fromSeq.getDatasetSequence()) - && (to == toSeq || to == toSeq.getDatasetSequence())) + if ((from == dssFrom && to == dssTo) + || (from == dssTo && to == dssFrom)) { return mapping.mapping; } } return null; } + + /** + * Returns a hashcode derived from the list of sequence mappings + * + * @see SequenceToSequenceMapping#hashCode() + * @see AbstractList#hashCode() + */ + @Override + public int hashCode() + { + return this.mappings.hashCode(); + } + + /** + * Two AlignedCodonFrame objects are equal if they hold the same ordered list + * of mappings + * + * @see SequenceToSequenceMapping# + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof AlignedCodonFrame)) + { + return false; + } + return this.mappings.equals(((AlignedCodonFrame) obj).mappings); + } + + public List getMappings() + { + return mappings; + } } diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index f14539b..f5665db 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -30,7 +30,6 @@ import java.util.Collections; import java.util.Enumeration; import java.util.HashSet; import java.util.Hashtable; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -45,7 +44,7 @@ import java.util.Vector; */ public class Alignment implements AlignmentI { - protected Alignment dataset; + private Alignment dataset; protected List sequences; @@ -110,7 +109,10 @@ public class Alignment implements AlignmentI /* * Share the same dataset sequence mappings (if any). */ - this.setCodonFrames(al.getCodonFrames()); + if (dataset == null && al.getDataset() == null) + { + this.setCodonFrames(al.getCodonFrames()); + } } /** @@ -987,7 +989,7 @@ public class Alignment implements AlignmentI } @Override - public void setDataset(Alignment data) + public void setDataset(AlignmentI data) { if (dataset == null && data == null) { @@ -995,7 +997,12 @@ public class Alignment implements AlignmentI } else if (dataset == null && data != null) { - dataset = data; + if (!(data instanceof Alignment)) + { + throw new Error( + "Implementation Error: jalview.datamodel.Alignment does not yet support other implementations of AlignmentI as its dataset reference"); + } + dataset = (Alignment) data; for (int i = 0; i < getHeight(); i++) { SequenceI currentSeq = getSequenceAt(i); @@ -1288,22 +1295,6 @@ public class Alignment implements AlignmentI } } - /** - * adds a set of mappings (while ignoring any duplicates) - */ - @Override - public void addCodonFrames(Iterable codons) - { - if (codons != null) - { - Iterator it = codons.iterator(); - while (it.hasNext()) - { - addCodonFrame(it.next()); - } - } - } - /* * (non-Javadoc) * @@ -1357,6 +1348,10 @@ public class Alignment implements AlignmentI @Override public List getCodonFrames() { + // TODO: Fix this method to fix failing AlignedCodonFrame tests + // this behaviour is currently incorrect. method should return codon frames + // for just the alignment, + // selected from dataset return dataset != null ? dataset.getCodonFrames() : codonFrameList; } @@ -1418,6 +1413,7 @@ public class Alignment implements AlignmentI addAnnotation(alan[a]); } + // use add method getCodonFrames().addAll(toappend.getCodonFrames()); List sg = toappend.getGroups(); @@ -1727,6 +1723,10 @@ public class Alignment implements AlignmentI { return AlignmentUtils.alignProteinAsDna(this, al); } + else if (thatIsProtein && thisIsNucleotide) + { + return AlignmentUtils.alignCdsAsProtein(this, al); + } return AlignmentUtils.alignAs(this, al); } diff --git a/src/jalview/datamodel/AlignmentI.java b/src/jalview/datamodel/AlignmentI.java index 4ae8ba2..c15bb99 100755 --- a/src/jalview/datamodel/AlignmentI.java +++ b/src/jalview/datamodel/AlignmentI.java @@ -305,7 +305,7 @@ public interface AlignmentI extends AnnotatedCollectionI * @return Alignment containing dataset sequences or null of this is a * dataset. */ - Alignment getDataset(); + AlignmentI getDataset(); /** * Set the associated dataset for the alignment, or create one. @@ -313,7 +313,7 @@ public interface AlignmentI extends AnnotatedCollectionI * @param dataset * The dataset alignment or null to construct one. */ - void setDataset(Alignment dataset); + void setDataset(AlignmentI dataset); /** * pads sequences with gaps (to ensure the set looks like an alignment) @@ -363,14 +363,6 @@ public interface AlignmentI extends AnnotatedCollectionI void addCodonFrame(AlignedCodonFrame codons); /** - * add a set of aligned codons mappings for this alignment, apart from any - * duplicates which are ignored - * - * @param codons - */ - void addCodonFrames(Iterable codons); - - /** * remove a particular codon frame reference from this alignment * * @param codons diff --git a/src/jalview/datamodel/DBRefEntry.java b/src/jalview/datamodel/DBRefEntry.java index 66a075e..efdf0ac 100755 --- a/src/jalview/datamodel/DBRefEntry.java +++ b/src/jalview/datamodel/DBRefEntry.java @@ -150,6 +150,7 @@ public class DBRefEntry implements DBRefEntryI * otherwise the versions have to match */ String otherVersion = other.getVersion(); + if ((version == null || version.equals("0") || version.endsWith(":0")) && otherVersion != null) { @@ -157,7 +158,9 @@ public class DBRefEntry implements DBRefEntryI } else { - if (!version.equalsIgnoreCase(otherVersion)) + if (version != null + && (otherVersion == null || !version + .equalsIgnoreCase(otherVersion))) { return false; } diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index bd83fe9..1c196be 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -20,6 +20,7 @@ */ package jalview.datamodel; +import jalview.util.Comparison; import jalview.util.MapList; import java.util.Iterator; @@ -258,7 +259,8 @@ public class Mapping int truePos = sequencePos - (start - 1); while (alignedBases < truePos && alignedColumn < alignedSeq.length) { - if (alignedSeq[alignedColumn++] != gap) + char c = alignedSeq[alignedColumn++]; + if (c != gap && !Comparison.isGap(c)) { alignedBases++; } @@ -274,18 +276,23 @@ public class Mapping } - /** + /* * Contains the start-end pairs mapping from the associated sequence to the * sequence in the database coordinate system. It also takes care of step * difference between coordinate systems. */ MapList map = null; - /** + /* * The sequence that map maps the associated sequence to (if any). */ SequenceI to = null; + /* + * optional sequence id for the 'from' ranges + */ + private String mappedFromId; + public Mapping(MapList map) { super(); @@ -333,6 +340,7 @@ public class Mapping map = new MapList(map2.map); } to = map2.to; + mappedFromId = map2.mappedFromId; } } @@ -356,14 +364,13 @@ public class Mapping /** * Equals that compares both the to references and MapList mappings. * - * @param other + * @param o * @return + * @see MapList#equals */ @Override public boolean equals(Object o) { - // TODO should override Object.hashCode() to ensure that equal objects have - // equal hashcodes if (o == null || !(o instanceof Mapping)) { return false; @@ -390,6 +397,21 @@ public class Mapping } /** + * Returns a hashCode made from the sequence and maplist + */ + @Override + public int hashCode() + { + int hashCode = (this.to == null ? 1 : this.to.hashCode()); + if (this.map != null) + { + hashCode = hashCode * 31 + this.map.hashCode(); + } + + return hashCode; + } + + /** * get the 'initial' position in the associated sequence for a position in the * mapped reference frame * @@ -728,4 +750,22 @@ public class Mapping : this.to.getName()); } + /** + * Returns the identifier for the 'from' range sequence, or null if not set + * + * @return + */ + public String getMappedFromId() + { + return mappedFromId; + } + + /** + * Sets the identifier for the 'from' range sequence + */ + public void setMappedFromId(String mappedFromId) + { + this.mappedFromId = mappedFromId; + } + } diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index 151d8c4..31ffdfd 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -1086,6 +1086,25 @@ public class Sequence extends ASequence implements SequenceI return new Sequence(this); } + private boolean _isNa; + + private long _seqhash = 0; + + @Override + public boolean isProtein() + { + if (datasetSequence != null) + { + return datasetSequence.isProtein(); + } + if (_seqhash != sequence.hashCode()) + { + _seqhash = sequence.hashCode(); + _isNa=jalview.util.Comparison.isNucleotide(new SequenceI[] { this }); + } + return !_isNa; + }; + /* * (non-Javadoc) * diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 69eb1d4..355e271 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -219,6 +219,12 @@ public interface SequenceI extends ASequenceI public int[] findPositionMap(); /** + * + * @return true if sequence is composed of amino acid characters + */ + public boolean isProtein(); + + /** * Delete a range of aligned sequence columns, creating a new dataset sequence * if necessary and adjusting start and end positions accordingly. * diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index f8c0bbe..56b1325 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -187,21 +187,26 @@ public class EmblEntry */ public SequenceI getSequence(String sourceDb, List peptides) { - SequenceI dna = new Sequence(sourceDb + "|" + accession, - sequence.getSequence()); + SequenceI dna = makeSequence(sourceDb); dna.setDescription(description); DBRefEntry retrievedref = new DBRefEntry(sourceDb, getSequenceVersion(), accession); dna.addDBRef(retrievedref); + dna.setSourceDBRef(retrievedref); // add map to indicate the sequence is a valid coordinate frame for the // dbref retrievedref.setMap(new Mapping(null, new int[] { 1, dna.getLength() }, new int[] { 1, dna.getLength() }, 1, 1)); + + /* + * transform EMBL Database refs to canonical form + */ if (dbRefs != null) { for (DBRefEntry dbref : dbRefs) { + dbref.setSource(DBRefUtils.getCanonicalName(dbref.getSource())); dna.addDBRef(dbref); } } @@ -211,13 +216,6 @@ public class EmblEntry { for (EmblFeature feature : features) { - if (feature.dbRefs != null) - { - for (DBRefEntry dbref : feature.dbRefs) - { - dna.addDBRef(dbref); - } - } if (FeatureProperties.isCodingFeature(sourceDb, feature.getName())) { parseCodingFeature(feature, sourceDb, dna, peptides, matcher); @@ -237,6 +235,17 @@ public class EmblEntry } /** + * @param sourceDb + * @return + */ + SequenceI makeSequence(String sourceDb) + { + SequenceI dna = new Sequence(sourceDb + "|" + accession, + sequence.getSequence()); + return dna; + } + + /** * Extracts coding region and product from a CDS feature and properly decorate * it with annotations. * @@ -248,17 +257,19 @@ public class EmblEntry * parent dna sequence for this record * @param peptides * list of protein product sequences for Embl entry + * @param matcher + * helper to match xrefs in already retrieved sequences */ void parseCodingFeature(EmblFeature feature, String sourceDb, SequenceI dna, List peptides, SequenceIdMatcher matcher) { boolean isEmblCdna = sourceDb.equals(DBRefSource.EMBLCDS); - int[] exon = getCdsRanges(feature); + int[] exons = getCdsRanges(feature); - String prseq = null; - String prname = ""; - String prid = null; + String translation = null; + String proteinName = ""; + String proteinId = null; Map vals = new Hashtable(); /* @@ -279,11 +290,11 @@ public class EmblEntry if (qname.equals("translation")) { // remove all spaces (precompiled String.replaceAll(" ", "")) - prseq = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll(""); + translation = SPACE_PATTERN.matcher(q.getValues()[0]).replaceAll(""); } else if (qname.equals("protein_id")) { - prid = q.getValues()[0].trim(); + proteinId = q.getValues()[0].trim(); } else if (qname.equals("codon_start")) { @@ -299,7 +310,7 @@ public class EmblEntry else if (qname.equals("product")) { // sometimes name is returned e.g. for V00488 - prname = q.getValues()[0].trim(); + proteinName = q.getValues()[0].trim(); } else { @@ -315,54 +326,59 @@ public class EmblEntry } } - DBRefEntry protEMBLCDS = null; - exon = MappingUtils.removeStartPositions(codonStart - 1, exon); - boolean noProteinDbref = true; + DBRefEntry proteinToEmblProteinRef = null; + exons = MappingUtils.removeStartPositions(codonStart - 1, exons); SequenceI product = null; - Mapping map = null; - if (prseq != null && prname != null && prid != null) + Mapping dnaToProteinMapping = null; + if (translation != null && proteinName != null && proteinId != null) { + int translationLength = translation.length(); + /* * look for product in peptides list, if not found, add it */ - product = matcher.findIdMatch(prid); + product = matcher.findIdMatch(proteinId); if (product == null) { - product = new Sequence(prid, prseq, 1, prseq.length()); - product.setDescription(((prname.length() == 0) ? "Protein Product from " + product = new Sequence(proteinId, translation, 1, translationLength); + product.setDescription(((proteinName.length() == 0) ? "Protein Product from " + sourceDb - : prname)); + : proteinName)); peptides.add(product); matcher.add(product); } // we have everything - create the mapping and perhaps the protein // sequence - if (exon == null || exon.length == 0) + if (exons == null || exons.length == 0) { + /* + * workaround until we handle dna location for CDS sequence + * e.g. location="X53828.1:60..1058" correctly + */ System.err .println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect (" + sourceDb + ":" + getAccession() + ")"); - if (prseq.length() * 3 == (1 - codonStart + dna.getSequence().length)) + if (translationLength * 3 == (1 - codonStart + dna.getSequence().length)) { System.err .println("Not allowing for additional stop codon at end of cDNA fragment... !"); - // this might occur for CDS sequences where no features are - // marked. - exon = new int[] { dna.getStart() + (codonStart - 1), + // this might occur for CDS sequences where no features are marked + exons = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() }; - map = new Mapping(product, exon, new int[] { 1, prseq.length() }, - 3, 1); + dnaToProteinMapping = new Mapping(product, exons, new int[] { 1, + translationLength }, 3, 1); } - if ((prseq.length() + 1) * 3 == (1 - codonStart + dna.getSequence().length)) + if ((translationLength + 1) * 3 == (1 - codonStart + dna + .getSequence().length)) { System.err .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); - exon = new int[] { dna.getStart() + (codonStart - 1), + exons = new int[] { dna.getStart() + (codonStart - 1), dna.getEnd() - 3 }; - map = new Mapping(product, exon, new int[] { 1, prseq.length() }, - 3, 1); + dnaToProteinMapping = new Mapping(product, exons, new int[] { 1, + translationLength }, 3, 1); } } else @@ -381,38 +397,49 @@ public class EmblEntry else { // final product length truncation check - // TODO should from range include stop codon even if not in protein - // in order to include stop codon in CDS sequence (as done for - // Ensembl)? - int[] cdsRanges = adjustForProteinLength(prseq.length(), exon); - map = new Mapping(product, cdsRanges, new int[] { 1, - prseq.length() }, 3, 1); - // reconstruct the EMBLCDS entry - // TODO: this is only necessary when there codon annotation is - // complete (I think JBPNote) - DBRefEntry pcdnaref = new DBRefEntry(); - pcdnaref.setAccessionId(prid); - pcdnaref.setSource(DBRefSource.EMBLCDS); - pcdnaref.setVersion(getSequenceVersion()); // same as parent EMBL - // version. - MapList mp = new MapList(new int[] { 1, prseq.length() }, - new int[] { 1 + (codonStart - 1), - (codonStart - 1) + 3 * prseq.length() }, 1, 3); - pcdnaref.setMap(new Mapping(mp)); + int[] cdsRanges = adjustForProteinLength(translationLength, exons); + dnaToProteinMapping = new Mapping(product, cdsRanges, new int[] { + 1, translationLength }, 3, 1); if (product != null) { - product.addDBRef(pcdnaref); - protEMBLCDS = new DBRefEntry(pcdnaref); - protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); - product.addDBRef(protEMBLCDS); + /* + * make xref with mapping from protein to EMBL dna + */ + DBRefEntry proteinToEmblRef = new DBRefEntry(DBRefSource.EMBL, + getSequenceVersion(), proteinId, new Mapping( + dnaToProteinMapping.getMap().getInverse())); + product.addDBRef(proteinToEmblRef); + + /* + * make xref from protein to EMBLCDS; we assume here that the + * CDS sequence version is same as dna sequence (?!) + */ + MapList proteinToCdsMapList = new MapList(new int[] { 1, + translationLength }, new int[] { 1 + (codonStart - 1), + (codonStart - 1) + 3 * translationLength }, 1, 3); + DBRefEntry proteinToEmblCdsRef = new DBRefEntry( + DBRefSource.EMBLCDS, getSequenceVersion(), proteinId, + new Mapping(proteinToCdsMapList)); + product.addDBRef(proteinToEmblCdsRef); + + /* + * make 'direct' xref from protein to EMBLCDSPROTEIN + */ + proteinToEmblProteinRef = new DBRefEntry(proteinToEmblCdsRef); + proteinToEmblProteinRef.setSource(DBRefSource.EMBLCDSProduct); + proteinToEmblProteinRef.setMap(null); + product.addDBRef(proteinToEmblProteinRef); } } } - // add cds feature to dna seq - this may include the stop codon - for (int xint = 0; exon != null && xint < exon.length; xint += 2) + + /* + * add cds features to dna sequence + */ + for (int xint = 0; exons != null && xint < exons.length; xint += 2) { - SequenceFeature sf = makeCdsFeature(exon, xint, prname, prid, vals, - codonStart); + SequenceFeature sf = makeCdsFeature(exons, xint, proteinName, + proteinId, vals, codonStart); sf.setType(feature.getName()); // "CDS" sf.setEnaLocation(feature.getLocation()); sf.setFeatureGroup(sourceDb); @@ -421,19 +448,26 @@ public class EmblEntry } /* - * add dbRefs to sequence, and mappings for Uniprot xrefs + * add feature dbRefs to sequence, and mappings for Uniprot xrefs */ + boolean hasUniprotDbref = false; if (feature.dbRefs != null) { boolean mappingUsed = false; for (DBRefEntry ref : feature.dbRefs) { - ref.setSource(DBRefUtils.getCanonicalName(ref.getSource())); - if (ref.getSource().equals(DBRefSource.UNIPROT)) + /* + * ensure UniProtKB/Swiss-Prot converted to UNIPROT + */ + String source = DBRefUtils.getCanonicalName(ref.getSource()); + ref.setSource(source); + DBRefEntry proteinDbRef = new DBRefEntry(ref.getSource(), ref.getVersion(), ref + .getAccessionId()); + if (source.equals(DBRefSource.UNIPROT)) { String proteinSeqName = DBRefSource.UNIPROT + "|" + ref.getAccessionId(); - if (map != null && map.getTo() != null) + if (dnaToProteinMapping != null && dnaToProteinMapping.getTo() != null) { if (mappingUsed) { @@ -441,13 +475,14 @@ public class EmblEntry * two or more Uniprot xrefs for the same CDS - * each needs a distinct Mapping (as to a different sequence) */ - map = new Mapping(map); + dnaToProteinMapping = new Mapping(dnaToProteinMapping); } mappingUsed = true; /* * try to locate the protein mapped to (possibly by a - * previous CDS feature) + * previous CDS feature); if not found, construct it from + * the EMBL translation */ SequenceI proteinSeq = matcher.findIdMatch(proteinSeqName); if (proteinSeq == null) @@ -457,61 +492,64 @@ public class EmblEntry matcher.add(proteinSeq); peptides.add(proteinSeq); } - map.setTo(proteinSeq); - map.getTo().addDBRef( - new DBRefEntry(ref.getSource(), ref.getVersion(), ref - .getAccessionId())); - ref.setMap(map); + dnaToProteinMapping.setTo(proteinSeq); + dnaToProteinMapping.setMappedFromId(proteinId); + proteinSeq.addDBRef(proteinDbRef); + proteinSeq.setSourceDBRef(proteinDbRef); + ref.setMap(dnaToProteinMapping); } - noProteinDbref = false; + hasUniprotDbref = true; } if (product != null) { - DBRefEntry pref = new DBRefEntry(ref.getSource(), - ref.getVersion(), ref.getAccessionId()); + /* + * copy feature dbref to our protein product + */ + DBRefEntry pref = proteinDbRef; pref.setMap(null); // reference is direct product.addDBRef(pref); // Add converse mapping reference - if (map != null) + if (dnaToProteinMapping != null) { - Mapping pmap = new Mapping(dna, map.getMap().getInverse()); + Mapping pmap = new Mapping(dna, dnaToProteinMapping.getMap() + .getInverse()); pref = new DBRefEntry(sourceDb, getSequenceVersion(), this.getAccession()); pref.setMap(pmap); - if (map.getTo() != null) + if (dnaToProteinMapping.getTo() != null) { - map.getTo().addDBRef(pref); + dnaToProteinMapping.getTo().addDBRef(pref); } } } dna.addDBRef(ref); } - if (noProteinDbref && product != null) + } + + /* + * if we have a product (translation) but no explicit Uniprot dbref + * (example: EMBL AAFI02000057 protein_id EAL65544.1) + * then construct mappings to an assumed EMBLCDSPROTEIN accession + */ + if (!hasUniprotDbref && product != null) + { + if (proteinToEmblProteinRef == null) { - // add protein coding reference to dna sequence so xref matches - if (protEMBLCDS == null) - { - protEMBLCDS = new DBRefEntry(); - protEMBLCDS.setAccessionId(prid); - protEMBLCDS.setSource(DBRefSource.EMBLCDSProduct); - protEMBLCDS.setVersion(getSequenceVersion()); - protEMBLCDS - .setMap(new Mapping(product, map.getMap().getInverse())); - } - product.addDBRef(protEMBLCDS); + // assuming CDSPROTEIN sequence version = dna version (?!) + proteinToEmblProteinRef = new DBRefEntry( + DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); + } + product.addDBRef(proteinToEmblProteinRef); + product.setSourceDBRef(proteinToEmblProteinRef); - // Add converse mapping reference - if (map != null) - { - Mapping pmap = new Mapping(product, protEMBLCDS.getMap().getMap() - .getInverse()); - DBRefEntry ncMap = new DBRefEntry(protEMBLCDS); - ncMap.setMap(pmap); - if (map.getTo() != null) - { - dna.addDBRef(ncMap); - } - } + if (dnaToProteinMapping != null + && dnaToProteinMapping.getTo() != null) + { + DBRefEntry dnaToEmblProteinRef = new DBRefEntry( + DBRefSource.EMBLCDSProduct, getSequenceVersion(), proteinId); + dnaToEmblProteinRef.setMap(dnaToProteinMapping); + dnaToProteinMapping.setMappedFromId(proteinId); + dna.addDBRef(dnaToEmblProteinRef); } } } @@ -612,26 +650,30 @@ public class EmblEntry } /** - * truncate the last exon interval to the prlength'th codon + * Truncates (if necessary) the exon intervals to match 3 times the length of + * the protein; also accepts 3 bases longer (for stop codon not included in + * protein) * - * @param prlength + * @param proteinLength * @param exon - * @return new exon + * an array of [start, end, start, end...] intervals + * @return the same array (if unchanged) or a truncated copy */ - static int[] adjustForProteinLength(int prlength, int[] exon) + static int[] adjustForProteinLength(int proteinLength, int[] exon) { - if (prlength <= 0 || exon == null) + if (proteinLength <= 0 || exon == null) { return exon; } - int desiredCdsLength = prlength * 3; + int expectedCdsLength = proteinLength * 3; int exonLength = MappingUtils.getLength(Arrays.asList(exon)); /* - * assuming here exon might include stop codon in addition to protein codons + * if exon length matches protein, or is shorter, or longer by the + * length of a stop codon (3 bases), then leave it unchanged */ - if (desiredCdsLength == exonLength - || desiredCdsLength == exonLength - 3) + if (expectedCdsLength >= exonLength + || expectedCdsLength == exonLength - 3) { return exon; } @@ -645,11 +687,11 @@ public class EmblEntry for (int x = 0; x < exon.length; x += 2) { cdspos += Math.abs(exon[x + 1] - exon[x]) + 1; - if (desiredCdsLength <= cdspos) + if (expectedCdsLength <= cdspos) { // advanced beyond last codon. sxpos = x; - if (desiredCdsLength != cdspos) + if (expectedCdsLength != cdspos) { // System.err // .println("Truncating final exon interval on region by " @@ -662,11 +704,11 @@ public class EmblEntry */ if (exon[x + 1] >= exon[x]) { - endxon = exon[x + 1] - cdspos + desiredCdsLength; + endxon = exon[x + 1] - cdspos + expectedCdsLength; } else { - endxon = exon[x + 1] + cdspos - desiredCdsLength; + endxon = exon[x + 1] + cdspos - expectedCdsLength; } break; } diff --git a/src/jalview/ext/ensembl/EnsemblSeqProxy.java b/src/jalview/ext/ensembl/EnsemblSeqProxy.java index c86469f..31552af 100644 --- a/src/jalview/ext/ensembl/EnsemblSeqProxy.java +++ b/src/jalview/ext/ensembl/EnsemblSeqProxy.java @@ -5,7 +5,6 @@ import jalview.analysis.Dna; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -315,13 +314,6 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient for (DBRefEntry xref : xrefs) { seq.addDBRef(xref); - /* - * Save any Uniprot xref to be the reference for SIFTS mapping - */ - if (DBRefSource.UNIPROT.equals(xref.getSource())) - { - seq.setSourceDBRef(xref); - } } /* @@ -330,6 +322,7 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient DBRefEntry self = new DBRefEntry(getDbSource(), getEnsemblDataVersion(), seq.getName()); seq.addDBRef(self); + seq.setSourceDBRef(self); } /** @@ -387,8 +380,9 @@ public abstract class EnsemblSeqProxy extends EnsemblRestClient if (ids.contains(name) || ids.contains(name.replace("ENSP", "ENST"))) { - DBRefUtils.parseToDbRef(sq, getDbSource(), + DBRefEntry dbref = DBRefUtils.parseToDbRef(sq, getDbSource(), getEnsemblDataVersion(), name); + sq.setSourceDBRef(dbref); } } if (alignment == null) diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 133aab4..88b0c35 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -4633,38 +4633,38 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Searches selected sequences for xRef products and builds the Show - * Cross-References menu (formerly called Show Products) + * Searches the alignment sequences for xRefs and builds the Show + * Cross-References menu (formerly called Show Products), with database + * sources for which cross-references are found (protein sources for a + * nucleotide alignment and vice versa) * - * @return true if Show Cross-references menu should be enabled. + * @return true if Show Cross-references menu should be enabled */ public boolean canShowProducts() { - SequenceI[] selection = viewport.getSequenceSelection(); + SequenceI[] seqs = viewport.getAlignment().getSequencesArray(); AlignmentI dataset = viewport.getAlignment().getDataset(); boolean showp = false; try { showProducts.removeAll(); final boolean dna = viewport.getAlignment().isNucleotide(); - String[] ptypes = (selection == null || selection.length == 0) ? null - : CrossRef.findSequenceXrefTypes(dna, selection, dataset); + List ptypes = (seqs == null || seqs.length == 0) ? null + : new CrossRef(seqs, dataset) + .findXrefSourcesForSequences(dna); - for (int t = 0; ptypes != null && t < ptypes.length; t++) + for (final String source : ptypes) { showp = true; final AlignFrame af = this; - final String source = ptypes[t]; - JMenuItem xtype = new JMenuItem(ptypes[t]); + JMenuItem xtype = new JMenuItem(source); xtype.addActionListener(new ActionListener() { - @Override public void actionPerformed(ActionEvent e) { showProductsFor(af.viewport.getSequenceSelection(), dna, source); } - }); showProducts.add(xtype); } @@ -4672,7 +4672,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, showProducts.setEnabled(showp); } catch (Exception e) { - jalview.bin.Cache.log + Cache.log .warn("canShowProducts threw an exception - please report to help@jalview.org", e); return false; @@ -4691,7 +4691,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * @param source * the database to show cross-references for */ - protected void showProductsFor(final SequenceI[] sel, final boolean dna, + protected void showProductsFor(final SequenceI[] sel, final boolean _odna, final String source) { Runnable foo = new Runnable() @@ -4708,156 +4708,168 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { AlignmentI alignment = AlignFrame.this.getViewport() .getAlignment(); - AlignmentI xrefs = CrossRef.findXrefSequences(sel, dna, source, - alignment); - if (xrefs != null) + AlignmentI dataset = alignment.getDataset() == null ? alignment + : alignment.getDataset(); + boolean dna = alignment.isNucleotide(); + if (_odna!=dna) { - /* - * get display scheme (if any) to apply to features - */ - FeatureSettingsModelI featureColourScheme = new SequenceFetcher() - .getFeatureColourScheme(source); + System.err + .println("Conflict: showProducts for alignment originally " + + "thought to be " + + (_odna ? "DNA" : "Protein") + + " now searching for " + + (dna ? "DNA" : "Protein") + " Context."); + } + AlignmentI xrefs = new CrossRef(sel, dataset) + .findXrefSequences(source, dna); + if (xrefs == null) + { + return; + } + /* + * get display scheme (if any) to apply to features + */ + FeatureSettingsModelI featureColourScheme = new SequenceFetcher() + .getFeatureColourScheme(source); + + AlignmentI xrefsAlignment = makeCrossReferencesAlignment(dataset, + xrefs); + final SequenceI[] sequenceSelection = AlignFrame.this.viewport + .getSequenceSelection(); + if (!dna) + { + xrefsAlignment = AlignmentUtils.makeCdsAlignment( + xrefsAlignment.getSequencesArray(), dataset, + sequenceSelection); + xrefsAlignment.alignAs(alignment); + } - AlignmentI al = makeCrossReferencesAlignment( - alignment.getDataset(), xrefs); + AlignFrame newFrame = new AlignFrame(xrefsAlignment, DEFAULT_WIDTH, + DEFAULT_HEIGHT); + if (Cache.getDefault("HIDE_INTRONS", true)) + { + newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); + } + String newtitle = String.format("%s %s %s", MessageManager + .getString(dna ? "label.proteins" : "label.nucleotides"), + MessageManager.getString("label.for"), getTitle()); + newFrame.setTitle(newtitle); - AlignFrame newFrame = new AlignFrame(al, DEFAULT_WIDTH, + if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + { + /* + * split frame display is turned off in preferences file + */ + Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, DEFAULT_HEIGHT); - if (Cache.getDefault("HIDE_INTRONS", true)) - { - newFrame.hideFeatureColumns(SequenceOntologyI.EXON, false); - } - String newtitle = String.format("%s %s %s", - MessageManager.getString(dna ? "label.proteins" - : "label.nucleotides"), MessageManager - .getString("label.for"), getTitle()); - newFrame.setTitle(newtitle); + return; // via finally clause + } - if (!Cache.getDefault(Preferences.ENABLE_SPLIT_FRAME, true)) + /* + * Make a copy of this alignment (sharing the same dataset + * sequences). If we are DNA, drop introns and update mappings + */ + AlignmentI copyAlignment = null; + boolean copyAlignmentIsAligned = false; + if (dna) + { + copyAlignment = AlignmentUtils.makeCdsAlignment( + sequenceSelection, dataset, + xrefsAlignment.getSequencesArray()); + if (copyAlignment.getHeight() == 0) { - /* - * split frame display is turned off in preferences file - */ - Desktop.addInternalFrame(newFrame, newtitle, DEFAULT_WIDTH, - DEFAULT_HEIGHT); - return; // via finally clause + System.err.println("Failed to make CDS alignment"); } /* - * Make a copy of this alignment (sharing the same dataset - * sequences). If we are DNA, drop introns and update mappings + * pending getting Embl transcripts to 'align', + * we are only doing this for Ensembl */ - AlignmentI copyAlignment = null; - final SequenceI[] sequenceSelection = AlignFrame.this.viewport - .getSequenceSelection(); - List cf = xrefs.getCodonFrames(); - boolean copyAlignmentIsAligned = false; - if (dna) - { - copyAlignment = AlignmentUtils.makeCdsAlignment( - sequenceSelection, cf, alignment); - if (copyAlignment.getHeight() == 0) - { - System.err.println("Failed to make CDS alignment"); - } - al.getCodonFrames().clear(); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); - - /* - * pending getting Embl transcripts to 'align', - * we are only doing this for Ensembl - */ - // TODO proper criteria for 'can align as cdna' - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) - || AlignmentUtils.looksLikeEnsembl(alignment)) - { - copyAlignment.alignAs(alignment); - copyAlignmentIsAligned = true; - } - } - else + // TODO proper criteria for 'can align as cdna' + if (DBRefSource.ENSEMBL.equalsIgnoreCase(source) + || AlignmentUtils.looksLikeEnsembl(alignment)) { - copyAlignment = AlignmentUtils.makeCopyAlignment( - sequenceSelection, xrefs.getSequencesArray()); - copyAlignment.addCodonFrames(cf); - al.addCodonFrames(copyAlignment.getCodonFrames()); - al.addCodonFrames(cf); + copyAlignment.alignAs(alignment); + copyAlignmentIsAligned = true; } - copyAlignment.setGapCharacter(AlignFrame.this.viewport - .getGapCharacter()); + } + else + { + copyAlignment = AlignmentUtils.makeCopyAlignment( + sequenceSelection, xrefs.getSequencesArray(), dataset); + } + copyAlignment.setGapCharacter(AlignFrame.this.viewport + .getGapCharacter()); - StructureSelectionManager ssm = StructureSelectionManager - .getStructureSelectionManager(Desktop.instance); - ssm.registerMappings(cf); + StructureSelectionManager ssm = StructureSelectionManager + .getStructureSelectionManager(Desktop.instance); - if (copyAlignment.getHeight() <= 0) - { - System.err.println("No Sequences generated for xRef type " - + source); - return; - } + /* + * register any new mappings for sequence mouseover etc + * (will not duplicate any previously registered mappings) + */ + ssm.registerMappings(dataset.getCodonFrames()); + + if (copyAlignment.getHeight() <= 0) + { + System.err.println("No Sequences generated for xRef type " + + source); + return; + } + /* + * align protein to dna + */ + if (dna && copyAlignmentIsAligned) + { + xrefsAlignment.alignAs(copyAlignment); + } + else + { /* - * align protein to dna + * align cdna to protein - currently only if + * fetching and aligning Ensembl transcripts! */ - if (dna && copyAlignmentIsAligned) + if (dna && DBRefSource.ENSEMBL.equalsIgnoreCase(source)) { - al.alignAs(copyAlignment); - } - else - { - /* - * align cdna to protein - currently only if - * fetching and aligning Ensembl transcripts! - */ - if (DBRefSource.ENSEMBL.equalsIgnoreCase(source)) - { - copyAlignment.alignAs(al); - } + copyAlignment.alignAs(xrefsAlignment); } + } - AlignFrame copyThis = new AlignFrame(copyAlignment, - AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); - copyThis.setTitle(AlignFrame.this.getTitle()); + AlignFrame copyThis = new AlignFrame(copyAlignment, + AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); + copyThis.setTitle(AlignFrame.this.getTitle()); - boolean showSequenceFeatures = viewport - .isShowSequenceFeatures(); - newFrame.setShowSeqFeatures(showSequenceFeatures); - copyThis.setShowSeqFeatures(showSequenceFeatures); - FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer(); + boolean showSequenceFeatures = viewport.isShowSequenceFeatures(); + newFrame.setShowSeqFeatures(showSequenceFeatures); + copyThis.setShowSeqFeatures(showSequenceFeatures); + FeatureRenderer myFeatureStyling = alignPanel.getSeqPanel().seqCanvas + .getFeatureRenderer(); - /* - * copy feature rendering settings to split frame - */ - newFrame.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); - copyThis.alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer() - .transferSettings(myFeatureStyling); + /* + * copy feature rendering settings to split frame + */ + newFrame.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); + copyThis.alignPanel.getSeqPanel().seqCanvas.getFeatureRenderer() + .transferSettings(myFeatureStyling); - /* - * apply 'database source' feature configuration - * if any was found - */ - // TODO is this the feature colouring for the original - // alignment or the fetched xrefs? either could be Ensembl - newFrame.getViewport().applyFeaturesStyle(featureColourScheme); - copyThis.getViewport().applyFeaturesStyle(featureColourScheme); - - SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, - dna ? newFrame : copyThis); - newFrame.setVisible(true); - copyThis.setVisible(true); - String linkedTitle = MessageManager - .getString("label.linked_view_title"); - Desktop.addInternalFrame(sf, linkedTitle, -1, -1); - sf.adjustDivider(); - } - } catch (Exception e) - { - Cache.log.error("Exception when finding crossreferences", e); + /* + * apply 'database source' feature configuration + * if any was found + */ + // TODO is this the feature colouring for the original + // alignment or the fetched xrefs? either could be Ensembl + newFrame.getViewport().applyFeaturesStyle(featureColourScheme); + copyThis.getViewport().applyFeaturesStyle(featureColourScheme); + + SplitFrame sf = new SplitFrame(dna ? copyThis : newFrame, + dna ? newFrame : copyThis); + newFrame.setVisible(true); + copyThis.setVisible(true); + String linkedTitle = MessageManager + .getString("label.linked_view_title"); + Desktop.addInternalFrame(sf, linkedTitle, -1, -1); + sf.adjustDivider(); } catch (OutOfMemoryError e) { new OOMWarning("whilst fetching crossreferences", e); @@ -4873,11 +4885,8 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Makes an alignment containing the given sequences. If this is of the - * same type as the given dataset (nucleotide/protein), then the new - * alignment shares the same dataset, and its dataset sequences are added - * to it. Otherwise a new dataset sequence is created for the - * cross-references. + * Makes an alignment containing the given sequences, and adds them to the + * given dataset, which is also set as the dataset for the new alignment * * @param dataset * @param seqs @@ -4886,32 +4895,20 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, protected AlignmentI makeCrossReferencesAlignment(AlignmentI dataset, AlignmentI seqs) { - boolean sameType = dataset.isNucleotide() == seqs.isNucleotide(); - SequenceI[] sprods = new SequenceI[seqs.getHeight()]; for (int s = 0; s < sprods.length; s++) { sprods[s] = (seqs.getSequenceAt(s)).deriveSequence(); - if (sameType) + if (dataset.getSequences() == null + || !dataset.getSequences().contains( + sprods[s].getDatasetSequence())) { - if (dataset.getSequences() == null - || !dataset.getSequences().contains( - sprods[s].getDatasetSequence())) - { - dataset.addSequence(sprods[s].getDatasetSequence()); - } + dataset.addSequence(sprods[s].getDatasetSequence()); } sprods[s].updatePDBIds(); } Alignment al = new Alignment(sprods); - if (sameType) - { - al.setDataset((Alignment) dataset); - } - else - { - al.createDatasetAlignment(); - } + al.setDataset(dataset); return al; } diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index 945651b..ac85aad 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -2713,7 +2713,7 @@ public class Jalview2XML SequenceI[] orderedSeqs = tmpseqs .toArray(new SequenceI[tmpseqs.size()]); - Alignment al = new Alignment(orderedSeqs); + AlignmentI al = new Alignment(orderedSeqs); if (referenceseqForView != null) { @@ -4096,7 +4096,7 @@ public class Jalview2XML } AlignFrame loadViewport(String file, JSeq[] JSEQ, - List hiddenSeqs, Alignment al, + List hiddenSeqs, AlignmentI al, JalviewModelSequence jms, Viewport view, String uniqueSeqSetId, String viewId, List autoAlan) { @@ -4449,7 +4449,7 @@ public class Jalview2XML } private ColourSchemeI constructAnnotationColour( - AnnotationColours viewAnnColour, AlignFrame af, Alignment al, + AnnotationColours viewAnnColour, AlignFrame af, AlignmentI al, JalviewModelSequence jms, boolean checkGroupAnnColour) { boolean propagateAnnColour = false; @@ -4573,7 +4573,7 @@ public class Jalview2XML return cs; } - private void reorderAutoannotation(AlignFrame af, Alignment al, + private void reorderAutoannotation(AlignFrame af, AlignmentI al, List autoAlan) { // copy over visualization settings for autocalculated annotation in the @@ -4728,10 +4728,11 @@ public class Jalview2XML } } - private void recoverDatasetFor(SequenceSet vamsasSet, Alignment al, + private void recoverDatasetFor(SequenceSet vamsasSet, AlignmentI al, boolean ignoreUnrefed) { - jalview.datamodel.Alignment ds = getDatasetFor(vamsasSet.getDatasetId()); + jalview.datamodel.AlignmentI ds = getDatasetFor(vamsasSet + .getDatasetId()); Vector dseqs = null; if (ds == null) { @@ -4881,15 +4882,15 @@ public class Jalview2XML * TODO use AlignmentI here and in related methods - needs * AlignmentI.getDataset() changed to return AlignmentI instead of Alignment */ - Hashtable datasetIds = null; + Hashtable datasetIds = null; - IdentityHashMap dataset2Ids = null; + IdentityHashMap dataset2Ids = null; - private Alignment getDatasetFor(String datasetId) + private AlignmentI getDatasetFor(String datasetId) { if (datasetIds == null) { - datasetIds = new Hashtable(); + datasetIds = new Hashtable(); return null; } if (datasetIds.containsKey(datasetId)) @@ -4899,11 +4900,11 @@ public class Jalview2XML return null; } - private void addDatasetRef(String datasetId, Alignment dataset) + private void addDatasetRef(String datasetId, AlignmentI dataset) { if (datasetIds == null) { - datasetIds = new Hashtable(); + datasetIds = new Hashtable(); } datasetIds.put(datasetId, dataset); } @@ -4914,7 +4915,7 @@ public class Jalview2XML * @param dataset * @return */ - private String getDatasetIdRef(Alignment dataset) + private String getDatasetIdRef(AlignmentI dataset) { if (dataset.getDataset() != null) { @@ -4926,7 +4927,7 @@ public class Jalview2XML // make a new datasetId and record it if (dataset2Ids == null) { - dataset2Ids = new IdentityHashMap(); + dataset2Ids = new IdentityHashMap(); } else { diff --git a/src/jalview/gui/PCAPanel.java b/src/jalview/gui/PCAPanel.java index 47add28..2b09eb6 100644 --- a/src/jalview/gui/PCAPanel.java +++ b/src/jalview/gui/PCAPanel.java @@ -21,6 +21,7 @@ package jalview.gui; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SeqCigar; @@ -383,8 +384,8 @@ public class PCAPanel extends GPCAPanel implements Runnable, { // AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]); - Alignment al = new Alignment((SequenceI[]) alAndColsel[0]); - Alignment dataset = (av != null && av.getAlignment() != null) ? av + AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]); + AlignmentI dataset = (av != null && av.getAlignment() != null) ? av .getAlignment().getDataset() : null; if (dataset != null) { diff --git a/src/jalview/gui/SequenceFetcher.java b/src/jalview/gui/SequenceFetcher.java index 03bb375..828a2aa 100755 --- a/src/jalview/gui/SequenceFetcher.java +++ b/src/jalview/gui/SequenceFetcher.java @@ -836,10 +836,8 @@ public class SequenceFetcher extends JPanel implements Runnable Cache.log.info( "Error retrieving " + accession + " from " + proxy.getDbName(), e); - } finally - { - return success; } + return success; } /** @@ -859,7 +857,6 @@ public class SequenceFetcher extends JPanel implements Runnable for (String q : queries) { - DBRefEntry[] found = null; DBRefEntry dbr = new DBRefEntry(); dbr.setSource(proxy.getDbSource()); dbr.setVersion(null); @@ -870,8 +867,9 @@ public class SequenceFetcher extends JPanel implements Runnable { if (rs[r] != null) { - found = DBRefUtils.searchRefs(rs[r].getDBRefs(), accId); - if (found != null && found.length > 0) + List found = DBRefUtils.searchRefs(rs[r].getDBRefs(), + accId); + if (!found.isEmpty()) { rfound = true; break; diff --git a/src/jalview/gui/SplitFrame.java b/src/jalview/gui/SplitFrame.java index 617224f..3b96be8 100644 --- a/src/jalview/gui/SplitFrame.java +++ b/src/jalview/gui/SplitFrame.java @@ -206,6 +206,7 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI final AlignmentI bottomAlignment = bottomViewport.getAlignment(); boolean topAnnotations = topViewport.isShowAnnotation(); boolean bottomAnnotations = bottomViewport.isShowAnnotation(); + // TODO need number of visible sequences here, not #sequences - how? int topCount = topAlignment.getHeight(); int bottomCount = bottomAlignment.getHeight(); int topCharHeight = topViewport.getViewStyle().getCharHeight(); @@ -223,6 +224,11 @@ public class SplitFrame extends GSplitFrame implements SplitContainerI + (bottomAnnotations ? bottomViewport.calcPanelHeight() : 0); double ratio = ((double) topHeight) / (topHeight + bottomHeight); + /* + * limit to 0.2 <= ratio <= 0.8 to avoid concealing all sequences + */ + ratio = Math.min(ratio, 0.8d); + ratio = Math.max(ratio, 0.2d); setRelativeDividerLocation(ratio); } diff --git a/src/jalview/gui/TreePanel.java b/src/jalview/gui/TreePanel.java index d78350d..fafa610 100755 --- a/src/jalview/gui/TreePanel.java +++ b/src/jalview/gui/TreePanel.java @@ -520,8 +520,8 @@ public class TreePanel extends GTreePanel { // AlignmentOrder origorder = new AlignmentOrder(alAndColsel[0]); - Alignment al = new Alignment((SequenceI[]) alAndColsel[0]); - Alignment dataset = (av != null && av.getAlignment() != null) ? av + AlignmentI al = new Alignment((SequenceI[]) alAndColsel[0]); + AlignmentI dataset = (av != null && av.getAlignment() != null) ? av .getAlignment().getDataset() : null; if (dataset != null) { diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 5605a53..0beb45b 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -249,6 +249,18 @@ public class Comparison } /** + * Overloaded method signature to test whether a single sequence is nucleotide + * (that is, more than 85% CGTA) + * + * @param seq + * @return + */ + public static final boolean isNucleotide(SequenceI seq) + { + return isNucleotide(new SequenceI[] { seq }); + } + + /** * Answers true if more than 85% of the sequence residues (ignoring gaps) are * A, G, C, T or U, else false. This is just a heuristic guess and may give a * wrong answer (as AGCT are also amino acid codes). diff --git a/src/jalview/util/DBRefUtils.java b/src/jalview/util/DBRefUtils.java index 424d40b..d5d0cf5 100755 --- a/src/jalview/util/DBRefUtils.java +++ b/src/jalview/util/DBRefUtils.java @@ -67,11 +67,14 @@ public class DBRefUtils } /** + * Returns those DBRefEntry objects whose source identifier (once converted to + * Jalview's canonical form) is in the list of sources to search for. Returns + * null if no matches found. * * @param dbrefs - * array of DBRef objects to search + * DBRefEntry objects to search * @param sources - * String[] array of source DBRef IDs to retrieve + * array of sources to select * @return */ public static DBRefEntry[] selectRefs(DBRefEntry[] dbrefs, @@ -148,8 +151,8 @@ public class DBRefUtils } /** - * Returns an array of those references that match the given entry, or null if - * no matches. Currently uses a comparator which matches if + * Returns a (possibly empty) list of those references that match the given + * entry. Currently uses a comparator which matches if *
    *
  • database sources are the same
  • *
  • accession ids are the same
  • @@ -162,34 +165,35 @@ public class DBRefUtils * pattern to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, DBRefEntry entry) + public static List searchRefs(DBRefEntry[] ref, + DBRefEntry entry) { return searchRefs(ref, entry, matchDbAndIdAndEitherMapOrEquivalentMapList); } /** - * Returns an array of those references that match the given accession id + * Returns a list of those references that match the given accession id *
      *
    • database sources are the same
    • *
    • accession ids are the same
    • *
    • both have no mapping, or the mappings are the same
    • *
    * - * @param ref + * @param refs * Set of references to search - * @param entry - * pattern to match + * @param accId + * accession id to match * @return */ - public static DBRefEntry[] searchRefs(DBRefEntry[] ref, String accId) + public static List searchRefs(DBRefEntry[] refs, String accId) { - return searchRefs(ref, new DBRefEntry("", "", accId), matchId); + return searchRefs(refs, new DBRefEntry("", "", accId), matchId); } /** - * Returns an array of those references that match the given entry, according - * to the given comparator. Returns null if no matches. + * Returns a (possibly empty) list of those references that match the given + * entry, according to the given comparator. * * @param refs * an array of database references to search @@ -198,14 +202,14 @@ public class DBRefUtils * @param comparator * @return */ - static DBRefEntry[] searchRefs(DBRefEntry[] refs, DBRefEntry entry, + static List searchRefs(DBRefEntry[] refs, DBRefEntry entry, DbRefComp comparator) { + List rfs = new ArrayList(); if (refs == null || entry == null) { - return null; + return rfs; } - List rfs = new ArrayList(); for (int i = 0; i < refs.length; i++) { if (comparator.matches(entry, refs[i])) @@ -213,7 +217,7 @@ public class DBRefUtils rfs.add(refs[i]); } } - return rfs.size() == 0 ? null : rfs.toArray(new DBRefEntry[rfs.size()]); + return rfs; } interface DbRefComp @@ -380,9 +384,9 @@ public class DBRefUtils }; /** - * accession ID and DB must be identical. Version is ignored. No map on either - * or map but no maplist on either or maplist of map on a is equivalent to the - * maplist of map on b. + * accession ID and DB must be identical, or null on a. Version is ignored. No + * map on either or map but no maplist on either or maplist of map on a is + * equivalent to the maplist of map on b. */ public static DbRefComp matchDbAndIdAndEitherMapOrEquivalentMapList = new DbRefComp() { @@ -393,8 +397,9 @@ public class DBRefUtils && refb.getSource().equals(refa.getSource())) { // We dont care about version - if (refa.getAccessionId() != null && refb.getAccessionId() != null - && refb.getAccessionId().equals(refa.getAccessionId())) + + if (refa.getAccessionId() == null + || refa.getAccessionId().equals(refb.getAccessionId())) { if (refa.getMap() == null || refb.getMap() == null) { @@ -406,7 +411,7 @@ public class DBRefUtils || (refb.getMap().getMap() != null && refa.getMap().getMap() != null && (refb .getMap().getMap().equals(refa.getMap().getMap())))) - { // getMap().getMap().containsEither(false,refa.getMap().getMap()) + { return true; } } @@ -519,4 +524,49 @@ public class DBRefUtils return (o1 == null ? o2.equals(o1) : o1.equals(o2)); } + /** + * Selects just the DNA or protein references from a set of references + * + * @param selectDna + * if true, select references to 'standard' DNA databases, else to + * 'standard' peptide databases + * @param refs + * a set of references to select from + * @return + */ + public static DBRefEntry[] selectDbRefs(boolean selectDna, + DBRefEntry[] refs) + { + return selectRefs(refs, selectDna ? DBRefSource.DNACODINGDBS + : DBRefSource.PROTEINDBS); + // could attempt to find other cross + // refs here - ie PDB xrefs + // (not dna, not protein seq) + } + + /** + * Returns the (possibly empty) list of those supplied dbrefs which have the + * specified source database, with a case-insensitive match of source name + * + * @param dbRefs + * @param source + * @return + */ + public static List searchRefsForSource(DBRefEntry[] dbRefs, + String source) + { + List matches = new ArrayList(); + if (dbRefs != null && source != null) + { + for (DBRefEntry dbref : dbRefs) + { + if (source.equalsIgnoreCase(dbref.getSource())) + { + matches.add(dbref); + } + } + } + return matches; + } + } diff --git a/src/jalview/util/MapList.java b/src/jalview/util/MapList.java index e51442c..cae968e 100644 --- a/src/jalview/util/MapList.java +++ b/src/jalview/util/MapList.java @@ -88,8 +88,6 @@ public class MapList @Override public boolean equals(Object o) { - // TODO should also override hashCode to ensure equal objects have equal - // hashcodes if (o == null || !(o instanceof MapList)) { return false; @@ -112,6 +110,19 @@ public class MapList } /** + * Returns a hashcode made from the fromRatio, toRatio, and from/to ranges + */ + @Override + public int hashCode() + { + int hashCode = 31 * fromRatio; + hashCode = 31 * hashCode + toRatio; + hashCode = 31 * hashCode + fromShifts.toArray().hashCode(); + hashCode = 31 * hashCode + toShifts.toArray().hashCode(); + return hashCode; + } + + /** * Returns the 'from' ranges as {[start1, end1], [start2, end2], ...} * * @return @@ -215,7 +226,7 @@ public class MapList { /* * note lowest and highest values - bearing in mind the - * direction may be revesed + * direction may be reversed */ fromLowest = Math.min(fromLowest, Math.min(from[i], from[i + 1])); fromHighest = Math.max(fromHighest, Math.max(from[i], from[i + 1])); @@ -992,6 +1003,10 @@ public class MapList */ public void addMapList(MapList map) { + if (this.equals(map)) + { + return; + } this.fromLowest = Math.min(fromLowest, map.fromLowest); this.toLowest = Math.min(toLowest, map.toLowest); this.fromHighest = Math.max(fromHighest, map.fromHighest); @@ -1087,4 +1102,5 @@ public class MapList } return forwardStrand; } + } diff --git a/src/jalview/ws/AWSThread.java b/src/jalview/ws/AWSThread.java index b158448..2ef5256 100644 --- a/src/jalview/ws/AWSThread.java +++ b/src/jalview/ws/AWSThread.java @@ -31,9 +31,8 @@ import jalview.gui.WebserviceInfo; import jalview.util.MessageManager; import jalview.viewmodel.seqfeatures.FeatureRendererSettings; -import java.util.LinkedHashSet; +import java.util.ArrayList; import java.util.List; -import java.util.Set; public abstract class AWSThread extends Thread { @@ -61,7 +60,7 @@ public abstract class AWSThread extends Thread /** * dataset sequence relationships to be propagated onto new results */ - protected Set codonframe = null; + protected List codonframe = null; /** * are there jobs still running in this thread. @@ -384,7 +383,7 @@ public abstract class AWSThread extends Thread .getCodonFrames(); if (cf != null) { - codonframe = new LinkedHashSet(); + codonframe = new ArrayList(); codonframe.addAll(cf); } } diff --git a/src/jalview/ws/SequenceFetcherFactory.java b/src/jalview/ws/SequenceFetcherFactory.java new file mode 100644 index 0000000..2b8f364 --- /dev/null +++ b/src/jalview/ws/SequenceFetcherFactory.java @@ -0,0 +1,32 @@ +package jalview.ws; + +import jalview.ws.seqfetcher.ASequenceFetcher; + +public class SequenceFetcherFactory +{ + + private static SequenceFetcher instance; + + /** + * Returns a new SequenceFetcher object, or a mock object if one has been set + * + * @return + */ + public static ASequenceFetcher getSequenceFetcher() + { + return instance == null ? new SequenceFetcher() : instance; + } + + /** + * Set the instance object to use (intended for unit testing with mock + * objects). + * + * Be sure to reset to null in the tearDown method of any tests! + * + * @param sf + */ + public static void setSequenceFetcher(SequenceFetcher sf) + { + instance = sf; + } +} diff --git a/src/jalview/ws/jws1/MsaWSClient.java b/src/jalview/ws/jws1/MsaWSClient.java index 95f5527..aad72b1 100644 --- a/src/jalview/ws/jws1/MsaWSClient.java +++ b/src/jalview/ws/jws1/MsaWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws1; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -34,7 +34,6 @@ import javax.swing.JMenu; import javax.swing.JMenuItem; import javax.swing.JOptionPane; -import ext.vamsas.MuscleWS; import ext.vamsas.MuscleWSServiceLocator; import ext.vamsas.MuscleWSSoapBindingStub; import ext.vamsas.ServiceHandle; @@ -72,7 +71,7 @@ public class MsaWSClient extends WS1Client public MsaWSClient(ext.vamsas.ServiceHandle sh, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { super(); @@ -109,7 +108,7 @@ public class MsaWSClient extends WS1Client } private void startMsaWSClient(String altitle, AlignmentView msa, - boolean submitGaps, boolean preserveOrder, Alignment seqdataset) + boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset) { if (!locateWebService()) { @@ -159,7 +158,7 @@ public class MsaWSClient extends WS1Client try { - this.server = (MuscleWS) loc.getMuscleWS(new java.net.URL(WsURL)); + this.server = loc.getMuscleWS(new java.net.URL(WsURL)); ((MuscleWSSoapBindingStub) this.server).setTimeout(60000); // One minute // timeout } catch (Exception ex) @@ -201,6 +200,7 @@ public class MsaWSClient extends WS1Client return (WebServiceName.indexOf("lustal") > -1); // cheat! } + @Override public void attachWSMenuEntry(JMenu msawsmenu, final ServiceHandle serviceHandle, final AlignFrame alignFrame) { @@ -209,6 +209,7 @@ public class MsaWSClient extends WS1Client method.setToolTipText(WsURL); method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = alignFrame.gatherSequencesForAlignment(); @@ -228,6 +229,7 @@ public class MsaWSClient extends WS1Client methodR.setToolTipText(WsURL); methodR.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = alignFrame.gatherSequencesForAlignment(); diff --git a/src/jalview/ws/jws1/MsaWSThread.java b/src/jalview/ws/jws1/MsaWSThread.java index be21de7..3fd7c5a 100644 --- a/src/jalview/ws/jws1/MsaWSThread.java +++ b/src/jalview/ws/jws1/MsaWSThread.java @@ -23,6 +23,7 @@ package jalview.ws.jws1; import jalview.analysis.AlignSeq; import jalview.bin.Cache; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; @@ -147,6 +148,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI * * @return true if getAlignment will return a valid alignment result. */ + @Override public boolean hasResults() { if (subjobComplete && result != null && result.isFinished() @@ -273,6 +275,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI * * @return boolean true if job can be submitted. */ + @Override public boolean hasValidInput() { if (seqs.getSeqs() != null) @@ -285,7 +288,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -327,7 +330,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI MsaWSThread(ext.vamsas.MuscleWS server, String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, boolean subgaps, - boolean presorder, Alignment seqset) + boolean presorder, AlignmentI seqset) { this(server, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder); OutputHeader = wsInfo.getProgressText(); @@ -359,11 +362,13 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public boolean isCancellable() { return true; } + @Override public void cancelJob() { if (!jobComplete && jobs != null) @@ -430,11 +435,13 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public void pollJob(AWsJob job) throws Exception { ((MsaWSJob) job).result = server.getResult(((MsaWSJob) job).getJobId()); } + @Override public void StartJob(AWsJob job) { if (!(job instanceof MsaWSJob)) @@ -521,6 +528,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI return msa; } + @Override public void parseResult() { int results = 0; // number of result sets received @@ -571,6 +579,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI wsInfo.showResultsNewFrame .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(true); @@ -579,6 +588,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI wsInfo.mergeResults .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(false); @@ -661,7 +671,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI while (j < l) { if (((AlignmentOrder) alorders.get(i)) - .equals(((AlignmentOrder) alorders.get(j)))) + .equals((alorders.get(j)))) { alorders.remove(j); l--; @@ -704,6 +714,7 @@ class MsaWSThread extends JWS1Thread implements WSClientI } } + @Override public boolean canMergeResults() { return false; diff --git a/src/jalview/ws/jws1/SeqSearchWSClient.java b/src/jalview/ws/jws1/SeqSearchWSClient.java index d731ced..2d83bf9 100644 --- a/src/jalview/ws/jws1/SeqSearchWSClient.java +++ b/src/jalview/ws/jws1/SeqSearchWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws1; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -39,7 +39,6 @@ import javax.swing.JMenu; import javax.swing.JMenuItem; import javax.swing.JOptionPane; -import ext.vamsas.SeqSearchI; import ext.vamsas.SeqSearchServiceLocator; import ext.vamsas.SeqSearchServiceSoapBindingStub; import ext.vamsas.ServiceHandle; @@ -77,7 +76,7 @@ public class SeqSearchWSClient extends WS1Client public SeqSearchWSClient(ext.vamsas.ServiceHandle sh, String altitle, jalview.datamodel.AlignmentView msa, String db, - Alignment seqdataset, AlignFrame _alignFrame) + AlignmentI seqdataset, AlignFrame _alignFrame) { super(); alignFrame = _alignFrame; @@ -128,7 +127,7 @@ public class SeqSearchWSClient extends WS1Client } private void startSeqSearchClient(String altitle, AlignmentView msa, - String db, Alignment seqdataset) + String db, AlignmentI seqdataset) { if (!locateWebService()) { @@ -173,7 +172,7 @@ public class SeqSearchWSClient extends WS1Client try { - this.server = (SeqSearchI) loc.getSeqSearchService(new java.net.URL( + this.server = loc.getSeqSearchService(new java.net.URL( WsURL)); ((SeqSearchServiceSoapBindingStub) this.server).setTimeout(60000); // One // minute @@ -241,6 +240,7 @@ public class SeqSearchWSClient extends WS1Client return dbs; } + @Override public void attachWSMenuEntry(JMenu wsmenu, final ServiceHandle sh, final AlignFrame af) { @@ -281,6 +281,7 @@ public class SeqSearchWSClient extends WS1Client method.setToolTipText(sh.getEndpointURL()); method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { // use same input gatherer as for secondary structure prediction @@ -305,6 +306,7 @@ public class SeqSearchWSClient extends WS1Client final String searchdb = dbs[db]; method.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { AlignmentView msa = af.gatherSeqOrMsaForSecStrPrediction(); diff --git a/src/jalview/ws/jws1/SeqSearchWSThread.java b/src/jalview/ws/jws1/SeqSearchWSThread.java index 66fddd1..70056a6 100644 --- a/src/jalview/ws/jws1/SeqSearchWSThread.java +++ b/src/jalview/ws/jws1/SeqSearchWSThread.java @@ -24,6 +24,7 @@ import jalview.analysis.AlignSeq; import jalview.api.FeatureColourI; import jalview.bin.Cache; import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; @@ -172,7 +173,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI * * @return null or { Alignment(+features and annotation), NewickFile)} */ - public Object[] getAlignment(Alignment dataset, + public Object[] getAlignment(AlignmentI dataset, Map featureColours) { @@ -303,7 +304,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -345,7 +346,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI SeqSearchWSThread(ext.vamsas.SeqSearchI server, String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, String db, - Alignment seqset) + AlignmentI seqset) { this(server, wsUrl, wsinfo, alFrame, _msa, wsname, db); OutputHeader = wsInfo.getProgressText(); diff --git a/src/jalview/ws/jws2/MsaWSClient.java b/src/jalview/ws/jws2/MsaWSClient.java index c83ef0f..758d941 100644 --- a/src/jalview/ws/jws2/MsaWSClient.java +++ b/src/jalview/ws/jws2/MsaWSClient.java @@ -20,7 +20,7 @@ */ package jalview.ws.jws2; -import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.gui.AlignFrame; import jalview.gui.Desktop; @@ -58,7 +58,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { this(sh, null, null, false, altitle, msa, submitGaps, preserveOrder, @@ -68,7 +68,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, WsParamSetI preset, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { this(sh, preset, null, false, altitle, msa, submitGaps, preserveOrder, @@ -95,7 +95,7 @@ public class MsaWSClient extends Jws2Client public MsaWSClient(Jws2Instance sh, WsParamSetI preset, List arguments, boolean editParams, String altitle, jalview.datamodel.AlignmentView msa, boolean submitGaps, - boolean preserveOrder, Alignment seqdataset, + boolean preserveOrder, AlignmentI seqdataset, AlignFrame _alignFrame) { super(_alignFrame, preset, arguments); @@ -138,7 +138,7 @@ public class MsaWSClient extends Jws2Client } private void startMsaWSClient(String altitle, AlignmentView msa, - boolean submitGaps, boolean preserveOrder, Alignment seqdataset) + boolean submitGaps, boolean preserveOrder, AlignmentI seqdataset) { // if (!locateWebService()) // { diff --git a/src/jalview/ws/jws2/MsaWSThread.java b/src/jalview/ws/jws2/MsaWSThread.java index e2f3a7c..e425624 100644 --- a/src/jalview/ws/jws2/MsaWSThread.java +++ b/src/jalview/ws/jws2/MsaWSThread.java @@ -176,6 +176,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI * * @return true if getAlignment will return a valid alignment result. */ + @Override public boolean hasResults() { if (subjobComplete @@ -316,6 +317,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI * * @return boolean true if job can be submitted. */ + @Override public boolean hasValidInput() { // TODO: get attributes for this MsaWS instance to check if it can do two @@ -436,7 +438,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI String alTitle; // name which will be used to form new alignment window. - Alignment dataset; // dataset to which the new alignment will be + AlignmentI dataset; // dataset to which the new alignment will be // associated. @@ -479,7 +481,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI String wsUrl, WebserviceInfo wsinfo, jalview.gui.AlignFrame alFrame, String wsname, String title, AlignmentView _msa, boolean subgaps, boolean presorder, - Alignment seqset) + AlignmentI seqset) { this(server2, wsUrl, wsinfo, alFrame, _msa, wsname, subgaps, presorder); OutputHeader = wsInfo.getProgressText(); @@ -530,11 +532,13 @@ class MsaWSThread extends AWS2Thread implements WSClientI return validInput; } + @Override public boolean isCancellable() { return true; } + @Override public void cancelJob() { if (!jobComplete && jobs != null) @@ -605,6 +609,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public void pollJob(AWsJob job) throws Exception { // TODO: investigate if we still need to cast here in J1.6 @@ -650,6 +655,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI return changed; } + @Override public void StartJob(AWsJob job) { Exception lex = null; @@ -775,6 +781,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public void parseResult() { long progbar = System.currentTimeMillis(); @@ -889,6 +896,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI wsInfo.showResultsNewFrame .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(true); @@ -897,6 +905,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI wsInfo.mergeResults .addActionListener(new java.awt.event.ActionListener() { + @Override public void actionPerformed(java.awt.event.ActionEvent evt) { displayResults(false); @@ -1023,6 +1032,10 @@ class MsaWSThread extends AWS2Thread implements WSClientI // becomes null if the alignment window was closed before the alignment // job finished. AlignmentI copyComplement = new Alignment(complement); + // todo should this be done by copy constructor? + copyComplement.setGapCharacter(complement.getGapCharacter()); + // share the same dataset (and the mappings it holds) + copyComplement.setDataset(complement.getDataset()); copyComplement.alignAs(al); if (copyComplement.getHeight() > 0) { @@ -1101,6 +1114,7 @@ class MsaWSThread extends AWS2Thread implements WSClientI } } + @Override public boolean canMergeResults() { return false; diff --git a/src/jalview/ws/seqfetcher/ASequenceFetcher.java b/src/jalview/ws/seqfetcher/ASequenceFetcher.java index 2392476..33a917e 100644 --- a/src/jalview/ws/seqfetcher/ASequenceFetcher.java +++ b/src/jalview/ws/seqfetcher/ASequenceFetcher.java @@ -55,7 +55,7 @@ public class ASequenceFetcher /** * Constructor */ - public ASequenceFetcher() + protected ASequenceFetcher() { super(); @@ -125,20 +125,20 @@ public class ASequenceFetcher * if true, only fetch from nucleotide data sources, else peptide * @return */ - public SequenceI[] getSequences(DBRefEntry[] refs, boolean dna) + public SequenceI[] getSequences(List refs, boolean dna) { Vector rseqs = new Vector(); Hashtable> queries = new Hashtable>(); - for (int r = 0; r < refs.length; r++) + for (DBRefEntry ref : refs) { - if (!queries.containsKey(refs[r].getSource())) + if (!queries.containsKey(ref.getSource())) { - queries.put(refs[r].getSource(), new ArrayList()); + queries.put(ref.getSource(), new ArrayList()); } - List qset = queries.get(refs[r].getSource()); - if (!qset.contains(refs[r].getAccessionId())) + List qset = queries.get(ref.getSource()); + if (!qset.contains(ref.getAccessionId())) { - qset.add(refs[r].getAccessionId()); + qset.add(ref.getAccessionId()); } } Enumeration e = queries.keys(); @@ -205,15 +205,12 @@ public class ASequenceFetcher for (int is = 0; is < seqs.length; is++) { rseqs.addElement(seqs[is]); - DBRefEntry[] frefs = DBRefUtils.searchRefs(seqs[is] + List frefs = DBRefUtils.searchRefs(seqs[is] .getDBRefs(), new DBRefEntry(db, null, null)); - if (frefs != null) + for (DBRefEntry dbr : frefs) { - for (DBRefEntry dbr : frefs) - { - queriesFound.add(dbr.getAccessionId()); - queriesMade.remove(dbr.getAccessionId()); - } + queriesFound.add(dbr.getAccessionId()); + queriesMade.remove(dbr.getAccessionId()); } seqs[is] = null; } diff --git a/test/jalview/analysis/AlignmentUtilsTests.java b/test/jalview/analysis/AlignmentUtilsTests.java index 2fc5325..5c75992 100644 --- a/test/jalview/analysis/AlignmentUtilsTests.java +++ b/test/jalview/analysis/AlignmentUtilsTests.java @@ -22,6 +22,7 @@ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; @@ -47,7 +48,6 @@ import jalview.util.MappingUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -974,117 +974,167 @@ public class AlignmentUtilsTests @Test(groups = { "Functional" }) public void testMakeCdsAlignment() { + /* + * scenario: + * dna1 --> [4, 6] [10,12] --> pep1 + * dna2 --> [1, 3] [7, 9] [13,15] --> pep1 + */ SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); SequenceI pep1 = new Sequence("pep1", "GF"); SequenceI pep2 = new Sequence("pep2", "GFP"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "pep1")); + pep2.addDBRef(new DBRefEntry("UNIPROT", "0", "pep2")); dna1.createDatasetSequence(); dna2.createDatasetSequence(); pep1.createDatasetSequence(); pep2.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds5", 13, 15, 0f, - null)); AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); dna.setDataset(null); - List mappings = new ArrayList(); + /* + * need a sourceDbRef if we are to construct dbrefs to the CDS + * sequence + */ + DBRefEntry dbref = new DBRefEntry("ENSEMBL", "0", "dna1"); + dna1.getDatasetSequence().setSourceDBRef(dbref); + dbref = new DBRefEntry("ENSEMBL", "0", "dna2"); + dna2.getDatasetSequence().setSourceDBRef(dbref); + + /* + * CDS sequences are 'discovered' from dna-to-protein mappings on the alignment + * dataset (e.g. added from dbrefs by CrossRef.findXrefSequences) + */ MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); /* * execute method under test: */ AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2 }, mappings, dna); + dna1, dna2 }, dna.getDataset(), null); + /* + * verify cds sequences + */ assertEquals(2, cds.getSequences().size()); - assertEquals("GGGTTT", cds.getSequenceAt(0) - .getSequenceAsString()); - assertEquals("GGGTTTCCC", cds.getSequenceAt(1) - .getSequenceAsString()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); /* * verify shared, extended alignment dataset */ assertSame(dna.getDataset(), cds.getDataset()); - assertTrue(dna.getDataset().getSequences() - .contains(cds.getSequenceAt(0).getDatasetSequence())); - assertTrue(dna.getDataset().getSequences() - .contains(cds.getSequenceAt(1).getDatasetSequence())); + SequenceI cds1Dss = cds.getSequenceAt(0).getDatasetSequence(); + SequenceI cds2Dss = cds.getSequenceAt(1).getDatasetSequence(); + assertTrue(dna.getDataset().getSequences().contains(cds1Dss)); + assertTrue(dna.getDataset().getSequences().contains(cds2Dss)); + + /* + * verify CDS has a dbref with mapping to peptide + */ + assertNotNull(cds1Dss.getDBRefs()); + assertEquals(1, cds1Dss.getDBRefs().length); + dbref = cds1Dss.getDBRefs()[0]; + assertEquals("UNIPROT", dbref.getSource()); + assertEquals("0", dbref.getVersion()); + assertEquals("pep1", dbref.getAccessionId()); + assertNotNull(dbref.getMap()); + assertSame(pep1.getDatasetSequence(), dbref.getMap().getTo()); + MapList cdsMapping = new MapList(new int[] { 1, 6 }, + new int[] { 1, 2 }, 3, 1); + assertEquals(cdsMapping, dbref.getMap().getMap()); + + /* + * verify peptide has added a dbref with reverse mapping to CDS + */ + assertNotNull(pep1.getDBRefs()); + assertEquals(2, pep1.getDBRefs().length); + dbref = pep1.getDBRefs()[1]; + assertEquals("ENSEMBL", dbref.getSource()); + assertEquals("0", dbref.getVersion()); + assertEquals("CDS|dna1", dbref.getAccessionId()); + assertNotNull(dbref.getMap()); + assertSame(cds1Dss, dbref.getMap().getTo()); + assertEquals(cdsMapping.getInverse(), dbref.getMap().getMap()); /* - * Verify mappings from CDS to peptide and cDNA to CDS + * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide * the mappings are on the shared alignment dataset + * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) */ - assertSame(dna.getCodonFrames(), cds.getCodonFrames()); - List cdsMappings = cds.getCodonFrames(); - assertEquals(2, cdsMappings.size()); - + List cdsMappings = cds.getDataset().getCodonFrames(); + assertEquals(6, cdsMappings.size()); + + /* + * verify that mapping sets for dna and cds alignments are different + * [not current behaviour - all mappings are on the alignment dataset] + */ + // select -> subselect type to test. + // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); + // assertEquals(4, dna.getCodonFrames().size()); + // assertEquals(4, cds.getCodonFrames().size()); + /* + * Two mappings involve pep1 (dna to pep1, cds to pep1) * Mapping from pep1 to GGGTTT in first new exon sequence */ - List pep1Mapping = MappingUtils + List pep1Mappings = MappingUtils .findMappingsForSequence(pep1, cdsMappings); - assertEquals(1, pep1Mapping.size()); + assertEquals(2, pep1Mappings.size()); + List mappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); + assertEquals(1, mappings.size()); + // map G to GGG - SearchResults sr = MappingUtils - .buildSearchResults(pep1, 1, cdsMappings); + SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, mappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(0).getDatasetSequence(), - m.getSequence()); + assertSame(cds1Dss, m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 2, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(0).getDatasetSequence(), - m.getSequence()); + assertSame(cds1Dss, m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); /* - * Mapping from pep2 to GGGTTTCCC in second new exon sequence + * Two mappings involve pep2 (dna to pep2, cds to pep2) + * Verify mapping from pep2 to GGGTTTCCC in second new exon sequence */ - List pep2Mapping = MappingUtils + List pep2Mappings = MappingUtils .findMappingsForSequence(pep2, cdsMappings); - assertEquals(1, pep2Mapping.size()); + assertEquals(2, pep2Mappings.size()); + mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), + pep2Mappings); + assertEquals(1, mappings.size()); // map G to GGG - sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 1, mappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds2Dss, m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); // map F to TTT - sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 2, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds2Dss, m.getSequence()); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); // map P to CCC - sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 3, mappings); m = sr.getResults().get(0); - assertSame(cds.getSequenceAt(1).getDatasetSequence(), - m.getSequence()); + assertSame(cds2Dss, m.getSequence()); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); } @@ -1105,18 +1155,6 @@ public class AlignmentUtilsTests pep1.createDatasetSequence(); pep2.createDatasetSequence(); pep3.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 6, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 10, 12, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 1, 3, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds4", 7, 9, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds5", 1, 3, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds6", 10, 12, 0f, - null)); pep1.getDatasetSequence().addDBRef( new DBRefEntry("EMBLCDS", "2", "A12345")); pep2.getDatasetSequence().addDBRef( @@ -1125,40 +1163,38 @@ public class AlignmentUtilsTests new DBRefEntry("EMBLCDS", "4", "A12347")); /* + * Create the CDS alignment + */ + AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); + dna.setDataset(null); + + /* * Make the mappings from dna to protein */ - List mappings = new ArrayList(); // map ...GGG...TTT to GF MapList map = new MapList(new int[] { 4, 6, 10, 12 }, new int[] { 1, 2 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa...ccc to KP map = new MapList(new int[] { 1, 3, 7, 9 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); // map aaa......TTT to KF map = new MapList(new int[] { 1, 3, 10, 12 }, new int[] { 1, 2 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); - mappings.add(acf); - - /* - * Create the CDS alignment; also augments the dna-to-protein mappings with - * exon-to-protein and exon-to-dna mappings - */ - AlignmentI dna = new Alignment(new SequenceI[] { dna1 }); - dna.setDataset(null); + dna.addCodonFrame(acf); /* * execute method under test */ AlignmentI cdsal = AlignmentUtils.makeCdsAlignment( - new SequenceI[] { dna1 }, mappings, dna); + new SequenceI[] { dna1 }, dna.getDataset(), null); /* * Verify we have 3 cds sequences, mapped to pep1/2/3 respectively @@ -1183,7 +1219,7 @@ public class AlignmentUtilsTests SequenceI cdsSeq = cds.get(0); assertEquals("GGGTTT", cdsSeq.getSequenceAsString()); // assertEquals("dna1|A12345", cdsSeq.getName()); - assertEquals("dna1|pep1", cdsSeq.getName()); + assertEquals("CDS|dna1", cdsSeq.getName()); // assertEquals(1, cdsSeq.getDBRefs().length); // DBRefEntry cdsRef = cdsSeq.getDBRefs()[0]; // assertEquals("EMBLCDS", cdsRef.getSource()); @@ -1193,7 +1229,7 @@ public class AlignmentUtilsTests cdsSeq = cds.get(1); assertEquals("aaaccc", cdsSeq.getSequenceAsString()); // assertEquals("dna1|A12346", cdsSeq.getName()); - assertEquals("dna1|pep2", cdsSeq.getName()); + assertEquals("CDS|dna1", cdsSeq.getName()); // assertEquals(1, cdsSeq.getDBRefs().length); // cdsRef = cdsSeq.getDBRefs()[0]; // assertEquals("EMBLCDS", cdsRef.getSource()); @@ -1203,7 +1239,7 @@ public class AlignmentUtilsTests cdsSeq = cds.get(2); assertEquals("aaaTTT", cdsSeq.getSequenceAsString()); // assertEquals("dna1|A12347", cdsSeq.getName()); - assertEquals("dna1|pep3", cdsSeq.getName()); + assertEquals("CDS|dna1", cdsSeq.getName()); // assertEquals(1, cdsSeq.getDBRefs().length); // cdsRef = cdsSeq.getDBRefs()[0]; // assertEquals("EMBLCDS", cdsRef.getSource()); @@ -1214,41 +1250,74 @@ public class AlignmentUtilsTests * Verify there are mappings from each cds sequence to its protein product * and also to its dna source */ - Iterator newMappingsIterator = cdsal - .getCodonFrames().iterator(); + List newMappings = cdsal.getCodonFrames(); - // mappings for dna1 - exon1 - pep1 - AlignedCodonFrame cdsMapping = newMappingsIterator.next(); - List dnaMappings = cdsMapping.getMappingsFromSequence(dna1); - assertEquals(3, dnaMappings.size()); - assertSame(cds.get(0).getDatasetSequence(), dnaMappings.get(0) - .getTo()); - assertEquals("G(1) in CDS should map to G(4) in DNA", 4, dnaMappings - .get(0).getMap().getToPosition(1)); - List peptideMappings = cdsMapping.getMappingsFromSequence(cds - .get(0).getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep1.getDatasetSequence(), peptideMappings.get(0).getTo()); - - // mappings for dna1 - cds2 - pep2 - assertSame(cds.get(1).getDatasetSequence(), dnaMappings.get(1) - .getTo()); - assertEquals("c(4) in CDS should map to c(7) in DNA", 7, dnaMappings - .get(1).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(1) - .getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep2.getDatasetSequence(), peptideMappings.get(0).getTo()); - - // mappings for dna1 - cds3 - pep3 - assertSame(cds.get(2).getDatasetSequence(), dnaMappings.get(2) + /* + * 6 mappings involve dna1 (to pep1/2/3, cds1/2/3) + */ + List dnaMappings = MappingUtils + .findMappingsForSequence(dna1, newMappings); + assertEquals(6, dnaMappings.size()); + + /* + * dna1 to pep1 + */ + List mappings = MappingUtils + .findMappingsForSequence(pep1, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep1.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds1 + */ + List dnaToCds1Mappings = MappingUtils + .findMappingsForSequence(cds.get(0), dnaMappings); + Mapping mapping = dnaToCds1Mappings.get(0).getMappings().get(0) + .getMapping(); + assertSame(cds.get(0).getDatasetSequence(), mapping .getTo()); - assertEquals("T(4) in CDS should map to T(10) in DNA", 10, dnaMappings - .get(2).getMap().getToPosition(4)); - peptideMappings = cdsMapping.getMappingsFromSequence(cds.get(2) - .getDatasetSequence()); - assertEquals(1, peptideMappings.size()); - assertSame(pep3.getDatasetSequence(), peptideMappings.get(0).getTo()); + assertEquals("G(1) in CDS should map to G(4) in DNA", 4, mapping + .getMap().getToPosition(1)); + + /* + * dna1 to pep2 + */ + mappings = MappingUtils.findMappingsForSequence(pep2, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep2.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds2 + */ + List dnaToCds2Mappings = MappingUtils + .findMappingsForSequence(cds.get(1), dnaMappings); + mapping = dnaToCds2Mappings.get(0).getMappings().get(0).getMapping(); + assertSame(cds.get(1).getDatasetSequence(), mapping.getTo()); + assertEquals("c(4) in CDS should map to c(7) in DNA", 7, mapping + .getMap().getToPosition(4)); + + /* + * dna1 to pep3 + */ + mappings = MappingUtils.findMappingsForSequence(pep3, dnaMappings); + assertEquals(1, mappings.size()); + assertEquals(1, mappings.get(0).getMappings().size()); + assertSame(pep3.getDatasetSequence(), mappings.get(0).getMappings() + .get(0).getMapping().getTo()); + + /* + * dna1 to cds3 + */ + List dnaToCds3Mappings = MappingUtils + .findMappingsForSequence(cds.get(2), dnaMappings); + mapping = dnaToCds3Mappings.get(0).getMappings().get(0).getMapping(); + assertSame(cds.get(2).getDatasetSequence(), mapping.getTo()); + assertEquals("T(4) in CDS should map to T(10) in DNA", 10, mapping + .getMap().getToPosition(4)); } @Test(groups = { "Functional" }) @@ -1497,36 +1566,24 @@ public class AlignmentUtilsTests dna3.createDatasetSequence(); pep1.createDatasetSequence(); pep2.createDatasetSequence(); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds1", 4, 8, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds2", 9, 12, 0f, - null)); - dna1.addSequenceFeature(new SequenceFeature("CDS", "cds3", 16, 18, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 4, 8, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 12, 12, 0f, - null)); - dna2.addSequenceFeature(new SequenceFeature("CDS", "cds", 16, 18, 0f, - null)); + + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); + dna.setDataset(null); - List mappings = new ArrayList(); MapList map = new MapList(new int[] { 4, 12, 16, 18 }, new int[] { 1, 4 }, 3, 1); AlignedCodonFrame acf = new AlignedCodonFrame(); acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); map = new MapList(new int[] { 4, 8, 12, 12, 16, 18 }, new int[] { 1, 3 }, 3, 1); acf = new AlignedCodonFrame(); acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); - mappings.add(acf); + dna.addCodonFrame(acf); - AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2, dna3 }); - dna.setDataset(null); AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { - dna1, dna2, dna3 }, mappings, dna); + dna1, dna2, dna3 }, dna.getDataset(), null); List cdsSeqs = cds.getSequences(); assertEquals(2, cdsSeqs.size()); assertEquals("GGGCCCTTTGGG", cdsSeqs.get(0).getSequenceAsString()); @@ -1542,59 +1599,69 @@ public class AlignmentUtilsTests .contains(cdsSeqs.get(1).getDatasetSequence())); /* - * Verify updated mappings + * Verify 6 mappings: dna1 to cds1, cds1 to pep1, dna1 to pep1 + * and the same for dna2/cds2/pep2 */ - List cdsMappings = cds.getCodonFrames(); - assertEquals(2, cdsMappings.size()); + List mappings = cds.getCodonFrames(); + assertEquals(6, mappings.size()); /* - * Mapping from pep1 to GGGTTT in first new CDS sequence + * 2 mappings involve pep1 */ - List pep1Mapping = MappingUtils - .findMappingsForSequence(pep1, cdsMappings); - assertEquals(1, pep1Mapping.size()); + List pep1Mappings = MappingUtils + .findMappingsForSequence(pep1, mappings); + assertEquals(2, pep1Mappings.size()); + /* + * Get mapping of pep1 to cds1 and verify it * maps GPFG to 1-3,4-6,7-9,10-12 */ - SearchResults sr = MappingUtils - .buildSearchResults(pep1, 1, cdsMappings); + List pep1CdsMappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep1Mappings); + assertEquals(1, pep1CdsMappings.size()); + SearchResults sr = MappingUtils.buildSearchResults(pep1, 1, + pep1CdsMappings); assertEquals(1, sr.getResults().size()); Match m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 2, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 3, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep1, 4, cdsMappings); + sr = MappingUtils.buildSearchResults(pep1, 4, pep1CdsMappings); m = sr.getResults().get(0); assertEquals(10, m.getStart()); assertEquals(12, m.getEnd()); /* - * GPG in pep2 map to 1-3,4-6,7-9 in second CDS sequence + * Get mapping of pep2 to cds2 and verify it + * maps GPG in pep2 to 1-3,4-6,7-9 in second CDS sequence */ - List pep2Mapping = MappingUtils - .findMappingsForSequence(pep2, cdsMappings); - assertEquals(1, pep2Mapping.size()); - sr = MappingUtils.buildSearchResults(pep2, 1, cdsMappings); + List pep2Mappings = MappingUtils + .findMappingsForSequence(pep2, mappings); + assertEquals(2, pep2Mappings.size()); + List pep2CdsMappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(1), pep2Mappings); + assertEquals(1, pep2CdsMappings.size()); + sr = MappingUtils.buildSearchResults(pep2, 1, pep2CdsMappings); assertEquals(1, sr.getResults().size()); m = sr.getResults().get(0); assertEquals(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); assertEquals(1, m.getStart()); assertEquals(3, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 2, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 2, pep2CdsMappings); m = sr.getResults().get(0); assertEquals(4, m.getStart()); assertEquals(6, m.getEnd()); - sr = MappingUtils.buildSearchResults(pep2, 3, cdsMappings); + sr = MappingUtils.buildSearchResults(pep2, 3, pep2CdsMappings); m = sr.getResults().get(0); assertEquals(7, m.getStart()); assertEquals(9, m.getEnd()); @@ -2193,4 +2260,187 @@ public class AlignmentUtilsTests assertEquals('T', map.get(11).get(seq1).charValue()); assertEquals('T', map.get(12).get(seq1).charValue()); } + + /** + * Test for the case where the products for which we want CDS are specified. + * This is to represent the case where EMBL has CDS mappings to both Uniprot + * and EMBLCDSPROTEIN. makeCdsAlignment() should only return the mappings for + * the protein sequences specified. + */ + @Test(groups = { "Functional" }) + public void testMakeCdsAlignment_filterProducts() + { + SequenceI dna1 = new Sequence("dna1", "aaaGGGcccTTTaaa"); + SequenceI dna2 = new Sequence("dna2", "GGGcccTTTaaaCCC"); + SequenceI pep1 = new Sequence("Uniprot|pep1", "GF"); + SequenceI pep2 = new Sequence("Uniprot|pep2", "GFP"); + SequenceI pep3 = new Sequence("EMBL|pep3", "GF"); + SequenceI pep4 = new Sequence("EMBL|pep4", "GFP"); + dna1.createDatasetSequence(); + dna2.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + pep3.createDatasetSequence(); + pep4.createDatasetSequence(); + AlignmentI dna = new Alignment(new SequenceI[] { dna1, dna2 }); + dna.setDataset(null); + AlignmentI emblPeptides = new Alignment(new SequenceI[] { pep3, pep4 }); + emblPeptides.setDataset(null); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 4, 6, 10, 12 }, + new int[] { 1, 2 }, 3, 1); + acf.addMap(dna1.getDatasetSequence(), pep1.getDatasetSequence(), map); + acf.addMap(dna1.getDatasetSequence(), pep3.getDatasetSequence(), map); + dna.addCodonFrame(acf); + + acf = new AlignedCodonFrame(); + map = new MapList(new int[] { 1, 3, 7, 9, 13, 15 }, new int[] { 1, 3 }, + 3, 1); + acf.addMap(dna2.getDatasetSequence(), pep2.getDatasetSequence(), map); + acf.addMap(dna2.getDatasetSequence(), pep4.getDatasetSequence(), map); + dna.addCodonFrame(acf); + + /* + * execute method under test to find CDS for EMBL peptides only + */ + AlignmentI cds = AlignmentUtils.makeCdsAlignment(new SequenceI[] { + dna1, dna2 }, dna.getDataset(), emblPeptides.getSequencesArray()); + + assertEquals(2, cds.getSequences().size()); + assertEquals("GGGTTT", cds.getSequenceAt(0).getSequenceAsString()); + assertEquals("GGGTTTCCC", cds.getSequenceAt(1).getSequenceAsString()); + + /* + * verify shared, extended alignment dataset + */ + assertSame(dna.getDataset(), cds.getDataset()); + assertTrue(dna.getDataset().getSequences() + .contains(cds.getSequenceAt(0).getDatasetSequence())); + assertTrue(dna.getDataset().getSequences() + .contains(cds.getSequenceAt(1).getDatasetSequence())); + + /* + * Verify mappings from CDS to peptide, cDNA to CDS, and cDNA to peptide + * the mappings are on the shared alignment dataset + */ + List cdsMappings = cds.getDataset().getCodonFrames(); + /* + * 6 mappings, 2*(DNA->CDS), 2*(DNA->Pep), 2*(CDS->Pep) + */ + assertEquals(6, cdsMappings.size()); + + /* + * verify that mapping sets for dna and cds alignments are different + * [not current behaviour - all mappings are on the alignment dataset] + */ + // select -> subselect type to test. + // Assert.assertNotSame(dna.getCodonFrames(), cds.getCodonFrames()); + // assertEquals(4, dna.getCodonFrames().size()); + // assertEquals(4, cds.getCodonFrames().size()); + + /* + * Two mappings involve pep3 (dna to pep3, cds to pep3) + * Mapping from pep3 to GGGTTT in first new exon sequence + */ + List pep3Mappings = MappingUtils + .findMappingsForSequence(pep3, cdsMappings); + assertEquals(2, pep3Mappings.size()); + List mappings = MappingUtils + .findMappingsForSequence(cds.getSequenceAt(0), pep3Mappings); + assertEquals(1, mappings.size()); + + // map G to GGG + SearchResults sr = MappingUtils.buildSearchResults(pep3, 1, mappings); + assertEquals(1, sr.getResults().size()); + Match m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + // map F to TTT + sr = MappingUtils.buildSearchResults(pep3, 2, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(0).getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + + /* + * Two mappings involve pep4 (dna to pep4, cds to pep4) + * Verify mapping from pep4 to GGGTTTCCC in second new exon sequence + */ + List pep4Mappings = MappingUtils + .findMappingsForSequence(pep4, cdsMappings); + assertEquals(2, pep4Mappings.size()); + mappings = MappingUtils.findMappingsForSequence(cds.getSequenceAt(1), + pep4Mappings); + assertEquals(1, mappings.size()); + // map G to GGG + sr = MappingUtils.buildSearchResults(pep4, 1, mappings); + assertEquals(1, sr.getResults().size()); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(1, m.getStart()); + assertEquals(3, m.getEnd()); + // map F to TTT + sr = MappingUtils.buildSearchResults(pep4, 2, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(4, m.getStart()); + assertEquals(6, m.getEnd()); + // map P to CCC + sr = MappingUtils.buildSearchResults(pep4, 3, mappings); + m = sr.getResults().get(0); + assertSame(cds.getSequenceAt(1).getDatasetSequence(), m.getSequence()); + assertEquals(7, m.getStart()); + assertEquals(9, m.getEnd()); + } + + /** + * Test the method that just copies aligned sequences, provided all sequences + * to be aligned share the aligned sequence's dataset + */ + @Test(groups = "Functional") + public void testAlignAsSameSequences() + { + SequenceI dna1 = new Sequence("dna1", "cccGGGTTTaaa"); + SequenceI dna2 = new Sequence("dna2", "CCCgggtttAAA"); + AlignmentI al1 = new Alignment(new SequenceI[] { dna1, dna2 }); + ((Alignment) al1).createDatasetAlignment(); + + SequenceI dna3 = new Sequence(dna1); + SequenceI dna4 = new Sequence(dna2); + assertSame(dna3.getDatasetSequence(), dna1.getDatasetSequence()); + assertSame(dna4.getDatasetSequence(), dna2.getDatasetSequence()); + String seq1 = "-cc-GG-GT-TT--aaa"; + dna3.setSequence(seq1); + String seq2 = "C--C-Cgg--gtt-tAA-A-"; + dna4.setSequence(seq2); + AlignmentI al2 = new Alignment(new SequenceI[] { dna3, dna4 }); + ((Alignment) al2).createDatasetAlignment(); + + assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2)); + assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString()); + assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString()); + + /* + * add another sequence to 'aligned' - should still succeed, since + * unaligned sequences still share a dataset with aligned sequences + */ + SequenceI dna5 = new Sequence("dna5", "CCCgggtttAAA"); + dna5.createDatasetSequence(); + al2.addSequence(dna5); + assertTrue(AlignmentUtils.alignAsSameSequences(al1, al2)); + assertEquals(seq1, al1.getSequenceAt(0).getSequenceAsString()); + assertEquals(seq2, al1.getSequenceAt(1).getSequenceAsString()); + + /* + * add another sequence to 'unaligned' - should fail, since now not + * all unaligned sequences share a dataset with aligned sequences + */ + SequenceI dna6 = new Sequence("dna6", "CCCgggtttAAA"); + dna6.createDatasetSequence(); + al1.addSequence(dna6); + assertFalse(AlignmentUtils.alignAsSameSequences(al1, al2)); + } + } diff --git a/test/jalview/analysis/CrossRefTest.java b/test/jalview/analysis/CrossRefTest.java index bbc23e5..62bcae8 100644 --- a/test/jalview/analysis/CrossRefTest.java +++ b/test/jalview/analysis/CrossRefTest.java @@ -21,10 +21,31 @@ package jalview.analysis; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNotSame; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; +import static org.testng.AssertJUnit.assertTrue; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignedCodonFrame.SequenceToSequenceMapping; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; +import jalview.datamodel.Mapping; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.util.DBRefUtils; +import jalview.util.MapList; +import jalview.ws.SequenceFetcher; +import jalview.ws.SequenceFetcherFactory; +import java.util.ArrayList; +import java.util.List; + +import org.testng.annotations.AfterClass; import org.testng.annotations.Test; public class CrossRefTest @@ -40,27 +61,674 @@ public class CrossRefTest DBRefEntry ref6 = new DBRefEntry("emblCDS", "1", "A123"); DBRefEntry ref7 = new DBRefEntry("GeneDB", "1", "A123"); DBRefEntry ref8 = new DBRefEntry("PFAM", "1", "A123"); + // ENSEMBL is a source of either dna or protein sequence data + DBRefEntry ref9 = new DBRefEntry("ENSEMBL", "1", "A123"); DBRefEntry[] refs = new DBRefEntry[] { ref1, ref2, ref3, ref4, ref5, - ref6, ref7, ref8 }; + ref6, ref7, ref8, ref9 }; /* * Just the DNA refs: */ - DBRefEntry[] found = CrossRef.findXDbRefs(false, refs); - assertEquals(3, found.length); + DBRefEntry[] found = DBRefUtils.selectDbRefs(true, refs); + assertEquals(4, found.length); assertSame(ref5, found[0]); assertSame(ref6, found[1]); assertSame(ref7, found[2]); + assertSame(ref9, found[3]); /* * Just the protein refs: */ - found = CrossRef.findXDbRefs(true, refs); - assertEquals(4, found.length); + found = DBRefUtils.selectDbRefs(false, refs); + assertEquals(5, found.length); assertSame(ref1, found[0]); assertSame(ref2, found[1]); assertSame(ref3, found[2]); assertSame(ref4, found[3]); + assertSame(ref9, found[4]); + } + + /** + * Test the method that finds a sequence's "product" xref source databases, + * which may be direct (dbrefs on the sequence), or indirect (dbrefs on + * sequences which share a dbref with the sequence + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSourcesForSequence_proteinToDna() + { + SequenceI seq = new Sequence("Seq1", "MGKYQARLSS"); + List sources = new ArrayList(); + AlignmentI al = new Alignment(new SequenceI[] {}); + + /* + * first with no dbrefs to search + */ + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); + assertTrue(sources.isEmpty()); + + /* + * add some dbrefs to sequence + */ + // protein db is not a candidate for findXrefSources + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + // dna coding databatases are + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + // a second EMBL xref should not result in a duplicate + seq.addDBRef(new DBRefEntry("EMBL", "0", "E2346")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + seq.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + seq.addDBRef(new DBRefEntry("ENSEMBL", "0", "E2349")); + seq.addDBRef(new DBRefEntry("ENSEMBLGENOMES", "0", "E2350")); + sources = new CrossRef(new SequenceI[] { seq }, al) + .findXrefSourcesForSequences(false); + assertEquals(4, sources.size()); + assertEquals("[EMBL, EMBLCDS, GENEDB, ENSEMBL]", sources.toString()); + + /* + * add a sequence to the alignment which has a dbref to UNIPROT|A1234 + * and others to dna coding databases + */ + sources.clear(); + seq.setDBRefs(null); + seq.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq.addDBRef(new DBRefEntry("EMBLCDS", "0", "E2347")); + SequenceI seq2 = new Sequence("Seq2", "MGKYQARLSS"); + seq2.addDBRef(new DBRefEntry("UNIPROT", "0", "A1234")); + seq2.addDBRef(new DBRefEntry("EMBL", "0", "E2345")); + seq2.addDBRef(new DBRefEntry("GENEDB", "0", "E2348")); + // TODO include ENSEMBLGENOMES in DBRefSource.DNACODINGDBS ? + al.addSequence(seq2); + sources = new CrossRef(new SequenceI[] { seq, seq2 }, al) + .findXrefSourcesForSequences(false); + assertEquals(3, sources.size()); + assertEquals("[EMBLCDS, EMBL, GENEDB]", sources.toString()); } + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the nucleotide sequence but on a peptide sequence in + * the alignment which which it shares a nucleotide dbref + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSequences_indirectDbrefToProtein() + { + /* + * Alignment setup: + * - nucleotide dbref EMBL|AF039662 + * - peptide dbrefs EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * Find UNIPROT xrefs for nucleotide + * - it has no UNIPROT dbref of its own + * - but peptide with matching nucleotide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = new CrossRef(new SequenceI[] { emblSeq }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(1, xrefs.getHeight()); + assertSame(uniprotSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where only an indirect + * xref is found - not on the peptide sequence but on a nucleotide sequence in + * the alignment which which it shares a protein dbref + */ + @Test(groups = { "Functional" }, enabled = true) + public void testFindXrefSequences_indirectDbrefToNucleotide() + { + /* + * Alignment setup: + * - peptide dbref UNIPROT|Q9ZTS2 + * - nucleotide dbref EMBL|AF039662, UNIPROT|Q9ZTS2 + */ + SequenceI uniprotSeq = new Sequence("Q9ZTS2", "MASVSATMISTS"); + uniprotSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI emblSeq = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + emblSeq.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + emblSeq.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + + /* + * find EMBL xrefs for peptide sequence - it has no direct + * dbrefs, but the 'corresponding' nucleotide sequence does, so is returned + */ + /* + * Find EMBL xrefs for peptide + * - it has no EMBL dbref of its own + * - but nucleotide with matching peptide dbref does, so is returned + */ + AlignmentI al = new Alignment(new SequenceI[] { emblSeq, uniprotSeq }); + Alignment xrefs = new CrossRef(new SequenceI[] { uniprotSeq }, al) + .findXrefSequences("EMBL", false); + assertEquals(1, xrefs.getHeight()); + assertSame(emblSeq, xrefs.getSequenceAt(0)); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has no dbref to the desired source, and there are no indirect + * references via another sequence in the alignment + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_noDbrefs() + { + /* + * two nucleotide sequences, one with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + SequenceI dna2 = new Sequence("AJ307031", "AAACCCTTT"); + + /* + * find UNIPROT xrefs for peptide sequence - it has no direct + * dbrefs, and the other sequence (which has a UNIPROT dbref) is not + * equatable to it, so no results found + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1, dna2 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna2 }, al) + .findXrefSequences("UNIPROT", true); + assertNull(xrefs); + } + + /** + * Tests for the method that searches an alignment (with one sequence + * excluded) for protein/nucleotide sequences with a given cross-reference + */ + @Test(groups = { "Functional" }, enabled = true) + public void testSearchDataset() + { + /* + * nucleotide sequence with UNIPROT AND EMBL dbref + * peptide sequence with UNIPROT dbref + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + Mapping map = new Mapping(new Sequence("pep2", "MLAVSRG"), new MapList( + new int[] { 1, 21 }, new int[] { + 1, 7 }, 3, 1)); + DBRefEntry dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.addDBRef(dbref); + dna1.addDBRef(new DBRefEntry("EMBL", "0", "AF039662")); + SequenceI pep1 = new Sequence("Q9ZTS2", "MLAVSRGQ"); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + pep1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + AlignmentI al = new Alignment(new SequenceI[] { dna1, pep1 }); + + List result = new ArrayList(); + + /* + * first search for a dbref nowhere on the alignment: + */ + dbref = new DBRefEntry("UNIPROT", "0", "P30419"); + CrossRef testee = new CrossRef(al.getSequencesArray(), al); + AlignedCodonFrame acf = new AlignedCodonFrame(); + boolean found = testee.searchDataset(true, dna1, dbref, result, acf, + true); + assertFalse(found); + assertTrue(result.isEmpty()); + assertTrue(acf.isEmpty()); + + /* + * search for a protein sequence with dbref UNIPROT:Q9ZTS2 + */ + acf = new AlignedCodonFrame(); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = testee.searchDataset(!dna1.isProtein(), dna1, dbref, result, + acf, false); // search dataset with a protein xref from a dna + // sequence to locate the protein product + assertTrue(found); + assertEquals(1, result.size()); + assertSame(pep1, result.get(0)); + assertTrue(acf.isEmpty()); + + /* + * search for a nucleotide sequence with dbref UNIPROT:Q9ZTS2 + */ + result.clear(); + acf = new AlignedCodonFrame(); + dbref = new DBRefEntry("UNIPROT", "0", "Q9ZTS2"); + found = testee.searchDataset(!pep1.isProtein(), pep1, dbref, result, + acf, false); // search dataset with a protein's direct dbref to + // locate dna sequences with matching xref + assertTrue(found); + assertEquals(1, result.size()); + assertSame(dna1, result.get(0)); + // should now have a mapping from dna to pep1 + List mappings = acf.getMappings(); + assertEquals(1, mappings.size()); + SequenceToSequenceMapping mapping = mappings.get(0); + assertSame(dna1, mapping.getFromSeq()); + assertSame(pep1, mapping.getMapping().getTo()); + MapList mapList = mapping.getMapping().getMap(); + assertEquals(1, mapList.getToRatio()); + assertEquals(3, mapList.getFromRatio()); + assertEquals(1, mapList.getFromRanges().size()); + assertEquals(1, mapList.getFromRanges().get(0)[0]); + assertEquals(21, mapList.getFromRanges().get(0)[1]); + assertEquals(1, mapList.getToRanges().size()); + assertEquals(1, mapList.getToRanges().get(0)[0]); + assertEquals(7, mapList.getToRanges().get(0)[1]); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with a mapping to a sequence. This represents the case + * where either + *
      + *
    • a fetched sequence is already decorated with its cross-reference (e.g. + * EMBL + translation), or
    • + *
    • Get Cross-References has been done once resulting in instantiated + * cross-reference mappings
    • + *
    + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_fromDbRefMap() + { + /* + * scenario: nucleotide sequence AF039662 + * with dbref + mapping to Q9ZTS2 and P30419 + * which themselves each have a dbref and feature + */ + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + SequenceI pep1 = new Sequence("Q9ZTS2", "MALFQRSV"); + SequenceI pep2 = new Sequence("P30419", "MTRRSQIF"); + dna1.createDatasetSequence(); + pep1.createDatasetSequence(); + pep2.createDatasetSequence(); + + pep1.getDatasetSequence().addDBRef( + new DBRefEntry("Pfam", "0", "PF00111")); + pep1.addSequenceFeature(new SequenceFeature("type", "desc", 12, 14, 1f, + "group")); + pep2.getDatasetSequence().addDBRef(new DBRefEntry("PDB", "0", "3JTK")); + pep2.addSequenceFeature(new SequenceFeature("type2", "desc2", 13, 15, + 12f, "group2")); + + MapList mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, + 3, 1); + Mapping map = new Mapping(pep1, mapList); + DBRefEntry dbRef1 = new DBRefEntry("UNIPROT", "0", "Q9ZTS2", map); + dna1.getDatasetSequence().addDBRef(dbRef1); + mapList = new MapList(new int[] { 1, 24 }, new int[] { 1, 3 }, 3, 1); + map = new Mapping(pep2, mapList); + DBRefEntry dbRef2 = new DBRefEntry("UNIPROT", "0", "P30419", map); + dna1.getDatasetSequence().addDBRef(dbRef2); + + /* + * find UNIPROT xrefs for nucleotide sequence - it should pick up + * mapped sequences + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(2, xrefs.getHeight()); + + /* + * cross-refs alignment holds copies of the mapped sequences + * including copies of their dbrefs and features + */ + checkCopySequence(pep1, xrefs.getSequenceAt(0)); + checkCopySequence(pep2, xrefs.getSequenceAt(1)); + } + + /** + * Helper method that verifies that 'copy' has the same name, start, end, + * sequence and dataset sequence object as 'original' (but is not the same + * object) + * + * @param copy + * @param original + */ + private void checkCopySequence(SequenceI copy, SequenceI original) + { + assertNotSame(copy, original); + assertSame(copy.getDatasetSequence(), original.getDatasetSequence()); + assertEquals(copy.getName(), original.getName()); + assertEquals(copy.getStart(), original.getStart()); + assertEquals(copy.getEnd(), original.getEnd()); + assertEquals(copy.getSequenceAsString(), original.getSequenceAsString()); + } + + /** + * Test for finding 'product' sequences for the case where the selected + * sequence has a dbref with no mapping, triggering a fetch from database + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_withFetch() + { + SequenceI dna1 = new Sequence("AF039662", "GGGGCAGCACAAGAAC"); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "Q9ZTS2")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P30419")); + dna1.addDBRef(new DBRefEntry("UNIPROT", "0", "P00314")); + final SequenceI pep1 = new Sequence("Q9ZTS2", "MYQLIRSSW"); + final SequenceI pep2 = new Sequence("P00314", "MRKLLAASG"); + + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for nucleotide sequence + */ + AlignmentI al = new Alignment(new SequenceI[] { dna1 }); + Alignment xrefs = new CrossRef(new SequenceI[] { dna1 }, al) + .findXrefSequences("UNIPROT", true); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + @AfterClass + public void tearDown() + { + SequenceFetcherFactory.setSequenceFetcher(null); + } + + /** + * Test for finding 'product' sequences for the case where both gene and + * transcript sequences have dbrefs to Uniprot. + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_forGeneAndTranscripts() + { + /* + * 'gene' sequence + */ + SequenceI gene = new Sequence("ENSG00000157764", "CGCCTCCCTTCCCC"); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + gene.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + + /* + * 'transcript' with CDS feature (supports mapping to protein) + */ + SequenceI braf001 = new Sequence("ENST00000288602", "taagATGGCGGCGCTGa"); + braf001.addDBRef(new DBRefEntry("UNIPROT", "0", "P15056")); + braf001.addSequenceFeature(new SequenceFeature("CDS", "", 5, 16, 0f, + null)); + + /* + * 'spliced transcript' with CDS ranges + */ + SequenceI braf002 = new Sequence("ENST00000497784", "gCAGGCtaTCTGTTCaa"); + braf002.addDBRef(new DBRefEntry("UNIPROT", "0", "H7C5K3")); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 2, 6, 0f, + null)); + braf002.addSequenceFeature(new SequenceFeature("CDS", "", 9, 15, 0f, + null)); + + /* + * TODO code is fragile - use of SequenceIdMatcher depends on fetched + * sequences having a name starting Source|Accession + * which happens to be true for Uniprot,PDB,EMBL but not Pfam,Rfam,Ensembl + */ + final SequenceI pep1 = new Sequence("UNIPROT|P15056", "MAAL"); + final SequenceI pep2 = new Sequence("UNIPROT|H7C5K3", "QALF"); + + /* + * argument false suppresses adding DAS sources + * todo: define an interface type SequenceFetcherI and mock that + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + return new SequenceI[] { pep1, pep2 }; + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find UNIPROT xrefs for gene and transcripts + * verify that + * - the two proteins are retrieved but not duplicated + * - mappings are built from transcript (CDS) to proteins + * - no mappings from gene to proteins + */ + SequenceI[] seqs = new SequenceI[] { gene, braf001, braf002 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("UNIPROT", + true); + assertEquals(2, xrefs.getHeight()); + assertSame(pep1, xrefs.getSequenceAt(0)); + assertSame(pep2, xrefs.getSequenceAt(1)); + } + + /** + *
    +   * Test that emulates this (real but simplified) case:
    +   * Alignment:          DBrefs
    +   *     UNIPROT|P0CE19  EMBL|J03321, EMBL|X06707, EMBL|M19487
    +   *     UNIPROT|P0CE20  EMBL|J03321, EMBL|X06707, EMBL|X07547
    +   * Find cross-references for EMBL. These are mocked here as
    +   *     EMBL|J03321     with mappings to P0CE18, P0CE19, P0CE20
    +   *     EMBL|X06707     with mappings to P0CE17, P0CE19, P0CE20
    +   *     EMBL|M19487     with mappings to P0CE19, Q46432
    +   *     EMBL|X07547     with mappings to P0CE20, B0BCM4
    +   * EMBL sequences are first 'fetched' (mocked here) for P0CE19.
    +   * The 3 EMBL sequences are added to the alignment dataset.
    +   * Their dbrefs to Uniprot products P0CE19 and P0CE20 should be matched in the
    +   * alignment dataset and updated to reference the original Uniprot sequences.
    +   * For the second Uniprot sequence, the J03321 and X06707 xrefs should be 
    +   * resolved from the dataset, and only the X07547 dbref fetched.
    +   * So the end state to verify is:
    +   * - 4 cross-ref sequences returned: J03321, X06707,  M19487, X07547
    +   * - P0CE19/20 dbrefs to EMBL sequences now have mappings
    +   * - J03321 dbrefs to P0CE19/20 mapped to original Uniprot sequences
    +   * - X06707 dbrefs to P0CE19/20 mapped to original Uniprot sequences
    +   * 
    + */ + @Test(groups = { "Functional" }) + public void testFindXrefSequences_uniprotEmblManyToMany() + { + /* + * Uniprot sequences, both with xrefs to EMBL|J03321 + * and EMBL|X07547 + */ + SequenceI p0ce19 = new Sequence("UNIPROT|P0CE19", "KPFG"); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce19.addDBRef(new DBRefEntry("EMBL", "0", "M19487")); + SequenceI p0ce20 = new Sequence("UNIPROT|P0CE20", "PFGK"); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "J03321")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X06707")); + p0ce20.addDBRef(new DBRefEntry("EMBL", "0", "X07547")); + + /* + * EMBL sequences to be 'fetched', complete with dbrefs and mappings + * to their protein products (CDS location and translations are provided + * in EMBL XML); these should be matched to, and replaced with, + * the corresponding uniprot sequences after fetching + */ + + /* + * J03321 with mappings to P0CE19 and P0CE20 + */ + final SequenceI j03321 = new Sequence("EMBL|J03321", "AAACCCTTTGGGAAAA"); + DBRefEntry dbref1 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList mapList = new MapList(new int[] { 1, 12 }, new int[] { 1, 4 }, + 3, 1); + Mapping map = new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + mapList); + // add a dbref to the mapped to sequence - should get copied to p0ce19 + map.getTo().addDBRef(new DBRefEntry("PIR", "0", "S01875")); + dbref1.setMap(map); + j03321.addDBRef(dbref1); + DBRefEntry dbref2 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + mapList = new MapList(new int[] { 4, 15 }, new int[] { 2, 5 }, 3, 1); + dbref2.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), + new MapList(mapList))); + j03321.addDBRef(dbref2); + + /* + * X06707 with mappings to P0CE19 and P0CE20 + */ + final SequenceI x06707 = new Sequence("EMBL|X06707", "atgAAACCCTTTGGG"); + DBRefEntry dbref3 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + MapList map2 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref3.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), map2)); + x06707.addDBRef(dbref3); + DBRefEntry dbref4 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + MapList map3 = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, 3, + 1); + dbref4.setMap(new Mapping(new Sequence("UNIPROT|P0CE20", "PFGK"), map3)); + x06707.addDBRef(dbref4); + + /* + * M19487 with mapping to P0CE19 and Q46432 + */ + final SequenceI m19487 = new Sequence("EMBL|M19487", "AAACCCTTTGGG"); + DBRefEntry dbref5 = new DBRefEntry("UNIPROT", "0", "P0CE19"); + dbref5.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref5); + DBRefEntry dbref6 = new DBRefEntry("UNIPROT", "0", "Q46432"); + dbref6.setMap(new Mapping(new Sequence("UNIPROT|Q46432", "KPFG"), + new MapList(mapList))); + m19487.addDBRef(dbref6); + + /* + * X07547 with mapping to P0CE20 and B0BCM4 + */ + final SequenceI x07547 = new Sequence("EMBL|X07547", "cccAAACCCTTTGGG"); + DBRefEntry dbref7 = new DBRefEntry("UNIPROT", "0", "P0CE20"); + dbref7.setMap(new Mapping(new Sequence("UNIPROT|P0CE19", "KPFG"), + new MapList(map2))); + x07547.addDBRef(dbref7); + DBRefEntry dbref8 = new DBRefEntry("UNIPROT", "0", "B0BCM4"); + dbref8.setMap(new Mapping(new Sequence("UNIPROT|B0BCM4", "KPFG"), + new MapList(map2))); + x07547.addDBRef(dbref8); + + /* + * mock sequence fetcher to 'return' the EMBL sequences + * TODO: Mockito would allow .thenReturn().thenReturn() here, + * and also capture and verification of the parameters + * passed in calls to getSequences() - important to verify that + * duplicate sequence fetches are not requested + */ + SequenceFetcher mockFetcher = new SequenceFetcher(false) + { + int call = 0; + + @Override + public boolean isFetchable(String source) + { + return true; + } + + @Override + public SequenceI[] getSequences(List refs, boolean dna) + { + call++; + if (call == 1) + { + assertEquals("Expected 3 embl seqs in first fetch", 3, + refs.size()); + return new SequenceI[] { j03321, x06707, m19487 }; + } + else + { + assertEquals("Expected 1 embl seq in second fetch", 1, + refs.size()); + return new SequenceI[] { x07547 }; + } + } + }; + SequenceFetcherFactory.setSequenceFetcher(mockFetcher); + + /* + * find EMBL xrefs for Uniprot seqs and verify that + * - the EMBL xref'd sequences are retrieved without duplicates + * - mappings are added to the Uniprot dbrefs + * - mappings in the EMBL-to-Uniprot dbrefs are updated to the + * alignment sequences + * - dbrefs on the EMBL sequences are added to the original dbrefs + */ + SequenceI[] seqs = new SequenceI[] { p0ce19, p0ce20 }; + AlignmentI al = new Alignment(seqs); + Alignment xrefs = new CrossRef(seqs, al).findXrefSequences("EMBL", + false); + + /* + * verify retrieved sequences + */ + assertNotNull(xrefs); + assertEquals(4, xrefs.getHeight()); + assertSame(j03321, xrefs.getSequenceAt(0)); + assertSame(x06707, xrefs.getSequenceAt(1)); + assertSame(m19487, xrefs.getSequenceAt(2)); + assertSame(x07547, xrefs.getSequenceAt(3)); + + /* + * verify mappings added to Uniprot-to-EMBL dbrefs + */ + Mapping mapping = p0ce19.getDBRefs()[0].getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce19.getDBRefs()[1].getMap(); + assertSame(x06707, mapping.getTo()); + mapping = p0ce20.getDBRefs()[0].getMap(); + assertSame(j03321, mapping.getTo()); + mapping = p0ce20.getDBRefs()[1].getMap(); + assertSame(x06707, mapping.getTo()); + + /* + * verify dbrefs on EMBL are mapped to alignment seqs + */ + assertSame(p0ce19, j03321.getDBRefs()[0].getMap().getTo()); + assertSame(p0ce20, j03321.getDBRefs()[1].getMap().getTo()); + assertSame(p0ce19, x06707.getDBRefs()[0].getMap().getTo()); + assertSame(p0ce20, x06707.getDBRefs()[1].getMap().getTo()); + + /* + * verify new dbref on EMBL dbref mapping is copied to the + * original Uniprot sequence + */ + assertEquals(4, p0ce19.getDBRefs().length); + assertEquals("PIR", p0ce19.getDBRefs()[3].getSource()); + assertEquals("S01875", p0ce19.getDBRefs()[3].getAccessionId()); + } + + @Test(groups = "Functional") + public void testSameSequence() + { + assertTrue(CrossRef.sameSequence(null, null)); + SequenceI seq1 = new Sequence("seq1", "ABCDEF"); + assertFalse(CrossRef.sameSequence(seq1, null)); + assertFalse(CrossRef.sameSequence(null, seq1)); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "ABCDEF"))); + assertTrue(CrossRef.sameSequence(seq1, new Sequence("seq2", "abcdef"))); + assertFalse(CrossRef + .sameSequence(seq1, new Sequence("seq2", "ABCDE-F"))); + assertFalse(CrossRef.sameSequence(seq1, new Sequence("seq2", "BCDEF"))); + } } diff --git a/test/jalview/analysis/SequenceIdMatcherTest.java b/test/jalview/analysis/SequenceIdMatcherTest.java index 9d3e3b6..a17270d 100644 --- a/test/jalview/analysis/SequenceIdMatcherTest.java +++ b/test/jalview/analysis/SequenceIdMatcherTest.java @@ -90,5 +90,11 @@ public class SequenceIdMatcherTest * case insensitive matching */ assertTrue(testee.equals("a12345")); + + testee = sequenceIdMatcher.new SeqIdName("UNIPROT|A12345"); + assertFalse(testee.equals("A12345")); + assertFalse(testee.equals("UNIPROT|B98765")); + assertFalse(testee.equals("UNIPROT|")); + assertTrue(testee.equals("UNIPROT")); } } diff --git a/test/jalview/datamodel/AlignedCodonFrameTest.java b/test/jalview/datamodel/AlignedCodonFrameTest.java index cd8a1e3..f2dd968 100644 --- a/test/jalview/datamodel/AlignedCodonFrameTest.java +++ b/test/jalview/datamodel/AlignedCodonFrameTest.java @@ -451,4 +451,30 @@ public class AlignedCodonFrameTest assertArrayEquals(new int[] { 2, 2 }, acf.getMappedRegion(seq2, seq1, 6)); } + + /** + * Tests for addMap. See also tests for MapList.addMapList + */ + @Test(groups = { "Functional" }) + public void testAddMap() + { + final Sequence seq1 = new Sequence("Seq1", "c-G-TA-gC-gT-T"); + seq1.createDatasetSequence(); + final Sequence aseq1 = new Sequence("Seq1", "-V-L"); + aseq1.createDatasetSequence(); + + AlignedCodonFrame acf = new AlignedCodonFrame(); + MapList map = new MapList(new int[] { 2, 4, 6, 6, 8, 9 }, new int[] { + 1, 2 }, 3, 1); + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + assertEquals(1, acf.getMappingsFromSequence(seq1).size()); + Mapping before = acf.getMappingsFromSequence(seq1).get(0); + + /* + * add the same map again, verify it doesn't get duplicated + */ + acf.addMap(seq1.getDatasetSequence(), aseq1.getDatasetSequence(), map); + assertEquals(1, acf.getMappingsFromSequence(seq1).size()); + assertSame(before, acf.getMappingsFromSequence(seq1).get(0)); + } } diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index 5a45176..07b8abf 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -217,7 +217,7 @@ public class AlignmentTest * * @throws IOException */ - @Test(groups = { "Functional" }, enabled = false) + @Test(groups = { "Functional" }, enabled = true) // TODO review / update this test after redesign of alignAs method public void testAlignAs_cdnaAsProtein() throws IOException { @@ -243,7 +243,7 @@ public class AlignmentTest * * @throws IOException */ - @Test(groups = { "Functional" }, enabled = false) + @Test(groups = { "Functional" }, enabled = true) // TODO review / update this test after redesign of alignAs method public void testAlignAs_cdnaAsProtein_singleSequence() throws IOException { @@ -315,7 +315,12 @@ public class AlignmentTest acf.addMap(seqFrom, seqTo, ml); } + /* + * not sure whether mappings 'belong' or protein or nucleotide + * alignment, so adding to both ;~) + */ alFrom.addCodonFrame(acf); + alTo.addCodonFrame(acf); } /** @@ -398,6 +403,8 @@ public class AlignmentTest // TODO should the copy constructor copy the dataset? // or make a new one referring to the same dataset sequences?? assertNull(copy.getDataset()); + // TODO test metadata is copied when AlignmentI is a dataset + // assertArrayEquals(copy.getDataset().getSequencesArray(), protein // .getDataset().getSequencesArray()); } @@ -436,8 +443,7 @@ public class AlignmentTest // TODO promote this method to AlignmentI ((Alignment) protein).createDatasetAlignment(); - // TODO this method should return AlignmentI not Alignment !! - Alignment ds = protein.getDataset(); + AlignmentI ds = protein.getDataset(); // side-effect: dataset created on second sequence assertNotNull(protein.getSequenceAt(1).getDatasetSequence()); diff --git a/test/jalview/datamodel/MappingTest.java b/test/jalview/datamodel/MappingTest.java index 3131ad7..b326d90 100644 --- a/test/jalview/datamodel/MappingTest.java +++ b/test/jalview/datamodel/MappingTest.java @@ -21,6 +21,7 @@ package jalview.datamodel; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertSame; import jalview.util.MapList; @@ -75,4 +76,18 @@ public class MappingTest m = new Mapping(seq, fk); assertEquals("[ [1, 6] [8, 13] ] 3:1 to [ [4, 7] ] Seq1", m.toString()); } + + @Test(groups = { "Functional" }) + public void testCopyConstructor() + { + MapList ml = new MapList(new int[] { 1, 6, 8, 13 }, new int[] { 4, 7 }, + 3, 1); + SequenceI seq = new Sequence("seq1", "agtacg"); + Mapping m = new Mapping(seq, ml); + m.setMappedFromId("abc"); + Mapping copy = new Mapping(m); + assertEquals("abc", copy.getMappedFromId()); + assertEquals(ml, copy.getMap()); + assertSame(seq, copy.getTo()); + } } diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 17dfcdc..71719dd 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -65,6 +65,30 @@ public class SequenceTest assertEquals("Gap interval 2 end wrong", 8, gapInt.get(1)[1]); } + @Test(groups = ("Functional")) + public void testIsProtein() + { + // test Protein + assertTrue(new Sequence("prot","ASDFASDFASDF").isProtein()); + // test DNA + assertFalse(new Sequence("prot","ACGTACGTACGT").isProtein()); + // test RNA + SequenceI sq = new Sequence("prot","ACGUACGUACGU"); + assertFalse(sq.isProtein()); + // change sequence, should trigger an update of cached result + sq.setSequence("ASDFASDFADSF"); + assertTrue(sq.isProtein()); + /* + * in situ change of sequence doesn't change hashcode :-O + * (sequence should not expose internal implementation) + */ + for (int i = 0; i < sq.getSequence().length; i++) + { + sq.getSequence()[i] = "acgtu".charAt(i % 5); + } + assertTrue(sq.isProtein()); // but it isn't + } + @Test(groups = { "Functional" }) public void testGetAnnotation() { @@ -388,6 +412,20 @@ public class SequenceTest } /** + * test createDatasetSequence behaves to doc + */ + @Test(groups = { "Functional" }) + public void testCreateDatasetSequence() + { + SequenceI sq = new Sequence("my","ASDASD"); + assertNull(sq.getDatasetSequence()); + SequenceI rds = sq.createDatasetSequence(); + assertNotNull(rds); + assertNull(rds.getDatasetSequence()); + assertEquals(sq.getDatasetSequence(), rds); + } + + /** * Test for deriveSequence applied to a sequence with a dataset */ @Test(groups = { "Functional" }) diff --git a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java index 3de5e3f..4b71417 100644 --- a/test/jalview/datamodel/xdb/embl/EmblEntryTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblEntryTest.java @@ -1,12 +1,14 @@ package jalview.datamodel.xdb.embl; import static org.testng.AssertJUnit.assertEquals; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import jalview.analysis.SequenceIdMatcher; import jalview.datamodel.DBRefEntry; -import jalview.datamodel.Sequence; +import jalview.datamodel.DBRefSource; import jalview.datamodel.SequenceI; +import jalview.util.MapList; import java.util.ArrayList; import java.util.Arrays; @@ -22,7 +24,7 @@ public class EmblEntryTest EmblEntry testee = new EmblEntry(); /* - * Make a (CDS) Feature with 4 locations + * Make a (CDS) Feature with 5 locations */ EmblFeature cds = new EmblFeature(); cds.setLocation("join(10..20,complement(30..40),50..60,70..80,complement(110..120))"); @@ -36,30 +38,32 @@ public class EmblEntryTest public void testParseCodingFeature() { // not the whole sequence but enough for this test... - SequenceI dna = new Sequence("J03321", "GGATCCGTAAGTTAGACGAAATT"); List peptides = new ArrayList(); SequenceIdMatcher matcher = new SequenceIdMatcher(peptides); EmblFile ef = EmblTestHelper.getEmblFile(); + assertEquals(1, ef.getEntries().size()); + EmblEntry testee = ef.getEntries().get(0); + String sourceDb = "EMBL"; + SequenceI dna = testee.makeSequence(sourceDb); /* - * parse two CDS features, one with two Uniprot cross-refs, - * the other with one + * parse three CDS features, with two/one/no Uniprot cross-refs */ - EmblEntry testee = new EmblEntry(); for (EmblFeature feature : ef.getEntries().get(0).getFeatures()) { if ("CDS".equals(feature.getName())) { - testee.parseCodingFeature(feature, "EMBL", dna, peptides, matcher); + testee.parseCodingFeature(feature, sourceDb, dna, peptides, matcher); } } /* * peptides should now have five entries: * EMBL product and two Uniprot accessions for the first CDS / translation - * EMBL product and one Uniprot accession for the second CDS / translation + * EMBL product and one Uniprot accession for the second CDS / " + * EMBL product only for the third */ - assertEquals(5, peptides.size()); + assertEquals(6, peptides.size()); assertEquals("CAA30420.1", peptides.get(0).getName()); assertEquals("MLCF", peptides.get(0).getSequenceAsString()); assertEquals("UNIPROT|B0BCM4", peptides.get(1).getName()); @@ -70,49 +74,161 @@ public class EmblEntryTest assertEquals("MSSS", peptides.get(3).getSequenceAsString()); assertEquals("UNIPROT|B0BCM3", peptides.get(4).getName()); assertEquals("MSSS", peptides.get(4).getSequenceAsString()); + assertEquals("CAA12345.6", peptides.get(5).getName()); + assertEquals("MSS", peptides.get(5).getSequenceAsString()); /* - * verify dna sequence has dbrefs with mappings to the peptide 'products' + * verify dna sequence has dbrefs with CDS mappings to the peptide 'products' */ + MapList cds1Map = new MapList(new int[] { 57, 46 }, new int[] { 1, 4 }, + 3, 1); + MapList cds2Map = new MapList(new int[] { 4, 15 }, new int[] { 1, 4 }, + 3, 1); + MapList cds3Map = new MapList(new int[] { 4, 6, 10, 15 }, new int[] { + 1, 3 }, 3, 1); DBRefEntry[] dbrefs = dna.getDBRefs(); - assertEquals(3, dbrefs.length); + assertEquals(4, dbrefs.length); DBRefEntry dbRefEntry = dbrefs[0]; assertEquals("UNIPROT", dbRefEntry.getSource()); assertEquals("B0BCM4", dbRefEntry.getAccessionId()); assertSame(peptides.get(1), dbRefEntry.getMap().getTo()); - List fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); - assertEquals(1, fromRanges.size()); - assertEquals(57, fromRanges.get(0)[0]); - assertEquals(46, fromRanges.get(0)[1]); - List toRanges = dbRefEntry.getMap().getMap().getToRanges(); - assertEquals(1, toRanges.size()); - assertEquals(1, toRanges.get(0)[0]); - assertEquals(4, toRanges.get(0)[1]); + assertEquals(cds1Map, dbRefEntry.getMap().getMap()); dbRefEntry = dbrefs[1]; assertEquals("UNIPROT", dbRefEntry.getSource()); assertEquals("P0CE20", dbRefEntry.getAccessionId()); assertSame(peptides.get(2), dbRefEntry.getMap().getTo()); - fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); - assertEquals(1, fromRanges.size()); - assertEquals(57, fromRanges.get(0)[0]); - assertEquals(46, fromRanges.get(0)[1]); - toRanges = dbRefEntry.getMap().getMap().getToRanges(); - assertEquals(1, toRanges.size()); - assertEquals(1, toRanges.get(0)[0]); - assertEquals(4, toRanges.get(0)[1]); + assertEquals(cds1Map, dbRefEntry.getMap().getMap()); dbRefEntry = dbrefs[2]; assertEquals("UNIPROT", dbRefEntry.getSource()); assertEquals("B0BCM3", dbRefEntry.getAccessionId()); assertSame(peptides.get(4), dbRefEntry.getMap().getTo()); - fromRanges = dbRefEntry.getMap().getMap().getFromRanges(); - assertEquals(1, fromRanges.size()); - assertEquals(4, fromRanges.get(0)[0]); - assertEquals(15, fromRanges.get(0)[1]); - toRanges = dbRefEntry.getMap().getMap().getToRanges(); - assertEquals(1, toRanges.size()); - assertEquals(1, toRanges.get(0)[0]); - assertEquals(4, toRanges.get(0)[1]); + assertEquals(cds2Map, dbRefEntry.getMap().getMap()); + + dbRefEntry = dbrefs[3]; + assertEquals("EMBLCDSPROTEIN", dbRefEntry.getSource()); + assertEquals("CAA12345.6", dbRefEntry.getAccessionId()); + assertSame(peptides.get(5), dbRefEntry.getMap().getTo()); + assertEquals(cds3Map, dbRefEntry.getMap().getMap()); + + /* + * verify peptides have dbrefs + * - to EMBL sequence (with inverse 1:3 cds mapping) + * - to EMBLCDS (with 1:3 mapping) + * - direct (no mapping) to other protein accessions + */ + MapList proteinToCdsMap1 = new MapList(new int[] { 1, 4 }, new int[] { + 1, 12 }, 1, 3); + MapList proteinToCdsMap2 = new MapList(new int[] { 1, 3 }, new int[] { + 1, 9 }, 1, 3); + + // dbrefs for first CDS EMBL product CAA30420.1 + dbrefs = peptides.get(0).getDBRefs(); + assertEquals(5, dbrefs.length); + assertEquals(DBRefSource.EMBL, dbrefs[0].getSource()); + assertEquals("CAA30420.1", dbrefs[0].getAccessionId()); + assertEquals(cds1Map.getInverse(), dbrefs[0].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource()); + assertEquals("CAA30420.1", dbrefs[1].getAccessionId()); + assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource()); + assertEquals("CAA30420.1", dbrefs[2].getAccessionId()); + assertNull(dbrefs[2].getMap()); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"), + dbrefs[3]); + assertNull(dbrefs[3].getMap()); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"), + dbrefs[4]); + assertNull(dbrefs[4].getMap()); + + // dbrefs for first CDS first Uniprot xref + dbrefs = peptides.get(1).getDBRefs(); + assertEquals(2, dbrefs.length); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "2.1", "B0BCM4"), + dbrefs[0]); + assertNull(dbrefs[0].getMap()); + assertEquals(DBRefSource.EMBL, dbrefs[1].getSource()); + assertEquals("X07547", dbrefs[1].getAccessionId()); + assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap()); + + // dbrefs for first CDS second Uniprot xref + dbrefs = peptides.get(2).getDBRefs(); + assertEquals(2, dbrefs.length); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "P0CE20"), + dbrefs[0]); + assertNull(dbrefs[0].getMap()); + assertEquals(DBRefSource.EMBL, dbrefs[1].getSource()); + assertEquals("X07547", dbrefs[1].getAccessionId()); + assertEquals(cds1Map.getInverse(), dbrefs[1].getMap().getMap()); + + // dbrefs for second CDS EMBL product CAA30421.1 + dbrefs = peptides.get(3).getDBRefs(); + assertEquals(4, dbrefs.length); + assertEquals(DBRefSource.EMBL, dbrefs[0].getSource()); + assertEquals("CAA30421.1", dbrefs[0].getAccessionId()); + assertEquals(cds2Map.getInverse(), dbrefs[0].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource()); + assertEquals("CAA30421.1", dbrefs[1].getAccessionId()); + assertEquals(proteinToCdsMap1, dbrefs[1].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource()); + assertEquals("CAA30421.1", dbrefs[2].getAccessionId()); + assertNull(dbrefs[2].getMap()); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"), + dbrefs[3]); + assertNull(dbrefs[3].getMap()); + + // dbrefs for second CDS second Uniprot xref + dbrefs = peptides.get(4).getDBRefs(); + assertEquals(2, dbrefs.length); + assertEquals(new DBRefEntry(DBRefSource.UNIPROT, "0", "B0BCM3"), + dbrefs[0]); + assertNull(dbrefs[0].getMap()); + assertEquals(DBRefSource.EMBL, dbrefs[1].getSource()); + assertEquals("X07547", dbrefs[1].getAccessionId()); + assertEquals(cds2Map.getInverse(), dbrefs[1].getMap().getMap()); + + // dbrefs for third CDS inferred EMBL product CAA12345.6 + dbrefs = peptides.get(5).getDBRefs(); + assertEquals(3, dbrefs.length); + assertEquals(DBRefSource.EMBL, dbrefs[0].getSource()); + assertEquals("CAA12345.6", dbrefs[0].getAccessionId()); + assertEquals(cds3Map.getInverse(), dbrefs[0].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDS, dbrefs[1].getSource()); + assertEquals("CAA12345.6", dbrefs[1].getAccessionId()); + assertEquals(proteinToCdsMap2, dbrefs[1].getMap().getMap()); + assertEquals(DBRefSource.EMBLCDSProduct, dbrefs[2].getSource()); + assertEquals("CAA12345.6", dbrefs[2].getAccessionId()); + assertNull(dbrefs[2].getMap()); + } + + @Test(groups = "Functional") + public void testAdjustForProteinLength() + { + int[] exons = new int[] { 11, 15, 21, 25, 31, 38 }; // 18 bp + + // exact length match: + assertSame(exons, EmblEntry.adjustForProteinLength(6, exons)); + + // match if we assume exons include stop codon not in protein: + assertSame(exons, EmblEntry.adjustForProteinLength(5, exons)); + + // truncate last exon by 6bp + int[] truncated = EmblEntry.adjustForProteinLength(4, exons); + assertEquals("[11, 15, 21, 25, 31, 32]", + Arrays.toString(truncated)); + + // remove last exon and truncate preceding by 1bp + truncated = EmblEntry.adjustForProteinLength(3, exons); + assertEquals("[11, 15, 21, 24]", Arrays.toString(truncated)); + + // exact removal of exon case: + exons = new int[] { 11, 15, 21, 27, 33, 38 }; // 18 bp + truncated = EmblEntry.adjustForProteinLength(4, exons); + assertEquals("[11, 15, 21, 27]", Arrays.toString(truncated)); + + // what if exons are too short for protein? + truncated = EmblEntry.adjustForProteinLength(7, exons); + assertSame(exons, truncated); } } diff --git a/test/jalview/datamodel/xdb/embl/EmblFileTest.java b/test/jalview/datamodel/xdb/embl/EmblFileTest.java index 6955833..906436f 100644 --- a/test/jalview/datamodel/xdb/embl/EmblFileTest.java +++ b/test/jalview/datamodel/xdb/embl/EmblFileTest.java @@ -80,9 +80,9 @@ public class EmblFileTest assertEquals("0", dbref.getVersion()); /* - * two sequence features for CDS + * three sequence features for CDS */ - assertEquals(2, entry.getFeatures().size()); + assertEquals(3, entry.getFeatures().size()); /* * first CDS */ @@ -140,6 +140,23 @@ public class EmblFileTest assertEquals("MSSS", q.getValues()[0]); /* + * third CDS + */ + ef = entry.getFeatures().get(2); + assertEquals("CDS", ef.getName()); + assertEquals("join(4..6,10..15)", ef.getLocation()); + assertNull(ef.getDbRefs()); + assertEquals(2, ef.getQualifiers().size()); + q = ef.getQualifiers().get(0); + assertEquals("protein_id", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("CAA12345.6", q.getValues()[0]); + q = ef.getQualifiers().get(1); + assertEquals("translation", q.getName()); + assertEquals(1, q.getValues().length); + assertEquals("MSS", q.getValues()[0]); + + /* * Sequence - verify newline not converted to space (JAL-2029) */ EmblSequence seq = entry.getSequence(); diff --git a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java index 9957c72..6349164 100644 --- a/test/jalview/datamodel/xdb/embl/EmblTestHelper.java +++ b/test/jalview/datamodel/xdb/embl/EmblTestHelper.java @@ -38,6 +38,14 @@ public class EmblTestHelper + "MSSS" + "" /* + * third CDS is made up - has no xref - code should synthesize + * one to an assumed EMBLCDSPROTEIN accession + */ + + "" + + "CAA12345.6" + + "MSS" + + "" + /* * sequence (modified for test purposes) * emulates EMBL XML 1.2 which splits sequence data every 60 characters * see EmblSequence.setSequence diff --git a/test/jalview/gui/AlignViewportTest.java b/test/jalview/gui/AlignViewportTest.java index c956da1..2b72914 100644 --- a/test/jalview/gui/AlignViewportTest.java +++ b/test/jalview/gui/AlignViewportTest.java @@ -36,6 +36,7 @@ import jalview.datamodel.SequenceI; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import jalview.structure.StructureSelectionManager; +import jalview.util.MapList; import java.util.ArrayList; import java.util.List; @@ -133,8 +134,13 @@ public class AlignViewportTest AlignFrame af1 = new FileLoader().LoadFileWaitTillLoaded( ">Seq1\nCAGT\n", FormatAdapter.PASTE); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 1, 4 }, + 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(s1, s1, new MapList(new int[] { 1, 4 }, new int[] { 4, 1 }, + 1, 1)); List mappings = new ArrayList(); mappings.add(acf1); @@ -178,10 +184,20 @@ public class AlignViewportTest ">Seq1\nRSVQ\n", FormatAdapter.PASTE); AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded( ">Seq2\nDGEL\n", FormatAdapter.PASTE); - + SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA"); + SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA"); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); + SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0); + // need to be distinct AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1, + 12 }, 1, 1)); List mappings1 = new ArrayList(); mappings1.add(acf1); @@ -231,10 +247,20 @@ public class AlignViewportTest ">Seq1\nRSVQ\n", FormatAdapter.PASTE); AlignFrame af2 = new FileLoader().LoadFileWaitTillLoaded( ">Seq2\nDGEL\n", FormatAdapter.PASTE); - + SequenceI cs1 = new Sequence("cseq1", "CCCGGGTTTAAA"); + SequenceI cs2 = new Sequence("cseq2", "CTTGAGTCTAGA"); + SequenceI s1 = af1.getViewport().getAlignment().getSequenceAt(0); + SequenceI s2 = af2.getViewport().getAlignment().getSequenceAt(0); + // need to be distinct AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(cs1, s1, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(cs2, s2, new MapList(new int[] { 1, 4 }, + new int[] { 1, 12 }, 1, 3)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(cs2, cs2, new MapList(new int[] { 1, 12 }, new int[] { 1, + 12 }, 1, 1)); List mappings1 = new ArrayList(); mappings1.add(acf1); diff --git a/test/jalview/schemes/UserColourSchemeTest.java b/test/jalview/schemes/UserColourSchemeTest.java index 88f4331..e524cb4 100644 --- a/test/jalview/schemes/UserColourSchemeTest.java +++ b/test/jalview/schemes/UserColourSchemeTest.java @@ -10,7 +10,7 @@ import org.testng.annotations.Test; public class UserColourSchemeTest { - @Test(groups = "functional") + @Test(groups = "Functional") public void testGetColourFromString() { /* diff --git a/test/jalview/structure/StructureSelectionManagerTest.java b/test/jalview/structure/StructureSelectionManagerTest.java index 16f56a6..2074fb4 100644 --- a/test/jalview/structure/StructureSelectionManagerTest.java +++ b/test/jalview/structure/StructureSelectionManagerTest.java @@ -29,6 +29,7 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.FormatAdapter; import jalview.io.StructureFile; +import jalview.util.MapList; import java.util.ArrayList; import java.util.List; @@ -51,7 +52,11 @@ public class StructureSelectionManagerTest public void testRegisterMapping() { AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); ssm.registerMapping(acf1); assertEquals(1, ssm.getSequenceMappings().size()); @@ -75,8 +80,14 @@ public class StructureSelectionManagerTest public void testRegisterMappings() { AlignedCodonFrame acf1 = new AlignedCodonFrame(); + acf1.addMap(new Sequence("s1", "ttt"), new Sequence("p1", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf2 = new AlignedCodonFrame(); + acf2.addMap(new Sequence("s2", "ttt"), new Sequence("p2", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); AlignedCodonFrame acf3 = new AlignedCodonFrame(); + acf3.addMap(new Sequence("s3", "ttt"), new Sequence("p3", "p"), + new MapList(new int[] { 1, 3 }, new int[] { 1, 1 }, 1, 1)); List set1 = new ArrayList(); set1.add(acf1); diff --git a/test/jalview/util/DBRefUtilsTest.java b/test/jalview/util/DBRefUtilsTest.java index c5e8ef5..96935ce 100644 --- a/test/jalview/util/DBRefUtilsTest.java +++ b/test/jalview/util/DBRefUtilsTest.java @@ -33,6 +33,8 @@ import jalview.datamodel.PDBEntry; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; +import java.util.List; + import org.testng.annotations.Test; public class DBRefUtilsTest @@ -191,12 +193,13 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3, ref4, ref5 }, target); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); } /** @@ -224,11 +227,12 @@ public class DBRefUtilsTest new int[] { 1, 1 }, 2, 2)); ref3.setMap(map3); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, ref2, ref3 }, target); - assertEquals(2, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); + assertEquals(2, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); } /** @@ -249,11 +253,42 @@ public class DBRefUtilsTest ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, 1 }, 1, 1))); - DBRefEntry[] matches = DBRefUtils.searchRefs(new DBRefEntry[] { ref1, - ref2, ref3, ref4, ref5 }, "A1234"); - assertEquals(3, matches.length); - assertSame(ref1, matches[0]); - assertSame(ref2, matches[1]); - assertSame(ref5, matches[2]); + DBRefEntry[] dbrefs = new DBRefEntry[] { ref1, + ref2, ref3, ref4, ref5 }; + List matches = DBRefUtils.searchRefs(dbrefs, "A1234"); + assertEquals(3, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref5, matches.get(2)); + } + + /** + * Test the method that searches for matches references - case when we are + * matching a reference with null (any) accession id + */ + @Test(groups = { "Functional" }) + public void testSearchRefs_wildcardAccessionid() + { + DBRefEntry target = new DBRefEntry("EMBL", "2", null); + + DBRefEntry ref1 = new DBRefEntry("EMBL", "1", "A1234"); // matches + // constructor changes embl to EMBL + DBRefEntry ref2 = new DBRefEntry("embl", "1", "A1235"); // matches + // constructor does not upper-case accession id + DBRefEntry ref3 = new DBRefEntry("EMBL", "1", "A1236"); // matches + DBRefEntry ref4 = new DBRefEntry("EMBLCDS", "1", "A1234"); // no match + // ref5 matches although it has a mapping - ignored + DBRefEntry ref5 = new DBRefEntry("EMBL", "1", "A1237"); + ref5.setMap(new Mapping(new MapList(new int[] { 1, 1 }, new int[] { 1, + 1 }, 1, 1))); + + List matches = DBRefUtils.searchRefs(new DBRefEntry[] { + ref1, + ref2, ref3, ref4, ref5 }, target); + assertEquals(4, matches.size()); + assertSame(ref1, matches.get(0)); + assertSame(ref2, matches.get(1)); + assertSame(ref3, matches.get(2)); + assertSame(ref5, matches.get(3)); } } diff --git a/test/jalview/util/MapListTest.java b/test/jalview/util/MapListTest.java index d4ed0ea..ba298c5 100644 --- a/test/jalview/util/MapListTest.java +++ b/test/jalview/util/MapListTest.java @@ -563,6 +563,21 @@ public class MapListTest s); } + /** + * Test that confirms adding a map twice does nothing + */ + @Test(groups = { "Functional" }) + public void testAddMapList_sameMap() + { + MapList ml = new MapList(new int[] { 11, 15, 20, 25, 35, 30 }, + new int[] { 72, 22 }, 1, 3); + String before = ml.toString(); + ml.addMapList(ml); + assertEquals(before, ml.toString()); + ml.addMapList(new MapList(ml)); + assertEquals(before, ml.toString()); + } + @Test(groups = { "Functional" }) public void testAddMapList_contiguous() { diff --git a/test/jalview/ws/SequenceFetcherTest.java b/test/jalview/ws/SequenceFetcherTest.java index a54ce8b..94bf979 100644 --- a/test/jalview/ws/SequenceFetcherTest.java +++ b/test/jalview/ws/SequenceFetcherTest.java @@ -1,5 +1,6 @@ package jalview.ws; +import jalview.analysis.CrossRef; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefSource; @@ -24,8 +25,6 @@ public class SequenceFetcherTest // TODO: extracted from SequenceFetcher - convert to proper unit test with // assertions - AlignmentI ds = null; - Vector noProds = new Vector(); String usage = "SequenceFetcher.main [-nodas] [ []]\n" + "With no arguments, all DbSources will be queried with their test Accession number.\n" + "With one argument, the argument will be resolved to one or more db sources and each will be queried with their test accession only.\n" @@ -44,7 +43,7 @@ public class SequenceFetcherTest { List sps = new SequenceFetcher(withDas) .getSourceProxy(argv[0]); - + if (sps != null) { for (DbSourceProxy sp : sps) @@ -52,7 +51,8 @@ public class SequenceFetcherTest AlignmentI al = null; try { - al = sp.getSequenceRecords(argv.length > 1 ? argv[1] : sp + testRetrieval(argv[0], sp, + argv.length > 1 ? argv[1] : sp .getTestQuery()); } catch (Exception e) { @@ -61,16 +61,6 @@ public class SequenceFetcherTest + (argv.length > 1 ? argv[1] : sp.getTestQuery()) + " from " + argv[0] + "\nUsage: " + usage); } - SequenceI[] prod = al.getSequencesArray(); - if (al != null) - { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true) + " : " - + prod[p].getDescription()); - } - } } return; } @@ -95,139 +85,135 @@ public class SequenceFetcherTest } for (DbSourceProxy sp : sfetcher.getSourceProxy(db)) { - System.out.println("Source: " + sp.getDbName() + " (" + db - + "): retrieving test:" + sp.getTestQuery()); - AlignmentI al = null; - try + testRetrieval(db, sp, sp.getTestQuery()); + } + } + + } + + private static void testRetrieval(String db, DbSourceProxy sp, + String testQuery) + { + AlignmentI ds = null; + Vector noProds = new Vector(); + System.out.println("Source: " + sp.getDbName() + " (" + db + + "): retrieving test:" + sp.getTestQuery()); + { + AlignmentI al = null; + try + { + al = sp.getSequenceRecords(testQuery); + if (al != null && al.getHeight() > 0) { - al = sp.getSequenceRecords(sp.getTestQuery()); - if (al != null && al.getHeight() > 0) + boolean dna = sp.isDnaCoding(); + al.setDataset(null); + AlignmentI alds = al.getDataset(); + // try and find products + CrossRef crossRef = new CrossRef(al.getSequencesArray(), alds); + List types = crossRef.findXrefSourcesForSequences(dna); + if (types != null) { - boolean dna = sp.isDnaCoding(); - // try and find products - String types[] = jalview.analysis.CrossRef - .findSequenceXrefTypes(dna, al.getSequencesArray()); - if (types != null) + System.out.println("Xref Types for: " + (dna ? "dna" : "prot")); + for (String source : types) { - System.out.println("Xref Types for: " - + (dna ? "dna" : "prot")); - for (int t = 0; t < types.length; t++) + System.out.println("Type: " + source); + SequenceI[] prod = crossRef.findXrefSequences(source, dna) + .getSequencesArray(); + System.out.println("Found " + + ((prod == null) ? "no" : "" + prod.length) + + " products"); + if (prod != null) { - System.out.println("Type: " + types[t]); - SequenceI[] prod = jalview.analysis.CrossRef - .findXrefSequences(al.getSequencesArray(), dna, - types[t], null) - .getSequencesArray(); - System.out.println("Found " - + ((prod == null) ? "no" : "" + prod.length) - + " products"); - if (prod != null) + for (int p = 0; p < prod.length; p++) { - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); } } } - else - { - noProds.addElement((dna ? new Object[] { al, al } - : new Object[] { al })); - } - - } - } catch (Exception ex) - { - System.out.println("ERROR:Failed to retrieve test query."); - ex.printStackTrace(System.out); - } - - if (al == null) - { - System.out.println("ERROR:No alignment retrieved."); - StringBuffer raw = sp.getRawRecords(); - if (raw != null) - { - System.out.println(raw.toString()); } else { - System.out.println("ERROR:No Raw results."); + noProds.addElement((dna ? new Object[] { al, al } + : new Object[] { al })); } + + } + } catch (Exception ex) + { + System.out.println("ERROR:Failed to retrieve test query."); + ex.printStackTrace(System.out); + } + + if (al == null) + { + System.out.println("ERROR:No alignment retrieved."); + StringBuffer raw = sp.getRawRecords(); + if (raw != null) + { + System.out.println(raw.toString()); } else { - System.out.println("Retrieved " + al.getHeight() + " sequences."); - for (int s = 0; s < al.getHeight(); s++) - { - SequenceI sq = al.getSequenceAt(s); - while (sq.getDatasetSequence() != null) - { - sq = sq.getDatasetSequence(); - - } - if (ds == null) - { - ds = new Alignment(new SequenceI[] { sq }); - - } - else - { - ds.addSequence(sq); - } - } + System.out.println("ERROR:No Raw results."); + } + } + else + { + System.out.println("Retrieved " + al.getHeight() + " sequences."); + if (ds == null) + { + ds = al.getDataset(); + } + else + { + ds.append(al.getDataset()); + al.setDataset(ds); } - System.out.flush(); - System.err.flush(); - } - if (noProds.size() > 0) + System.out.flush(); + System.err.flush(); + } + if (noProds.size() > 0) + { + Enumeration ts = noProds.elements(); + while (ts.hasMoreElements()) + { - Enumeration ts = noProds.elements(); - while (ts.hasMoreElements()) - + Object[] typeSq = ts.nextElement(); + boolean dna = (typeSq.length > 1); + AlignmentI al = (AlignmentI) typeSq[0]; + System.out.println("Trying getProducts for " + + al.getSequenceAt(0).getDisplayId(true)); + System.out.println("Search DS Xref for: " + (dna ? "dna" : "prot")); + // have a bash at finding the products amongst all the retrieved + // sequences. + SequenceI[] seqs = al.getSequencesArray(); + Alignment prodal = new CrossRef(seqs, ds).findXrefSequences(null, + dna); + System.out.println("Found " + + ((prodal == null) ? "no" : "" + prodal.getHeight()) + + " products"); + if (prodal != null) { - Object[] typeSq = ts.nextElement(); - boolean dna = (typeSq.length > 1); - AlignmentI al = (AlignmentI) typeSq[0]; - System.out.println("Trying getProducts for " - + al.getSequenceAt(0).getDisplayId(true)); - System.out.println("Search DS Xref for: " - + (dna ? "dna" : "prot")); - // have a bash at finding the products amongst all the retrieved - // sequences. - SequenceI[] seqs = al.getSequencesArray(); - Alignment prodal = jalview.analysis.CrossRef.findXrefSequences( - seqs, dna, null, ds); - System.out.println("Found " - + ((prodal == null) ? "no" : "" + prodal.getHeight()) - + " products"); - if (prodal != null) + SequenceI[] prod = prodal.getSequencesArray(); // note + // should + // test + // rather + // than + // throw + // away + // codon + // mapping + // (if + // present) + for (int p = 0; p < prod.length; p++) { - SequenceI[] prod = prodal.getSequencesArray(); // note - // should - // test - // rather - // than - // throw - // away - // codon - // mapping - // (if - // present) - for (int p = 0; p < prod.length; p++) - { - System.out.println("Prod " + p + ": " - + prod[p].getDisplayId(true)); - } + System.out.println("Prod " + p + ": " + + prod[p].getDisplayId(true)); } } - } - } } - } diff --git a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java index 341d9ef..b3c7e10 100644 --- a/test/jalview/ws/seqfetcher/DbRefFetcherTest.java +++ b/test/jalview/ws/seqfetcher/DbRefFetcherTest.java @@ -178,8 +178,8 @@ public class DbRefFetcherTest .getMap().getMappedWidth(), 1); assertEquals("Expected local reference map to be 3 nucleotides", dr[0] .getMap().getWidth(), 3); - AlignmentI sprods = CrossRef.findXrefSequences( - alsq.getSequencesArray(), true, dr[0].getSource(), alsq); + AlignmentI sprods = new CrossRef(alsq.getSequencesArray(), alsq) + .findXrefSequences(dr[0].getSource(), true); assertNotNull( "Couldn't recover cross reference sequence from dataset. Was it ever added ?", sprods);