From 87a85be7fc7678455c298287349b03fdd12fd3ad Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 12 May 2017 16:00:20 +0100 Subject: [PATCH] JAL-2525 get sequence features for Ontology term(s), tidy feature sort methods --- src/jalview/analysis/AlignmentUtils.java | 342 ++++++++++---------- .../datamodel/features/SequenceFeatures.java | 96 +++++- .../datamodel/features/SequenceFeaturesI.java | 16 +- src/jalview/ext/ensembl/EnsemblGene.java | 48 ++- .../datamodel/features/SequenceFeaturesTest.java | 103 ++++++ test/jalview/ext/ensembl/EnsemblGeneTest.java | 36 +-- test/jalview/ext/ensembl/EnsemblSeqProxyTest.java | 23 +- 7 files changed, 430 insertions(+), 234 deletions(-) diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 955de28..7b867ac 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -35,14 +35,14 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import jalview.io.gff.SequenceOntologyFactory; +import jalview.datamodel.features.SequenceFeatures; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; import jalview.util.DBRefUtils; +import jalview.util.IntRangeComparator; import jalview.util.MapList; import jalview.util.MappingUtils; -import jalview.util.RangeComparator; import jalview.util.StringUtils; import java.io.UnsupportedEncodingException; @@ -51,7 +51,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -2055,11 +2054,11 @@ public class AlignmentUtils * * @param fromSeq * @param toSeq + * @param mapping + * the mapping from 'fromSeq' to 'toSeq' * @param select * if not null, only features of this type are copied (including * subtypes in the Sequence Ontology) - * @param mapping - * the mapping from 'fromSeq' to 'toSeq' * @param omitting */ public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, @@ -2071,76 +2070,74 @@ public class AlignmentUtils copyTo = copyTo.getDatasetSequence(); } - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + /* + * get features, optionally restricted by an ontology term + */ + List sfs = select == null ? fromSeq.getFeatures() + .getPositionalFeatures() : fromSeq.getFeatures() + .getFeaturesByOntology(select); + int count = 0; - SequenceFeature[] sfs = fromSeq.getSequenceFeatures(); - if (sfs != null) + for (SequenceFeature sf : sfs) { - for (SequenceFeature sf : sfs) + String type = sf.getType(); + boolean omit = false; + for (String toOmit : omitting) { - String type = sf.getType(); - if (select != null && !so.isA(type, select)) + if (type.equals(toOmit)) { - continue; - } - boolean omit = false; - for (String toOmit : omitting) - { - if (type.equals(toOmit)) - { - omit = true; - } - } - if (omit) - { - continue; + omit = true; } + } + if (omit) + { + continue; + } - /* - * locate the mapped range - null if either start or end is - * not mapped (no partial overlaps are calculated) - */ - int start = sf.getBegin(); - int end = sf.getEnd(); - int[] mappedTo = mapping.locateInTo(start, end); - /* - * if whole exon range doesn't map, try interpreting it - * as 5' or 3' exon overlapping the CDS range - */ - if (mappedTo == null) - { - mappedTo = mapping.locateInTo(end, end); - if (mappedTo != null) - { - /* - * end of exon is in CDS range - 5' overlap - * to a range from the start of the peptide - */ - mappedTo[0] = 1; - } - } - if (mappedTo == null) + /* + * locate the mapped range - null if either start or end is + * not mapped (no partial overlaps are calculated) + */ + int start = sf.getBegin(); + int end = sf.getEnd(); + int[] mappedTo = mapping.locateInTo(start, end); + /* + * if whole exon range doesn't map, try interpreting it + * as 5' or 3' exon overlapping the CDS range + */ + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(end, end); + if (mappedTo != null) { - mappedTo = mapping.locateInTo(start, start); - if (mappedTo != null) - { - /* - * start of exon is in CDS range - 3' overlap - * to a range up to the end of the peptide - */ - mappedTo[1] = toSeq.getLength(); - } + /* + * end of exon is in CDS range - 5' overlap + * to a range from the start of the peptide + */ + mappedTo[0] = 1; } + } + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(start, start); if (mappedTo != null) { - int newBegin = Math.min(mappedTo[0], mappedTo[1]); - int newEnd = Math.max(mappedTo[0], mappedTo[1]); - SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, - sf.getFeatureGroup()); - copyTo.addSequenceFeature(copy); - count++; + /* + * start of exon is in CDS range - 3' overlap + * to a range up to the end of the peptide + */ + mappedTo[1] = toSeq.getLength(); } } + if (mappedTo != null) + { + int newBegin = Math.min(mappedTo[0], mappedTo[1]); + int newEnd = Math.max(mappedTo[0], mappedTo[1]); + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup()); + copyTo.addSequenceFeature(copy); + count++; + } } return count; } @@ -2205,49 +2202,44 @@ public class AlignmentUtils public static List findCdsPositions(SequenceI dnaSeq) { List result = new ArrayList(); - SequenceFeature[] sfs = dnaSeq.getSequenceFeatures(); - if (sfs == null) + + List sfs = dnaSeq.getFeatures().getFeaturesByOntology( + SequenceOntologyI.CDS); + if (sfs.isEmpty()) { return result; } - - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + SequenceFeatures.sortFeatures(sfs, true); int startPhase = 0; for (SequenceFeature sf : sfs) { + int phase = 0; + try + { + phase = Integer.parseInt(sf.getPhase()); + } catch (NumberFormatException e) + { + // ignore + } /* - * process a CDS feature (or a sub-type of CDS) + * phase > 0 on first codon means 5' incomplete - skip to the start + * of the next codon; example ENST00000496384 */ - if (so.isA(sf.getType(), SequenceOntologyI.CDS)) + int begin = sf.getBegin(); + int end = sf.getEnd(); + if (result.isEmpty()) { - int phase = 0; - try - { - phase = Integer.parseInt(sf.getPhase()); - } catch (NumberFormatException e) + begin += phase; + if (begin > end) { - // ignore - } - /* - * phase > 0 on first codon means 5' incomplete - skip to the start - * of the next codon; example ENST00000496384 - */ - int begin = sf.getBegin(); - int end = sf.getEnd(); - if (result.isEmpty()) - { - begin += phase; - if (begin > end) - { - // shouldn't happen! - System.err - .println("Error: start phase extends beyond start CDS in " - + dnaSeq.getName()); - } + // shouldn't happen! + System.err + .println("Error: start phase extends beyond start CDS in " + + dnaSeq.getName()); } - result.add(new int[] { begin, end }); } + result.add(new int[] { begin, end }); } /* @@ -2267,7 +2259,7 @@ public class AlignmentUtils * ranges are assembled in order. Other cases should not use this method, * but instead construct an explicit mapping for CDS (e.g. EMBL parsing). */ - Collections.sort(result, new RangeComparator(true)); + Collections.sort(result, IntRangeComparator.ASCENDING); return result; } @@ -2324,20 +2316,20 @@ public class AlignmentUtils * sort to get sequence features in start position order * - would be better to store in Sequence as a TreeSet or NCList? */ - if (peptide.getSequenceFeatures() != null) - { - Arrays.sort(peptide.getSequenceFeatures(), - new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) - { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) - : c; - } - }); - } + // if (peptide.getSequenceFeatures() != null) + // { + // Arrays.sort(peptide.getSequenceFeatures(), + // new Comparator() + // { + // @Override + // public int compare(SequenceFeature o1, SequenceFeature o2) + // { + // int c = Integer.compare(o1.getBegin(), o2.getBegin()); + // return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) + // : c; + // } + // }); + // } return count; } @@ -2528,10 +2520,10 @@ public class AlignmentUtils * LinkedHashMap ensures we keep the peptide features in sequence order */ LinkedHashMap[]> variants = new LinkedHashMap[]>(); - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); - if (dnaFeatures == null) + List dnaFeatures = dnaSeq.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT); + if (dnaFeatures.isEmpty()) { return variants; } @@ -2551,84 +2543,80 @@ public class AlignmentUtils // not handling multi-locus variant features continue; } - if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT)) + int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); + if (mapsTo == null) { - int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); - if (mapsTo == null) - { - // feature doesn't lie within coding region - continue; - } - int peptidePosition = mapsTo[0]; - List[] codonVariants = variants.get(peptidePosition); - if (codonVariants == null) - { - codonVariants = new ArrayList[CODON_LENGTH]; - codonVariants[0] = new ArrayList(); - codonVariants[1] = new ArrayList(); - codonVariants[2] = new ArrayList(); - variants.put(peptidePosition, codonVariants); - } + // feature doesn't lie within coding region + continue; + } + int peptidePosition = mapsTo[0]; + List[] codonVariants = variants.get(peptidePosition); + if (codonVariants == null) + { + codonVariants = new ArrayList[CODON_LENGTH]; + codonVariants[0] = new ArrayList(); + codonVariants[1] = new ArrayList(); + codonVariants[2] = new ArrayList(); + variants.put(peptidePosition, codonVariants); + } - /* - * extract dna variants to a string array - */ - String alls = (String) sf.getValue("alleles"); - if (alls == null) - { - continue; - } - String[] alleles = alls.toUpperCase().split(","); - int i = 0; - for (String allele : alleles) - { - alleles[i++] = allele.trim(); // lose any space characters "A, G" - } + /* + * extract dna variants to a string array + */ + String alls = (String) sf.getValue("alleles"); + if (alls == null) + { + continue; + } + String[] alleles = alls.toUpperCase().split(","); + int i = 0; + for (String allele : alleles) + { + alleles[i++] = allele.trim(); // lose any space characters "A, G" + } - /* - * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] - */ - int[] codon = peptidePosition == lastPeptidePostion ? lastCodon - : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( - peptidePosition, peptidePosition)); - lastPeptidePostion = peptidePosition; - lastCodon = codon; + /* + * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] + */ + int[] codon = peptidePosition == lastPeptidePostion ? lastCodon + : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( + peptidePosition, peptidePosition)); + lastPeptidePostion = peptidePosition; + lastCodon = codon; - /* - * save nucleotide (and any variant) for each codon position - */ - for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++) + /* + * save nucleotide (and any variant) for each codon position + */ + for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++) + { + String nucleotide = String.valueOf( + dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase(); + List codonVariant = codonVariants[codonPos]; + if (codon[codonPos] == dnaCol) { - String nucleotide = String.valueOf( - dnaSeq.getCharAt(codon[codonPos] - dnaStart)) - .toUpperCase(); - List codonVariant = codonVariants[codonPos]; - if (codon[codonPos] == dnaCol) + if (!codonVariant.isEmpty() + && codonVariant.get(0).variant == null) { - if (!codonVariant.isEmpty() - && codonVariant.get(0).variant == null) - { - /* - * already recorded base value, add this variant - */ - codonVariant.get(0).variant = sf; - } - else - { - /* - * add variant with base value - */ - codonVariant.add(new DnaVariant(nucleotide, sf)); - } + /* + * already recorded base value, add this variant + */ + codonVariant.get(0).variant = sf; } - else if (codonVariant.isEmpty()) + else { /* - * record (possibly non-varying) base value + * add variant with base value */ - codonVariant.add(new DnaVariant(nucleotide)); + codonVariant.add(new DnaVariant(nucleotide, sf)); } } + else if (codonVariant.isEmpty()) + { + /* + * record (possibly non-varying) base value + */ + codonVariant.add(new DnaVariant(nucleotide)); + } } } return variants; diff --git a/src/jalview/datamodel/features/SequenceFeatures.java b/src/jalview/datamodel/features/SequenceFeatures.java index 5fa9a3c..73ddac7 100644 --- a/src/jalview/datamodel/features/SequenceFeatures.java +++ b/src/jalview/datamodel/features/SequenceFeatures.java @@ -1,10 +1,13 @@ package jalview.datamodel.features; import jalview.datamodel.SequenceFeature; +import jalview.io.gff.SequenceOntologyFactory; +import jalview.io.gff.SequenceOntologyI; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -22,6 +25,29 @@ import java.util.TreeMap; */ public class SequenceFeatures implements SequenceFeaturesI { + /** + * a comparator for sorting features by start position ascending + */ + private static Comparator FORWARD_STRAND = new Comparator() + { + @Override + public int compare(ContiguousI o1, ContiguousI o2) + { + return Integer.compare(o1.getBegin(), o2.getBegin()); + } + }; + + /** + * a comparator for sorting features by end position descending + */ + private static Comparator REVERSE_STRAND = new Comparator() + { + @Override + public int compare(ContiguousI o1, ContiguousI o2) + { + return Integer.compare(o2.getEnd(), o1.getEnd()); + } + }; /* * map from feature type to structured store of features for that type @@ -38,8 +64,9 @@ public class SequenceFeatures implements SequenceFeaturesI * use a TreeMap so that features are returned in alphabetical order of type * wrap as a synchronized map for add and delete operations */ - featureStore = Collections - .synchronizedSortedMap(new TreeMap()); + // featureStore = Collections + // .synchronizedSortedMap(new TreeMap()); + featureStore = new TreeMap(); } /** @@ -102,6 +129,22 @@ public class SequenceFeatures implements SequenceFeaturesI * {@inheritDoc} */ @Override + public List getFeaturesByOntology(String... ontologyTerm) + { + if (ontologyTerm == null || ontologyTerm.length == 0) + { + return new ArrayList(); + } + + Set featureTypes = getFeatureTypes(ontologyTerm); + return getAllFeatures(featureTypes.toArray(new String[featureTypes + .size()])); + } + + /** + * {@inheritDoc} + */ + @Override public int getFeatureCount(boolean positional, String... type) { int result = 0; @@ -309,20 +352,47 @@ public class SequenceFeatures implements SequenceFeaturesI * {@inheritDoc} */ @Override - public Set getFeatureTypes() + public Set getFeatureTypes(String... soTerm) { Set types = new HashSet(); for (Entry entry : featureStore.entrySet()) { - if (!entry.getValue().isEmpty()) + String type = entry.getKey(); + if (!entry.getValue().isEmpty() && isOntologyTerm(type, soTerm)) { - types.add(entry.getKey()); + types.add(type); } } return types; } /** + * Answers true if the given type is one of the specified sequence ontology + * terms (or a sub-type of one), or if no terms are supplied. Answers false if + * filter terms are specified and the given term does not match any of them. + * + * @param type + * @param soTerm + * @return + */ + protected boolean isOntologyTerm(String type, String... soTerm) + { + if (soTerm == null || soTerm.length == 0) + { + return true; + } + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + for (String term : soTerm) + { + if (so.isA(type, term)) + { + return true; + } + } + return false; + } + + /** * {@inheritDoc} */ @Override @@ -341,4 +411,18 @@ public class SequenceFeatures implements SequenceFeaturesI return featureStore.containsKey(type) ? featureStore.get(type) .getMaximumScore(positional) : Float.NaN; } -} + + /** + * A convenience method to sort features by start position ascending (if on + * forward strand), or end position descending (if on reverse strand) + * + * @param features + * @param forwardStrand + */ + public static void sortFeatures(List features, + final boolean forwardStrand) + { + Collections.sort(features, forwardStrand ? FORWARD_STRAND + : REVERSE_STRAND); + } +} \ No newline at end of file diff --git a/src/jalview/datamodel/features/SequenceFeaturesI.java b/src/jalview/datamodel/features/SequenceFeaturesI.java index cfcdc76..ed966e7 100644 --- a/src/jalview/datamodel/features/SequenceFeaturesI.java +++ b/src/jalview/datamodel/features/SequenceFeaturesI.java @@ -42,6 +42,16 @@ public interface SequenceFeaturesI List getAllFeatures(String... type); /** + * Answers a list of all features stored, whose type either matches one of the + * given ontology terms, or is a specialisation of a term in the Sequence + * Ontology. Results are returned in no particular guaranteed order. + * + * @param ontologyTerm + * @return + */ + List getFeaturesByOntology(String... ontologyTerm); + + /** * Answers the number of (positional or non-positional) features, optionally * restricted to specified feature types. Contact features are counted as 1. * @@ -134,11 +144,13 @@ public interface SequenceFeaturesI boolean positionalFeatures, String... groups); /** - * Answers a set of the distinct feature types for which a feature is stored + * Answers a set of the distinct feature types for which a feature is stored. + * The types may optionally be restricted to those which match, or are a + * subtype of, given sequence ontology terms * * @return */ - Set getFeatureTypes(); + Set getFeatureTypes(String... soTerm); /** * Answers the minimum score held for positional or non-positional features diff --git a/src/jalview/ext/ensembl/EnsemblGene.java b/src/jalview/ext/ensembl/EnsemblGene.java index 24e3e95..223e54a 100644 --- a/src/jalview/ext/ensembl/EnsemblGene.java +++ b/src/jalview/ext/ensembl/EnsemblGene.java @@ -26,6 +26,7 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; import jalview.io.gff.SequenceOntologyFactory; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.FeatureColour; @@ -267,22 +268,19 @@ public class EnsemblGene extends EnsemblSeqProxy */ protected void clearGeneFeatures(SequenceI gene) { - SequenceFeature[] sfs = gene.getSequenceFeatures(); - if (sfs != null) + /* + * Note we include NMD_transcript_variant here because it behaves like + * 'transcript' in Ensembl, although strictly speaking it is not + * (it is a sub-type of sequence_variant) + */ + String[] soTerms = new String[] { + SequenceOntologyI.NMD_TRANSCRIPT_VARIANT, SequenceOntologyI.EXON, + SequenceOntologyI.CDS }; + List sfs = gene.getFeatures().getFeaturesByOntology( + soTerms); + for (SequenceFeature sf : sfs) { - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - List filtered = new ArrayList(); - for (SequenceFeature sf : sfs) - { - String type = sf.getType(); - if (!isTranscript(type) && !so.isA(type, SequenceOntologyI.EXON) - && !so.isA(type, SequenceOntologyI.CDS)) - { - filtered.add(sf); - } - } - gene.setSequenceFeatures(filtered - .toArray(new SequenceFeature[filtered.size()])); + gene.deleteFeature(sf); } } @@ -332,6 +330,7 @@ public class EnsemblGene extends EnsemblSeqProxy { splices = findFeatures(gene, SequenceOntologyI.CDS, parentId); } + SequenceFeatures.sortFeatures(splices, true); int transcriptLength = 0; final char[] geneChars = gene.getSequence(); @@ -381,7 +380,7 @@ public class EnsemblGene extends EnsemblSeqProxy mapTo.add(new int[] { 1, transcriptLength }); MapList mapping = new MapList(mappedFrom, mapTo, 1, 1); EnsemblCdna cdna = new EnsemblCdna(getDomain()); - cdna.transferFeatures(gene.getSequenceFeatures(), + cdna.transferFeatures(gene.getFeatures().getPositionalFeatures(), transcript.getDatasetSequence(), mapping, parentId); /* @@ -422,19 +421,18 @@ public class EnsemblGene extends EnsemblSeqProxy List transcriptFeatures = new ArrayList(); String parentIdentifier = GENE_PREFIX + accId; - SequenceFeature[] sfs = geneSequence.getSequenceFeatures(); + // todo optimise here by transcript type! + List sfs = geneSequence.getFeatures() + .getPositionalFeatures(); - if (sfs != null) + for (SequenceFeature sf : sfs) { - for (SequenceFeature sf : sfs) + if (isTranscript(sf.getType())) { - if (isTranscript(sf.getType())) + String parent = (String) sf.getValue(PARENT); + if (parentIdentifier.equals(parent)) { - String parent = (String) sf.getValue(PARENT); - if (parentIdentifier.equals(parent)) - { - transcriptFeatures.add(sf); - } + transcriptFeatures.add(sf); } } } diff --git a/test/jalview/datamodel/features/SequenceFeaturesTest.java b/test/jalview/datamodel/features/SequenceFeaturesTest.java index 0d1d89d..5ff2d7b 100644 --- a/test/jalview/datamodel/features/SequenceFeaturesTest.java +++ b/test/jalview/datamodel/features/SequenceFeaturesTest.java @@ -2,10 +2,12 @@ package jalview.datamodel.features; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; import jalview.datamodel.SequenceFeature; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; @@ -909,4 +911,105 @@ public class SequenceFeaturesTest assertEquals(iterator.next(), "Helix"); assertFalse(iterator.hasNext()); } + + @Test(groups = "Functional") + public void testGetFeatureTypes_byOntology() + { + SequenceFeaturesI store = new SequenceFeatures(); + + SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20, + Float.NaN, null); + store.add(sf1); + // mRNA isA mature_transcript isA transcript + SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 10, 20, + Float.NaN, null); + store.add(sf2); + // just to prove non-positional feature types are included + SequenceFeature sf3 = new SequenceFeature("mRNA", "desc", 0, 0, + Float.NaN, null); + store.add(sf3); + SequenceFeature sf4 = new SequenceFeature("CDS", "desc", 0, 0, + Float.NaN, null); + store.add(sf4); + + Set types = store.getFeatureTypes("transcript"); + assertEquals(types.size(), 2); + assertTrue(types.contains("transcript")); + assertTrue(types.contains("mRNA")); + + // matches include arguments whether SO terms or not + types = store.getFeatureTypes("transcript", "CDS"); + assertEquals(types.size(), 3); + assertTrue(types.contains("transcript")); + assertTrue(types.contains("mRNA")); + assertTrue(types.contains("CDS")); + + types = store.getFeatureTypes("exon"); + assertTrue(types.isEmpty()); + } + + @Test(groups = "Functional") + public void testGetFeaturesByOntology() + { + SequenceFeaturesI store = new SequenceFeatures(); + List features = store.getFeaturesByOntology(); + assertTrue(features.isEmpty()); + assertTrue(store.getFeaturesByOntology(new String[] {}).isEmpty()); + assertTrue(store.getFeaturesByOntology((String[]) null).isEmpty()); + + SequenceFeature sf1 = new SequenceFeature("transcript", "desc", 10, 20, + Float.NaN, null); + store.add(sf1); + + // mRNA isA transcript; added here 'as if' non-positional + // just to show that non-positional features are included in results + SequenceFeature sf2 = new SequenceFeature("mRNA", "desc", 0, 0, + Float.NaN, null); + store.add(sf2); + + SequenceFeature sf3 = new SequenceFeature("Pfam", "desc", 30, 40, + Float.NaN, null); + store.add(sf3); + + features = store.getFeaturesByOntology("transcript"); + assertEquals(features.size(), 2); + assertTrue(features.contains(sf1)); + assertTrue(features.contains(sf2)); + + features = store.getFeaturesByOntology("mRNA"); + assertEquals(features.size(), 1); + assertTrue(features.contains(sf2)); + + features = store.getFeaturesByOntology("mRNA", "Pfam"); + assertEquals(features.size(), 2); + assertTrue(features.contains(sf2)); + assertTrue(features.contains(sf3)); + } + + @Test(groups = "Functional") + public void testSortFeatures() + { + List sfs = new ArrayList(); + SequenceFeature sf1 = new SequenceFeature("Pfam", "desc", 30, 80, + Float.NaN, null); + sfs.add(sf1); + SequenceFeature sf2 = new SequenceFeature("Rfam", "desc", 40, 50, + Float.NaN, null); + sfs.add(sf2); + SequenceFeature sf3 = new SequenceFeature("Rfam", "desc", 50, 60, + Float.NaN, null); + sfs.add(sf3); + + // sort by end position descending + SequenceFeatures.sortFeatures(sfs, false); + assertSame(sfs.get(0), sf1); + assertSame(sfs.get(1), sf3); + assertSame(sfs.get(2), sf2); + + // sort by start position ascending + SequenceFeatures.sortFeatures(sfs, true); + assertSame(sfs.get(0), sf1); + assertSame(sfs.get(1), sf2); + assertSame(sfs.get(2), sf3); + } } diff --git a/test/jalview/ext/ensembl/EnsemblGeneTest.java b/test/jalview/ext/ensembl/EnsemblGeneTest.java index 6cfd85b..edecc23 100644 --- a/test/jalview/ext/ensembl/EnsemblGeneTest.java +++ b/test/jalview/ext/ensembl/EnsemblGeneTest.java @@ -22,7 +22,6 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; -import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import jalview.api.FeatureSettingsModelI; @@ -76,7 +75,9 @@ public class EnsemblGeneTest genomic.setEnd(50000); String geneId = "ABC123"; - // gene at (start+10000) length 501 + // gene at (start+20000) length 501 + // should be ignored - the first 'gene' found defines the whole range + // (note features are found in position order, not addition order) SequenceFeature sf = new SequenceFeature("gene", "", 20000, 20500, 0f, null); sf.setValue("ID", "gene:" + geneId); @@ -84,7 +85,6 @@ public class EnsemblGeneTest genomic.addSequenceFeature(sf); // gene at (start + 10500) length 101 - // should be ignored - the first 'gene' found defines the whole range sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); @@ -94,13 +94,13 @@ public class EnsemblGeneTest 23); List fromRanges = ranges.getFromRanges(); assertEquals(1, fromRanges.size()); - assertEquals(20000, fromRanges.get(0)[0]); - assertEquals(20500, fromRanges.get(0)[1]); + assertEquals(10500, fromRanges.get(0)[0]); + assertEquals(10600, fromRanges.get(0)[1]); // to range should start from given start numbering List toRanges = ranges.getToRanges(); assertEquals(1, toRanges.size()); assertEquals(23, toRanges.get(0)[0]); - assertEquals(523, toRanges.get(0)[1]); + assertEquals(123, toRanges.get(0)[1]); } /** @@ -115,7 +115,9 @@ public class EnsemblGeneTest genomic.setEnd(50000); String geneId = "ABC123"; - // gene at (start+10000) length 501 + // gene at (start+20000) length 501 + // should be ignored - the first 'gene' found defines the whole range + // (real data would only have one such feature) SequenceFeature sf = new SequenceFeature("ncRNA_gene", "", 20000, 20500, 0f, null); sf.setValue("ID", "gene:" + geneId); @@ -123,8 +125,6 @@ public class EnsemblGeneTest genomic.addSequenceFeature(sf); // gene at (start + 10500) length 101 - // should be ignored - the first 'gene' found defines the whole range - // (real data would only have one such feature) sf = new SequenceFeature("gene", "", 10500, 10600, 0f, null); sf.setValue("ID", "gene:" + geneId); sf.setStrand("+"); @@ -135,13 +135,13 @@ public class EnsemblGeneTest List fromRanges = ranges.getFromRanges(); assertEquals(1, fromRanges.size()); // from range on reverse strand: - assertEquals(20500, fromRanges.get(0)[0]); - assertEquals(20000, fromRanges.get(0)[1]); + assertEquals(10500, fromRanges.get(0)[0]); + assertEquals(10600, fromRanges.get(0)[1]); // to range should start from given start numbering List toRanges = ranges.getToRanges(); assertEquals(1, toRanges.size()); assertEquals(23, toRanges.get(0)[0]); - assertEquals(523, toRanges.get(0)[1]); + assertEquals(123, toRanges.get(0)[1]); } /** @@ -164,7 +164,7 @@ public class EnsemblGeneTest genomic.addSequenceFeature(sf1); // transcript sub-type feature - SequenceFeature sf2 = new SequenceFeature("snRNA", "", 20000, 20500, + SequenceFeature sf2 = new SequenceFeature("snRNA", "", 21000, 21500, 0f, null); sf2.setValue("Parent", "gene:" + geneId); sf2.setValue("transcript_id", "transcript2"); @@ -172,13 +172,13 @@ public class EnsemblGeneTest // NMD_transcript_variant treated like transcript in Ensembl SequenceFeature sf3 = new SequenceFeature("NMD_transcript_variant", "", - 20000, 20500, 0f, null); + 22000, 22500, 0f, null); sf3.setValue("Parent", "gene:" + geneId); sf3.setValue("transcript_id", "transcript3"); genomic.addSequenceFeature(sf3); // transcript for a different gene - ignored - SequenceFeature sf4 = new SequenceFeature("snRNA", "", 20000, 20500, + SequenceFeature sf4 = new SequenceFeature("snRNA", "", 23000, 23500, 0f, null); sf4.setValue("Parent", "gene:XYZ"); sf4.setValue("transcript_id", "transcript4"); @@ -192,9 +192,9 @@ public class EnsemblGeneTest List features = testee.getTranscriptFeatures(geneId, genomic); assertEquals(3, features.size()); - assertSame(sf1, features.get(0)); - assertSame(sf2, features.get(1)); - assertSame(sf3, features.get(2)); + assertTrue(features.contains(sf1)); + assertTrue(features.contains(sf2)); + assertTrue(features.contains(sf3)); } /** diff --git a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java index e977233..c8fa3c2 100644 --- a/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java +++ b/test/jalview/ext/ensembl/EnsemblSeqProxyTest.java @@ -22,12 +22,13 @@ package jalview.ext.ensembl; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; +import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; -import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; import jalview.datamodel.Alignment; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.SequenceFeatures; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FastaFile; @@ -37,6 +38,7 @@ import jalview.io.gff.SequenceOntologyLite; import java.lang.reflect.Method; import java.util.Arrays; +import java.util.List; import org.testng.Assert; import org.testng.annotations.AfterClass; @@ -166,6 +168,8 @@ public class EnsemblSeqProxyTest Alignment ral = new Alignment(sqs); for (SequenceI tr : trueSqs) { + // 12/05/2017 failing for EnsemblCdna which is returning protein + // Ensembl helpdesk ticket 187998 SequenceI[] rseq; Assert.assertNotNull( rseq = ral.findSequenceMatch(tr.getName()), @@ -269,15 +273,22 @@ public class EnsemblSeqProxyTest SequenceFeature sf2 = new SequenceFeature("", "", 8, 12, 0f, null); SequenceFeature sf3 = new SequenceFeature("", "", 8, 13, 0f, null); SequenceFeature sf4 = new SequenceFeature("", "", 11, 11, 0f, null); - SequenceFeature[] sfs = new SequenceFeature[] { sf1, sf2, sf3, sf4 }; + List sfs = Arrays.asList(new SequenceFeature[] { sf1, + sf2, sf3, sf4 }); // sort by start position ascending (forward strand) // sf2 and sf3 tie and should not be reordered by sorting - EnsemblSeqProxy.sortFeatures(sfs, true); - assertArrayEquals(new SequenceFeature[] { sf2, sf3, sf1, sf4 }, sfs); + SequenceFeatures.sortFeatures(sfs, true); + assertSame(sfs.get(0), sf2); + assertSame(sfs.get(1), sf3); + assertSame(sfs.get(2), sf1); + assertSame(sfs.get(3), sf4); // sort by end position descending (reverse strand) - EnsemblSeqProxy.sortFeatures(sfs, false); - assertArrayEquals(new SequenceFeature[] { sf1, sf3, sf2, sf4 }, sfs); + SequenceFeatures.sortFeatures(sfs, false); + assertSame(sfs.get(0), sf1); + assertSame(sfs.get(1), sf3); + assertSame(sfs.get(2), sf2); + assertSame(sfs.get(3), sf4); } } -- 1.7.10.2