import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
copyTo = copyTo.getDatasetSequence();
}
- SequenceOntology so = SequenceOntology.getInstance();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
int count = 0;
SequenceFeature[] sfs = fromSeq.getSequenceFeatures();
if (sfs != null)
package jalview.ext.ensembl;
import jalview.datamodel.SequenceFeature;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
import java.util.List;
@Override
protected boolean identifiesSequence(SequenceFeature sf, String accId)
{
- if (SequenceOntology.getInstance().isA(sf.getType(),
- SequenceOntology.EXON))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.EXON))
{
String parentFeature = (String) sf.getValue(PARENT);
if (("transcript:" + accId).equals(parentFeature))
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
import java.util.List;
@Override
protected boolean retainFeature(SequenceFeature sf, String accessionId)
{
- if (SequenceOntology.getInstance().isA(sf.getType(),
- SequenceOntology.CDS))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.CDS))
{
return false;
}
@Override
protected boolean identifiesSequence(SequenceFeature sf, String accId)
{
- if (SequenceOntology.getInstance().isA(sf.getType(),
- SequenceOntology.CDS))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.CDS))
{
String parentFeature = (String) sf.getValue(PARENT);
if (("transcript:" + accId).equals(parentFeature))
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
import jalview.util.MapList;
import java.util.ArrayList;
*/
String parentId = "transcript:" + accId;
List<SequenceFeature> splices = findFeatures(gene,
- SequenceOntology.EXON, parentId);
+ SequenceOntologyI.EXON, parentId);
if (splices.isEmpty())
{
- splices = findFeatures(gene, SequenceOntology.CDS, parentId);
+ splices = findFeatures(gene, SequenceOntologyI.CDS, parentId);
}
int transcriptLength = 0;
/*
* and finally fetch the protein product and save as a cross-reference
*/
- addProteinProduct(transcript);
+ new EnsemblCdna().addProteinProduct(transcript);
return transcript;
}
@Override
protected boolean identifiesSequence(SequenceFeature sf, String accId)
{
- if (SequenceOntology.getInstance().isA(sf.getType(),
- SequenceOntology.GENE))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.GENE))
{
String id = (String) sf.getValue(ID);
if (("gene:" + accId).equals(id))
@Override
protected boolean retainFeature(SequenceFeature sf, String accessionId)
{
- if (SequenceOntology.getInstance().isA(sf.getType(),
- SequenceOntology.GENE))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.GENE))
{
return false;
}
return super.getCrossReferenceDatabases();
}
+ /**
+ * Override to do nothing as Ensembl doesn't return a protein sequence for a
+ * gene identifier
+ */
+ @Override
+ protected void addProteinProduct(SequenceI querySeq)
+ {
+ }
+
}
@Override
public AlignmentI getSequenceRecords(String query) throws IOException
{
- long now = System.currentTimeMillis();
// TODO: use a vararg String... for getSequenceRecords instead?
List<String> queries = new ArrayList<String>();
queries.add(query);
FileParse fp = getSequenceReader(queries);
FeaturesFile fr = new FeaturesFile(fp);
- System.out.println(getClass().getName() + " took "
- + (System.currentTimeMillis() - now) + "ms to fetch");
return new Alignment(fr.getSeqsAsArray());
}
protected BufferedReader getHttpResponse(URL url, List<String> ids)
throws IOException
{
+ long now = System.currentTimeMillis();
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
/*
"Response code was not 200. Detected response was "
+ responseCode);
}
+ System.out.println(getClass().getName() + " took "
+ + (System.currentTimeMillis() - now) + "ms to fetch");
BufferedReader reader = null;
reader = new BufferedReader(new InputStreamReader(response, "UTF-8"));
import jalview.exceptions.JalviewException;
import jalview.io.FastaFile;
import jalview.io.FileParse;
-import jalview.io.gff.SequenceOntology;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyI;
import jalview.schemes.ResidueProperties;
import jalview.util.DBRefUtils;
import jalview.util.MapList;
@Override
public AlignmentI getSequenceRecords(String query) throws Exception
{
- long now = System.currentTimeMillis();
// TODO use a String... query vararg instead?
// danger: accession separator used as a regex here, a string elsewhere
+ " chunks. Unexpected problem (" + r.getLocalizedMessage()
+ ")";
System.err.println(msg);
- if (alignment != null)
- {
- break; // return what we got
- }
- else
- {
- throw new JalviewException(msg, r);
- }
+ break;
+ // if (alignment != null)
+ // {
+ // break; // return what we got
+ // }
+ // else
+ // {
+ // throw new JalviewException(msg, r);
+ // }
}
}
getCrossReferences(seq);
}
- System.out.println(getClass().getName() + " took "
- + (System.currentTimeMillis() - now) + "ms to fetch");
return alignment;
}
}
/**
- * Adds CDS ranges to the ranges list, and returns the total length mapped.
+ * Adds CDS ranges to the ranges list, and returns the total length mapped
+ * from.
*
- * No need to worry about reverse strand dna here since the retrieved sequence
- * is as transcribed (reverse complement for reverse strand), i.e in the same
- * sense as the peptide.
+ * No need to worry about reverse strand dna, here since the retrieved
+ * sequence is as transcribed (reverse complement for reverse strand), i.e in
+ * the same sense as the peptide.
*
* @param dnaSeq
* @param ranges
/*
* process a CDS feature (or a sub-type of CDS)
*/
- if (SequenceOntology.getInstance().isA(sf.getType(), SequenceOntology.CDS))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.CDS))
{
int phase = 0;
try {
* the start position of the sequence we are mapping to
* @return
*/
- protected MapList getGenomicRanges(SequenceI sourceSequence,
+ protected MapList getGenomicRangesFromFeatures(SequenceI sourceSequence,
String accId, int start)
{
SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
*/
if (identifiesSequence(sf, accId))
{
- int strand = sf.getStrand();
-
- if (directionSet && strand != direction)
- {
- // abort - mix of forward and backward
+ int strand = sf.getStrand();
+ strand = strand == 0 ? 1 : strand; // treat unknown as forward
+
+ if (directionSet && strand != direction)
+ {
+ // abort - mix of forward and backward
System.err.println("Error: forward and backward strand for "
+ accId);
return null;
*/
Collections.sort(regions, new RangeSorter(direction == 1));
- List<int[]> to = new ArrayList<int[]>();
- to.add(new int[] { start, start + mappedLength - 1 });
+ List<int[]> to = Arrays.asList(new int[] { start,
+ start + mappedLength - 1 });
return new MapList(regions, to, 1, 1);
}
/*
* for sequence_variant, make an additional feature with consequence
*/
- if (SequenceOntology.getInstance().isSequenceVariant(sf.getType()))
+ if (SequenceOntologyFactory.getInstance().isA(sf.getType(),
+ SequenceOntologyI.SEQUENCE_VARIANT))
{
String consequence = (String) sf.getValue(CONSEQUENCE_TYPE);
if (consequence != null)
}
SequenceFeature[] sfs = sourceSequence.getSequenceFeatures();
- MapList mapping = getGenomicRanges(sourceSequence, accessionId,
+ MapList mapping = getGenomicRangesFromFeatures(sourceSequence, accessionId,
targetSequence.getStart());
if (mapping == null)
{
SequenceFeature[] sfs = sequence.getSequenceFeatures();
if (sfs != null) {
- SequenceOntology so = SequenceOntology.getInstance();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
for (SequenceFeature sf :sfs) {
if (so.isA(sf.getType(), type))
{
}
AlignmentUtils.transferFeatures(dnaSeq, peptide, dnaToProtein,
- SequenceOntology.EXON);
+ SequenceOntologyI.EXON);
LinkedHashMap<Integer, String[][]> variants = buildDnaVariantsMap(
dnaSeq, dnaToProtein);
String desc = StringUtils.listToDelimitedString(peptideVariants,
", ");
SequenceFeature sf = new SequenceFeature(
- SequenceOntology.SEQUENCE_VARIANT, desc, peptidePos,
+ SequenceOntologyI.SEQUENCE_VARIANT, desc, peptidePos,
peptidePos, 0f, null);
peptide.addSequenceFeature(sf);
count++;
* LinkedHashMap ensures we add the peptide features in sequence order
*/
LinkedHashMap<Integer, String[][]> variants = new LinkedHashMap<Integer, String[][]>();
- SequenceOntology so = SequenceOntology.getInstance();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures();
if (dnaFeatures == null)
// not handling multi-locus variant features
continue;
}
- if (so.isSequenceVariant(sf.getType()))
+ if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT))
{
int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol);
if (mapsTo == null)
public static boolean isTranscript(String featureType)
{
return NMD_VARIANT.equals(featureType)
- || SequenceOntology.getInstance().isA(featureType, SequenceOntology.TRANSCRIPT);
+ || SequenceOntologyFactory.getInstance().isA(featureType,
+ SequenceOntologyI.TRANSCRIPT);
}
}
String atts = gff[ATTRIBUTES_COL];
Map<String, List<String>> attributes = parseNameValuePairs(atts);
- if (SequenceOntology.getInstance().isProteinMatch(soTerm))
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+ if (so.isA(soTerm, SequenceOntologyI.PROTEIN_MATCH))
{
- sf = processProteinMatch(attributes, seq, gff, align,
- newseqs, relaxedIdMatching);
+ sf = processProteinMatch(attributes, seq, gff, align, newseqs,
+ relaxedIdMatching);
}
- else if (SequenceOntology.getInstance().isNucleotideMatch(soTerm))
+ else if (so.isA(soTerm, SequenceOntologyI.NUCLEOTIDE_MATCH))
{
sf = processNucleotideMatch(attributes, seq, gff, align,
newseqs, relaxedIdMatching);
desc = target.split(" ")[0];
}
- SequenceOntology so = SequenceOntology.getInstance();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
String type = sf.getType();
- if (so.isSequenceVariant(type))
+ if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT))
{
/*
* Ensembl returns dna variants as 'alleles'
*/
public static boolean recognises(String[] columns)
{
- SequenceOntology so = SequenceOntology.getInstance();
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
String type = columns[TYPE_COL];
- if (so.isProteinMatch(type)
- || (".".equals(columns[SOURCE_COL]) && so.isPolypeptide(type)))
+ if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
+ || (".".equals(columns[SOURCE_COL]) && so.isA(type,
+ SequenceOntologyI.POLYPEPTIDE)))
{
return true;
}
import java.io.InputStreamReader;
import java.text.ParseException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
* A wrapper class that parses the Sequence Ontology and exposes useful access
* methods. This version uses the BioJava parser.
*/
-public class SequenceOntology
+class SequenceOntology implements SequenceOntologyI
{
-
- /*
- * selected commonly used values for quick reference
- */
- // SO:0000316
- public static final String CDS = "CDS";
-
- // SO:0001060
- public static final String SEQUENCE_VARIANT = "sequence_variant";
-
- // SO:0000147
- public static final String EXON = "exon";
-
- // SO:0000673
- public static final String TRANSCRIPT = "transcript";
-
- // SO:0000704
- public static final String GENE = "gene";
-
- /*
- * singleton instance of this class
- */
- private static SequenceOntology instance;
-
/*
* the parsed Ontology data as modelled by BioJava
*/
*/
private Map<Term, List<Term>> termIsA;
- /**
- * Returns singleton instance
- *
- * @return
- */
- public synchronized static SequenceOntology getInstance()
- {
- if (instance == null)
- {
- instance = new SequenceOntology();
- }
- return instance;
- }
+ private List<String> termsFound;
+
+ private List<String> termsNotFound;
/**
- * Private constructor to enforce use of singleton. Parses and caches the SO
- * OBO data file.
+ * Package private constructor to enforce use of singleton. Parses and caches
+ * the SO OBO data file.
*/
- private SequenceOntology()
+ SequenceOntology()
{
+ termsFound = new ArrayList<String>();
+ termsNotFound = new ArrayList<String>();
termsByDescription = new HashMap<String, Term>();
termIsA = new HashMap<Term, List<Term>>();
*/
public boolean isNucleotideMatch(String soTerm)
{
- return isA(soTerm, "nucleotide_match");
+ return isA(soTerm, NUCLEOTIDE_MATCH);
}
/**
*/
public boolean isProteinMatch(String soTerm)
{
- return isA(soTerm, "protein_match");
+ return isA(soTerm, PROTEIN_MATCH);
}
/**
*/
public boolean isPolypeptide(String soTerm)
{
- return isA(soTerm, "polypeptide");
+ return isA(soTerm, POLYPEPTIDE);
}
/**
* @param parent
* @return
*/
+ @Override
public boolean isA(String child, String parent)
{
+ if (child == null || parent == null)
+ {
+ return false;
+ }
/*
* optimise trivial checks like isA("CDS", "CDS")
*/
if (child.equals(parent))
{
+ termFound(child);
return true;
}
Term childTerm = getTerm(child);
+ if (childTerm != null)
+ {
+ termFound(child);
+ }
+ else
+ {
+ termNotFound(child);
+ }
Term parentTerm = getTerm(parent);
return termIsA(childTerm, parentTerm);
}
/**
+ * Records a valid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termFound(String term)
+ {
+ synchronized (termsFound)
+ {
+ if (!termsFound.contains(term))
+ {
+ termsFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Records an invalid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termNotFound(String term)
+ {
+ synchronized (termsNotFound)
+ {
+ if (!termsNotFound.contains(term))
+ {
+ System.err.println("SO term " + term + " invalid");
+ termsNotFound.add(term);
+ }
+ }
+ }
+
+ /**
* Returns true if the childTerm 'isA' parentTerm (directly or indirectly).
*
* @param childTerm
public boolean isSequenceVariant(String soTerm)
{
- return isA(soTerm, "sequence_variant");
+ return isA(soTerm, SEQUENCE_VARIANT);
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of valid terms queried for
+ */
+ @Override
+ public List<String> termsFound()
+ {
+ synchronized (termsFound)
+ {
+ Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+ return termsFound;
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of invalid terms queried for
+ */
+ @Override
+ public List<String> termsNotFound()
+ {
+ synchronized (termsNotFound)
+ {
+ Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+ return termsNotFound;
+ }
}
}
--- /dev/null
+package jalview.io.gff;
+
+public class SequenceOntologyFactory
+{
+ private static SequenceOntologyI instance;
+
+ public static synchronized SequenceOntologyI getInstance()
+ {
+ if (instance == null)
+ {
+ // instance = new SequenceOntology();
+ instance = new SequenceOntologyLite();
+ }
+ return instance;
+ }
+
+ public static void setInstance(SequenceOntologyI so)
+ {
+ instance = so;
+ }
+}
--- /dev/null
+package jalview.io.gff;
+
+import java.util.List;
+
+public interface SequenceOntologyI
+{
+ /*
+ * selected commonly used values for quick reference
+ */
+ public static final String POLYPEPTIDE = "polypeptide";
+
+ public static final String PROTEIN_MATCH = "protein_match";
+
+ public static final String NUCLEOTIDE_MATCH = "nucleotide_match";
+
+ // SO:0000316
+ public static final String CDS = "CDS";
+
+ // SO:0001060
+ public static final String SEQUENCE_VARIANT = "sequence_variant";
+
+ // SO:0000147
+ public static final String EXON = "exon";
+
+ // SO:0000673
+ public static final String TRANSCRIPT = "transcript";
+
+ // SO:0000704
+ public static final String GENE = "gene";
+
+ public boolean isA(String childTerm, String parentTerm);
+
+ /**
+ * Returns a sorted list of all valid terms queried for (i.e. terms processed
+ * which were valid in the SO), using the friendly description.
+ *
+ * This can be used to check that any hard-coded stand-in for the full SO
+ * includes all the terms needed for correct processing.
+ *
+ * @return
+ */
+ public List<String> termsFound();
+
+ /**
+ * Returns a sorted list of all invalid terms queried for (i.e. terms
+ * processed which were not found in the SO), using the friendly description.
+ *
+ * This can be used to report any 'non-compliance' in data, and/or to report
+ * valid terms missing from any hard-coded stand-in for the full SO.
+ *
+ * @return
+ */
+ public List<String> termsNotFound();
+}
--- /dev/null
+package jalview.io.gff;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * An implementation of SequenceOntologyI that hard codes terms of interest.
+ *
+ * Use this in unit testing by calling SequenceOntology.setInstance(new
+ * SequenceOntologyLite()).
+ *
+ * May also become a stand-in for SequenceOntology in the applet if we want to
+ * avoid the additional jars needed for parsing the full SO.
+ *
+ * @author gmcarstairs
+ *
+ */
+public class SequenceOntologyLite implements SequenceOntologyI
+{
+ /*
+ * initial selection of types of interest when processing Ensembl features
+ */
+ // @formatter:off
+ private final String[][] TERMS = new String[][] {
+
+ /*
+ * gene sub-types:
+ */
+ { "gene", "gene" },
+ { "ncRNA_gene", "gene" },
+ { "snRNA_gene", "gene" },
+
+ /*
+ * transcript sub-types:
+ */
+ { "transcript", "transcript" },
+ { "mature_transcript", "transcript" },
+ { "ncRNA", "transcript" },
+ { "snRNA", "transcript" },
+ { "aberrant_processed_transcript", "transcript" },
+
+ /*
+ * sequence_variant sub-types:
+ */
+ { "sequence_variant", "sequence_variant" },
+ { "feature_variant", "sequence_variant" },
+ { "gene_variant", "sequence_variant" },
+ // NB Ensembl uses NMD_transcript_variant as if a 'transcript'
+ // but we model it here correctly as per the SO
+ { "NMD_transcript_variant", "sequence_variant" },
+ { "transcript_variant", "sequence_variant" },
+ { "structural_variant", "sequence_variant" },
+
+ /*
+ * no sub-types of exon or CDS yet encountered; add if needed
+ */
+ { "exon", "exon" },
+ { "CDS", "CDS" }
+ };
+ // @formatter:on
+
+ /*
+ * hard-coded list of any parents (direct or indirect)
+ * that we care about for a term
+ */
+ private Map<String, List<String>> parents;
+
+ private List<String> termsFound;
+
+ private List<String> termsNotFound;
+
+ public SequenceOntologyLite()
+ {
+ termsFound = new ArrayList<String>();
+ termsNotFound = new ArrayList<String>();
+ loadStaticData();
+ }
+
+ /**
+ * Loads hard-coded data into a lookup table of {term, {list_of_parents}}
+ */
+ private void loadStaticData()
+ {
+ parents = new HashMap<String, List<String>>();
+ for (String [] pair : TERMS) {
+ List<String> p = parents.get(pair[0]);
+ if (p == null)
+ {
+ p = new ArrayList<String>();
+ parents.put(pair[0], p);
+ }
+ p.add(pair[1]);
+ }
+ }
+
+ /**
+ * Answers true if 'child' isA 'parent' (including equality). In this
+ * implementation, based only on hard-coded values.
+ */
+ @Override
+ public boolean isA(String child, String parent)
+ {
+ if (child == null || parent == null)
+ {
+ return false;
+ }
+ if (child.equals(parent))
+ {
+ termFound(child);
+ return true;
+ }
+
+ List<String> p = parents.get(child);
+ if (p == null)
+ {
+ termNotFound(child);
+ return false;
+ }
+ termFound(child);
+ if (p.contains(parent))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Records a valid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termFound(String term)
+ {
+ if (!termsFound.contains(term))
+ {
+ synchronized (termsFound)
+ {
+ termsFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Records an invalid term queried for, for reporting purposes
+ *
+ * @param term
+ */
+ private void termNotFound(String term)
+ {
+ synchronized (termsNotFound)
+ {
+ if (!termsNotFound.contains(term))
+ {
+ System.out.println("SO term " + term
+ + " not known - either invalid or needs modelled in "
+ + getClass().getName());
+ termsNotFound.add(term);
+ }
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of valid terms queried for
+ */
+ @Override
+ public List<String> termsFound()
+ {
+ synchronized (termsFound)
+ {
+ Collections.sort(termsFound, String.CASE_INSENSITIVE_ORDER);
+ return termsFound;
+ }
+ }
+
+ /**
+ * Sorts (case-insensitive) and returns the list of invalid terms queried for
+ */
+ @Override
+ public List<String> termsNotFound()
+ {
+ synchronized (termsNotFound)
+ {
+ Collections.sort(termsNotFound, String.CASE_INSENSITIVE_ORDER);
+ return termsNotFound;
+ }
+ }
+}
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.io.AppletFormatAdapter;
import jalview.io.FastaFile;
import jalview.io.FileParse;
+import jalview.io.gff.SequenceOntologyFactory;
+import jalview.io.gff.SequenceOntologyLite;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
variants = EnsemblSeqProxy.computePeptideVariants(codonVariants, "S");
assertEquals("[C, R, T, W]", variants.toString());
}
+
+ /**
+ * Tests for the method that maps the subset of a dna sequence that has CDS
+ * (or subtype) feature.
+ */
+ @Test(groups = "Functional")
+ public void testGetCdsRanges()
+ {
+ EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter();
+
+ SequenceI dnaSeq = new Sequence("dna", "aaaGGGcccAAATTTttt");
+ dnaSeq.createDatasetSequence();
+ SequenceI ds = dnaSeq.getDatasetSequence();
+
+ // CDS for dna 3-6
+ SequenceFeature sf = new SequenceFeature("CDS", "", 4, 6, 0f, null);
+ ds.addSequenceFeature(sf);
+ // exon feature should be ignored here
+ sf = new SequenceFeature("exon", "", 7, 9, 0f, null);
+ ds.addSequenceFeature(sf);
+ // CDS for dna 10-12
+ sf = new SequenceFeature("some_cds", "", 10, 12, 0f, null);
+ ds.addSequenceFeature(sf);
+
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ List<int[]> ranges = new ArrayList<int[]>();
+ int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
+ assertEquals(6, mappedLength);
+ assertEquals(2, ranges.size());
+ assertEquals(4, ranges.get(0)[0]);
+ assertEquals(6, ranges.get(0)[1]);
+ assertEquals(10, ranges.get(1)[0]);
+ assertEquals(12, ranges.get(1)[1]);
+
+ }
+
+ @Test(groups = "Functional")
+ public void getGenomicRangesFromFeatures()
+ {
+
+ }
+
+ /**
+ * Tests for the method that maps the subset of a dna sequence that has CDS
+ * (or subtype) feature - case where the start codon is incomplete.
+ */
+ @Test(groups = "Functional")
+ public void testGetCdsRanges_fivePrimeIncomplete()
+ {
+ EnsemblSeqProxy testee = new EnsemblSeqProxyAdapter();
+
+ SequenceI dnaSeq = new Sequence("dna", "aaagGGCCCaaaTTTttt");
+ dnaSeq.createDatasetSequence();
+ SequenceI ds = dnaSeq.getDatasetSequence();
+
+ // CDS for dna 5-6 (incomplete codon), 7-9
+ SequenceFeature sf = new SequenceFeature("CDS", "", 5, 9, 0f, null);
+ sf.setPhase("2"); // skip 2 bases to start of next codon
+ ds.addSequenceFeature(sf);
+ ds.addSequenceFeature(sf);
+ // CDS for dna 13-15
+ sf = new SequenceFeature("some_cds", "", 13, 15, 0f, null);
+ ds.addSequenceFeature(sf);
+
+ SequenceOntologyFactory.setInstance(new SequenceOntologyLite());
+ List<int[]> ranges = new ArrayList<int[]>();
+ int mappedLength = testee.getCdsRanges(dnaSeq, ranges);
+
+ /*
+ * check the mapping starts with the first complete codon
+ */
+ assertEquals(6, mappedLength);
+ assertEquals(2, ranges.size());
+ assertEquals(7, ranges.get(0)[0]);
+ assertEquals(9, ranges.get(0)[1]);
+ assertEquals(13, ranges.get(1)[0]);
+ assertEquals(15, ranges.get(1)[1]);
+ }
}
\ No newline at end of file
public class SequenceOntologyTest
{
- private SequenceOntology so;
+ private SequenceOntologyI so;
@BeforeMethod
public void setUp() {
long now = System.currentTimeMillis();
- so = SequenceOntology.getInstance();
+ so = SequenceOntologyFactory.getInstance();
long elapsed = System.currentTimeMillis() - now;
System.out.println("Load and cache of Sequence Ontology took "
+ elapsed + "ms");
}
@Test(groups = "Functional")
- public void testIsProteinMatch()
- {
- assertTrue(so.isProteinMatch("protein_match"));
- assertTrue(so.isProteinMatch("protein_hmm_match"));
- assertFalse(so.isProteinMatch("Protein_match")); // case-sensitive
- }
-
- @Test(groups = "Functional")
- public void testIsNucleotideMatch()
- {
- assertTrue(so.isNucleotideMatch("nucleotide_match"));
- assertTrue(so.isNucleotideMatch("primer_match"));
- assertTrue(so.isNucleotideMatch("cross_genome_match"));
- assertTrue(so.isNucleotideMatch("expressed_sequence_match"));
- assertTrue(so.isNucleotideMatch("translated_nucleotide_match"));
- assertTrue(so.isNucleotideMatch("UST_match"));
- assertTrue(so.isNucleotideMatch("RST_match"));
- assertTrue(so.isNucleotideMatch("cDNA_match"));
- assertTrue(so.isNucleotideMatch("EST_match"));
- assertFalse(so.isNucleotideMatch("match")); // parent
- }
-
- @Test(groups = "Functional")
public void testIsCDS()
{
assertTrue(so.isA("CDS", "CDS"));