X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fgff%2FSequenceOntologyLite.java;h=b3f81611d761880e2941186d622688d0a0625118;hb=b5f2dfda36c463b93f34db95fe5bc5ff2a1516bf;hp=173dea6324e5003e3c89e79f7e0d66b735ab831d;hpb=3cccac4697c371b1964236e17b532fd3d180e1c4;p=jalview.git diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java index 173dea6..b3f8161 100644 --- a/src/jalview/io/gff/SequenceOntologyLite.java +++ b/src/jalview/io/gff/SequenceOntologyLite.java @@ -22,6 +22,9 @@ public class SequenceOntologyLite implements SequenceOntologyI { /* * initial selection of types of interest when processing Ensembl features + * NB unlike the full SequenceOntology we don't traverse indirect + * child-parent relationships here so e.g. need to list every sub-type + * of gene (direct or indirect) that is of interest */ // @formatter:off private final String[][] TERMS = new String[][] { @@ -32,15 +35,23 @@ public class SequenceOntologyLite implements SequenceOntologyI { "gene", "gene" }, { "ncRNA_gene", "gene" }, { "snRNA_gene", "gene" }, + { "miRNA_gene", "gene" }, + { "lincRNA_gene", "gene" }, + { "rRNA_gene", "gene" }, /* * transcript sub-types: */ { "transcript", "transcript" }, { "mature_transcript", "transcript" }, + { "processed_transcript", "transcript" }, + { "aberrant_processed_transcript", "transcript" }, { "ncRNA", "transcript" }, { "snRNA", "transcript" }, - { "aberrant_processed_transcript", "transcript" }, + { "miRNA", "transcript" }, + { "lincRNA", "transcript" }, + { "rRNA", "transcript" }, + // there are many more sub-types of ncRNA... /* * sequence_variant sub-types: @@ -55,10 +66,25 @@ public class SequenceOntologyLite implements SequenceOntologyI { "structural_variant", "sequence_variant" }, /* - * no sub-types of exon or CDS yet encountered; add if needed + * no sub-types of exon or CDS yet seen in Ensembl + * some added here for testing purposes */ { "exon", "exon" }, - { "CDS", "CDS" } + { "coding_exon", "exon" }, + { "CDS", "CDS" }, + { "CDS_predicted", "CDS" }, + + /* + * terms used in exonerate or PASA GFF + */ + { "protein_match", "protein_match"}, + { "nucleotide_match", "nucleotide_match"}, + { "cDNA_match", "nucleotide_match"}, + + /* + * used in InterProScan GFF + */ + { "polypeptide", "polypeptide" } }; // @formatter:on @@ -155,7 +181,7 @@ public class SequenceOntologyLite implements SequenceOntologyI if (!termsNotFound.contains(term)) { System.out.println("SO term " + term - + " not known - either invalid or needs modelled in " + + " not known - may be invalid, or model if needed in " + getClass().getName()); termsNotFound.add(term); }