X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fgff%2FSequenceOntologyLite.java;h=72e906c96cf8abf05a407ed0e299a3211d025602;hb=b5889c572976c81f068d9743363695ca84e7d413;hp=6719ae6e749235eeaa4fe6d0a91d077108bb84e3;hpb=b96accb6a3904ea9b424f1dbd8b2b3b4164c521b;p=jalview.git diff --git a/src/jalview/io/gff/SequenceOntologyLite.java b/src/jalview/io/gff/SequenceOntologyLite.java index 6719ae6..72e906c 100644 --- a/src/jalview/io/gff/SequenceOntologyLite.java +++ b/src/jalview/io/gff/SequenceOntologyLite.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io.gff; import java.util.ArrayList; @@ -22,6 +42,9 @@ public class SequenceOntologyLite implements SequenceOntologyI { /* * initial selection of types of interest when processing Ensembl features + * NB unlike the full SequenceOntology we don't traverse indirect + * child-parent relationships here so e.g. need to list every sub-type + * (direct or indirect) that is of interest */ // @formatter:off private final String[][] TERMS = new String[][] { @@ -32,39 +55,62 @@ public class SequenceOntologyLite implements SequenceOntologyI { "gene", "gene" }, { "ncRNA_gene", "gene" }, { "snRNA_gene", "gene" }, + { "miRNA_gene", "gene" }, + { "lincRNA_gene", "gene" }, + { "rRNA_gene", "gene" }, /* * transcript sub-types: */ { "transcript", "transcript" }, { "mature_transcript", "transcript" }, + { "processed_transcript", "transcript" }, + { "aberrant_processed_transcript", "transcript" }, { "ncRNA", "transcript" }, { "snRNA", "transcript" }, - { "aberrant_processed_transcript", "transcript" }, + { "miRNA", "transcript" }, + { "lincRNA", "transcript" }, + { "rRNA", "transcript" }, + { "mRNA", "transcript" }, + // there are many more sub-types of ncRNA... /* - * sequence_variant sub-types: + * sequence_variant sub-types */ { "sequence_variant", "sequence_variant" }, + { "structural_variant", "sequence_variant" }, { "feature_variant", "sequence_variant" }, { "gene_variant", "sequence_variant" }, + { "transcript_variant", "sequence_variant" }, // NB Ensembl uses NMD_transcript_variant as if a 'transcript' // but we model it here correctly as per the SO { "NMD_transcript_variant", "sequence_variant" }, - { "transcript_variant", "sequence_variant" }, - { "structural_variant", "sequence_variant" }, + { "missense_variant", "sequence_variant" }, + { "synonymous_variant", "sequence_variant" }, + { "frameshift_variant", "sequence_variant" }, + { "5_prime_UTR_variant", "sequence_variant" }, + { "3_prime_UTR_variant", "sequence_variant" }, + { "stop_gained", "sequence_variant" }, + { "stop_lost", "sequence_variant" }, + { "inframe_deletion", "sequence_variant" }, + { "inframe_insertion", "sequence_variant" }, + { "splice_region_variant", "sequence_variant" }, /* - * no sub-types of exon or CDS yet encountered; add if needed + * no sub-types of exon or CDS yet seen in Ensembl + * some added here for testing purposes */ { "exon", "exon" }, + { "coding_exon", "exon" }, { "CDS", "CDS" }, + { "CDS_predicted", "CDS" }, /* - * used in exonerate GFF + * terms used in exonerate or PASA GFF */ { "protein_match", "protein_match"}, { "nucleotide_match", "nucleotide_match"}, + { "cDNA_match", "nucleotide_match"}, /* * used in InterProScan GFF @@ -85,8 +131,8 @@ public class SequenceOntologyLite implements SequenceOntologyI public SequenceOntologyLite() { - termsFound = new ArrayList(); - termsNotFound = new ArrayList(); + termsFound = new ArrayList<>(); + termsNotFound = new ArrayList<>(); loadStaticData(); } @@ -95,12 +141,13 @@ public class SequenceOntologyLite implements SequenceOntologyI */ private void loadStaticData() { - parents = new HashMap>(); - for (String [] pair : TERMS) { + parents = new HashMap<>(); + for (String[] pair : TERMS) + { List p = parents.get(pair[0]); if (p == null) { - p = new ArrayList(); + p = new ArrayList<>(); parents.put(pair[0], p); } p.add(pair[1]); @@ -165,9 +212,11 @@ public class SequenceOntologyLite implements SequenceOntologyI { if (!termsNotFound.contains(term)) { - System.out.println("SO term " + term - + " not known - either invalid or needs modelled in " - + getClass().getName()); + // suppress logging here as it reports Uniprot sequence features + // (which do not use SO terms) when auto-configuring feature colours + // System.out.println("SO term " + term + // + " not known - add to model if needed in " + // + getClass().getName()); termsNotFound.add(term); } }