// Copyright (C) 2010 Christian M Zmasek
// Copyright (C) 2010 Sanford-Burnham Medical Research Institute
// All rights reserved
-//
+//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
-//
+//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
public final class UniProtTaxonomy {
- private final String[] _lineage;
+ private static final String ARCHAEA = "Archaea";
+ private static final String BACTERIA = "Bacteria";
+ private static final String EUKARYOTA = "Eukaryota";
+ private final List<String> _lineage;
private final String _code;
private final String _scientific_name;
private final String _common_name;
private final String _synonym;
private final String _rank;
private final String _id;
- public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota",
- "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera", "Endopterygota", "Diptera",
- "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae" },
+ public final static String CELLULAR_ORGANISMS = "cellular organisms";
+ public final static UniProtTaxonomy DROSOPHILA_GENUS = new UniProtTaxonomy( new String[] {
+ CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Arthropoda", "Hexapoda", "Insecta", "Pterygota", "Neoptera",
+ "Endopterygota", "Diptera", "Brachycera", "Muscomorpha", "Ephydroidea", "Drosophilidae", "Drosophila" },
"",
"fruit flies",
"Drosophila",
"",
"genus",
"7215" );
- public final static UniProtTaxonomy XENOPUS_GENUS = new UniProtTaxonomy( new String[] { "Eukaryota",
- "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia", "Batrachia", "Anura",
- "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae" }, "", "", "Xenopus", "", "genus", "8353" );
- public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] { "Eukaryota",
- "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida", "Capitellidae", "Capitella" },
+ public final static UniProtTaxonomy XENOPUS_GENUS = new UniProtTaxonomy( new String[] {
+ CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Chordata", "Craniata", "Vertebrata", "Euteleostomi", "Amphibia",
+ "Batrachia", "Anura", "Mesobatrachia", "Pipoidea", "Pipidae", "Xenopodinae", "Xenopus" },
+ "",
+ "",
+ "Xenopus",
+ "",
+ "genus",
+ "8353" );
+ public final static UniProtTaxonomy CAPITELLA_TELATA_SPECIES = new UniProtTaxonomy( new String[] {
+ CELLULAR_ORGANISMS, EUKARYOTA, "Metazoa", "Annelida", "Polychaeta", "Scolecida", "Capitellida",
+ "Capitellidae", "Capitella", "Capitella teleta" },
"",
"",
"Capitella teleta",
"species",
"283909" );
+ // public final static UniProtTaxonomy NUCLEARIIDAE_AND_FONTICULA = new UniProtTaxonomy( new String[] {
+ // CELLULAR_ORGANISMS, EUKARYOTA, "Nucleariidae and Fonticula group" }, "", "", "", "", "", "1001604" );
public UniProtTaxonomy( final String line ) {
final String[] items = line.split( "\t" );
if ( items.length < 5 ) {
_rank = "";
}
String[] lin = null;
- if ( items.length > 7 ) {
+ if ( items.length > 8 ) {
lin = items[ 8 ].split( "; " );
}
+ _lineage = new ArrayList<String>();
if ( ( lin != null ) && ( lin.length > 0 ) ) {
final List<String> temp = new ArrayList<String>();
for( final String t : lin ) {
temp.add( t.trim() );
}
}
- _lineage = new String[ temp.size() ];
for( int i = 0; i < temp.size(); ++i ) {
- _lineage[ i ] = temp.get( i );
+ if ( ( i == 0 )
+ && ( temp.get( i ).equalsIgnoreCase( EUKARYOTA ) || temp.get( i ).equalsIgnoreCase( BACTERIA ) || temp
+ .get( i ).equalsIgnoreCase( ARCHAEA ) ) ) {
+ _lineage.add( CELLULAR_ORGANISMS );
+ }
+ _lineage.add( temp.get( i ) );
}
}
- else {
- _lineage = new String[ 0 ];
+ if ( _lineage.isEmpty()
+ && ( _scientific_name.equalsIgnoreCase( EUKARYOTA ) || _scientific_name.equalsIgnoreCase( BACTERIA ) || _scientific_name
+ .equalsIgnoreCase( ARCHAEA ) ) ) {
+ _lineage.add( CELLULAR_ORGANISMS );
+ }
+ _lineage.add( _scientific_name );
+ if ( _lineage.isEmpty() ) {
+ throw new IllegalArgumentException( "lineage in a UniProt Taxonomy can not be empty\n: " + line );
}
}
- public UniProtTaxonomy( final String[] lineage,
+ public UniProtTaxonomy( final List<String> lineage,
final String code,
final String common_name,
final String scientific_name,
_synonym = synonym;
_rank = rank;
_id = id;
+ if ( ( _lineage != null ) && !_lineage.get( _lineage.size() - 1 ).equalsIgnoreCase( _scientific_name ) ) {
+ _lineage.add( _scientific_name );
+ }
+ }
+
+ public UniProtTaxonomy( final String[] lineage,
+ final String code,
+ final String common_name,
+ final String scientific_name,
+ final String synonym,
+ final String rank,
+ final String id ) {
+ _lineage = new ArrayList<String>();
+ if ( lineage != null ) {
+ for( final String l : lineage ) {
+ _lineage.add( l );
+ }
+ }
+ _code = code;
+ _scientific_name = scientific_name;
+ _common_name = common_name;
+ _synonym = synonym;
+ _rank = rank;
+ _id = id;
+ if ( ( _lineage != null ) && !_lineage.get( _lineage.size() - 1 ).equalsIgnoreCase( _scientific_name ) ) {
+ _lineage.add( _scientific_name );
+ }
}
/**
return _id;
}
- public String[] getLineage() {
+ public List<String> getLineage() {
return _lineage;
}