From 5fa5ccc8d3c9a6f68d3b1078c40113d8b316c6dc Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Fri, 15 Nov 2013 09:11:48 +0000 Subject: [PATCH] in progress --- .../analysis/AncestralTaxonomyInference.java | 4 +- .../org/forester/analysis/TaxonomyDataManager.java | 10 +++ forester/java/src/org/forester/test/Test.java | 1 + .../src/org/forester/ws/seqdb/UniProtTaxonomy.java | 75 +++++++++++++------- 4 files changed, 61 insertions(+), 29 deletions(-) diff --git a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java index 9f18f81..61b9465 100644 --- a/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java +++ b/forester/java/src/org/forester/analysis/AncestralTaxonomyInference.java @@ -138,8 +138,8 @@ public final class AncestralTaxonomyInference { } } if ( saw_cellular_organism && saw_viruses ) { - last_common_lineage.add( UniProtTaxonomy.CELLULAR_ORGANISMS ); - last_common = UniProtTaxonomy.CELLULAR_ORGANISMS; + //last_common_lineage.add( UniProtTaxonomy.CELLULAR_ORGANISMS ); + // last_common = UniProtTaxonomy.CELLULAR_ORGANISMS; } else { String msg = "no common lineage for:\n"; diff --git a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java index 1a1bef6..79ab713 100644 --- a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java +++ b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java @@ -184,6 +184,16 @@ public final class TaxonomyDataManager extends RunnableProcess { } private final static List getTaxonomiesFromScientificName( final String query ) throws IOException { + if ( query.equalsIgnoreCase( UniProtTaxonomy.BACTERIA ) || + query.equalsIgnoreCase( UniProtTaxonomy.ARCHAEA ) || + query.equalsIgnoreCase( UniProtTaxonomy.VIRUSES ) || + query.equalsIgnoreCase( UniProtTaxonomy.EUKARYOTA ) + ) { + final List l = new ArrayList(); + l.add( UniProtTaxonomy.createSpecialFromScientificName( query ) ); + return l; + } + return SequenceDbWsTools.getTaxonomiesFromScientificNameStrict( query, MAX_TAXONOMIES_TO_RETURN ); } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 92cd3d8..47516e4 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -11511,6 +11511,7 @@ public final class Test { System.out.println( entry4.getMap() ); return false; } + //TODO FIXME gi... // //TODO fails: // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtTaxonomy.java b/forester/java/src/org/forester/ws/seqdb/UniProtTaxonomy.java index 2cdd93e..b959899 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtTaxonomy.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtTaxonomy.java @@ -32,9 +32,9 @@ import org.forester.util.ForesterUtil; public final class UniProtTaxonomy { - private static final String ARCHAEA = "Archaea"; - private static final String BACTERIA = "Bacteria"; - private static final String EUKARYOTA = "Eukaryota"; + public static final String ARCHAEA = "Archaea"; + public static final String BACTERIA = "Bacteria"; + public static final String EUKARYOTA = "Eukaryota"; private final List _lineage; private final String _code; private final String _scientific_name; @@ -107,34 +107,13 @@ public final class UniProtTaxonomy { _synonym = synonym; _rank = rank; _id = id; - if ( ( _lineage != null ) && !_lineage.get( _lineage.size() - 1 ).equalsIgnoreCase( _scientific_name ) ) { + if ( ( _lineage != null && _lineage.isEmpty() ) || ( ( !ForesterUtil.isEmpty( _lineage ) ) && !_lineage.get( _lineage.size() - 1 ).equalsIgnoreCase( _scientific_name ) ) ) { _lineage.add( _scientific_name ); } + } - public UniProtTaxonomy( final String[] lineage, - final String code, - final String common_name, - final String scientific_name, - final String synonym, - final String rank, - final String id ) { - _lineage = new ArrayList(); - if ( lineage != null ) { - for( final String l : lineage ) { - _lineage.add( l ); - } - } - _code = code; - _scientific_name = scientific_name; - _common_name = common_name; - _synonym = synonym; - _rank = rank; - _id = id; - if ( ( _lineage != null ) && !_lineage.get( _lineage.size() - 1 ).equalsIgnoreCase( _scientific_name ) ) { - _lineage.add( _scientific_name ); - } - } + /** * Creates deep copy for all fields, except lineage. @@ -178,4 +157,46 @@ public final class UniProtTaxonomy { public String getSynonym() { return _synonym; } + + public final static UniProtTaxonomy createSpecialFromScientificName( final String sn ) { + + List lineage = new ArrayList(); + String code = ""; + String common_name = ""; + String scientific_name = ""; + String synonym = ""; + String rank = ""; + String id = ""; + + if ( sn.equalsIgnoreCase( BACTERIA ) ) { + scientific_name = BACTERIA; + lineage.add( "cellular organisms" ); + rank = "superkingdom"; + id = "2"; + } + else if ( sn.equalsIgnoreCase( ARCHAEA ) ) { + scientific_name = ARCHAEA; + lineage.add( "cellular organisms" ); + + rank = "superkingdom"; + id = "2157"; + } + else if ( sn.equalsIgnoreCase( EUKARYOTA ) ) { + scientific_name = EUKARYOTA; + lineage.add( "cellular organisms" ); + rank = "superkingdom"; + id = "2759"; + } + else if ( sn.equalsIgnoreCase( VIRUSES ) ) { + scientific_name = VIRUSES; + rank = "superkingdom"; + id = "10239"; + } + else { + throw new IllegalArgumentException( "illegal attempt to make UniProt taxonomy for :" + sn ); + } + return new UniProtTaxonomy( lineage, code, common_name, scientific_name, synonym, rank, id ); + + + } } -- 1.7.10.2