From 1c57d9dd98190445f28ff5b2d447614fb14dd1aa Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Thu, 31 Oct 2013 03:45:25 +0000 Subject: [PATCH] in progress --- .../surfacing/PrintableDomainSimilarity.java | 63 ++++++++++++++++++++ .../src/org/forester/surfacing/SurfacingUtil.java | 57 ++++++++++++++++++ forester/java/src/org/forester/test/Test.java | 20 +++---- .../org/forester/ws/seqdb/SequenceDbWsTools.java | 9 +++ 4 files changed, 139 insertions(+), 10 deletions(-) diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index 22ddb1e..63db3b2 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -26,6 +26,7 @@ package org.forester.surfacing; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.SortedMap; @@ -373,6 +374,68 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return sb; } + + + private StringBuffer getTaxonomyGroupDistribution( Phylogeny tol ) { + //TODO work on me + + final SortedMap> m = new TreeMap>(); + for( final Species species : getSpeciesData().keySet() ) { + for( final String combable_dom : getCombinableDomainIds( species ) ) { + if ( !m.containsKey( combable_dom ) ) { + m.put( combable_dom, new TreeSet() ); + } + m.get( combable_dom ).add( species.getSpeciesId() ); + } + } + Map countz = new HashMap(); + for( final Map.Entry> e : m.entrySet() ) { + for( final String tax_code : e.getValue() ) { + final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol ); + if ( !ForesterUtil.isEmpty( group ) ) { + if ( !countz.containsKey( group ) ) { + countz.put( group, 1 ); + } + else { + countz.put( group, countz.get( group) + 1 ); + } + + } + else { + return null; + } + + } + + } + final StringBuffer sb = new StringBuffer(); + + // i am just a template and need to be modified for "printout" TODO + for( final Map.Entry> e : m.entrySet() ) { + sb.append( "" + e.getKey() + "" ); + sb.append( ": " ); + sb.append( "" ); + for( final String tax : e.getValue() ) { + final String hex = SurfacingUtil.obtainHexColorStringDependingOnTaxonomyGroup( tax, null ); + if ( !ForesterUtil.isEmpty( hex ) ) { + sb.append( "" ); + sb.append( tax ); + sb.append( "" ); + } + else { + sb.append( tax ); + } + sb.append( " " ); + } + sb.append( "" ); + sb.append( "
\n" ); + } + return sb; + } + + private StringBuffer getSpeciesDataInAlphabeticalOrder( final boolean html, final Map tax_code_to_id_map, final Phylogeny phy ) { diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index f23f865..f9fb313 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -101,6 +101,11 @@ public final class SurfacingUtil { public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); private final static Map _TAXCODE_HEXCOLORSTRING_MAP = new HashMap(); + + + private final static Map _TAXCODE_TAXGROUP_MAP = new HashMap(); + + private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { @Override @@ -1378,6 +1383,58 @@ public final class SurfacingUtil { } return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code ); } + + + public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny phy ) + throws IllegalArgumentException { + if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) { + if ( ( phy != null ) && !phy.isEmpty() ) { + final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); + + if ( ( nodes == null ) || nodes.isEmpty() ) { + throw new IllegalArgumentException( "code " + tax_code + " is not found" ); + } + if ( nodes.size() != 1 ) { + throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); + } + PhylogenyNode n = nodes.get( 0 ); + String group = null; + Color c = null; + while ( n != null ) { + if ( n.getNodeData().isHasTaxonomy() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() + .getScientificName(), tax_code ); + + group = n.getNodeData().getTaxonomy() + .getScientificName(); + } + if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { + c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); + group = n.getName(); + } + if ( c != null ) { + break; + } + group = null; + n = n.getParent(); + } + if ( c == null ) { + throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" ); + } + + _TAXCODE_TAXGROUP_MAP.put( tax_code, group ); + } + else { + throw new IllegalArgumentException( "unable to obtain group for code " + tax_code + + " (tree is null or empty and code is not in map)" ); + } + } + return _TAXCODE_TAXGROUP_MAP.get( tax_code ); + } + + + public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, final SortedMap domain_architecuture_counts, diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index a57974a..7391763 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -127,7 +127,7 @@ import org.forester.ws.wabi.TxSearch.TAX_RANK; @SuppressWarnings( "unused") public final class Test { - private final static boolean PERFORM_DB_TESTS = false; + private final static boolean PERFORM_DB_TESTS = true; private final static double ZERO_DIFF = 1.0E-9; private final static String PATH_TO_TEST_DATA = System.getProperty( "user.dir" ) + ForesterUtil.getFileSeparator() + "test_data" @@ -501,7 +501,7 @@ public final class Test { failed++; } } - ///////////////////////////////////////// System.exit( 0 ); + System.exit( 0 ); System.out.print( "UniProtKB id extraction: " ); if ( Test.testExtractUniProtKbProteinSeqIdentifier() ) { System.out.println( "OK." ); @@ -11503,14 +11503,14 @@ public final class Test { System.out.println( entry4.getGeneName() ); return false; } - // if ( !entry4.getChromosome().equals( "ras" ) ) { - // System.out.println( entry4.getChromosome() ); - // return false; - // } - // if ( !entry4.getMap().equals( "ras" ) ) { - // System.out.println( entry4.getMap() ); - // return false; - // } + if ( !entry4.getChromosome().equals( "ras" ) ) { + System.out.println( entry4.getChromosome() ); + return false; + } + if ( !entry4.getMap().equals( "ras" ) ) { + System.out.println( entry4.getMap() ); + return false; + } // //TODO fails: // final SequenceDatabaseEntry entry5 = SequenceDbWsTools.obtainEntry( "M30539" ); diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java index c65fb2a..29376bd 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java @@ -383,6 +383,15 @@ public final class SequenceDbWsTools { seq.addCrossReference( x ); } } + if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) && !ForesterUtil.isEmpty( db_entry.getMap() ) ) { + seq.setLocation( "chr " + db_entry.getChromosome() + ", " + db_entry.getMap() ); + } + else if ( !ForesterUtil.isEmpty( db_entry.getChromosome() ) ) { + seq.setLocation( "chr " + db_entry.getChromosome() ); + } + else if ( !ForesterUtil.isEmpty( db_entry.getMap() ) ) { + seq.setLocation( db_entry.getMap() ); + } final Taxonomy tax = node.getNodeData().isHasTaxonomy() ? node.getNodeData().getTaxonomy() : new Taxonomy(); if ( !ForesterUtil.isEmpty( db_entry.getTaxonomyScientificName() ) ) { tax.setScientificName( db_entry.getTaxonomyScientificName() ); -- 1.7.10.2