From 3b5c2fab212d221196ed1734ee9c623f45310bb7 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Wed, 6 Nov 2013 03:04:05 +0000 Subject: [PATCH] inprogress --- .../src/org/forester/application/surfacing.java | 3 +- .../org/forester/archaeopteryx/Configuration.java | 1 - .../src/org/forester/archaeopteryx/MainFrame.java | 11 +- .../archaeopteryx/MainFrameApplication.java | 11 +- .../src/org/forester/archaeopteryx/Options.java | 5 +- .../org/forester/archaeopteryx/TaxonomyUtil.java | 13 +- .../src/org/forester/archaeopteryx/TreePanel.java | 6 +- .../surfacing/PairwiseGenomeComparator.java | 3 +- .../surfacing/PrintableDomainSimilarity.java | 95 +++++++++---- .../src/org/forester/surfacing/SurfacingUtil.java | 102 +++++++------- .../java/src/org/forester/util/ForesterUtil.java | 143 ++++++++++---------- .../java/src/org/forester/util/TaxonomyGroups.java | 31 +++++ .../java/src/org/forester/ws/seqdb/EbiDbEntry.java | 16 +-- .../forester/ws/seqdb/SequenceDatabaseEntry.java | 4 +- .../src/org/forester/ws/seqdb/UniProtEntry.java | 6 +- 15 files changed, 245 insertions(+), 205 deletions(-) create mode 100644 forester/java/src/org/forester/util/TaxonomyGroups.java diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index ec4878c..413f45e 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -1829,7 +1829,8 @@ public class surfacing { scoring, true, tax_code_to_id_map, - intree_0_orig ); + intree_0_orig, + positive_filter_file != null ? filter : null ); simple_tab_writer.close(); ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote main output (includes domain similarities) to: \"" + ( out_dir == null ? my_outfile : out_dir + ForesterUtil.FILE_SEPARATOR + my_outfile ) + "\"" ); diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index d423ac1..b09e7b9 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -1694,7 +1694,6 @@ public final class Configuration { } public boolean isColorByTaxonomicGroup() { - return false; } } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index 4e76d1f..c4b2425 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -97,7 +97,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { static final String SEARCH_CASE_SENSITIVE_LABEL = "Case Sensitive"; static final String INVERSE_SEARCH_RESULT_LABEL = "Negate Result"; static final String DISPLAY_BRANCH_LENGTH_VALUES_LABEL = "Display Branch Length Values"; - static final String COLOR_BY_TAXONOMIC_GROUP = "Color by Taxonomic Group"; + static final String COLOR_BY_TAXONOMIC_GROUP = "Color by Taxonomic Group"; static final String DISPLAY_SCALE_LABEL = "Display Scale"; static final String NON_LINED_UP_CLADOGRAMS_LABEL = "Non-Lined Up Cladograms"; static final String UNIFORM_CLADOGRAMS_LABEL = "Total Node Sum Dependent Cladograms"; @@ -178,7 +178,6 @@ public abstract class MainFrame extends JFrame implements ActionListener { JRadioButtonMenuItem _ext_node_dependent_cladogram_rbmi; JCheckBoxMenuItem _show_branch_length_values_cbmi; JCheckBoxMenuItem _color_by_taxonomic_group_cbmi; - JCheckBoxMenuItem _show_scale_cbmi; //TODO fix me JCheckBoxMenuItem _show_overview_cbmi; JCheckBoxMenuItem _show_domain_labels; @@ -443,7 +442,6 @@ public abstract class MainFrame extends JFrame implements ActionListener { else if ( o == _color_by_taxonomic_group_cbmi ) { updateOptions( getOptions() ); } - else if ( o == _show_confidence_stddev_cbmi ) { updateOptions( getOptions() ); } @@ -1311,12 +1309,9 @@ public abstract class MainFrame extends JFrame implements ActionListener { if ( ( _show_branch_length_values_cbmi != null ) && _show_branch_length_values_cbmi.isEnabled() ) { options.setShowBranchLengthValues( _show_branch_length_values_cbmi.isSelected() ); } - - if ( ( _color_by_taxonomic_group_cbmi != null ) && _color_by_taxonomic_group_cbmi.isEnabled() ) { - options.setColorByTaxonomicGroup( _color_by_taxonomic_group_cbmi.isSelected() ); + if ( ( _color_by_taxonomic_group_cbmi != null ) && _color_by_taxonomic_group_cbmi.isEnabled() ) { + options.setColorByTaxonomicGroup( _color_by_taxonomic_group_cbmi.isSelected() ); } - - options.setPrintUsingActualSize( ( _print_using_actual_size_cbmi != null ) && ( _print_using_actual_size_cbmi.isSelected() ) ); options.setGraphicsExportUsingActualSize( ( _graphics_export_using_actual_size_cbmi != null ) diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index c01828e..368499a 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -888,20 +888,14 @@ public final class MainFrameApplication extends MainFrame { _options_jmenu.add( _show_scale_cbmi = new JCheckBoxMenuItem( DISPLAY_SCALE_LABEL ) ); _options_jmenu .add( _show_branch_length_values_cbmi = new JCheckBoxMenuItem( DISPLAY_BRANCH_LENGTH_VALUES_LABEL ) ); - - _options_jmenu .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_INT ) ); _options_jmenu .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_EXT ) ); - - _options_jmenu - .add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( COLOR_BY_TAXONOMIC_GROUP ) ); - + _options_jmenu.add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( COLOR_BY_TAXONOMIC_GROUP ) ); _options_jmenu .add( _taxonomy_colorize_node_shapes_cbmi = new JCheckBoxMenuItem( MainFrame.TAXONOMY_COLORIZE_NODE_SHAPES_LABEL ) ); _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( SHOW_CONF_STDDEV_LABEL ) ); - _options_jmenu.add( _cycle_node_shape_mi = new JMenuItem( MainFrame.CYCLE_NODE_SHAPE_LABEL ) ); _options_jmenu.add( _cycle_node_fill_mi = new JMenuItem( MainFrame.CYCLE_NODE_FILL_LABEL ) ); _options_jmenu.add( _choose_node_size_mi = new JMenuItem( MainFrame.CHOOSE_NODE_SIZE_LABEL ) ); @@ -986,8 +980,7 @@ public final class MainFrameApplication extends MainFrame { customizeJMenuItem( _choose_node_size_mi ); customizeCheckBoxMenuItem( _color_labels_same_as_parent_branch, getOptions().isColorLabelsSameAsParentBranch() ); customizeCheckBoxMenuItem( _color_by_taxonomic_group_cbmi, getOptions().isColorByTaxonomicGroup() ); - - customizeCheckBoxMenuItem( _screen_antialias_cbmi, getOptions().isAntialiasScreen() ); + customizeCheckBoxMenuItem( _screen_antialias_cbmi, getOptions().isAntialiasScreen() ); customizeCheckBoxMenuItem( _background_gradient_cbmi, getOptions().isBackgroundColorGradient() ); customizeCheckBoxMenuItem( _show_domain_labels, getOptions().isShowDomainLabels() ); customizeCheckBoxMenuItem( _show_annotation_ref_source, getOptions().isShowAnnotationRefSource() ); diff --git a/forester/java/src/org/forester/archaeopteryx/Options.java b/forester/java/src/org/forester/archaeopteryx/Options.java index 34a225e..b533aa6 100644 --- a/forester/java/src/org/forester/archaeopteryx/Options.java +++ b/forester/java/src/org/forester/archaeopteryx/Options.java @@ -261,8 +261,7 @@ final public class Options { final boolean isColorByTaxonomicGroup() { return _color_by_taxonomic_group; } - - + boolean isShowConfidenceStddev() { return _show_confidence_stddev; } @@ -408,7 +407,7 @@ final public class Options { final void setColorByTaxonomicGroup( final boolean color_by_taxonomic_group ) { _color_by_taxonomic_group = color_by_taxonomic_group; } - + void setShowConfidenceStddev( final boolean show_confidence_stddev ) { _show_confidence_stddev = show_confidence_stddev; } diff --git a/forester/java/src/org/forester/archaeopteryx/TaxonomyUtil.java b/forester/java/src/org/forester/archaeopteryx/TaxonomyUtil.java index 899dfa4..9a29863 100644 --- a/forester/java/src/org/forester/archaeopteryx/TaxonomyUtil.java +++ b/forester/java/src/org/forester/archaeopteryx/TaxonomyUtil.java @@ -6,16 +6,13 @@ import java.util.Map; final class TaxonomyUtil { - static String getTaxGroupByTaxCode( String code ) { + static String getTaxGroupByTaxCode( final String code ) { return _default_taxcode_taxgroup_map.get( code ); } - - private final static Map _default_taxcode_taxgroup_map = new HashMap(); static { _default_taxcode_taxgroup_map.put( "HUMAN", "deuterostomia" ); _default_taxcode_taxgroup_map.put( "HOMSA", "deuterostomia" ); - _default_taxcode_taxgroup_map.put( "PANTR", "deuterostomia" ); _default_taxcode_taxgroup_map.put( "GORGO", "deuterostomia" ); _default_taxcode_taxgroup_map.put( "PONAB", "deuterostomia" ); @@ -146,21 +143,17 @@ final class TaxonomyUtil { _default_taxcode_taxgroup_map.put( "NEMVE", "cnidaria" ); _default_taxcode_taxgroup_map.put( "HMAXX", "cnidaria" ); _default_taxcode_taxgroup_map.put( "HYDMA", "cnidaria" ); //TODO - _default_taxcode_taxgroup_map.put( "TRIAD", "placozoa" ); _default_taxcode_taxgroup_map.put( "MNELE", "ctenophora" ); _default_taxcode_taxgroup_map.put( "AMPQE", "porifera" ); _default_taxcode_taxgroup_map.put( "AMPQU", "porifera" ); //TODO - _default_taxcode_taxgroup_map.put( "MONBE", "choanoflagellida" ); _default_taxcode_taxgroup_map.put( "SALS5", "choanoflagellida" ); _default_taxcode_taxgroup_map.put( "AMOPA", "ichthyophonida & filasterea" ); _default_taxcode_taxgroup_map.put( "SARXX", "ichthyophonida & filasterea" ); _default_taxcode_taxgroup_map.put( "SPHAR", "ichthyophonida & filasterea" ); //TODO - _default_taxcode_taxgroup_map.put( "CAPO3", "ichthyophonida & filasterea" ); _default_taxcode_taxgroup_map.put( "CAPOW", "ichthyophonida & filasterea" ); //TODO - _default_taxcode_taxgroup_map.put( "AALXX", "dikarya" ); _default_taxcode_taxgroup_map.put( "GIBZE", "dikarya" ); _default_taxcode_taxgroup_map.put( "HYPVG", "dikarya" ); @@ -208,7 +201,6 @@ final class TaxonomyUtil { _default_taxcode_taxgroup_map.put( "COPC7", "dikarya" ); _default_taxcode_taxgroup_map.put( "LACBS", "dikarya" ); _default_taxcode_taxgroup_map.put( "LACBI", "dikarya" ); //TODO - _default_taxcode_taxgroup_map.put( "PLEOS", "dikarya" ); _default_taxcode_taxgroup_map.put( "CPUXX", "dikarya" ); _default_taxcode_taxgroup_map.put( "SERL9", "dikarya" ); @@ -349,12 +341,9 @@ final class TaxonomyUtil { _default_taxcode_taxgroup_map.put( "NAEGR", "excavata" ); _default_taxcode_taxgroup_map.put( "GIAIC", "excavata" ); _default_taxcode_taxgroup_map.put( "GIALA", "excavata" ); //TODO - _default_taxcode_taxgroup_map.put( "TRIVA", "excavata" ); _default_taxcode_taxgroup_map.put( "TTRXX", "apusozoa" ); _default_taxcode_taxgroup_map.put( "THETR", "apusozoa" ); //TODO //FIXME - - _default_taxcode_taxgroup_map.put( "METAC", "archaea" ); _default_taxcode_taxgroup_map.put( "METBF", "archaea" ); _default_taxcode_taxgroup_map.put( "METMA", "archaea" ); diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index ca479f6..c548685 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -706,13 +706,13 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee boolean ex = false; String group = null; try { - group =TaxonomyUtil.getTaxGroupByTaxCode( tax.getTaxonomyCode() ); + group = TaxonomyUtil.getTaxGroupByTaxCode( tax.getTaxonomyCode() ); } - catch ( Exception e ) { + catch ( final Exception e ) { ex = true; } if ( !ex && !ForesterUtil.isEmpty( group ) ) { - Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); + final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); if ( c != null ) { return c; } diff --git a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java index 07ece5b..4a78a13 100644 --- a/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java +++ b/forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java @@ -218,7 +218,8 @@ public class PairwiseGenomeComparator { scoring, false, tax_code_to_id_map, - phy ); + phy, + null ); } catch ( final IOException e ) { ForesterUtil.fatalError( command_line_prg_name, "Failed to write similarites to: \"" diff --git a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java index 2734a29..6247714 100644 --- a/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java +++ b/forester/java/src/org/forester/surfacing/PrintableDomainSimilarity.java @@ -32,6 +32,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; @@ -378,7 +379,7 @@ public class PrintableDomainSimilarity implements DomainSimilarity { return sb; } - private StringBuffer getTaxonomyGroupDistribution( Phylogeny tol ) { + private StringBuffer getTaxonomyGroupDistribution( final Phylogeny tol ) { //TODO work on me final SortedMap> domain_to_species_set_map = new TreeMap>(); for( final Species species : getSpeciesData().keySet() ) { @@ -390,49 +391,84 @@ public class PrintableDomainSimilarity implements DomainSimilarity { } } final StringBuffer sb = new StringBuffer(); + sb.append( "" ); for( final Map.Entry> domain_to_species_set : domain_to_species_set_map.entrySet() ) { - final Map countz = new HashMap(); - final ValueComparator bvc = new ValueComparator( countz ); - final SortedMap sorted_countz = new TreeMap( bvc ); + final Map counts = new HashMap(); + // final ValueComparator bvc = new ValueComparator( counts ); + // final SortedMap sorted_counts = new TreeMap( bvc ); for( final String tax_code : domain_to_species_set.getValue() ) { final String group = SurfacingUtil.obtainTaxonomyGroup( tax_code, tol ); if ( !ForesterUtil.isEmpty( group ) ) { - if ( !countz.containsKey( group ) ) { - countz.put( group, 1 ); + if ( !counts.containsKey( group ) ) { + counts.put( group, 1 ); } else { - countz.put( group, countz.get( group ) + 1 ); + counts.put( group, counts.get( group ) + 1 ); } } else { return null; } } - sorted_countz.putAll( countz ); - sb.append( "" + domain_to_species_set.getKey() + "" ); + final SortedMap> counts_to_groups = new TreeMap>( new Comparator() { + + @Override + public int compare( final Integer first, final Integer second ) { + return second.compareTo( first ); + } + } ); + for( final Map.Entry group_to_counts : counts.entrySet() ) { + final int c = group_to_counts.getValue(); + if ( !counts_to_groups.containsKey( c ) ) { + counts_to_groups.put( c, new TreeSet() ); + } + counts_to_groups.get( c ).add( group_to_counts.getKey() ); + } + // sorted_counts.putAll( counts ); + sb.append( "" ); + sb.append( "" ); + // sb.append( "" ); + boolean first = true; + for( final Entry> count_to_groups : counts_to_groups.entrySet() ) { + if ( first ) { + first = false; + } + else { + sb.append( "" ); + sb.append( "" ); + } + sb.append( "" ); + sb.append( "" ); } - sb.append( "" ); - + // sb.append( "" ); + sb.append( ForesterUtil.getLineSeparator() ); } + sb.append( "
" ); + sb.append( "" + + domain_to_species_set.getKey() + "" ); sb.append( ": " ); - sb.append( "" ); - for( final Map.Entry group_to_counts : sorted_countz.entrySet() ) { - final String group = group_to_counts.getKey(); - final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); - if ( c == null ) { - throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" ); - } - final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); - - sb.append( "" ); - sb.append( group ); - sb.append( ": " ); - sb.append( group_to_counts.getValue() ); - sb.append( "" ); - sb.append( " " ); - sb.append( "
\n" ); + sb.append( "
" ); + sb.append( "" ); + final SortedSet groups = count_to_groups.getValue(); + sb.append( count_to_groups.getKey() ); + sb.append( ":" ); + for( final String group : groups ) { + final Color color = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); + if ( color == null ) { + throw new IllegalArgumentException( "no color found for taxonomy group\"" + group + "\"" ); + } + final String hex = String.format( "#%02x%02x%02x", + color.getRed(), + color.getGreen(), + color.getBlue() ); + sb.append( "" ); + sb.append( " " ); + sb.append( group ); + sb.append( "" ); + } + sb.append( "
" ); // i am just a template and need to be modified for "printout" TODO // for( final Map.Entry> e : m.entrySet() ) { // sb.append( "" + e.getKey() + "" ); @@ -582,7 +618,6 @@ public class PrintableDomainSimilarity implements DomainSimilarity { sb.append( getSpeciesDataInCustomOrder( true, tax_code_to_id_map, phy ) ); sb.append( getDomainDataInAlphabeticalOrder() ); sb.append( getTaxonomyGroupDistribution( phy ) ); - sb.append( "" ); } sb.append( "" ); diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 00ee155..73cee01 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -101,11 +101,7 @@ public final class SurfacingUtil { public final static Pattern PATTERN_SP_STYLE_TAXONOMY = Pattern.compile( "^[A-Z0-9]{3,5}$" ); private final static Map _TAXCODE_HEXCOLORSTRING_MAP = new HashMap(); - - - private final static Map _TAXCODE_TAXGROUP_MAP = new HashMap(); - - + private final static Map _TAXCODE_TAXGROUP_MAP = new HashMap(); private static final Comparator ASCENDING_CONFIDENCE_VALUE_ORDER = new Comparator() { @Override @@ -1347,34 +1343,35 @@ public final class SurfacingUtil { throws IllegalArgumentException { if ( !_TAXCODE_HEXCOLORSTRING_MAP.containsKey( tax_code ) ) { if ( ( phy != null ) && !phy.isEmpty() ) { -// final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); -// Color c = null; -// if ( ( nodes == null ) || nodes.isEmpty() ) { -// throw new IllegalArgumentException( "code " + tax_code + " is not found" ); -// } -// if ( nodes.size() != 1 ) { -// throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); -// } -// PhylogenyNode n = nodes.get( 0 ); -// while ( n != null ) { -// if ( n.getNodeData().isHasTaxonomy() -// && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { -// c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() -// .getScientificName(), tax_code ); -// } -// if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { -// c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); -// } -// if ( c != null ) { -// break; -// } -// n = n.getParent(); -// } + // final List nodes = phy.getNodesViaTaxonomyCode( tax_code ); + // Color c = null; + // if ( ( nodes == null ) || nodes.isEmpty() ) { + // throw new IllegalArgumentException( "code " + tax_code + " is not found" ); + // } + // if ( nodes.size() != 1 ) { + // throw new IllegalArgumentException( "code " + tax_code + " is not unique" ); + // } + // PhylogenyNode n = nodes.get( 0 ); + // while ( n != null ) { + // if ( n.getNodeData().isHasTaxonomy() + // && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { + // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getNodeData().getTaxonomy() + // .getScientificName(), tax_code ); + // } + // if ( ( c == null ) && !ForesterUtil.isEmpty( n.getName() ) ) { + // c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( n.getName(), tax_code ); + // } + // if ( c != null ) { + // break; + // } + // n = n.getParent(); + // } final String group = obtainTaxonomyGroup( tax_code, phy ); - Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); + final Color c = ForesterUtil.obtainColorDependingOnTaxonomyGroup( group ); if ( c == null ) { - throw new IllegalArgumentException( "no color found for taxonomy code \"" + tax_code + "\"" ); - } + throw new IllegalArgumentException( "no color found for taxonomy group \"" + group + + "\" for code \"" + tax_code + "\"" ); + } final String hex = String.format( "#%02x%02x%02x", c.getRed(), c.getGreen(), c.getBlue() ); _TAXCODE_HEXCOLORSTRING_MAP.put( tax_code, hex ); } @@ -1385,14 +1382,12 @@ public final class SurfacingUtil { } return _TAXCODE_HEXCOLORSTRING_MAP.get( tax_code ); } - - + public static String obtainTaxonomyGroup( final String tax_code, final Phylogeny species_tree ) throws IllegalArgumentException { if ( !_TAXCODE_TAXGROUP_MAP.containsKey( tax_code ) ) { if ( ( species_tree != null ) && !species_tree.isEmpty() ) { final List nodes = species_tree.getNodesViaTaxonomyCode( tax_code ); - if ( ( nodes == null ) || nodes.isEmpty() ) { throw new IllegalArgumentException( "code " + tax_code + " is not found" ); } @@ -1401,28 +1396,23 @@ public final class SurfacingUtil { } PhylogenyNode n = nodes.get( 0 ); String group = null; - while ( n != null ) { if ( n.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getNodeData().getTaxonomy() .getScientificName() ); - } - if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) { + if ( ForesterUtil.isEmpty( group ) && !ForesterUtil.isEmpty( n.getName() ) ) { group = ForesterUtil.obtainNormalizedTaxonomyGroup( n.getName() ); - } - if ( !ForesterUtil.isEmpty( group ) ) { + if ( !ForesterUtil.isEmpty( group ) ) { break; } - n = n.getParent(); } - if ( ForesterUtil.isEmpty( group ) ) { + if ( ForesterUtil.isEmpty( group ) ) { throw new IllegalArgumentException( "no group found for taxonomy code \"" + tax_code + "\"" ); } - _TAXCODE_TAXGROUP_MAP.put( tax_code, group ); } else { @@ -1432,9 +1422,6 @@ public final class SurfacingUtil { } return _TAXCODE_TAXGROUP_MAP.get( tax_code ); } - - - public static void performDomainArchitectureAnalysis( final SortedMap> domain_architecutures, final SortedMap domain_architecuture_counts, @@ -2238,7 +2225,8 @@ public final class SurfacingUtil { final DomainSimilarity.DomainSimilarityScoring scoring, final boolean verbose, final Map tax_code_to_id_map, - final Phylogeny phy ) throws IOException { + final Phylogeny phy, + final Set pos_filter_doms ) throws IOException { if ( ( single_writer != null ) && ( ( split_writers == null ) || split_writers.isEmpty() ) ) { split_writers = new HashMap(); split_writers.put( '_', single_writer ); @@ -2279,8 +2267,15 @@ public final class SurfacingUtil { ( ( PrintableDomainSimilarity ) similarity ).setSpeciesOrder( species_order ); } if ( single_writer != null ) { - single_writer.write( "" - + similarity.getDomainId() + "" ); + if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { + single_writer.write( "" + similarity.getDomainId() + + "" ); + } + else { + single_writer.write( "" + + similarity.getDomainId() + "" ); + } single_writer.write( SurfacingConstants.NL ); } else { @@ -2289,8 +2284,15 @@ public final class SurfacingUtil { if ( local_writer == null ) { local_writer = split_writers.get( '0' ); } - local_writer.write( "" - + similarity.getDomainId() + "" ); + if ( !ForesterUtil.isEmpty( pos_filter_doms ) && pos_filter_doms.contains( similarity.getDomainId() ) ) { + local_writer.write( "" + similarity.getDomainId() + + "" ); + } + else { + local_writer.write( "" + + similarity.getDomainId() + "" ); + } local_writer.write( SurfacingConstants.NL ); } } diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index eb1ea8d..1908f8f 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -1206,76 +1206,77 @@ public final class ForesterUtil { public final static Color obtainColorDependingOnTaxonomyGroup( final String tax_group ) { if ( !ForesterUtil.isEmpty( tax_group ) ) { - if ( tax_group.equals( "deuterostomia" ) ) { + if ( tax_group.equals( TaxonomyGroups.DEUTEROSTOMIA ) ) { return TaxonomyColors.DEUTEROSTOMIA_COLOR; } - else if ( tax_group.equals( "protostomia" ) ) { + else if ( tax_group.equals( TaxonomyGroups.PROTOSTOMIA ) ) { return TaxonomyColors.PROTOSTOMIA_COLOR; } - else if ( tax_group.equals( "cnidaria" ) ) { + else if ( tax_group.equals( TaxonomyGroups.CNIDARIA ) ) { return TaxonomyColors.CNIDARIA_COLOR; } - else if ( tax_group.equals( "placozoa" ) ) { + else if ( tax_group.equals( TaxonomyGroups.PLACOZOA ) ) { return TaxonomyColors.PLACOZOA_COLOR; } - else if ( tax_group.equals( "ctenophora" ) ) { + else if ( tax_group.equals( TaxonomyGroups.CTENOPHORA ) ) { return TaxonomyColors.CTENOPHORA_COLOR; } - else if ( tax_group.equals( "porifera" ) ) { + else if ( tax_group.equals( TaxonomyGroups.PORIFERA ) ) { return TaxonomyColors.PORIFERA_COLOR; } - else if ( tax_group.equals( "choanoflagellida" ) ) { + else if ( tax_group.equals( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { return TaxonomyColors.CHOANOFLAGELLIDA; } - else if ( tax_group.equals( "ichthyophonida & filasterea" ) ) { + else if ( tax_group.equals( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) ) { return TaxonomyColors.ICHTHYOSPOREA_AND_FILASTEREA; } - else if ( tax_group.equals( "dikarya" ) ) { + else if ( tax_group.equals( TaxonomyGroups.DIKARYA ) ) { return TaxonomyColors.DIKARYA_COLOR; } - else if ( tax_group.equalsIgnoreCase( "fungi" ) || tax_group.equalsIgnoreCase( "other fungi" ) ) { + else if ( tax_group.equalsIgnoreCase( TaxonomyGroups.FUNGI ) + || tax_group.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { return TaxonomyColors.OTHER_FUNGI_COLOR; } - else if ( tax_group.equals( "nucleariidae and fonticula" ) ) { + else if ( tax_group.equals( TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP ) ) { return TaxonomyColors.NUCLEARIIDAE_AND_FONTICULA_GROUP_COLOR; } - else if ( tax_group.equals( "amoebozoa" ) ) { + else if ( tax_group.equals( TaxonomyGroups.AMOEBOZOA ) ) { return TaxonomyColors.AMOEBOZOA_COLOR; } - else if ( tax_group.equals( "embryophyta" ) ) { + else if ( tax_group.equals( TaxonomyGroups.EMBRYOPHYTA ) ) { return TaxonomyColors.EMBRYOPHYTA_COLOR; } - else if ( tax_group.equals( "chlorophyta" ) ) { + else if ( tax_group.equals( TaxonomyGroups.CHLOROPHYTA ) ) { return TaxonomyColors.CHLOROPHYTA_COLOR; } - else if ( tax_group.equals( "rhodophyta" ) ) { + else if ( tax_group.equals( TaxonomyGroups.RHODOPHYTA ) ) { return TaxonomyColors.RHODOPHYTA_COLOR; } - else if ( tax_group.equals( "hacrobia" ) ) { + else if ( tax_group.equals( TaxonomyGroups.HACROBIA ) ) { return TaxonomyColors.HACROBIA_COLOR; } - else if ( tax_group.equals( "glaucocystophyceae" ) ) { + else if ( tax_group.equals( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) ) { return TaxonomyColors.GLAUCOPHYTA_COLOR; } - else if ( tax_group.equals( "stramenopiles" ) ) { + else if ( tax_group.equals( TaxonomyGroups.STRAMENOPILES ) ) { return TaxonomyColors.STRAMENOPILES_COLOR; } - else if ( tax_group.equals( "alveolata" ) ) { + else if ( tax_group.equals( TaxonomyGroups.ALVEOLATA ) ) { return TaxonomyColors.ALVEOLATA_COLOR; } - else if ( tax_group.equals( "rhizaria" ) ) { + else if ( tax_group.equals( TaxonomyGroups.RHIZARIA ) ) { return TaxonomyColors.RHIZARIA_COLOR; } - else if ( tax_group.equals( "excavata" ) ) { + else if ( tax_group.equals( TaxonomyGroups.EXCAVATA ) ) { return TaxonomyColors.EXCAVATA_COLOR; } - else if ( tax_group.equals( "apusozoa" ) ) { + else if ( tax_group.equals( TaxonomyGroups.APUSOZOA ) ) { return TaxonomyColors.APUSOZOA_COLOR; } - else if ( tax_group.equals( "archaea" ) ) { + else if ( tax_group.equals( TaxonomyGroups.ARCHAEA ) ) { return TaxonomyColors.ARCHAEA_COLOR; } - else if ( tax_group.equals( "bacteria" ) ) { + else if ( tax_group.equals( TaxonomyGroups.BACTERIA ) ) { return TaxonomyColors.BACTERIA_COLOR; } } @@ -1283,80 +1284,80 @@ public final class ForesterUtil { } public final static String obtainNormalizedTaxonomyGroup( final String tax ) { - if ( tax.equalsIgnoreCase( "deuterostomia" ) ) { - return "deuterostomia"; + if ( tax.equalsIgnoreCase( TaxonomyGroups.DEUTEROSTOMIA ) ) { + return TaxonomyGroups.DEUTEROSTOMIA; } - else if ( tax.equalsIgnoreCase( "protostomia" ) ) { - return "protostomia"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.PROTOSTOMIA ) ) { + return TaxonomyGroups.PROTOSTOMIA; } - else if ( tax.equalsIgnoreCase( "cnidaria" ) ) { - return "cnidaria"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CNIDARIA ) ) { + return TaxonomyGroups.CNIDARIA; } - else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( "placozoa" ) ) { - return "placozoa"; + else if ( tax.toLowerCase().startsWith( "trichoplax" ) || tax.equalsIgnoreCase( TaxonomyGroups.PLACOZOA ) ) { + return TaxonomyGroups.PLACOZOA; } - else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( "ctenophora" ) ) { - return "ctenophora"; + else if ( tax.toLowerCase().startsWith( "mnemiopsis" ) || tax.equalsIgnoreCase( TaxonomyGroups.CTENOPHORA ) ) { + return TaxonomyGroups.CTENOPHORA; } - else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( "porifera" ) ) { - return "porifera"; + else if ( tax.toLowerCase().startsWith( "amphimedon" ) || tax.equalsIgnoreCase( TaxonomyGroups.PORIFERA ) ) { + return TaxonomyGroups.PORIFERA; } - else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( "choanoflagellida" ) ) { - return "choanoflagellida"; + else if ( tax.equalsIgnoreCase( "codonosigidae" ) || tax.equalsIgnoreCase( TaxonomyGroups.CHOANOFLAGELLIDA ) ) { + return TaxonomyGroups.CHOANOFLAGELLIDA; } - else if ( tax.toLowerCase().startsWith( "ichthyophonida & filasterea" ) + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA ) || tax.toLowerCase().startsWith( "ichthyophonida and filasterea" ) || tax.toLowerCase().startsWith( "ichthyosporea & filasterea" ) || tax.toLowerCase().startsWith( "ichthyosporea and filasterea" ) ) { - return "ichthyophonida & filasterea"; + return TaxonomyGroups.ICHTHYOPHONIDA_FILASTEREA; } - else if ( tax.equalsIgnoreCase( "dikarya" ) ) { - return "dikarya"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.DIKARYA ) ) { + return TaxonomyGroups.DIKARYA; } - else if ( tax.equalsIgnoreCase( "other fungi" ) ) { - return "other fungi"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.FUNGI ) || tax.equalsIgnoreCase( TaxonomyGroups.OTHER_FUNGI ) ) { + return TaxonomyGroups.OTHER_FUNGI; } else if ( tax.toLowerCase().startsWith( "nucleariidae and fonticula" ) ) { - return "nucleariidae and fonticula group"; + return TaxonomyGroups.NUCLEARIIDAE_AND_FONTICULA_GROUP; } - else if ( tax.equalsIgnoreCase( "amoebozoa" ) ) { - return "amoebozoa"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.AMOEBOZOA ) ) { + return TaxonomyGroups.AMOEBOZOA; } - else if ( tax.equalsIgnoreCase( "embryophyta" ) ) { - return "embryophyta"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EMBRYOPHYTA ) ) { + return TaxonomyGroups.EMBRYOPHYTA; } - else if ( tax.equalsIgnoreCase( "chlorophyta" ) ) { - return "chlorophyta"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.CHLOROPHYTA ) ) { + return TaxonomyGroups.CHLOROPHYTA; } - else if ( tax.equalsIgnoreCase( "rhodophyta" ) ) { - return "rhodophyta"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHODOPHYTA ) ) { + return TaxonomyGroups.RHODOPHYTA; } - else if ( tax.toLowerCase().startsWith( "hacrobia" ) ) { - return "hacrobia"; + else if ( tax.toLowerCase().startsWith( TaxonomyGroups.HACROBIA ) ) { + return TaxonomyGroups.HACROBIA; } - else if ( tax.equalsIgnoreCase( "glaucocystophyceae" ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { - return "glaucocystophyceae"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.GLAUCOCYSTOPHYCEAE ) || tax.equalsIgnoreCase( "glaucophyta" ) ) { + return TaxonomyGroups.GLAUCOCYSTOPHYCEAE; } - else if ( tax.equalsIgnoreCase( "stramenopiles" ) ) { - return "stramenopiles"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.STRAMENOPILES ) ) { + return TaxonomyGroups.STRAMENOPILES; } - else if ( tax.equalsIgnoreCase( "alveolata" ) ) { - return "alveolata"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ALVEOLATA ) ) { + return TaxonomyGroups.ALVEOLATA; } - else if ( tax.equalsIgnoreCase( "rhizaria" ) ) { - return "rhizaria"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.RHIZARIA ) ) { + return TaxonomyGroups.RHIZARIA; } - else if ( tax.equalsIgnoreCase( "excavata" ) ) { - return "excavata"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.EXCAVATA ) ) { + return TaxonomyGroups.EXCAVATA; } - else if ( tax.equalsIgnoreCase( "apusozoa" ) ) { - return "apusozoa"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.APUSOZOA ) ) { + return TaxonomyGroups.APUSOZOA; } - else if ( tax.equalsIgnoreCase( "archaea" ) ) { - return "archaea"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.ARCHAEA ) ) { + return TaxonomyGroups.ARCHAEA; } - else if ( tax.equalsIgnoreCase( "bacteria" ) ) { - return "bacteria"; + else if ( tax.equalsIgnoreCase( TaxonomyGroups.BACTERIA ) ) { + return TaxonomyGroups.BACTERIA; } return null; } diff --git a/forester/java/src/org/forester/util/TaxonomyGroups.java b/forester/java/src/org/forester/util/TaxonomyGroups.java new file mode 100644 index 0000000..9c982b2 --- /dev/null +++ b/forester/java/src/org/forester/util/TaxonomyGroups.java @@ -0,0 +1,31 @@ + +package org.forester.util; + +final class TaxonomyGroups { + + static final String ALVEOLATA = "alveolata"; + static final String AMOEBOZOA = "amoebozoa"; + static final String APUSOZOA = "apusozoa"; + static final String ARCHAEA = "archaea"; + static final String BACTERIA = "bacteria"; + static final String CHLOROPHYTA = "chlorophyta"; + static final String CHOANOFLAGELLIDA = "choanoflagellida"; + static final String CNIDARIA = "cnidaria"; + static final String CTENOPHORA = "ctenophora"; + static final String DEUTEROSTOMIA = "deuterostomia"; + static final String DIKARYA = "dikarya"; + static final String EMBRYOPHYTA = "embryophyta"; + static final String EXCAVATA = "excavata"; + static final String FUNGI = "fungi"; + static final String GLAUCOCYSTOPHYCEAE = "glaucocystophyceae"; + static final String HACROBIA = "hacrobia"; + static final String ICHTHYOPHONIDA_FILASTEREA = "ichthyophonida & filasterea"; + static final String NUCLEARIIDAE_AND_FONTICULA_GROUP = "nucleariidae and fonticula group"; + static final String OTHER_FUNGI = "other fungi"; + static final String PLACOZOA = "placozoa"; + static final String PORIFERA = "porifera"; + static final String PROTOSTOMIA = "protostomia"; + static final String RHIZARIA = "rhizaria"; + static final String RHODOPHYTA = "rhodophyta"; + static final String STRAMENOPILES = "stramenopiles"; +} diff --git a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java index 340fc3b..e32a2a4 100644 --- a/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/EbiDbEntry.java @@ -270,28 +270,24 @@ public final class EbiDbEntry implements SequenceDatabaseEntry { private String _map; private String _chromosome; - private void setMap( String map ) { + private void setMap( final String map ) { _map = map; - } - private void setChromosome( String chromosome ) { + private void setChromosome( final String chromosome ) { _chromosome = chromosome; - } @Override - public String getMap( ) { + public String getMap() { return _map; - } + @Override - public String getChromosome() { + public String getChromosome() { return _chromosome; - } - - + private static void x( final StringBuilder sb, final String s ) { if ( sb.length() > 0 ) { sb.append( " " ); diff --git a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java index 2bf42b9..808da38 100644 --- a/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/SequenceDatabaseEntry.java @@ -55,7 +55,7 @@ public interface SequenceDatabaseEntry { public SortedSet getCrossReferences(); - public String getMap( ); + public String getMap(); - public String getChromosome( ); + public String getChromosome(); } \ No newline at end of file diff --git a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java index 13ee113..00f4b67 100644 --- a/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java +++ b/forester/java/src/org/forester/ws/seqdb/UniProtEntry.java @@ -296,14 +296,12 @@ public final class UniProtEntry implements SequenceDatabaseEntry { } @Override - public String getMap( ) { - + public String getMap() { return null; } @Override - public String getChromosome( ) { - + public String getChromosome() { return null; } } -- 1.7.10.2