From 7f4318a3ef37864b5453e3cd56270b8e91e76b9f Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Thu, 23 Feb 2012 05:02:50 +0000 Subject: [PATCH] in progress --- .../forester/application/phyloxml_converter.java | 3 + .../src/org/forester/application/surfacing.java | 12 +- .../forester/application/surfacing_hmmpfam.java | 5 +- .../org/forester/archaeopteryx/ControlPanel.java | 6 + .../src/org/forester/archaeopteryx/TreePanel.java | 10 ++ .../surfacing/DomainCountsDifferenceUtil.java | 7 +- .../src/org/forester/surfacing/SurfacingUtil.java | 116 +++++++++++--------- .../java/src/org/forester/tools/SupportCount.java | 3 + 8 files changed, 102 insertions(+), 60 deletions(-) diff --git a/forester/java/src/org/forester/application/phyloxml_converter.java b/forester/java/src/org/forester/application/phyloxml_converter.java index bee52a0..6d45207 100644 --- a/forester/java/src/org/forester/application/phyloxml_converter.java +++ b/forester/java/src/org/forester/application/phyloxml_converter.java @@ -255,6 +255,9 @@ public class phyloxml_converter { if ( order_subtrees ) { for( final Phylogeny phy : phys ) { PhylogenyMethods.orderAppearance( phy.getRoot(), true, true, DESCENDANT_SORT_PRIORITY.TAXONOMY ); + phy.externalNodesHaveChanged(); + phy.hashIDs(); + phy.recalculateNumberOfExternalDescendants( true ); } } try { diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 4218c4b..951ce21 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -235,7 +235,7 @@ public class surfacing { final static private String SEQ_EXTRACT_OPTION = "prot_extract"; final static private char SEPARATOR_FOR_INPUT_VALUES = '#'; final static private String PRG_VERSION = "2.210"; - final static private String PRG_DATE = "2011.12.08"; + final static private String PRG_DATE = "2012.02.21"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/applications/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; @@ -278,6 +278,7 @@ public class surfacing { public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt"; + public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default). private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, @@ -1946,7 +1947,8 @@ public class surfacing { SurfacingUtil.extractProteinNames( protein_list, query_domain_ids_array[ j ], query_domains_writer_ary[ j ], - "\t" ); + "\t", + LIMIT_SPEC_FOR_PROT_EX ); query_domains_writer_ary[ j ].flush(); } catch ( final IOException e ) { @@ -2788,7 +2790,11 @@ public class surfacing { SurfacingUtil.checkForOutputFileWriteability( out ); try { final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) ); - SurfacingUtil.extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t" ); + SurfacingUtil.extractProteinNames( protein_lists_per_species, + domain, + proteins_file_writer, + "\t", + LIMIT_SPEC_FOR_PROT_EX ); proteins_file_writer.close(); } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/application/surfacing_hmmpfam.java b/forester/java/src/org/forester/application/surfacing_hmmpfam.java index 476640b..dfbed8a 100644 --- a/forester/java/src/org/forester/application/surfacing_hmmpfam.java +++ b/forester/java/src/org/forester/application/surfacing_hmmpfam.java @@ -1868,7 +1868,8 @@ public class surfacing_hmmpfam { SurfacingUtil.extractProteinNames( protein_list, query_domain_ids_array[ j ], query_domains_writer_ary[ j ], - "\t" ); + "\t", + surfacing.LIMIT_SPEC_FOR_PROT_EX ); query_domains_writer_ary[ j ].flush(); } catch ( final IOException e ) { @@ -2576,7 +2577,7 @@ public class surfacing_hmmpfam { SurfacingUtil.checkForOutputFileWriteability( out ); try { final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) ); - SurfacingUtil.extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t" ); + SurfacingUtil.extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t", null ); proteins_file_writer.close(); } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java index 21c4891..419f2c5 100644 --- a/forester/java/src/org/forester/archaeopteryx/ControlPanel.java +++ b/forester/java/src/org/forester/archaeopteryx/ControlPanel.java @@ -252,6 +252,12 @@ final class ControlPanel extends JPanel implements ActionListener { } PhylogenyMethods.orderAppearance( tp.getPhylogeny().getRoot(), _order_of_appearance, true, pri ); _order_of_appearance = !_order_of_appearance; + tp.setNodeInPreorderToNull(); + tp.getPhylogeny().externalNodesHaveChanged(); + tp.getPhylogeny().hashIDs(); + tp.getPhylogeny().recalculateNumberOfExternalDescendants( true ); + tp.resetNodeIdToDistToLeafMap(); + tp.setEdited( true ); displayedPhylogenyMightHaveChanged( false ); } else if ( e.getSource() == _uncollapse_all ) { diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index a9c154b..52d4bfb 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -4931,6 +4931,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee if ( !node.isExternal() ) { node.swapChildren(); setNodeInPreorderToNull(); + _phylogeny.externalNodesHaveChanged(); + _phylogeny.hashIDs(); + _phylogeny.recalculateNumberOfExternalDescendants( true ); + resetNodeIdToDistToLeafMap(); + setEdited( true ); } repaint(); } @@ -4950,6 +4955,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } PhylogenyMethods.sortNodeDescendents( node, pri ); setNodeInPreorderToNull(); + _phylogeny.externalNodesHaveChanged(); + _phylogeny.hashIDs(); + _phylogeny.recalculateNumberOfExternalDescendants( true ); + resetNodeIdToDistToLeafMap(); + setEdited( true ); } repaint(); } diff --git a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java index 6996ef3..9414d39 100644 --- a/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java +++ b/forester/java/src/org/forester/surfacing/DomainCountsDifferenceUtil.java @@ -44,6 +44,7 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import org.forester.application.surfacing; import org.forester.go.GoId; import org.forester.go.GoTerm; import org.forester.util.BasicDescriptiveStatistics; @@ -820,7 +821,11 @@ public final class DomainCountsDifferenceUtil { + domain_id + PLUS_MINUS_PROTEINS_FILE_DOM_SUFFIX ); SurfacingUtil.checkForOutputFileWriteability( my_proteins_file ); final Writer proteins_file_writer = new BufferedWriter( new FileWriter( my_proteins_file ) ); - SurfacingUtil.extractProteinNames( protein_lists_per_species, domain_id, proteins_file_writer, "\t" ); + SurfacingUtil.extractProteinNames( protein_lists_per_species, + domain_id, + proteins_file_writer, + "\t", + surfacing.LIMIT_SPEC_FOR_PROT_EX ); proteins_file_writer.close(); System.out.println( "Wrote proteins list to \"" + my_proteins_file + "\"" ); } diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index c6df0b2..f979897 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -845,65 +845,35 @@ public final class SurfacingUtil { public static void extractProteinNames( final List proteins, final List query_domain_ids_nc_order, final Writer out, - final String separator ) throws IOException { + final String separator, + final String limit_to_species ) throws IOException { for( final Protein protein : proteins ) { - if ( protein.contains( query_domain_ids_nc_order, true ) ) { - out.write( protein.getSpecies().getSpeciesId() ); - out.write( separator ); - out.write( protein.getProteinId().getId() ); - out.write( separator ); - out.write( "[" ); - final Set visited_domain_ids = new HashSet(); - boolean first = true; - for( final Domain domain : protein.getProteinDomains() ) { - if ( !visited_domain_ids.contains( domain.getDomainId() ) ) { - visited_domain_ids.add( domain.getDomainId() ); - if ( first ) { - first = false; - } - else { - out.write( " " ); - } - out.write( domain.getDomainId().getId() ); - out.write( " {" ); - out.write( "" + domain.getTotalCount() ); - out.write( "}" ); - } - } - out.write( "]" ); - out.write( separator ); - if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() - .equals( SurfacingConstants.NONE ) ) ) { - out.write( protein.getDescription() ); - } - out.write( separator ); - if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession() - .equals( SurfacingConstants.NONE ) ) ) { - out.write( protein.getAccession() ); - } - out.write( SurfacingConstants.NL ); - } - } - out.flush(); - } - - public static void extractProteinNames( final SortedMap> protein_lists_per_species, - final DomainId domain_id, - final Writer out, - final String separator ) throws IOException { - for( final Species species : protein_lists_per_species.keySet() ) { - for( final Protein protein : protein_lists_per_species.get( species ) ) { - final List domains = protein.getProteinDomains( domain_id ); - if ( domains.size() > 0 ) { - final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); - for( final Domain domain : domains ) { - stats.addValue( domain.getPerSequenceEvalue() ); - } + if ( ForesterUtil.isEmpty( limit_to_species ) + || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { + if ( protein.contains( query_domain_ids_nc_order, true ) ) { out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); out.write( protein.getProteinId().getId() ); out.write( separator ); - out.write( "[" + FORMATTER.format( stats.median() ) + "]" ); + out.write( "[" ); + final Set visited_domain_ids = new HashSet(); + boolean first = true; + for( final Domain domain : protein.getProteinDomains() ) { + if ( !visited_domain_ids.contains( domain.getDomainId() ) ) { + visited_domain_ids.add( domain.getDomainId() ); + if ( first ) { + first = false; + } + else { + out.write( " " ); + } + out.write( domain.getDomainId().getId() ); + out.write( " {" ); + out.write( "" + domain.getTotalCount() ); + out.write( "}" ); + } + } + out.write( "]" ); out.write( separator ); if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() .equals( SurfacingConstants.NONE ) ) ) { @@ -921,6 +891,44 @@ public final class SurfacingUtil { out.flush(); } + public static void extractProteinNames( final SortedMap> protein_lists_per_species, + final DomainId domain_id, + final Writer out, + final String separator, + final String limit_to_species ) throws IOException { + for( final Species species : protein_lists_per_species.keySet() ) { + for( final Protein protein : protein_lists_per_species.get( species ) ) { + if ( ForesterUtil.isEmpty( limit_to_species ) + || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { + final List domains = protein.getProteinDomains( domain_id ); + if ( domains.size() > 0 ) { + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final Domain domain : domains ) { + stats.addValue( domain.getPerSequenceEvalue() ); + } + out.write( protein.getSpecies().getSpeciesId() ); + out.write( separator ); + out.write( protein.getProteinId().getId() ); + out.write( separator ); + out.write( "[" + FORMATTER.format( stats.median() ) + "]" ); + out.write( separator ); + if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription() + .equals( SurfacingConstants.NONE ) ) ) { + out.write( protein.getDescription() ); + } + out.write( separator ); + if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession() + .equals( SurfacingConstants.NONE ) ) ) { + out.write( protein.getAccession() ); + } + out.write( SurfacingConstants.NL ); + } + } + } + } + out.flush(); + } + public static SortedSet getAllDomainIds( final List gwcd_list ) { final SortedSet all_domains_ids = new TreeSet(); for( final GenomeWideCombinableDomains gwcd : gwcd_list ) { diff --git a/forester/java/src/org/forester/tools/SupportCount.java b/forester/java/src/org/forester/tools/SupportCount.java index e554d4e..e542c5e 100644 --- a/forester/java/src/org/forester/tools/SupportCount.java +++ b/forester/java/src/org/forester/tools/SupportCount.java @@ -204,6 +204,9 @@ public final class SupportCount { // comparison if // phylos are saved // to file. + evaluator_phylogeny.externalNodesHaveChanged(); + evaluator_phylogeny.hashIDs(); + evaluator_phylogeny.recalculateNumberOfExternalDescendants( true ); } final double s = SupportCount.compare( phylogeny, evaluator_phylogenies[ i ], -- 1.7.10.2