X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fgene_tree_preprocess.java;h=06f28037a56056dbf9afd4ee22b01226d7078d9c;hb=c2f18ef75d93bcf32df987ba5a817150b73ad93e;hp=525a3fc87c13f7373c2573fbe4eb189a9e10b9b4;hpb=6e555309fae3a0e65296c94bbe45c151660b449e;p=jalview.git diff --git a/forester/java/src/org/forester/application/gene_tree_preprocess.java b/forester/java/src/org/forester/application/gene_tree_preprocess.java index 525a3fc..06f2803 100644 --- a/forester/java/src/org/forester/application/gene_tree_preprocess.java +++ b/forester/java/src/org/forester/application/gene_tree_preprocess.java @@ -21,7 +21,7 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.application; @@ -29,32 +29,31 @@ import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.util.HashSet; -import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; -import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.CommandLineArguments; import org.forester.util.ForesterUtil; +import org.forester.ws.seqdb.SequenceDbWsTools; public class gene_tree_preprocess { - final static private String HELP_OPTION_1 = "help"; - final static private String HELP_OPTION_2 = "h"; - final static private String PRG_NAME = "gene_tree_preprocess"; - final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; - final static private String PRG_VERSION = "1.00"; - final static private String PRG_DATE = "2012.06.07"; - final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester/"; + final static private String HELP_OPTION_1 = "help"; + final static private String HELP_OPTION_2 = "h"; + final static private String PRG_NAME = "gene_tree_preprocess"; + final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; + final static private String PRG_VERSION = "1.01"; + final static private String PRG_DATE = "2012.06.07"; + final static private String E_MAIL = "phylosoft@gmail.com"; + final static private String WWW = "www.phylosoft.org/forester"; + private final static int DEFAULT_LINES_TO_RETURN = 50; public static void main( final String[] args ) { try { @@ -84,11 +83,15 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes() + " external node(s), aborting" ); } - final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy, true ); + final SortedSet not_found = SequenceDbWsTools.obtainSeqInformation( phy, + true, + false, + DEFAULT_LINES_TO_RETURN ); for( final String remove_me : not_found ) { - // System.out.println( " not found: " + remove_me ); - PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + phy.deleteSubtree( phy.getNode( remove_me ), true ); } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "after removal of unresolvable external nodes, phylogeny has " @@ -102,21 +105,21 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); - final Set species_found = new HashSet(); + final SortedSet species_set = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasTaxonomy() ) { + final String sn = node.getNodeData().getTaxonomy().getScientificName(); + if ( !ForesterUtil.isEmpty( sn ) ) { + species_set.add( sn ); + } + } + } try { final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.getNodeData().isHasTaxonomy() ) { - final String sn = node.getNodeData().getTaxonomy().getScientificName(); - if ( !ForesterUtil.isEmpty( sn ) ) { - if ( !species_found.contains( sn ) ) { - species_found.add( sn ); - out.write( node.getNodeData().getTaxonomy().getScientificName() ); - out.newLine(); - } - } - } + for( final String species : species_set ) { + out.write( species ); + out.newLine(); } out.close(); } @@ -145,7 +148,7 @@ public class gene_tree_preprocess { } } - public static void checkForOutputFileWriteability( final File outfile ) { + private static void checkForOutputFileWriteability( final File outfile ) { final String error = ForesterUtil.isWritableFile( outfile ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( PRG_NAME, error );