X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fapplication%2Fgene_tree_preprocess.java;h=f755d910d2df81bfdba6dc86cf1dd13f60f914be;hb=4acc1443d8c6c6f47c26a7c57357bdcdad27ecf8;hp=525a3fc87c13f7373c2573fbe4eb189a9e10b9b4;hpb=6e555309fae3a0e65296c94bbe45c151660b449e;p=jalview.git diff --git a/forester/java/src/org/forester/application/gene_tree_preprocess.java b/forester/java/src/org/forester/application/gene_tree_preprocess.java index 525a3fc..f755d91 100644 --- a/forester/java/src/org/forester/application/gene_tree_preprocess.java +++ b/forester/java/src/org/forester/application/gene_tree_preprocess.java @@ -29,15 +29,13 @@ import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; -import java.util.HashSet; -import java.util.Set; import java.util.SortedSet; +import java.util.TreeSet; import org.forester.archaeopteryx.tools.SequenceDataRetriver; import org.forester.io.parsers.util.ParserUtils; import org.forester.io.writers.PhylogenyWriter; import org.forester.phylogeny.Phylogeny; -import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; import org.forester.phylogeny.factories.PhylogenyFactory; @@ -51,10 +49,10 @@ public class gene_tree_preprocess { final static private String HELP_OPTION_2 = "h"; final static private String PRG_NAME = "gene_tree_preprocess"; final static private String PRG_DESC = "gene tree preprocessing for SDI analysis"; - final static private String PRG_VERSION = "1.00"; + final static private String PRG_VERSION = "1.01"; final static private String PRG_DATE = "2012.06.07"; final static private String E_MAIL = "phylosoft@gmail.com"; - final static private String WWW = "www.phylosoft.org/forester/"; + final static private String WWW = "www.phylosoft.org/forester"; public static void main( final String[] args ) { try { @@ -84,11 +82,12 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "phylogeny has " + phy.getNumberOfExternalNodes() + " external node(s), aborting" ); } - final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy, true ); + final SortedSet not_found = SequenceDataRetriver.obtainSeqInformation( phy, true, false ); for( final String remove_me : not_found ) { - // System.out.println( " not found: " + remove_me ); - PhylogenyMethods.removeNode( phy.getNode( remove_me ), phy ); + phy.deleteSubtree( phy.getNode( remove_me ), true ); } + phy.clearHashIdToNodeMap(); + phy.externalNodesHaveChanged(); if ( phy.getNumberOfExternalNodes() < 2 ) { ForesterUtil.fatalError( PRG_NAME, "after removal of unresolvable external nodes, phylogeny has " @@ -102,21 +101,21 @@ public class gene_tree_preprocess { ForesterUtil.fatalError( PRG_NAME, "failed to write to [" + outtree + "]: " + e.getLocalizedMessage() ); } ForesterUtil.programMessage( PRG_NAME, "wrote output phylogeny to: " + outtree ); - final Set species_found = new HashSet(); + final SortedSet species_set = new TreeSet(); + for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + if ( node.getNodeData().isHasTaxonomy() ) { + final String sn = node.getNodeData().getTaxonomy().getScientificName(); + if ( !ForesterUtil.isEmpty( sn ) ) { + species_set.add( sn ); + } + } + } try { final BufferedWriter out = new BufferedWriter( new FileWriter( present_species ) ); - for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) { - final PhylogenyNode node = iter.next(); - if ( node.getNodeData().isHasTaxonomy() ) { - final String sn = node.getNodeData().getTaxonomy().getScientificName(); - if ( !ForesterUtil.isEmpty( sn ) ) { - if ( !species_found.contains( sn ) ) { - species_found.add( sn ); - out.write( node.getNodeData().getTaxonomy().getScientificName() ); - out.newLine(); - } - } - } + for( final String species : species_set ) { + out.write( species ); + out.newLine(); } out.close(); }