}
}
+ public static Set<Taxonomy> obtainAllDistinctTaxonomies( final PhylogenyNode node ) {
+ final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+ final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
+ for( final PhylogenyNode n : descs ) {
+ if ( n.getNodeData().isHasTaxonomy() && !n.getNodeData().getTaxonomy().isEmpty() ) {
+ tax_set.add( n.getNodeData().getTaxonomy() );
+ }
+ }
+ return tax_set;
+ }
+
+ /**
+ * Returns the set of distinct taxonomies of
+ * all external nodes of node.
+ * If at least one the external nodes has no taxonomy,
+ * null is returned.
+ *
+ */
+ public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
+ final List<PhylogenyNode> descs = node.getAllExternalDescendants();
+ final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
+ for( final PhylogenyNode n : descs ) {
+ if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
+ return null;
+ }
+ tax_set.add( n.getNodeData().getTaxonomy() );
+ }
+ return tax_set;
+ }
+
public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
final String n = sequence_name.trim();
final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
}
}
- /**
- * Returns the set of distinct taxonomies of
- * all external nodes of node.
- * If at least one the external nodes has no taxonomy,
- * null is returned.
- *
- */
- public static Set<Taxonomy> obtainDistinctTaxonomies( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
- for( final PhylogenyNode n : descs ) {
- if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
- return null;
- }
- tax_set.add( n.getNodeData().getTaxonomy() );
- }
- return tax_set;
- }
-
- public static Set<Taxonomy> obtainAllDistinctTaxonomies( final PhylogenyNode node ) {
- final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final Set<Taxonomy> tax_set = new HashSet<Taxonomy>();
- for( final PhylogenyNode n : descs ) {
- if ( n.getNodeData().isHasTaxonomy() && !n.getNodeData().getTaxonomy().isEmpty() ) {
- tax_set.add( n.getNodeData().getTaxonomy() );
- System.out.println( n.getNodeData().getTaxonomy() );
- }
- }
- for( final Taxonomy taxonomy : tax_set ) {
- System.out.println( taxonomy );
- }
- return tax_set;
- }
-
final static void collapseSubtree( final PhylogenyNode node, final boolean collapse ) {
node.setCollapse( collapse );
if ( node.isExternal() ) {
desc.append( "Rerootable: " );
desc.append( phy.isRerootable() );
desc.append( "\n" );
- desc.append( "Node sum: " );
+ desc.append( "Nodes: " );
desc.append( phy.getNodeCount() );
desc.append( "\n" );
- desc.append( "External node sum: " );
+ desc.append( "External nodes: " );
desc.append( phy.getNumberOfExternalNodes() );
desc.append( "\n" );
- desc.append( "Internal node sum: " );
+ desc.append( "Internal nodes: " );
desc.append( phy.getNodeCount() - phy.getNumberOfExternalNodes() );
desc.append( "\n" );
- desc.append( "Branche sum: " );
+ desc.append( "Internal nodes with polytomies: " );
+ desc.append( PhylogenyMethods.countNumberOfPolytomies( phy ) );
+ desc.append( "\n" );
+ desc.append( "Branches: " );
desc.append( phy.getNumberOfBranches() );
desc.append( "\n" );
desc.append( "Depth: " );
}
}
+ final static void outOfMemoryError( final OutOfMemoryError e ) {
+ System.err.println();
+ System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" );
+ System.err.println();
+ e.printStackTrace();
+ System.err.println();
+ JOptionPane.showMessageDialog( null,
+ "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option"
+ + "\n\nError: " + e.getLocalizedMessage(),
+ "Out of Memory Error [" + Constants.PRG_NAME + " " + Constants.VERSION + "]",
+ JOptionPane.ERROR_MESSAGE );
+ System.exit( -1 );
+ }
+
final static void printAppletMessage( final String applet_name, final String message ) {
System.out.println( "[" + applet_name + "] > " + message );
}
System.exit( -1 );
}
- final static void outOfMemoryError( final OutOfMemoryError e ) {
- System.err.println();
- System.err.println( "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option" );
- System.err.println();
- e.printStackTrace();
- System.err.println();
- JOptionPane.showMessageDialog( null,
- "Java memory allocation might be too small, try \"-Xmx2048m\" java command line option"
- + "\n\nError: " + e.getLocalizedMessage(),
- "Out of Memory Error [" + Constants.PRG_NAME + " " + Constants.VERSION + "]",
- JOptionPane.ERROR_MESSAGE );
- System.exit( -1 );
- }
-
final static void unexpectedException( final Exception e ) {
System.err.println();
e.printStackTrace( System.err );
--- /dev/null
+
+package org.forester.archaeopteryx;
+
+import java.io.File;
+
+import org.forester.io.parsers.phyloxml.PhyloXmlParser;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Taxonomy;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.CommandLineArguments;
+
+public class simple_node_processor {
+
+ private final static String BASE = "b_";
+
+ public static void main( final String args[] ) {
+ File in = null;
+ File out = null;
+ try {
+ CommandLineArguments cla = null;
+ cla = new CommandLineArguments( args );
+ in = cla.getFile( 0 );
+ out = cla.getFile( 1 );
+ if ( out.exists() ) {
+ System.out.println( out + " already exists" );
+ System.exit( -1 );
+ }
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhyloXmlParser xml_parser = new PhyloXmlParser();
+ final Phylogeny[] phylogenies_0 = factory.create( in, xml_parser );
+ final Phylogeny phylogeny_0 = phylogenies_0[ 0 ];
+ final PhylogenyNodeIterator it = phylogeny_0.iteratorPostorder();
+ int i = 0;
+ while ( it.hasNext() ) {
+ final PhylogenyNode node = it.next();
+ processNode( node, i );
+ i++;
+ }
+ final PhylogenyWriter writer = new PhylogenyWriter();
+ writer.toPhyloXML( out, phylogeny_0, 0 );
+ }
+ catch ( final Exception e ) {
+ System.out.println( e.getLocalizedMessage() );
+ e.printStackTrace();
+ System.exit( -1 );
+ }
+ }
+
+ // private static void processNode( final PhylogenyNode node, final int i ) {
+ // node.setDistanceToParent( PhylogenyNode.DISTANCE_DEFAULT );
+ // if ( !node.isExternal() ) {
+ // if ( ( node.getName() == null ) || node.getName().isEmpty() ) {
+ // node.setName( BASE + i );
+ // }
+ // }
+ // }
+ private static void processNode( final PhylogenyNode node, final int i ) {
+ //if ( node.isExternal() ) {
+ // final String c = "" + node.getNodeData().getBinaryCharacters().getPresentCount();
+ // final String s = node.getNodeData().getTaxonomy().getScientificName();
+ // System.out.println( s + "\t" + c );
+ //}
+ // if ( !node.isExternal() ) {
+ // if ( !node.getNodeData().isHasTaxonomy() ) {
+ // if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ // if ( ( node.getName().indexOf( "_" ) < 0 ) && ( node.getName().indexOf( "&" ) < 0 )
+ // && ( node.getName().indexOf( " " ) < 0 ) ) {
+ // Taxonomy t = new Taxonomy();
+ // t.setScientificName( node.getName() );
+ // node.getNodeData().addTaxonomy( t );
+ // node.setName( "" );
+ // }
+ // }
+ // }
+ // }
+ if ( node.isExternal() ) {
+ if ( node.getNodeData().isHasTaxonomy() ) {
+ final Taxonomy t = node.getNodeData().getTaxonomy();
+ t.setIdentifier( null );
+ //if ( !ForesterUtil.isEmpty( t.getTaxonomyCode() ) && t.getTaxonomyCode().length() == 5 ) {
+ // if ( node.getName().equalsIgnoreCase( t.getTaxonomyCode() ) ) {
+ // node.setName( "" );
+ // }
+ //}
+ }
+ }
+ }
+}
\ No newline at end of file
public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParser {
public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = TAXONOMY_EXTRACTION.NO;
- public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" );
public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" );
public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" );
public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob_stddev=([^,]+)" );
}
}
}
- else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) {
+ if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
+ || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) {
final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name );
if ( m1.matches() ) {
return name;
public class PhylogenyMethods {
- //private static PhylogenyMethods _instance = null;
- //private final PhylogenyNode _farthest_1 = null;
- //private final PhylogenyNode _farthest_2 = null;
private PhylogenyMethods() {
// Hidden constructor.
}
- // public double calculateFurthestDistance( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return 0.0;
- // }
- // _farthest_1 = null;
- // _farthest_2 = null;
- // PhylogenyNode node_1 = null;
- // PhylogenyNode node_2 = null;
- // double farthest_d = -Double.MAX_VALUE;
- // final PhylogenyMethods methods = PhylogenyMethods.getInstance();
- // final List<PhylogenyNode> ext_nodes = phylogeny.getRoot().getAllExternalDescendants();
- // for( int i = 1; i < ext_nodes.size(); ++i ) {
- // for( int j = 0; j < i; ++j ) {
- // final double d = methods.calculateDistance( ext_nodes.get( i ), ext_nodes.get( j ) );
- // if ( d < 0.0 ) {
- // throw new RuntimeException( "distance cannot be negative" );
- // }
- // if ( d > farthest_d ) {
- // farthest_d = d;
- // node_1 = ext_nodes.get( i );
- // node_2 = ext_nodes.get( j );
- // }
- // }
- // }
- // _farthest_1 = node_1;
- // _farthest_2 = node_2;
- // return farthest_d;
- // }
@Override
public Object clone() throws CloneNotSupportedException {
throw new CloneNotSupportedException();
}
- // public PhylogenyNode getFarthestNode1() {
- // return _farthest_1;
- // }
- // public PhylogenyNode getFarthestNode2() {
- // return _farthest_2;
- // }
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
for( final PhylogenyNode n : ext ) {
nodes.put( n.getName(), n );
}
- // for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
- // final PhylogenyNode n = iter.next();
- // nodes.put( n.getName(), n );
- // }
return nodes;
}
phylogeny.recalculateNumberOfExternalDescendants( true );
}
- public static void midpointRootOLD( final Phylogeny phylogeny ) {
- // if ( phylogeny.getNumberOfExternalNodes() < 2 ) {
- // return;
- // }
- // final PhylogenyMethods methods = getInstance();
- //final double farthest_d = methods.calculateFurthestDistance( phylogeny );
- // final PhylogenyNode f1 = methods.getFarthestNode1();
- // final PhylogenyNode f2 = methods.getFarthestNode2();
- // if ( farthest_d <= 0.0 ) {
- // return;
- // }
- // double x = farthest_d / 2.0;
- // PhylogenyNode n = f1;
- // if ( PhylogenyMethods.getDistance( f1, phylogeny.getRoot() ) < PhylogenyMethods.getDistance( f2, phylogeny
- // .getRoot() ) ) {
- // n = f2;
- // }
- // while ( ( x > n.getDistanceToParent() ) && !n.isRoot() ) {
- // x -= ( n.getDistanceToParent() > 0 ? n.getDistanceToParent() : 0 );
- // n = n.getParent();
- // }
- // phylogeny.reRoot( n, x );
- // phylogeny.recalculateNumberOfExternalDescendants( true );
- // final PhylogenyNode a = getFurthestDescendant( phylogeny.getRoot().getChildNode1() );
- // final PhylogenyNode b = getFurthestDescendant( phylogeny.getRoot().getChildNode2() );
- // final double da = getDistance( a, phylogeny.getRoot() );
- // final double db = getDistance( b, phylogeny.getRoot() );
- // if ( Math.abs( da - db ) > 0.000001 ) {
- // throw new FailedConditionCheckException( "this should not have happened: midpoint rooting failed: da="
- // + da + ", db=" + db + ", diff=" + Math.abs( da - db ) );
- // }
- }
-
public static void normalizeBootstrapValues( final Phylogeny phylogeny,
final double max_bootstrap_value,
final double max_normalized_value ) {
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
+ "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
+ "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
//
final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);";
final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx );