import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
public final class Analysis2 {
- public static Result2 execute( final Phylogeny p, final String query, final String separator ) {
- final PhylogenyNode qnode = p.getNode( query );
- if ( qnode.isRoot() ) {
- throw new IllegalStateException( "Unexpected error: Query " + query
- + " is root. This should have never happened" );
- }
- if ( qnode.getParent().isRoot() ) {
- throw new IllegalStateException( "Unexpected error: Parent of query " + query
- + " is root. This should have never happened" );
- }
- PhylogenyNode qnode_p = qnode.getParent();
- PhylogenyNode qnode_pp = qnode.getParent().getParent();
- while ( qnode_p.getNumberOfDescendants() == 1 ) {
- qnode_p = qnode_p.getParent();
- }
- while ( qnode_pp.getNumberOfDescendants() == 1 ) {
- qnode_pp = qnode_pp.getParent();
+ public static Result2 execute( final Phylogeny p, final Pattern query, final String separator ) {
+ final List<PhylogenyNode> qnodes = p.getNodes( query );
+ final Result2 res = new Result2();
+ for( int i = 0; i < qnodes.size(); ++i ) {
+ final PhylogenyNode qnode = qnodes.get( i );
+ System.out.println( ">>" + qnode.getName() );
+ if ( qnode.isRoot() ) {
+ throw new IllegalArgumentException( "Query " + query + " is root." );
+ }
+ if ( qnode.getParent().isRoot() ) {
+ throw new IllegalArgumentException( "Parent of query " + query + " is root." );
+ }
+ PhylogenyNode qnode_p = qnode.getParent();
+ PhylogenyNode qnode_pp = qnode.getParent().getParent();
+ //This is to deal with internal nodes with 1 descendant.
+ while ( qnode_p.getNumberOfDescendants() == 1 ) {
+ qnode_p = qnode_p.getParent();
+ }
+ while ( qnode_pp.getNumberOfDescendants() == 1 ) {
+ qnode_pp = qnode_pp.getParent();
+ }
+ // final List<PhylogenyNode> qnode_ext_nodes = new ArrayList<PhylogenyNode>();
+ final List<String> qnode_ext_nodes_names = new ArrayList<>();
+ for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) {
+ final String name = qnode_ext_node.getName();
+ if ( ForesterUtil.isEmptyTrimmed( name ) ) {
+ throw new IllegalArgumentException( "external node(s) with empty names found" );
+ }
+ final Matcher m = query.matcher( name );
+ if ( !m.find() ) {
+ qnode_ext_nodes_names.add( name );
+ }
+ }
+ final int lec_ext_nodes = qnode_ext_nodes_names.size();
+ final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
+ final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
+ System.out.println( greatest_common_prefix );
+ Matcher matcher = query.matcher( qnode.getName() );
+ String conf_str = null;
+ if ( matcher.find() ) {
+ conf_str = matcher.group( 1 );
+ }
+ else {
+ throw new IllegalStateException( "pattern did not match -- this should have never happened!" );
+ }
+ res.setLeastEncompassingCladeSize( lec_ext_nodes );
+ res.setTreeSize( p_ext_nodes );
+ final double conf = Double.parseDouble( conf_str );
+ if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) {
+ res.addGreatestCommonPrefix( greatest_common_prefix, conf );
+ }
+ else {
+ res.addGreatestCommonPrefix( "?", conf );
+ }
}
- final List<PhylogenyNode> qnode_ext_nodes = qnode_pp.getAllExternalDescendants();
- final int lec_ext_nodes = qnode_ext_nodes.size() - 1;
- final int p_ext_nodes = p.getNumberOfExternalNodes() - 1;
- final List<String> qnode_ext_nodes_names = new ArrayList<>();
- for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
+ /* for( final PhylogenyNode qnode_ext_node : qnode_ext_nodes ) {
String name = qnode_ext_node.getName();
if ( ForesterUtil.isEmptyTrimmed( name ) ) {
throw new IllegalArgumentException( "external node(s) with empty names found" );
if ( !name.equals( query ) ) {
qnode_ext_nodes_names.add( name );
}
- }
- final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator );
- final Result2 res = new Result2();
- if ( greatest_common_prefix.length() < 1 ) {
- res.addWarning( "No greatest common prefix" );
- //res.setGreatestCommonPrefix( "" );
- }
- else {
- // res.setGreatestCommonPrefix( greatest_common_prefix );
- // res.addGreatestCommonPrefix( prefix, confidence, separator ); //TODO
- }
- if ( qnode_pp.isRoot() ) {
- res.addWarning( "Least Encompassing Clade is entire tree" );
- }
- res.setLeastEncompassingCladeSize( lec_ext_nodes );
- res.setTreeSize( p_ext_nodes );
-
- final String conf = obtainConfidence( qnode_pp );
+ }*/
+ // if ( greatest_common_prefix.length() < 1 ) {
+ // res.addWarning( "No greatest common prefix" );
+ //res.setGreatestCommonPrefix( "" );
+ // }
+ // else {
+ // // res.setGreatestCommonPrefix( greatest_common_prefix );
+ // res.addGreatestCommonPrefix( prefix, confidence, separator ); //TODO
+ // }
+ // if ( qnode_pp.isRoot() ) {
+ // res.addWarning( "Least Encompassing Clade is entire tree" );
+ // }
+ /* final String conf = obtainConfidence( qnode_pp );
if ( conf != null ) {
res.setGreatestCommonCladeSubtreeConfidence(conf);
- }
-
- final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator );
+ }*/
+ /* final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator );
res.setGreatestCommonPrefixUp( greatest_common_prefix_up[ 0 ] );
if ( greatest_common_prefix_up[ 1 ] != null ) {
res.setGreatestCommonCladeUpSubtreeConfidence( greatest_common_prefix_up[ 1 ] );
res.setGreatestCommonPrefixDown( greatest_common_prefix_down[ 0 ] );
if ( greatest_common_prefix_down[ 1 ] != null ) {
res.setGreatestCommonCladeDownSubtreeConfidence( greatest_common_prefix_down[ 1 ] );
- }
+ }*/
return res;
}
-
-
private final static String[] analyzeSiblings( final PhylogenyNode child,
final PhylogenyNode parent,
final String separator ) {
final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator );
return new String[] { greatest_common_prefix, conf };
}
-
+
private final static String obtainConfidence( final PhylogenyNode n ) {
if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) {
final List<Confidence> confidences = n.getBranchData().getConfidences();
import java.io.File;
import java.util.List;
+import java.util.regex.Pattern;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.util.ParserUtils;
System.out.println( "Clade analysis 3 failed" );
failed = true;
}
+ if ( !testCladeAnalysis4() ) {
+ System.out.println( "Clade analysis 3 failed" );
+ failed = true;
+ }
if ( !failed ) {
System.out.println( "OK" );
}
if ( !testCladeAnalysis3() ) {
return false;
}
+ if ( !testCladeAnalysis4() ) {
+ return false;
+ }
return true;
}
}
return true;
}
+
+ private static boolean testCladeAnalysis4() {
+ try {
+ final File intreefile1 = new File( PATH_TO_TEST_DATA + "pplacer_2.tre" );
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( intreefile1, true );
+ final Phylogeny p1 = factory.create( intreefile1, pp )[ 0 ];
+ Pattern query = Pattern.compile(".+#\\d+_M=(.+)");
+ Result2 res = Analysis2.execute( p1, query, "." );
+
+ res.analyzeGreatestCommonPrefixes( 0.3 );
+ System.out.print( res.toString());
+ System.out.println( "------------------------- ");
+ System.out.println();
+
+ // Result res = Analysis.execute( p1, "A.1.1.1", "." );
+ /* if ( !res.getGreatestCommonPrefix().equals( "A.1" ) ) {
+ return false;
+ }
+ if ( !res.getGreatestCommonPrefixDown().equals( "A.1.1" ) ) {
+ return false;
+ }
+ if ( !res.getGreatestCommonPrefixUp().equals( "A.1.2.1" ) ) {
+ return false;
+ }
+ if ( res.getLeastEncompassingCladeSize() != 4 ) {
+ return false;
+ }
+ if ( res.getTreeSize() != 25 ) {
+ return false;
+ }
+ if ( res.getWarnings().size() != 0 ) {
+ return false;
+ }*/
+
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace( System.out );
+ return false;
+ }
+ return true;
+ }
}
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<phyloxml xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.phyloxml.org http://www.phyloxml.org/1.20/phyloxml.xsd" xmlns="http://www.phyloxml.org">
+<phylogeny rooted="true" rerootable="true">
+ <clade>
+ <branch_length>0.0</branch_length>
+ <clade>
+ <branch_length>0.0679195</branch_length>
+ <clade>
+ <branch_length>0.21174</branch_length>
+ <clade>
+ <branch_length>0.477305</branch_length>
+ <clade>
+ <branch_length>0.309716</branch_length>
+ <clade>
+ <branch_length>0.0152436</branch_length>
+ <clade>
+ <branch_length>0.0857918</branch_length>
+ <clade>
+ <branch_length>0.162176</branch_length>
+ <clade>
+ <branch_length>9.756E-6</branch_length>
+ <clade>
+ <branch_length>0.0802987</branch_length>
+ <clade>
+ <branch_length>0.0684959</branch_length>
+ <clade>
+ <branch_length>0.0761231</branch_length>
+ <clade>
+ <branch_length>0.107021</branch_length>
+ <clade>
+ <branch_length>1.14092</branch_length>
+ <clade>
+ <name>A.1.1.1</name>
+ <branch_length>1.0E-6</branch_length>
+ </clade>
+ <clade>
+ <name>A.1.1.2</name>
+ <branch_length>0.043972</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#5_M=0.0277996</name>
+ <branch_length>1.49689</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>A.1.1.3</name>
+ <branch_length>1.11622</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#6_M=0.0273544</name>
+ <branch_length>1.58319</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <branch_length>0.760242</branch_length>
+ <clade>
+ <name>A.1.2.1</name>
+ <branch_length>0.130667</branch_length>
+ </clade>
+ <clade>
+ <name>A.1.2.2</name>
+ <branch_length>0.127953</branch_length>
+ </clade>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#4_M=0.0552666</name>
+ <branch_length>1.60222</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <branch_length>7.591E-6</branch_length>
+ <clade>
+ <name>A.2.1.1</name>
+ <branch_length>1.00994</branch_length>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#3_M=0.0552703</name>
+ <branch_length>1.60221</branch_length>
+ </clade>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#0_M=0.380211</name>
+ <branch_length>1.54796</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <branch_length>1.257E-5</branch_length>
+ <clade>
+ <branch_length>1.11517</branch_length>
+ <clade>
+ <branch_length>0.0852309</branch_length>
+ <clade>
+ <name>A.3.1.1</name>
+ <branch_length>0.022644</branch_length>
+ </clade>
+ <clade>
+ <name>A.3.1.1</name>
+ <branch_length>0.017626</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>A.3.2.1</name>
+ <branch_length>0.156409</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#2_M=0.224819</name>
+ <branch_length>1.56994</branch_length>
+ </clade>
+ </clade>
+ </clade>
+ <clade>
+ <name>CED9_CAEBR_#1_M=0.229279</name>
+ <branch_length>1.56987</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>C.5</name>
+ <branch_length>0.367867</branch_length>
+ <taxonomy>
+ </taxonomy>
+ </clade>
+ </clade>
+ <clade>
+ <name>A.6</name>
+ <branch_length>0.030507</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>A.7</name>
+ <branch_length>0.026535</branch_length>
+ </clade>
+ </clade>
+ <clade>
+ <name>A.8</name>
+ <branch_length>0.035019</branch_length>
+ </clade>
+ <clade>
+ <name>B.9</name>
+ <branch_length>1.0E-6</branch_length>
+ </clade>
+ </clade>
+</phylogeny>
+</phyloxml>
\ No newline at end of file