import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.forester.io.parsers.FastaParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
throw new CloneNotSupportedException();
}
+ public static boolean extractFastaInformation( final Phylogeny phy ) {
+ boolean could_extract = false;
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+ if ( name_m.lookingAt() ) {
+ could_extract = true;
+ final String acc_source = name_m.group( 1 );
+ final String acc = name_m.group( 2 );
+ final String seq_name = name_m.group( 3 );
+ final String tax_sn = name_m.group( 4 );
+ if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+ }
+ if ( !ForesterUtil.isEmpty( seq_name ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setName( seq_name );
+ }
+ if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
+ node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+ }
+ }
+ }
+ }
+ return could_extract;
+ }
+
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
}
}
- final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy ) {
+ final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
final PhylogenyNodeIterator it = phy.iteratorPostorder();
while ( it.hasNext() ) {
final PhylogenyNode n = it.next();
- if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !n.isExternal() && !n.isRoot() ) {
if ( !ForesterUtil.isEmpty( n.getName() ) ) {
- double d = -1.0;
+ double value = -1;
try {
- d = Double.parseDouble( n.getName() );
+ value = Double.parseDouble( n.getName() );
}
- catch ( final Exception e ) {
- d = -1.0;
+ catch ( final NumberFormatException e ) {
+ return false;
}
- if ( d >= 0.0 ) {
- n.getBranchData().addConfidence( new Confidence( d, "" ) );
- n.setName( "" );
+ if ( ( value < 0.0 ) || ( value > 100 ) ) {
+ return false;
}
}
}
}
+ return true;
+ }
+
+ final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ transferInternalNodeNameToConfidence( confidence_type, it.next() );
+ }
+ }
+
+ private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+ if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ double d = -1.0;
+ try {
+ d = Double.parseDouble( n.getName() );
+ }
+ catch ( final Exception e ) {
+ d = -1.0;
+ }
+ if ( d >= 0.0 ) {
+ n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+ n.setName( "" );
+ }
+ }
+ }
}
final static public void transferNodeNameToField( final Phylogeny phy,