import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.Map;
import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.forester.io.parsers.FastaParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
throw new CloneNotSupportedException();
}
+ public static void extractFastaInformation( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+ if ( name_m.lookingAt() ) {
+ System.out.println();
+ // System.out.println( name_m.group( 1 ) );
+ // System.out.println( name_m.group( 2 ) );
+ // System.out.println( name_m.group( 3 ) );
+ // System.out.println( name_m.group( 4 ) );
+ final String acc_source = name_m.group( 1 );
+ final String acc = name_m.group( 2 );
+ final String seq_name = name_m.group( 3 );
+ final String tax_sn = name_m.group( 4 );
+ if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+ }
+ if ( !ForesterUtil.isEmpty( seq_name ) ) {
+ ForesterUtil.ensurePresenceOfSequence( node );
+ node.getNodeData().getSequence( 0 ).setName( seq_name );
+ }
+ if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+ ForesterUtil.ensurePresenceOfTaxonomy( node );
+ node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+ }
+ }
+ }
+ }
+ }
+
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
phy.externalNodesHaveChanged();
}
+ public final static List<List<PhylogenyNode>> divideIntoSubTrees( final Phylogeny phy,
+ final double min_distance_to_root ) {
+ if ( min_distance_to_root <= 0 ) {
+ throw new IllegalArgumentException( "attempt to use min distance to root of: " + min_distance_to_root );
+ }
+ final List<List<PhylogenyNode>> l = new ArrayList<List<PhylogenyNode>>();
+ setAllIndicatorsToZero( phy );
+ for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ if ( n.getIndicator() != 0 ) {
+ continue;
+ }
+ l.add( divideIntoSubTreesHelper( n, min_distance_to_root ) );
+ if ( l.isEmpty() ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ }
+ return l;
+ }
+
public static List<PhylogenyNode> getAllDescendants( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = new ArrayList<PhylogenyNode>();
final Set<Long> encountered = new HashSet<Long>();
* null is returned.
*
*/
- public static SortedMap<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
+ public static Map<Taxonomy, Integer> obtainDistinctTaxonomyCounts( final PhylogenyNode node ) {
final List<PhylogenyNode> descs = node.getAllExternalDescendants();
- final SortedMap<Taxonomy, Integer> tax_map = new TreeMap<Taxonomy, Integer>();
+ final Map<Taxonomy, Integer> tax_map = new HashMap<Taxonomy, Integer>();
for( final PhylogenyNode n : descs ) {
if ( !n.getNodeData().isHasTaxonomy() || n.getNodeData().getTaxonomy().isEmpty() ) {
return null;
return nodes;
}
+ public static void setAllIndicatorsToZero( final Phylogeny phy ) {
+ for( final PhylogenyNodeIterator it = phy.iteratorPostorder(); it.hasNext(); ) {
+ it.next().setIndicator( ( byte ) 0 );
+ }
+ }
+
/**
* Convenience method.
* Sets value for the first confidence value (created if not present, values overwritten otherwise).
}
}
+ private final static List<PhylogenyNode> divideIntoSubTreesHelper( final PhylogenyNode node,
+ final double min_distance_to_root ) {
+ final List<PhylogenyNode> l = new ArrayList<PhylogenyNode>();
+ final PhylogenyNode r = moveTowardsRoot( node, min_distance_to_root );
+ for( final PhylogenyNode ext : r.getAllExternalDescendants() ) {
+ if ( ext.getIndicator() != 0 ) {
+ throw new RuntimeException( "this should not have happened" );
+ }
+ ext.setIndicator( ( byte ) 1 );
+ l.add( ext );
+ }
+ return l;
+ }
+
/**
* Calculates the distance between PhylogenyNodes n1 and n2.
* PRECONDITION: n1 is a descendant of n2.
return my_s.indexOf( my_query ) >= 0;
}
else {
- return my_s.equals( my_query );
+ return Pattern.compile( "(\\b|_)" + Pattern.quote( my_query ) + "(\\b|_)" ).matcher( my_s ).find();
+ }
+ }
+
+ private final static PhylogenyNode moveTowardsRoot( final PhylogenyNode node, final double min_distance_to_root ) {
+ PhylogenyNode n = node;
+ PhylogenyNode prev = node;
+ while ( min_distance_to_root < n.calculateDistanceToRoot() ) {
+ prev = n;
+ n = n.getParent();
}
+ return prev;
}
public static enum DESCENDANT_SORT_PRIORITY {
- TAXONOMY, SEQUENCE, NODE_NAME;
+ NODE_NAME, SEQUENCE, TAXONOMY;
}
public static enum PhylogenyNodeField {
CLADE_NAME,
+ SEQUENCE_NAME,
+ SEQUENCE_SYMBOL,
TAXONOMY_CODE,
- TAXONOMY_SCIENTIFIC_NAME,
TAXONOMY_COMMON_NAME,
- SEQUENCE_SYMBOL,
- SEQUENCE_NAME,
+ TAXONOMY_ID,
TAXONOMY_ID_UNIPROT_1,
TAXONOMY_ID_UNIPROT_2,
- TAXONOMY_ID;
+ TAXONOMY_SCIENTIFIC_NAME;
}
}