X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fclade_analysis%2FAnalysisMulti.java;h=3970e5d16310521b0fd7c34381aeb9d16fa3bd47;hb=1706deea223bc1a30d170596192d726b6847b7eb;hp=219588c579fe1a6736aa458e92e82e8a5736822e;hpb=31c6a5701649241d2f8a493b09d2871acd146d43;p=jalview.git diff --git a/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java b/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java index 219588c..3970e5d 100644 --- a/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java +++ b/forester/java/src/org/forester/clade_analysis/AnalysisMulti.java @@ -31,54 +31,62 @@ package org.forester.clade_analysis; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.SortedMap; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; -import org.forester.phylogeny.data.Confidence; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.util.ForesterUtil; +import org.forester.util.UserException; public final class AnalysisMulti { - private final static String UNKNOWN = "?"; - public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5; - public final static String DEFAULT_SEPARATOR = "."; - public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( ".+#\\d+_M=(.+)" ); - + private final static String UNKNOWN = "?"; + public final static double DEFAULT_CUTOFF_FOR_SPECIFICS = 0.5; + public final static String DEFAULT_SEPARATOR = "."; + public final static Pattern DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE = Pattern.compile( "_#\\d+_M=(.+)" ); - public static ResultMulti execute( final Phylogeny p ) { + public static ResultMulti execute( final Phylogeny p ) throws UserException { return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, DEFAULT_CUTOFF_FOR_SPECIFICS ); } - - public static ResultMulti execute( final Phylogeny p, final String separator ) { - return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , DEFAULT_CUTOFF_FOR_SPECIFICS ); + + public static ResultMulti execute( final Phylogeny p, final String separator ) throws UserException { + return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, DEFAULT_CUTOFF_FOR_SPECIFICS ); } - - public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics ) { - return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator , cutoff_for_specifics ); + + public static ResultMulti execute( final Phylogeny p, final String separator, final double cutoff_for_specifics ) + throws UserException { + return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, separator, cutoff_for_specifics ); } - - public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) { - return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR , cutoff_for_specifics ); + + public static ResultMulti execute( final Phylogeny p, final double cutoff_for_specifics ) throws UserException { + return execute( p, DEFAULT_QUERY_PATTERN_FOR_PPLACER_TYPE, DEFAULT_SEPARATOR, cutoff_for_specifics ); } public static ResultMulti execute( final Phylogeny p, - final Pattern query, - final String separator, - final double cutoff_for_specifics ) { + final Pattern query, + final String separator, + final double cutoff_for_specifics ) + throws UserException { + if ( ForesterUtil.isEmpty( separator ) ) { + throw new IllegalArgumentException( "separator must not be null or empty" ); + } + cleanUpExternalNames( p, separator ); final List qnodes = p.getNodes( query ); final ResultMulti res = new ResultMulti(); + res.setQueryNamePrefix( obtainQueryPrefix( query, qnodes ) ); + res.setTotalNumberOfMatches( qnodes.size() ); + res.setReferenceTreeNumberOfExternalNodes( p.getNumberOfExternalNodes() - qnodes.size() ); for( int i = 0; i < qnodes.size(); ++i ) { final PhylogenyNode qnode = qnodes.get( i ); - System.out.println( ">>" + qnode.getName() ); if ( qnode.isRoot() ) { - throw new IllegalArgumentException( "Query " + query + " is root." ); + throw new UserException( "query " + query + " is root" ); } if ( qnode.getParent().isRoot() ) { - throw new IllegalArgumentException( "Parent of query " + query + " is root." ); + throw new UserException( "parent of query " + query + " is root" ); } PhylogenyNode qnode_p = qnode.getParent(); PhylogenyNode qnode_pp = qnode.getParent().getParent(); @@ -89,26 +97,22 @@ public final class AnalysisMulti { while ( qnode_pp.getNumberOfDescendants() == 1 ) { qnode_pp = qnode_pp.getParent(); } - final List qnode_ext_nodes_names = new ArrayList<>(); + final List qnode_ext_nodes_names = new ArrayList(); for( final PhylogenyNode qnode_ext_node : qnode_pp.getAllExternalDescendants() ) { final String name = qnode_ext_node.getName(); - if ( ForesterUtil.isEmptyTrimmed( name ) ) { - throw new IllegalArgumentException( "external node(s) with empty names found" ); - } final Matcher m = query.matcher( name ); if ( !m.find() ) { qnode_ext_nodes_names.add( name ); } } final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( qnode_ext_nodes_names, separator ); - // System.out.println( greatest_common_prefix ); - Matcher matcher = query.matcher( qnode.getName() ); + final Matcher matcher = query.matcher( qnode.getName() ); String conf_str = null; if ( matcher.find() ) { conf_str = matcher.group( 1 ); } else { - throw new IllegalStateException( "pattern did not match -- this should have never happened!" ); + throw new IllegalStateException( "query pattern does not match [this should have never happened!]" ); } final double conf = Double.parseDouble( conf_str ); if ( !ForesterUtil.isEmpty( greatest_common_prefix ) ) { @@ -117,9 +121,7 @@ public final class AnalysisMulti { else { res.addGreatestCommonPrefix( UNKNOWN, conf ); } - //final String greatest_common_prefix_up[] = analyzeSiblings( qnode_p, qnode_pp, separator, query, res ); final String greatest_common_prefix_up = analyzeSiblings( qnode_p, qnode_pp, separator, query ); - System.out.println( "greatest_common_prefix_up=" + greatest_common_prefix_up + " " + conf ); if ( !ForesterUtil.isEmpty( greatest_common_prefix_up ) ) { res.addGreatestCommonPrefixUp( greatest_common_prefix_up, conf ); } @@ -127,7 +129,6 @@ public final class AnalysisMulti { res.addGreatestCommonPrefixUp( UNKNOWN, conf ); } final String greatest_common_prefix_down = analyzeSiblings( qnode, qnode_p, separator, query ); - System.out.println( "greatest_common_prefix_down=" + greatest_common_prefix_down + " " + conf ); if ( !ForesterUtil.isEmpty( greatest_common_prefix_down ) ) { res.addGreatestCommonPrefixDown( greatest_common_prefix_down, conf ); } @@ -139,57 +140,215 @@ public final class AnalysisMulti { return res; } + private final static String obtainQueryPrefix( final Pattern query, final List qnodes ) + throws UserException { + String query_name_prefix = null; + for( final PhylogenyNode n : qnodes ) { + final String name = n.getName(); + final Matcher matcher = query.matcher( name ); + if ( matcher.find() ) { + final String prefix = name.substring( 0, matcher.start() ); + if ( ForesterUtil.isEmpty( prefix ) ) { + throw new UserException( "query nodes with empty label prefix found: \"" + prefix + "\"" ); + } + if ( query_name_prefix == null ) { + query_name_prefix = prefix; + } + else if ( !query_name_prefix.equals( prefix ) ) { + throw new UserException( "query nodes with different label prefixes found: \"" + query_name_prefix + + "\" and \"" + prefix + "\"" ); + } + } + } + return query_name_prefix; + } + + private final static void cleanUpExternalNames( final Phylogeny p, final String separator ) throws UserException { + final Pattern pattern1 = Pattern.compile( "\\Q" + separator + "\\E" + "\\s+" ); + final Pattern pattern2 = Pattern.compile( "\\s+" + "\\Q" + separator + "\\E" ); + final Pattern pattern3 = Pattern.compile( "\\Q" + separator + separator + "\\E" ); + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode node = it.next(); + final String name = node.getName().trim(); + if ( ForesterUtil.isEmpty( name ) ) { + throw new UserException( "external node(s) with empty annotation found" ); + } + if ( name.endsWith( separator ) ) { + throw new UserException( "illegally formatted annotation found: annotations cannot end with separator: " + + name ); + } + if ( name.startsWith( separator ) ) { + throw new UserException( "illegally formatted annotation found: annotations cannot start with separator: " + + name ); + } + if ( pattern1.matcher( name ).find() ) { + throw new UserException( "illegally formatted annotation found: separator followed by whitespace: " + + name ); + } + if ( pattern2.matcher( name ).find() ) { + throw new UserException( "illegally formatted annotation found: whitespace followed by separator: " + + name ); + } + if ( pattern3.matcher( name ).find() ) { + throw new UserException( "illegally formatted annotation found: empty annotation level: " + name ); + } + node.setName( name.replaceAll( "\\s+", " " ) ); + } + } + private final static String analyzeSiblings( final PhylogenyNode child, final PhylogenyNode parent, final String separator, final Pattern query ) { final int child_index = child.getChildNodeIndex(); - final List ext_nodes_names = new ArrayList<>(); + final List ext_nodes_names = new ArrayList(); final List descs = parent.getDescendants(); - // String conf = null; for( int i = 0; i < descs.size(); ++i ) { if ( i != child_index ) { final PhylogenyNode d = descs.get( i ); for( final PhylogenyNode n : d.getAllExternalDescendants() ) { final String name = n.getName(); - if ( ForesterUtil.isEmptyTrimmed( name ) ) { - throw new IllegalArgumentException( "external node(s) with empty names found" ); - } final Matcher m = query.matcher( name ); if ( !m.find() ) { ext_nodes_names.add( name ); } } - // if ( descs.size() == 2 ) { - // conf = obtainConfidence( d ); - // } } } final String greatest_common_prefix = ForesterUtil.greatestCommonPrefix( ext_nodes_names, separator ); return greatest_common_prefix; } - private final static String obtainConfidence( final PhylogenyNode n ) { - if ( n.getBranchData().getConfidences() != null && n.getBranchData().getConfidences().size() > 0 ) { - final List confidences = n.getBranchData().getConfidences(); - boolean not_first = false; - Collections.sort( confidences ); - final StringBuilder sb = new StringBuilder(); - for( final Confidence confidence : confidences ) { - final double value = confidence.getValue(); - if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) { - if ( not_first ) { - sb.append( " / " ); + public final static void performMapping( final Pattern pattern, + final SortedMap map, + final Phylogeny p, + final boolean verbose ) + throws UserException { + if ( verbose ) { + System.out.println(); + System.out.println( "Id to annotation mapping:" ); + } + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode node = it.next(); + final String name = node.getName().trim(); + if ( ForesterUtil.isEmpty( name ) ) { + throw new UserException( "external node with empty name found" ); + } + final Matcher m = pattern.matcher( name ); + if ( !m.find() ) { + if ( !map.containsKey( name ) ) { + throw new UserException( "no mapping for \"" + name + "\" found" ); + } + node.setName( map.get( name ).trim() ); + if ( verbose ) { + System.out.println( name + " -> " + node.getName() ); + } + } + } + if ( verbose ) { + System.out.println(); + } + } + + public final static void performExtraProcessing1( final Pattern query_pattern, + final Phylogeny p, + final String extra_sep, + final boolean keep, + final String annotation_sep, + final boolean verbose ) + throws UserException { + if ( verbose ) { + System.out.println(); + System.out.println( "Extra annotation processing:" ); + } + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode node = it.next(); + final String name = node.getName().trim(); + if ( ForesterUtil.isEmpty( name ) ) { + throw new UserException( "external node with empty name found" ); + } + if ( !query_pattern.matcher( name ).find() ) { + final StringBuilder sb = new StringBuilder(); + final int last_index = name.lastIndexOf( extra_sep ); + if ( last_index >= 0 ) { + final String annotation = name.substring( last_index + 1 ).trim(); + if ( ForesterUtil.isEmptyTrimmed( annotation ) ) { + throw new UserException( "llegally formatted annotation: " + name ); + } + if ( keep ) { + final String extra = name.substring( 0, last_index ).trim(); + sb.append( annotation ); + if ( !ForesterUtil.isEmpty( extra ) ) { + sb.append( annotation_sep ); + sb.append( extra ); + } } else { - not_first = true; + sb.append( annotation ); + } + node.setName( sb.toString() ); + if ( verbose ) { + System.out.println( name + " -> " + node.getName() ); + } + } + } + } + if ( verbose ) { + System.out.println(); + } + } + + public final static void performSpecialProcessing1( final Pattern query_pattern, + final Phylogeny p, + final String annotation_sep, + final Pattern special_pattern, + final boolean verbose ) + throws UserException { + if ( verbose ) { + System.out.println(); + System.out.println( "Special annotation processing:" ); + } + final PhylogenyNodeIterator it = p.iteratorExternalForward(); + while ( it.hasNext() ) { + final PhylogenyNode node = it.next(); + final String name = node.getName().trim(); + if ( ForesterUtil.isEmpty( name ) ) { + throw new UserException( "external node with empty name found" ); + } + if ( !query_pattern.matcher( name ).find() ) { + final Matcher special_m = special_pattern.matcher( name ); + if ( special_m.matches() ) { + final int c = special_m.groupCount(); + if ( c < 1 ) { + throw new UserException( "illegal special pattern: " + special_pattern + + " (need at least one capturing group)" ); + } + final StringBuilder sb = new StringBuilder(); + for( int i = 1; i <= c; ++i ) { + final String g = special_m.group( i ); + if ( !ForesterUtil.isEmpty( g ) ) { + if ( i > 1 ) { + sb.append( annotation_sep ); + } + sb.append( special_m.group( i ) ); + } } - sb.append( ( ForesterUtil.isEmpty( confidence.getType() ) ? "confidence: " - : confidence.getType() + ": " ) + value ); + node.setName( sb.toString() ); + if ( verbose ) { + System.out.println( name + " -> " + node.getName() ); + } + } + else { + throw new UserException( "illegally formatted annotation for special processing: " + name + + " (expected pattern: " + special_pattern + ")" ); } } - return sb.toString(); } - return null; + if ( verbose ) { + System.out.println(); + } } }