}
else {
// res.setGreatestCommonPrefix( greatest_common_prefix );
- res.addGreatestCommonPrefix( prefix, confidence );
+ res.addGreatestCommonPrefix( prefix, confidence, separator );
}
if ( qnode_pp.isRoot() ) {
res.addWarning( "Least Encompassing Clade is entire tree" );
// System.out.println( x );
- res1.analyzeGreatestCommonPrefixes( "." );
+ res1.analyzeGreatestCommonPrefixes( );
- final Result2 res2 = new Result2();
+ final Result2 res2 = new Result2(".");
res2.addGreatestCommonPrefix( "A.1.1.1", 0.1 );
res2.addGreatestCommonPrefix( "A.1", 0.7 );
res2.addGreatestCommonPrefix( "A.1.2", 0.1 );
res2.addGreatestCommonPrefix( "B.1", 0.1 );
- res2.analyzeGreatestCommonPrefixes( "." );
+ res2.analyzeGreatestCommonPrefixes( );
- final Result2 res3 = new Result2();
+ final Result2 res3 = new Result2(".");
res3.addGreatestCommonPrefix( "A.1.1.1", 0.7 );
res3.addGreatestCommonPrefix( "A.1", 0.1 );
res3.addGreatestCommonPrefix( "A.1.2", 0.1 );
res3.addGreatestCommonPrefix( "B.1", 0.1 );
- res3.analyzeGreatestCommonPrefixes( "." );
-
-
+ res3.analyzeGreatestCommonPrefixes( );
+
final Result2 res4 = new Result2();
res4.addGreatestCommonPrefix( "A.1.1.1.1", 0.35);
res4.addGreatestCommonPrefix( "A.1.1.1.2", 0.35 );
res4.addGreatestCommonPrefix( "A.1", 0.1 );
res4.addGreatestCommonPrefix( "A.1.2", 0.1 );
res4.addGreatestCommonPrefix( "B.1", 0.1 );
- res4.analyzeGreatestCommonPrefixes( "." );
+ res4.analyzeGreatestCommonPrefixes( );
final Result2 res5 = new Result2();
res5.addGreatestCommonPrefix( "A.1.1.1.1", 0.2);
res5.addGreatestCommonPrefix( "B.1.1", 0.2 );
res5.addGreatestCommonPrefix( "B.1.2", 0.09 );
res5.addGreatestCommonPrefix( "D.1.1.1.1", 0.01 );
- res5.analyzeGreatestCommonPrefixes( "." );
+ res5.analyzeGreatestCommonPrefixes( );
+
+ final Result2 res6 = new Result2();
+ res6.addGreatestCommonPrefix( "A.1.1.1", 0.05 );
+ res6.addGreatestCommonPrefix( "A.1.1.1.1", 0.65 );
+ res6.addGreatestCommonPrefix( "A.1", 0.1 );
+ res6.addGreatestCommonPrefix( "A.1.2", 0.1 );
+ res6.addGreatestCommonPrefix( "B.1", 0.1 );
+ res6.analyzeGreatestCommonPrefixes( );
+
+ final Result2 res7 = new Result2();
+ res7.addGreatestCommonPrefix( "A.1.1.1", 0.07 );
+ res7.addGreatestCommonPrefix( "A.1.1.1.1", 0.9 );
+ res7.addGreatestCommonPrefix( "A.1", 0.01 );
+ res7.addGreatestCommonPrefix( "A.1.2", 0.01 );
+ res7.addGreatestCommonPrefix( "B.1", 0.01 );
+ res7.analyzeGreatestCommonPrefixes( );
+
+ final Result2 res8 = new Result2("_/_");
+ res8.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 );
+ res8.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.9 );
+ res8.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 );
+ res8.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 );
+ res8.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 );
+ res8.analyzeGreatestCommonPrefixes( );
+
+ final Result2 res9 = new Result2("_/_");
+ res9.addGreatestCommonPrefix( "AA_/_abc_/_def", 0.07 );
+ res9.addGreatestCommonPrefix( "AA_/_abc_/_sfc", 0.6 );
+ res9.addGreatestCommonPrefix( "AA_/_abc_/_xcd", 0.01 );
+ res9.addGreatestCommonPrefix( "AA_/_abc_/_memr", 0.01 );
+ res9.addGreatestCommonPrefix( "AA_/_abc_/_fkem_/_odem", 0.01 );
+ res9.addGreatestCommonPrefix( "BB_/_fke_/_dme_/_nx2", 0.3 );
+ res9.analyzeGreatestCommonPrefixes( );
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
+
package org.forester.clade_analysis;
+import java.math.BigDecimal;
final class Prefix {
- final String _prefix;
- final double _confidence;
+
+ private final String _prefix;
+ private final BigDecimal _confidence;
+ private final String _separator;
+ private final String _first;
+
+ Prefix( final String prefix, final String confidence, final String separator ) {
+ _prefix = prefix;
+ _confidence = new BigDecimal( confidence);
+ _separator = separator ;
+ if ( _prefix.indexOf( _separator ) < 0) {
+ _first = _prefix;
+ }
+ else {
+ _first = _prefix.substring( 0, _prefix.indexOf(_separator ) );
+ }
+ }
- Prefix( final String prefix, final double confidence ) {
+ Prefix( final String prefix, final double confidence , final String separator) {
_prefix = prefix;
- _confidence = confidence;
+ _confidence = new BigDecimal( confidence);
+ _separator = separator ;
+ if ( _prefix.indexOf( _separator ) < 0) {
+ _first = _prefix;
+ }
+ else {
+ _first = _prefix.substring( 0, _prefix.indexOf(_separator ) );
+ }
}
-
- String getPrefix() {
+ String getPrefix() {
return _prefix;
}
-
+ String getPrefixFirstElement() {
+ return _first;
+ }
double getConfidence() {
- return _confidence;
+ return _confidence.doubleValue();
+ }
+
+ @Override
+ public String toString() {
+ return getPrefix() + ": " + getConfidence();
}
-
-
-
}
package org.forester.clade_analysis;
import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
+import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
public final class Result2 {
- private List<Prefix> _greatest_common_prefix = new ArrayList<Prefix>();
- private String _greatest_common_prefix_up = "";
- private String _greatest_common_prefix_down = "";
- private final List<String> _warnings = new ArrayList<>();
- private int _lec_ext_nodes = 0;
- private int _p_ext_nodes = 0;
+ private final String _separator;
+ private final List<Prefix> _greatest_common_prefixes = new ArrayList<>();
+ private String _greatest_common_prefix_up = "";
+ private String _greatest_common_prefix_down = "";
+ private final List<String> _warnings = new ArrayList<>();
+ private int _lec_ext_nodes = 0;
+ private int _p_ext_nodes = 0;
private String _greatest_common_clade_subtree_confidence = "";
private String _greatest_common_clade_subtree_confidence_up = "";
private String _greatest_common_clade_subtree_confidence_down = "";
+
+ public Result2(final String separator) {
+ _separator = separator;
+ }
+
+ public Result2() {
+ _separator = ".";//TODO make const somewhere
+ }
void addWarning( final String warning ) {
_warnings.add( warning );
}
void addGreatestCommonPrefix( final String prefix, final double confidence ) {
- _greatest_common_prefix.add( new Prefix(prefix, confidence) );
+ _greatest_common_prefixes.add( new Prefix( prefix, confidence, _separator ) );
}
+
+
void setGreatestCommonPrefixUp( final String greatest_common_prefix_up ) {
_greatest_common_prefix_up = greatest_common_prefix_up;
}
_greatest_common_clade_subtree_confidence_down = greatest_common_clade_confidence_down;
}
- // public String getGreatestCommonPrefix() {
- // return _greatest_common_prefix;
- // }
-
+ // public String getGreatestCommonPrefix() {
+ // return _greatest_common_prefix;
+ // }
public String getGreatestCommonPrefixUp() {
return _greatest_common_prefix_up;
}
return _p_ext_nodes;
}
- public void analyzeGreatestCommonPrefixes(final String separator ) {
- final SortedMap<String,Double> map = new TreeMap<String,Double>();
- for( final Prefix prefix : _greatest_common_prefix ) {
- List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
+ public void analyzeGreatestCommonPrefixes( ) {
+ analyzeGreatestCommonPrefixes( _greatest_common_prefixes, _separator );
+ }
+
+ public final static void analyzeGreatestCommonPrefixes( List<Prefix> greatest_common_prefixes, final String separator ) {
+ final SortedMap<String, Double> map = new TreeMap<>();
+ for( final Prefix prefix : greatest_common_prefixes ) {
+ final List<String> prefixes = ForesterUtil.spliIntoPrefixes( prefix.getPrefix(), separator );
for( final String p : prefixes ) {
map.put( p, 0.0 );
}
}
- // System.out.println( map );
- for (final String key : map.keySet()) {
+ // System.out.println( map );
+ for( final String key : map.keySet() ) {
//System.out.println(key);
- for( final Prefix prefix : _greatest_common_prefix ) {
+ for( final Prefix prefix : greatest_common_prefixes ) {
if ( prefix.getPrefix().startsWith( key ) ) {
- map.put( key, map.get( key ) + prefix.getConfidence() );
+ map.put( key, map.get( key ) + prefix.getConfidence() );
+ }
+ }
+ }
+ //System.out.println( map );
+ final List<Prefix> l = new ArrayList<>();
+ for( final Entry<String, Double> entry : map.entrySet() ) {
+ // System.out.println( entry.getKey() + "->" + entry.getValue() );
+ l.add( new Prefix( entry.getKey(), entry.getValue(), separator ) );
+ }
+ Collections.sort( l, new Comparator<Prefix>() {
+
+ @Override
+ public int compare( final Prefix x, final Prefix y ) {
+ final int start_comparison = compare( x.getConfidence(), y.getConfidence() );
+ return start_comparison;
+ //return startComparison != 0 ? startComparison
+ // : compare(x.timeEnded, y.timeEnded);
+ }
+
+ private int compare( final double a, final double b ) {
+ return a > b ? -1 : a > b ? 1 : 0;
+ }
+ } );
+ System.out.println();
+ for( final Prefix prefix : l ) {
+ // System.out.println( prefix );
+ }
+ final List<Prefix> cleaned = new ArrayList<>();
+ for( final Prefix o : l ) {
+ boolean ok = true;
+ for( final Prefix i : l ) {
+ if ( ( !o.getPrefix().equals( i.getPrefix() ) ) && ( i.getPrefix().startsWith( o.getPrefix() ) )
+ && ForesterUtil.isEqual( i.getConfidence(),
+ o.getConfidence() ) ) {
+ ok = false;
+ break;
}
}
+ if ( ok ) {
+ cleaned.add( o );
+ }
+ }
+ System.out.println();
+ for( final Prefix prefix : cleaned ) {
+ System.out.println( prefix );
+ }
+ final List<Prefix> collapsed = new ArrayList<>();
+ final Set<String> firsts = new HashSet<>();
+ double confidence_sum = 0;
+ for( final Prefix prefix : cleaned ) {
+ final String f = prefix.getPrefixFirstElement();
+ if ( !firsts.contains( f ) ) {
+ firsts.add( f );
+ collapsed.add( prefix );
+ confidence_sum += prefix.getConfidence();
+ }
+ }
+ if ( !ForesterUtil.isEqual( confidence_sum, 1.0 ) ) {
+ throw new IllegalArgumentException( "Confidences add up to " + confidence_sum + " instead of 1.0" );
+ }
+ System.out.println();
+ for( final Prefix prefix : collapsed ) {
+ System.out.println( prefix );
}
- System.out.println( map );
}
-
-
}
public final static String OS_VERSION = System.getProperty( "os.version" );
public static final String PDB = "http://www.pdb.org/pdb/explore/explore.do?pdbId=";
public final static String UNIPROT_KB = "http://www.uniprot.org/uniprot/";
- public final static double ZERO_DIFF = 1.0E-9;
+ public final static double ZERO_DIFF = 1.0E-12;
private static final Pattern PARANTHESESABLE_NH_CHARS_PATTERN = Pattern.compile( "[(),;\\s:\\[\\]]" );
static {
final DecimalFormatSymbols dfs = new DecimalFormatSymbols();