import java.io.File;
import java.io.FileWriter;
-import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.Vector;
import org.forester.io.parsers.phyloxml.PhyloXmlParser;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PreorderTreeIterator;
-import org.forester.sdi.DistanceCalculator;
import org.forester.sdi.RIO;
-import org.forester.sdi.SDIException;
-import org.forester.sdi.SDIR;
import org.forester.util.ForesterUtil;
public class rio {
System.exit( -1 );
}
- // Uses DistanceCalculator to calculate distances.
- private final static StringBuffer getDistances( final File tree_file_for_dist_val,
- final File outfile,
- final Phylogeny species_tree,
- final String seq_name,
- final ArrayList<String> al_ortholog_names_for_dc,
- final HashMap<String, Integer> ortholog_hashmap,
- final HashMap<String, Integer> super_ortholog_hashmap,
- final int warn_more_than_one_ortho,
- final int warn_no_orthos,
- final double warn_one_ortho,
- final int bootstraps,
- final double t_orthologs_dc ) throws IOException, SDIException {
- Phylogeny consensus_tree = null;
- Phylogeny
- // to be a consensus tree.
- assigned_cons_tree = null;
- final SDIR sdiunrooted = new SDIR();
- final ArrayList<PhylogenyNode> al_ortholog_nodes = new ArrayList<PhylogenyNode>();
- double m = 0.0;
- double sd = 0.0;
- double d = 0.0;
- int n = 0;
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- consensus_tree = factory.create( tree_file_for_dist_val, new PhyloXmlParser() )[ 0 ];
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, consensus_tree );
- assigned_cons_tree = sdiunrooted.infer( consensus_tree,
- species_tree,
- rio.MINIMIZE_COST,
- rio.MINIMIZE_DUPS,
- rio.MINIMIZE_HEIGHT,
- true,
- 1 )[ 0 ];
- final DistanceCalculator dc = new DistanceCalculator();
- final StringBuffer sb = new StringBuffer();
- sb.append( "Given the threshold for distance calculations (" + ForesterUtil.roundToInt( t_orthologs_dc )
- + "): " );
- // No orthologs.
- if ( al_ortholog_names_for_dc.size() == 0 ) {
- dc.setTree( assigned_cons_tree );
- // Remark. Calculation of mean and sd _does_ include the node
- // with seq_name.
- m = dc.getMean();
- sd = dc.getStandardDeviation();
- d = dc.getDistanceToRoot( seq_name );
- n = dc.getN();
- sb.append( "No sequence is considered orthologous to query."
- + "\ndistance of query to root = " + ForesterUtil.FORMATTER_06.format( d )
- + "\nmean of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( m )
- + "\nsd of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( sd )
- + "\nn (sum of sequences in alignment plus query) = " + n );
- if ( !( ( ( m - ( warn_no_orthos * sd ) ) < d ) && ( ( m + ( warn_no_orthos * sd ) ) > d ) ) ) {
- sb.append( "\nWARNING: distance of query to root is outside of mean+/-" + warn_no_orthos + "*sd!" );
- }
- }
- // One ortholog.
- else if ( al_ortholog_names_for_dc.size() == 1 ) {
- final String name_of_ortholog = al_ortholog_names_for_dc.get( 0 );
- al_ortholog_nodes.add( assigned_cons_tree.getNode( name_of_ortholog ) );
- al_ortholog_nodes.add( assigned_cons_tree.getNode( seq_name ) );
- dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes );
- // Remark. Calculation of mean _does_ include the node
- // with seq_name.
- d = dc.getDistanceToLCA( seq_name );
- final double d_o = dc.getDistanceToLCA( name_of_ortholog );
- sb.append( "One sequence is considered orthologous to query." + "\nLCA is LCA of query and its ortholog."
- + "\ndistance of query to LCA = " + ForesterUtil.FORMATTER_06.format( d )
- + "\ndistance of ortholog to LCA = " + ForesterUtil.FORMATTER_06.format( d_o ) );
- if ( ( d_o > 0.0 )
- && ( d > 0.0 )
- && ( ( ( d_o >= d ) && ( ( d_o / d ) > warn_one_ortho ) ) || ( ( d_o < d ) && ( ( d / d_o ) > warn_one_ortho ) ) ) ) {
- sb.append( "\nWARNING: Ratio of distances to LCA is greater than " + warn_one_ortho + "!" );
- }
- else if ( ( ( d_o == 0.0 ) || ( d == 0.0 ) ) && ( ( d_o != 0.0 ) || ( d != 0.0 ) ) ) {
- sb.append( "\nWARNING: Ratio could not be calculated, " + " one distance is 0.0!" );
- }
- }
- // More than one ortholog.
- else {
- for( int i = 0; i < al_ortholog_names_for_dc.size(); ++i ) {
- al_ortholog_nodes.add( assigned_cons_tree.getNodeViaSequenceName( al_ortholog_names_for_dc.get( i ) ) );
- }
- al_ortholog_nodes.add( assigned_cons_tree.getNodesViaSequenceName( seq_name ).get( 0 ) );
- dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes );
- // Remark. Calculation of mean and sd _does_ include the node
- // with seq_name.
- m = dc.getMean();
- sd = dc.getStandardDeviation();
- d = dc.getDistanceToLCA( seq_name );
- n = dc.getN();
- sb.append( "More than one sequence is considered orthologous to query."
- + "\nLCA is LCA of query and its orthologs."
- + "\ndistance of query to LCA = "
- + ForesterUtil.FORMATTER_06.format( d )
- + "\nmean of distances (for query and its orthologs) to LCA = "
- + ForesterUtil.FORMATTER_06.format( m )
- + "\nsd of distances (for query and its orthologs) to LCA = "
- + ForesterUtil.FORMATTER_06.format( sd )
- + "\nn (sum of orthologs plus query) = " + n );
- if ( !( ( ( m - ( warn_more_than_one_ortho * sd ) ) < d ) && ( ( m + ( warn_more_than_one_ortho * sd ) ) > d ) ) ) {
- sb.append( "\n!WARNING: distance of query to LCA is outside of mean+/-" + warn_more_than_one_ortho
- + "*sd!" );
- }
- }
- return sb;
- }
-
public static void main( final String[] args ) {
ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
File species_tree_file = null;
File multiple_trees_file = null;
File outfile = null;
- File distance_matrix_file = null;
- File tree_file_for_dist_val = null;
- File tree_file_for_avg_bs = null;
String seq_name = "";
String arg = "";
boolean output_ultraparalogs = false;
ArrayList<String> orthologs_al_for_dc = null;
double t_orthologs = 0.0;
- double t_sn = 0.0;
double t_orthologs_dc = 0.0;
+ double threshold_ultra_paralogs = 0.0;
double[] bs_mean_sd = null;
int sort = 13;
Phylogeny species_tree = null;
RIO rio_instance = null;
PrintWriter out = null;
long time = 0;
- int warn_no_orthos = WARN_NO_ORTHOS_DEFAULT;
- int warn_more_than_one_ortho = WARN_MORE_THAN_ONE_ORTHO_DEFAULT;
- double warn_one_ortho = WARN_ONE_ORTHO_DEFAULT;
- double threshold_ultra_paralogs = THRESHOLD_ULTRA_PARALOGS_DEFAULT;
if ( args.length < 2 ) {
printHelp();
System.exit( 0 );
case 'O':
outfile = new File( arg );
break;
- case 'D':
- distance_matrix_file = new File( arg );
- break;
- case 'T':
- tree_file_for_dist_val = new File( arg );
- break;
- case 't':
- tree_file_for_avg_bs = new File( arg );
- break;
case 'p':
output_ultraparalogs = true;
break;
case 'L':
t_orthologs = Double.parseDouble( arg );
break;
- case 'B':
- t_sn = Double.parseDouble( arg );
- break;
- case 'U':
- t_orthologs_dc = Double.parseDouble( arg );
- break;
case 'v':
threshold_ultra_paralogs = Double.parseDouble( arg );
break;
- case 'X':
- warn_more_than_one_ortho = Integer.parseInt( arg );
- break;
- case 'Y':
- warn_no_orthos = Integer.parseInt( arg );
- break;
- case 'Z':
- warn_one_ortho = Double.parseDouble( arg );
- break;
default:
errorInCommandLine();
}
|| ( outfile == null ) ) {
errorInCommandLine();
}
- if ( ( sort < 0 ) || ( sort > 17 ) ) {
- errorInCommandLine();
- }
- if ( ( sort > 2 ) && ( distance_matrix_file == null ) ) {
+ if ( ( sort < 0 ) || ( sort > 2 ) ) {
errorInCommandLine();
}
if ( VERBOSE ) {
System.out.println( "Seq name: " + seq_name );
System.out.println( "Species tree file: " + species_tree_file );
System.out.println( "Outfile: " + outfile );
- if ( distance_matrix_file != null ) {
- System.out.println( "Distance matrix file: " + distance_matrix_file );
- }
- if ( tree_file_for_dist_val != null ) {
- if ( tree_file_for_avg_bs == null ) {
- System.out.println( "Phy to read dists and calc mean support from: " + tree_file_for_dist_val );
- }
- else {
- System.out.println( "Phylogeny to read dist values from: " + tree_file_for_dist_val );
- }
- }
- if ( tree_file_for_avg_bs != null ) {
- System.out.println( "Phylogeny to calc mean bootstrap from: " + tree_file_for_avg_bs );
- }
System.out.println( "Sort: " + sort );
System.out.println( "Threshold orthologs: " + t_orthologs );
- System.out.println( "Threshold subtree neighborings: " + t_sn );
System.out.println( "Threshold orthologs for distance calc.: " + t_orthologs_dc );
if ( output_ultraparalogs ) {
System.out.println( "Threshold ultra paralogs: " + threshold_ultra_paralogs );
}
- System.out.println( "More than one ortholog sd diff: " + warn_more_than_one_ortho );
- System.out.println( "No orthologs sd diff: " + warn_no_orthos );
- System.out.println( "One ortholog factor : " + warn_one_ortho + "\n" );
}
if ( TIME && VERBOSE ) {
time = System.currentTimeMillis();
rio_instance = new RIO();
final StringBuffer output = new StringBuffer();
try {
- if ( distance_matrix_file != null ) {
- rio_instance.readDistanceMatrix( distance_matrix_file );
- }
rio_instance.inferOrthologs( multiple_trees_file, species_tree.copy(), seq_name );
- output.append( rio_instance.inferredOrthologsToString( seq_name, sort, t_orthologs, t_sn ) );
- if ( tree_file_for_dist_val != null ) {
- orthologs_al_for_dc = rio_instance.inferredOrthologsToArrayList( seq_name, t_orthologs_dc );
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- if ( tree_file_for_avg_bs != null ) {
- final Phylogeny p = factory.create( tree_file_for_avg_bs, new PhyloXmlParser() )[ 0 ];
- bs_mean_sd = calculateMeanBoostrapValue( p );
- }
- else {
- final Phylogeny p = factory.create( tree_file_for_dist_val, new PhyloXmlParser() )[ 0 ];
- bs_mean_sd = calculateMeanBoostrapValue( p );
- }
- if ( ( bs_mean_sd != null ) && ( bs_mean_sd.length == 2 ) ) {
- final double bs_mean = bs_mean_sd[ 0 ];
- final double bs_sd = bs_mean_sd[ 1 ];
- output.append( "\n\nMean bootstrap value of consensus tree (sd): "
- + ForesterUtil.roundToInt( ( bs_mean * 100.0 ) / rio_instance.getBootstraps() ) + "% (+/-"
- + ForesterUtil.roundToInt( ( bs_sd * 100.0 ) / rio_instance.getBootstraps() ) + "%)\n" );
- }
- output.append( "\n\nDistance values:\n" );
- output.append( getDistances( tree_file_for_dist_val,
- outfile,
- species_tree,
- seq_name,
- orthologs_al_for_dc,
- rio_instance.getInferredOrthologs( seq_name ),
- rio_instance.getInferredSuperOrthologs( seq_name ),
- warn_more_than_one_ortho,
- warn_no_orthos,
- warn_one_ortho,
- rio_instance.getBootstraps(),
- t_orthologs_dc ) );
- }
+ output.append( rio_instance.inferredOrthologsToString( seq_name, sort, t_orthologs ) );
if ( output_ultraparalogs ) {
output.append( "\n\nUltra paralogs:\n" );
- output.append( rio_instance
- .inferredUltraParalogsToString( seq_name, sort > 2, threshold_ultra_paralogs ) );
+ output.append( rio_instance.inferredUltraParalogsToString( seq_name, threshold_ultra_paralogs ) );
}
output.append( "\n\nSort priority: " + RIO.getOrder( sort ) );
output.append( "\nExt nodes : " + rio_instance.getExtNodesOfAnalyzedGeneTrees() );
- output.append( "\nSamples : " + rio_instance.getBootstraps() + "\n" );
+ output.append( "\nSamples : " + rio_instance.getNumberOfSamples() + "\n" );
out = new PrintWriter( new FileWriter( outfile ), true );
}
catch ( final Exception e ) {
System.out.println( "N= (String) Query sequence name (mandatory)" );
System.out.println( "S= (String) Species tree file (mandatory)" );
System.out.println( "O= (String) Output file name -- overwritten without warning! (mandatory)" );
- System.out.println( "D= (String) Distance matrix file for pairwise distances" );
- System.out.println( "T= (String) Phylogeny file for distances of query to LCA" );
- System.out.println( " of orthologs and for mean bootstrap value (if t= is not used)," );
- System.out.println( " must be binary )" );
- System.out.println( "t= (String) Phylogeny file for mean bootstrap value (if this option is used," );
- System.out.println( " the mean bootstrap value is not calculated from the tree read in" );
- System.out.println( " with T=), not necessary binary" );
- System.out.println( "p To output ultra paralogs" );
System.out.println( "P= (int) Sort priority" );
System.out.println( "L= (double) Threshold orthologs for output" );
- System.out.println( "U= (double) Threshold orthologs for distance calculation" );
- System.out.println( "X= (int) More than one ortholog: " );
- System.out.println( " numbers of sd the dist. to LCA has to differ from mean to generate a warning" );
- System.out.println( "Y= (int) No orthologs:" );
- System.out.println( " Numbers of sd the dist to root has to differ from mean to generate a warning" );
- System.out.println( "Z= (double) One ortholog:" );
- System.out.println( " threshold for factor between the two distances to their LCA (larger/smaller)" );
- System.out.println( " to generate a warning" );
- System.out.println();
System.out.println( " Sort priority (\"P=\"):" );
System.out.println( RIO.getOrderHelp().toString() );
System.out.println();
import java.util.Set;
import org.forester.datastructures.IntMatrix;
-import org.forester.evoinference.matrix.distance.DistanceMatrix;
import org.forester.io.parsers.PhylogenyParser;
-import org.forester.io.parsers.SymmetricalDistanceMatrixParser;
import org.forester.io.parsers.nhx.NHXParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.Phylogeny;
private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
- private HashMap<String, HashMap<String, Integer>> _sn_hash_maps; // HashMap of HashMaps
- private DistanceMatrix _m;
- private HashMap<String, Double> _l;
private List<String> _seq_names;
- private int _bootstraps;
+ private int _samples;
private int _ext_nodes_;
private long _time;
return m;
}
- /**
- * Returns the numbers of trees analyzed.
- *
- * @return the numbers of trees analyzed
- */
- public final int getBootstraps() {
- return _bootstraps;
+
+ public final int getNumberOfSamples() {
+ return _samples;
}
// Helper method for inferredOrthologsToString.
return 0.0;
}
final int i = h.get( name );
- return ( ( i * 100.0 ) / getBootstraps() );
- }
-
- /**
- * Returns the distance to a sequences/taxa after a distance list file has
- * been read in with readDistanceList(File). Throws an exception if name is
- * not found or if no list has been read in.
- *
- * @param name
- * a sequence name
- */
- public final double getDistance( String name ) {
- double distance = 0.0;
- name = name.trim();
- if ( _l == null ) {
- throw new RuntimeException( "Distance list has probably not been read in (successfully)." );
- }
- if ( _l.get( name ) == null ) {
- throw new IllegalArgumentException( name + " not found." );
- }
- distance = ( _l.get( name ) ).doubleValue();
- return distance;
- }
-
- public final double getDistance( final String name1, final String name2 ) {
- try {
- return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) );
- }
- catch ( final Exception e ) {
- return 1;
- }
+ return ( ( i * 100.0 ) / getNumberOfSamples() );
}
/**
return _o_hash_maps.get( seq_name );
}
- private final HashMap<String, Integer> getInferredSubtreeNeighbors( final String seq_name ) {
- if ( _sn_hash_maps == null ) {
- return null;
- }
- return _sn_hash_maps.get( seq_name );
- }
-
/**
* Returns a HashMap containing the inferred "super orthologs" of the
* external gene tree node with the sequence name seq_name. Sequence names
_o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
_so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
_up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
_o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
_so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
_up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
// Go through all gene trees in the file.
final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
final Phylogeny[] assigned_trees = new Phylogeny[ gene_trees.length ];
}
final IntMatrix m = calculateOrthologTable( assigned_trees );
System.out.println( m.toString() );
- setBootstraps( bs );
+ setNumberOfSamples( bs );
if ( RIO.TIME ) {
_time = ( System.currentTimeMillis() - _time );
}
List<PhylogenyNode> orthologs = null;
List<PhylogenyNode> super_orthologs = null;
List<PhylogenyNode> ultra_paralogs = null;
- List<PhylogenyNode> subtree_neighbors = null;
assigned_tree = sdiunrooted.infer( gene_tree,
species_tree,
RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
updateHash( _o_hash_maps, query, orthologs );
super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
updateHash( _so_hash_maps, query, super_orthologs );
- subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
- updateHash( _sn_hash_maps, query, subtree_neighbors );
ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
updateHash( _up_hash_maps, query, ultra_paralogs );
return assigned_tree;
* @see #inferOrthologs(File,Phylogeny)
* @see #getOrder(int)
*/
- public StringBuffer inferredOrthologsToString( final String query_name,
- int sort,
- double threshold_orthologs,
- double threshold_subtreeneighborings ) {
+ public StringBuffer inferredOrthologsToString( final String query_name, int sort, double threshold_orthologs ) {
HashMap<String, Integer> o_hashmap = null;
HashMap<String, Integer> s_hashmap = null;
- HashMap<String, Integer> n_hashmap = null;
String name = "";
- double o = 0.0, // Orthologs.
- s = 0.0, // Super orthologs.
- sn = 0.0, // Subtree neighbors.
- value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0;
- final ArrayList<Tuplet> nv = new ArrayList<Tuplet>();
- if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) {
- throw new RuntimeException( "Orthologs have not been calculated (successfully)" );
+ double o = 0.0; // Orthologs.
+ double s = 0.0; // Super orthologs.
+ double value1 = 0.0;
+ double value2 = 0.0;
+ final ArrayList<ResultLine> nv = new ArrayList<ResultLine>();
+ if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) ) {
+ throw new RuntimeException( "orthologs have not been calculated (successfully)" );
}
- if ( ( sort < 0 ) || ( sort > 17 ) ) {
- sort = 12;
- }
- if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) {
- throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" );
+ if ( ( sort < 0 ) || ( sort > 2 ) ) {
+ sort = 1;
}
if ( threshold_orthologs < 0.0 ) {
threshold_orthologs = 0.0;
else if ( threshold_orthologs > 100.0 ) {
threshold_orthologs = 100.0;
}
- if ( threshold_subtreeneighborings < 0.0 ) {
- threshold_subtreeneighborings = 0.0;
- }
- else if ( threshold_subtreeneighborings > 100.0 ) {
- threshold_subtreeneighborings = 100.0;
- }
o_hashmap = getInferredOrthologs( query_name );
s_hashmap = getInferredSuperOrthologs( query_name );
- n_hashmap = getInferredSubtreeNeighbors( query_name );
- if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) {
+ if ( ( o_hashmap == null ) || ( s_hashmap == null ) ) {
throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
}
final StringBuffer orthologs = new StringBuffer();
if ( o < threshold_orthologs ) {
continue I;
}
- sn = getBootstrapValueFromHash( n_hashmap, name );
- if ( sn < threshold_subtreeneighborings ) {
- continue I;
- }
s = getBootstrapValueFromHash( s_hashmap, name );
- if ( sort >= 3 ) {
- if ( _m != null ) {
- d = getDistance( query_name, name );
- }
- else {
- d = getDistance( name );
- }
- }
switch ( sort ) {
case 0:
- nv.add( new Tuplet( name, o, 5 ) );
+ nv.add( new ResultLine( name, o, 5 ) );
break;
case 1:
- nv.add( new Tuplet( name, o, s, 5 ) );
+ nv.add( new ResultLine( name, o, s, 5 ) );
break;
case 2:
- nv.add( new Tuplet( name, s, o, 5 ) );
- break;
- case 3:
- nv.add( new Tuplet( name, o, d, 1 ) );
- break;
- case 4:
- nv.add( new Tuplet( name, d, o, 0 ) );
- break;
- case 5:
- nv.add( new Tuplet( name, o, s, d, 2 ) );
- break;
- case 6:
- nv.add( new Tuplet( name, o, d, s, 1 ) );
- break;
- case 7:
- nv.add( new Tuplet( name, s, o, d, 2 ) );
- break;
- case 8:
- nv.add( new Tuplet( name, s, d, o, 1 ) );
- break;
- case 9:
- nv.add( new Tuplet( name, d, o, s, 0 ) );
- break;
- case 10:
- nv.add( new Tuplet( name, d, s, o, 0 ) );
- break;
- case 11:
- nv.add( new Tuplet( name, o, sn, d, 2 ) );
- break;
- case 12:
- nv.add( new Tuplet( name, o, sn, s, d, 3 ) );
- break;
- case 13:
- nv.add( new Tuplet( name, o, s, sn, d, 3 ) );
- break;
- case 14:
- nv.add( new Tuplet( name, sn, o, s, d, 3 ) );
- break;
- case 15:
- nv.add( new Tuplet( name, sn, d, o, s, 1 ) );
- break;
- case 16:
- nv.add( new Tuplet( name, o, d, sn, s, 1 ) );
- break;
- case 17:
- nv.add( new Tuplet( name, o, sn, d, s, 2 ) );
+ nv.add( new ResultLine( name, s, o, 5 ) );
break;
default:
- nv.add( new Tuplet( name, o, 5 ) );
+ nv.add( new ResultLine( name, o, 5 ) );
}
} // End of I for loop.
if ( ( nv != null ) && ( nv.size() > 0 ) ) {
orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR );
- final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+ final ResultLine[] nv_array = new ResultLine[ nv.size() ];
for( int j = 0; j < nv.size(); ++j ) {
nv_array[ j ] = nv.get( j );
}
Arrays.sort( nv_array );
- for( final Tuplet element : nv_array ) {
+ for( final ResultLine element : nv_array ) {
name = element.getKey();
value1 = element.getValue1();
value2 = element.getValue2();
- value3 = element.getValue3();
- value4 = element.getValue4();
- orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) );
+ orthologs.append( addNameAndValues( name, value1, value2, sort ) );
}
}
}
* @return String containing the inferred orthologs, String containing "-"
* if no orthologs have been found null in case of error
*/
- public String inferredUltraParalogsToString( final String query_name,
- final boolean return_dists,
- double threshold_ultra_paralogs ) {
+ public String inferredUltraParalogsToString( final String query_name, double threshold_ultra_paralogs ) {
HashMap<String, Integer> sp_hashmap = null;
String name = "", ultra_paralogs = "";
int sort = 0;
- double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0;
- final List<Tuplet> nv = new ArrayList<Tuplet>();
+ double sp = 0.0;
+ double value1 = 0.0;
+ double value2 = 0.0;
+ final List<ResultLine> nv = new ArrayList<ResultLine>();
if ( threshold_ultra_paralogs < 1.0 ) {
threshold_ultra_paralogs = 1.0;
}
if ( _up_hash_maps == null ) {
throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." );
}
- if ( return_dists && ( _m == null ) && ( _l == null ) ) {
- throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." );
- }
sp_hashmap = getInferredUltraParalogs( query_name );
if ( sp_hashmap == null ) {
throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" );
if ( sp < threshold_ultra_paralogs ) {
continue I;
}
- if ( return_dists ) {
- if ( _m != null ) {
- d = getDistance( query_name, name );
- }
- else {
- d = getDistance( name );
- }
- nv.add( new Tuplet( name, sp, d, 1 ) );
- }
- else {
- nv.add( new Tuplet( name, sp, 5 ) );
- }
+ nv.add( new ResultLine( name, sp, 5 ) );
} // End of I for loop.
if ( ( nv != null ) && ( nv.size() > 0 ) ) {
- final Tuplet[] nv_array = new Tuplet[ nv.size() ];
+ final ResultLine[] nv_array = new ResultLine[ nv.size() ];
for( int j = 0; j < nv.size(); ++j ) {
nv_array[ j ] = nv.get( j );
}
Arrays.sort( nv_array );
- if ( return_dists ) {
- sort = 91;
- }
- else {
- sort = 90;
- }
- for( final Tuplet element : nv_array ) {
+ sort = 90;
+ for( final ResultLine element : nv_array ) {
name = element.getKey();
value1 = element.getValue1();
value2 = element.getValue2();
- ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort );
+ ultra_paralogs += addNameAndValues( name, value1, value2, sort );
}
}
}
return ultra_paralogs;
}
- public final void readDistanceMatrix( final File matrix_file ) throws IOException {
- DistanceMatrix[] matrices = null;
- final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance();
- matrices = parser.parse( matrix_file );
- if ( ( matrices == null ) || ( matrices.length == 0 ) ) {
- throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" );
- }
- if ( matrices.length > 1 ) {
- throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" );
- }
- _m = matrices[ 0 ];
- }
-
/**
* Brings this into the same state as immediately after construction.
*/
_so_hash_maps = null;
_up_hash_maps = null;
_seq_names = null;
- _m = null;
- _l = null;
- _bootstraps = 1;
+ _samples = 1;
_ext_nodes_ = 0;
_time = 0;
}
- /**
- * Sets the numbers of trees analyzed.
- * @param the
- * numbers of trees analyzed
- */
- private void setBootstraps( int i ) {
+
+ private void setNumberOfSamples( int i ) {
if ( i < 1 ) {
i = 1;
}
- _bootstraps = i;
+ _samples = i;
}
/**
private final static String addNameAndValues( final String name,
final double value1,
final double value2,
- final double value3,
- final double value4,
final int sort ) {
final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" );
df.setDecimalSeparatorAlwaysShown( false );
case 0:
line += addToLine( value1, df );
line += "-\t";
- line += "-\t";
- line += "-\t";
break;
case 1:
line += addToLine( value1, df );
- line += "-\t";
line += addToLine( value2, df );
- line += "-\t";
break;
case 2:
line += addToLine( value2, df );
- line += "-\t";
line += addToLine( value1, df );
- line += "-\t";
- break;
- case 3:
- line += addToLine( value1, df );
- line += "-\t";
- line += "-\t";
- line += addToLine( value2, df );
- break;
- case 4:
- line += addToLine( value2, df );
- line += "-\t";
- line += "-\t";
- line += addToLine( value1, df );
- break;
- case 5:
- line += addToLine( value1, df );
- line += "-\t";
- line += addToLine( value2, df );
- line += addToLine( value3, df );
- break;
- case 6:
- line += addToLine( value1, df );
- line += "-\t";
- line += addToLine( value3, df );
- line += addToLine( value2, df );
- break;
- case 7:
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- break;
- case 8:
- line += addToLine( value3, df );
- line += "-\t";
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- break;
- case 9:
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value3, df );
- line += addToLine( value1, df );
- break;
- case 10:
- line += addToLine( value3, df );
- line += "-\t";
- line += addToLine( value2, df );
- line += addToLine( value1, df );
- break;
- case 11:
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += "-\t";
- line += addToLine( value3, df );
- break;
- case 12:
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- break;
- case 13:
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value2, df );
- line += addToLine( value4, df );
- break;
- case 14:
- line += addToLine( value2, df );
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- break;
- case 15:
- line += addToLine( value3, df );
- line += addToLine( value1, df );
- line += addToLine( value4, df );
- line += addToLine( value2, df );
- break;
- case 16:
- line += addToLine( value1, df );
- line += addToLine( value3, df );
- line += addToLine( value4, df );
- line += addToLine( value2, df );
- break;
- case 17:
- line += addToLine( value1, df );
- line += addToLine( value2, df );
- line += addToLine( value4, df );
- line += addToLine( value3, df );
break;
case 90:
line += addToLine( value1, df );
// Helper for addNameAndValues.
private final static String addToLine( final double value, final java.text.DecimalFormat df ) {
String s = "";
- if ( value != Tuplet.DEFAULT ) {
+ if ( value != ResultLine.DEFAULT ) {
s = df.format( value ) + "\t";
}
else {
case 2:
order = "super orthologies > orthologies";
break;
- case 3:
- order = "orthologies > distance to query";
- break;
- case 4:
- order = "distance to query > orthologies";
- break;
- case 5:
- order = "orthologies > super orthologies > distance to query";
- break;
- case 6:
- order = "orthologies > distance to query > super orthologies";
- break;
- case 7:
- order = "super orthologies > orthologies > distance to query";
- break;
- case 8:
- order = "super orthologies > distance to query > orthologies";
- break;
- case 9:
- order = "distance to query > orthologies > super orthologies";
- break;
- case 10:
- order = "distance to query > super orthologies > orthologies";
- break;
- case 11:
- order = "orthologies > subtree neighbors > distance to query";
- break;
- case 12:
- order = "orthologies > subtree neighbors > super orthologies > distance to query";
- break;
- case 13:
- order = "orthologies > super orthologies > subtree neighbors > distance to query";
- break;
- case 14:
- order = "subtree neighbors > orthologies > super orthologies > distance to query";
- break;
- case 15:
- order = "subtree neighbors > distance to query > orthologies > super orthologies";
- break;
- case 16:
- order = "orthologies > distance to query > subtree neighbors > super orthologies";
- break;
- case 17:
- order = "orthologies > subtree neighbors > distance to query > super orthologies";
- break;
default:
order = "orthologies";
break;
sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR );
sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
- sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies"
- + ForesterUtil.LINE_SEPARATOR );
return sb;
}
- private final static List<PhylogenyNode> getSubtreeNeighbors( final PhylogenyNode query, final int level ) {
- PhylogenyNode node = query;
- if ( !node.isExternal() ) {
- return null;
+ class ResultLine implements Comparable<ResultLine> {
+
+ public static final int DEFAULT = -999;
+ private final String _key;
+ private final double _value1;
+ private final double _value2;
+ private int[] _p;
+
+ ResultLine() {
+ setSigns();
+ _key = "";
+ _value1 = ResultLine.DEFAULT;
+ _value2 = ResultLine.DEFAULT;
+ }
+
+ ResultLine( final String name, final double value1, final double value2, final int c ) {
+ setSigns();
+ _key = name;
+ _value1 = value1;
+ _value2 = value2;
+ if ( ( c >= 0 ) && ( c <= 2 ) ) {
+ _p[ c ] = -1;
+ }
}
- if ( !node.isRoot() ) {
- node = node.getParent();
+
+ ResultLine( final String name, final double value1, final int c ) {
+ setSigns();
+ _key = name;
+ _value1 = value1;
+ _value2 = ResultLine.DEFAULT;
+ if ( c == 0 ) {
+ _p[ 0 ] = -1;
+ }
}
- if ( level == 2 ) {
- if ( !node.isRoot() ) {
- node = node.getParent();
+
+ @Override
+ public int compareTo( final ResultLine n ) {
+ if ( ( getValue1() != ResultLine.DEFAULT ) && ( n.getValue1() != ResultLine.DEFAULT ) ) {
+ if ( getValue1() < n.getValue1() ) {
+ return _p[ 0 ];
+ }
+ if ( getValue1() > n.getValue1() ) {
+ return ( -_p[ 0 ] );
+ }
+ }
+ if ( ( getValue2() != ResultLine.DEFAULT ) && ( n.getValue2() != ResultLine.DEFAULT ) ) {
+ if ( getValue2() < n.getValue2() ) {
+ return _p[ 1 ];
+ }
+ if ( getValue2() > n.getValue2() ) {
+ return ( -_p[ 1 ] );
+ }
}
+ return ( getKey().compareTo( n.getKey() ) );
}
- else {
- throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " );
+
+ String getKey() {
+ return _key;
}
- final List<PhylogenyNode> sn = node.getAllExternalDescendants();
- sn.remove( query );
- return sn;
- }
+
+ double getValue1() {
+ return _value1;
+ }
+
+ double getValue2() {
+ return _value2;
+ }
+
+ private void setSigns() {
+ _p = new int[ 2 ];
+ _p[ 0 ] = _p[ 1 ] = +1;
+ }
+ } // Tuplet
}