- da[ 0 ] = mean;
- da[ 1 ] = java.lang.Math.sqrt( sum / ( n - 1.0 ) );
- return da;
- }
-
- private final static void errorInCommandLine() {
- System.out.println( "\nrio: Error in command line.\n" );
- printHelp();
- System.exit( -1 );
- }
-
- // Uses DistanceCalculator to calculate distances.
- private final static StringBuffer getDistances( final File tree_file_for_dist_val,
- final File outfile,
- final Phylogeny species_tree,
- final String seq_name,
- final ArrayList<String> al_ortholog_names_for_dc,
- final HashMap<String, Integer> ortholog_hashmap,
- final HashMap<String, Integer> super_ortholog_hashmap,
- final int warn_more_than_one_ortho,
- final int warn_no_orthos,
- final double warn_one_ortho,
- final int bootstraps,
- final double t_orthologs_dc ) throws IOException {
- Phylogeny consensus_tree = null;
- Phylogeny
- // to be a consensus tree.
- assigned_cons_tree = null;
- final SDIR sdiunrooted = new SDIR();
- final ArrayList<PhylogenyNode> al_ortholog_nodes = new ArrayList<PhylogenyNode>();
- double m = 0.0;
- double sd = 0.0;
- double d = 0.0;
- int n = 0;
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- consensus_tree = factory.create( tree_file_for_dist_val, new PhyloXmlParser() )[ 0 ];
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, consensus_tree );
- assigned_cons_tree = sdiunrooted.infer( consensus_tree,
- species_tree,
- rio.MINIMIZE_COST,
- rio.MINIMIZE_DUPS,
- rio.MINIMIZE_HEIGHT,
- true,
- 1 )[ 0 ];
- final DistanceCalculator dc = new DistanceCalculator();
- final StringBuffer sb = new StringBuffer();
- sb.append( "Given the threshold for distance calculations (" + ForesterUtil.roundToInt( t_orthologs_dc )
- + "): " );
- // No orthologs.
- if ( al_ortholog_names_for_dc.size() == 0 ) {
- dc.setTree( assigned_cons_tree );
- // Remark. Calculation of mean and sd _does_ include the node
- // with seq_name.
- m = dc.getMean();
- sd = dc.getStandardDeviation();
- d = dc.getDistanceToRoot( seq_name );
- n = dc.getN();
- sb.append( "No sequence is considered orthologous to query."
- + "\ndistance of query to root = " + ForesterUtil.FORMATTER_06.format( d )
- + "\nmean of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( m )
- + "\nsd of distances (for all sequences) to root = " + ForesterUtil.FORMATTER_06.format( sd )
- + "\nn (sum of sequences in alignment plus query) = " + n );
- if ( !( ( ( m - ( warn_no_orthos * sd ) ) < d ) && ( ( m + ( warn_no_orthos * sd ) ) > d ) ) ) {
- sb.append( "\nWARNING: distance of query to root is outside of mean+/-" + warn_no_orthos + "*sd!" );
- }
- }
- // One ortholog.
- else if ( al_ortholog_names_for_dc.size() == 1 ) {
- final String name_of_ortholog = al_ortholog_names_for_dc.get( 0 );
- al_ortholog_nodes.add( assigned_cons_tree.getNode( name_of_ortholog ) );
- al_ortholog_nodes.add( assigned_cons_tree.getNode( seq_name ) );
- dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes );
- // Remark. Calculation of mean _does_ include the node
- // with seq_name.
- d = dc.getDistanceToLCA( seq_name );
- final double d_o = dc.getDistanceToLCA( name_of_ortholog );
- sb.append( "One sequence is considered orthologous to query." + "\nLCA is LCA of query and its ortholog."
- + "\ndistance of query to LCA = " + ForesterUtil.FORMATTER_06.format( d )
- + "\ndistance of ortholog to LCA = " + ForesterUtil.FORMATTER_06.format( d_o ) );
- if ( ( d_o > 0.0 )
- && ( d > 0.0 )
- && ( ( ( d_o >= d ) && ( ( d_o / d ) > warn_one_ortho ) ) || ( ( d_o < d ) && ( ( d / d_o ) > warn_one_ortho ) ) ) ) {
- sb.append( "\nWARNING: Ratio of distances to LCA is greater than " + warn_one_ortho + "!" );
- }
- else if ( ( ( d_o == 0.0 ) || ( d == 0.0 ) ) && ( ( d_o != 0.0 ) || ( d != 0.0 ) ) ) {
- sb.append( "\nWARNING: Ratio could not be calculated, " + " one distance is 0.0!" );
- }
- }
- // More than one ortholog.
- else {
- for( int i = 0; i < al_ortholog_names_for_dc.size(); ++i ) {
- al_ortholog_nodes.add( assigned_cons_tree.getNodeViaSequenceName( al_ortholog_names_for_dc.get( i ) ) );
- }
- al_ortholog_nodes.add( assigned_cons_tree.getNodesViaSequenceName( seq_name ).get( 0 ) );
- dc.setTreeAndExtNodes( assigned_cons_tree, al_ortholog_nodes );
- // Remark. Calculation of mean and sd _does_ include the node
- // with seq_name.
- m = dc.getMean();
- sd = dc.getStandardDeviation();
- d = dc.getDistanceToLCA( seq_name );
- n = dc.getN();
- sb.append( "More than one sequence is considered orthologous to query."
- + "\nLCA is LCA of query and its orthologs."
- + "\ndistance of query to LCA = "
- + ForesterUtil.FORMATTER_06.format( d )
- + "\nmean of distances (for query and its orthologs) to LCA = "
- + ForesterUtil.FORMATTER_06.format( m )
- + "\nsd of distances (for query and its orthologs) to LCA = "
- + ForesterUtil.FORMATTER_06.format( sd )
- + "\nn (sum of orthologs plus query) = " + n );
- if ( !( ( ( m - ( warn_more_than_one_ortho * sd ) ) < d ) && ( ( m + ( warn_more_than_one_ortho * sd ) ) > d ) ) ) {
- sb.append( "\n!WARNING: distance of query to LCA is outside of mean+/-" + warn_more_than_one_ortho
- + "*sd!" );
- }
- }
- return sb;
- }
-
- public static void main( final String[] args ) {
- ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
- File species_tree_file = null;
- File multiple_trees_file = null;
- File outfile = null;
- File distance_matrix_file = null;
- File tree_file_for_dist_val = null;
- File tree_file_for_avg_bs = null;
- String seq_name = "";
- String arg = "";
- boolean output_ultraparalogs = false;
- ArrayList<String> orthologs_al_for_dc = null;
- double t_orthologs = 0.0;
- double t_sn = 0.0;
- double t_orthologs_dc = 0.0;
- double[] bs_mean_sd = null;
- int sort = 13;
- Phylogeny species_tree = null;
- RIO rio_instance = null;
- PrintWriter out = null;
- long time = 0;
- int warn_no_orthos = WARN_NO_ORTHOS_DEFAULT;
- int warn_more_than_one_ortho = WARN_MORE_THAN_ONE_ORTHO_DEFAULT;
- double warn_one_ortho = WARN_ONE_ORTHO_DEFAULT;
- double threshold_ultra_paralogs = THRESHOLD_ULTRA_PARALOGS_DEFAULT;
- if ( args.length < 2 ) {