2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // Copyright (C) 2000-2001 Washington University School of Medicine
8 // and Howard Hughes Medical Institute
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: www.phylosoft.org/forester
28 package org.forester.sdi;
31 import java.io.FileWriter;
32 import java.io.IOException;
33 import java.io.PrintWriter;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.HashMap;
37 import java.util.List;
39 import org.forester.evoinference.matrix.distance.DistanceMatrix;
40 import org.forester.io.parsers.SymmetricalDistanceMatrixParser;
41 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
42 import org.forester.phylogeny.Phylogeny;
43 import org.forester.phylogeny.PhylogenyMethods;
44 import org.forester.phylogeny.PhylogenyNode;
45 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
46 import org.forester.phylogeny.factories.PhylogenyFactory;
47 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
48 import org.forester.util.ForesterUtil;
51 * @author Christian M. Zmasek
53 public final class RIO {
55 private final static boolean ROOT_BY_MINIMIZING_MAPPING_COST = false;
56 private final static boolean ROOT_BY_MINIMIZING_SUM_OF_DUPS = true;
57 private final static boolean ROOT_BY_MINIMIZING_TREE_HEIGHT = true;
58 private final static boolean TIME = false;
59 private HashMap<String, HashMap<String, Integer>> _o_hash_maps;
60 private HashMap<String, HashMap<String, Integer>> _so_hash_maps;
61 private HashMap<String, HashMap<String, Integer>> _up_hash_maps;
62 private HashMap<String, HashMap<String, Integer>> _sn_hash_maps; // HashMap of HashMaps
63 private DistanceMatrix _m;
64 private HashMap<String, Double> _l;
65 private String[] _seq_names;
66 private int _bootstraps;
67 private int _ext_nodes_;
71 * Default constructor.
78 * Returns the numbers of trees analyzed.
80 * @return the numbers of trees analyzed
82 public final int getBootstraps() {
86 // Helper method for inferredOrthologsToString.
87 // inferredOrthologsToArrayList,
88 // and inferredUltraParalogsToString.
89 private final double getBootstrapValueFromHash( final HashMap<String, Integer> h, final String name ) {
90 if ( !h.containsKey( name ) ) {
93 final int i = h.get( name );
94 return ( i * 100.0 / getBootstraps() );
98 * Returns the distance to a sequences/taxa after a distance list file has
99 * been read in with readDistanceList(File). Throws an exception if name is
100 * not found or if no list has been read in.
105 public final double getDistance( String name ) {
106 double distance = 0.0;
109 throw new RuntimeException( "Distance list has probably not been read in (successfully)." );
111 if ( _l.get( name ) == null ) {
112 throw new IllegalArgumentException( name + " not found." );
114 distance = ( _l.get( name ) ).doubleValue();
118 public final double getDistance( final String name1, final String name2 ) {
120 return _m.getValue( _m.getIndex( name1 ), _m.getIndex( name2 ) );
122 catch ( final Exception e ) {
128 * Returns the numbers of number of ext nodes in gene trees analyzed (after
131 * @return number of ext nodes in gene trees analyzed (after stripping)
133 public final int getExtNodesOfAnalyzedGeneTrees() {
138 * Returns a HashMap containing the inferred orthologs of the external gene
139 * tree node with the sequence name seq_name. Sequence names are the keys
140 * (String), numbers of observations are the values (Int). Orthologs are to
141 * be inferred by method "inferOrthologs". Throws an exception if seq_name
145 * sequence name of a external node of the gene trees
146 * @return HashMap containing the inferred orthologs
147 * (name(String)->value(Int))
149 public final HashMap<String, Integer> getInferredOrthologs( final String seq_name ) {
150 if ( _o_hash_maps == null ) {
153 return _o_hash_maps.get( seq_name );
156 private final HashMap<String, Integer> getInferredSubtreeNeighbors( final String seq_name ) {
157 if ( _sn_hash_maps == null ) {
160 return _sn_hash_maps.get( seq_name );
164 * Returns a HashMap containing the inferred "super orthologs" of the
165 * external gene tree node with the sequence name seq_name. Sequence names
166 * are the keys (String), numbers of observations are the values (Int).
167 * Super orthologs are to be inferred by method "inferOrthologs". Throws an
168 * exception if seq_name is not found.
171 * sequence name of a external node of the gene trees
172 * @return HashMap containing the inferred super orthologs
173 * (name(String)->value(Int))
175 public final HashMap<String, Integer> getInferredSuperOrthologs( final String seq_name ) {
176 if ( _so_hash_maps == null ) {
179 return _so_hash_maps.get( seq_name );
183 * Returns a HashMap containing the inferred "ultra paralogs" of the
184 * external gene tree node with the sequence name seq_name. Sequence names
185 * are the keys (String), numbers of observations are the values (Int).
186 * "ultra paralogs" are to be inferred by method "inferOrthologs". Throws an
187 * exception if seq_name is not found.
190 * sequence name of a external node of the gene trees
191 * @return HashMap containing the inferred ultra paralogs
192 * (name(String)->value(Int))
194 public final HashMap<String, Integer> getInferredUltraParalogs( final String seq_name ) {
195 if ( _up_hash_maps == null ) {
198 return _up_hash_maps.get( seq_name );
202 * Returns the time (in ms) needed to run "inferOrthologs". Final variable
203 * TIME needs to be set to true.
205 * @return time (in ms) needed to run method "inferOrthologs"
207 public long getTime() {
212 * Infers the orthologs (as well the "super orthologs", the "subtree
213 * neighbors", and the "ultra paralogs") for each external node of the gene
214 * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
215 * for example). Tallies how many times each sequence is (super-)
216 * orthologous towards the query. Tallies how many times each sequence is
217 * ultra paralogous towards the query. Tallies how many times each sequence
218 * is a subtree neighbor of the query. Gene duplications are inferred using
219 * SDI. Modifies its argument species_tree. Is a little faster than
220 * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
223 * To obtain the results use the methods listed below.
225 * @param gene_trees_file
226 * a File containing gene Trees in NH format, which is the result
227 * of performing a bootstrap analysis in PHYLIP
228 * @param species_tree
229 * a species Phylogeny, which has species names in its species
232 * the sequence name of the squence whose orthologs are to be
234 * @throws SDIException
236 public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
237 throws IOException, SDIException {
240 _time = System.currentTimeMillis();
242 if ( !gene_trees_file.exists() ) {
243 throw new IllegalArgumentException( gene_trees_file.getAbsolutePath() + " does not exist." );
245 else if ( !gene_trees_file.isFile() ) {
246 throw new IllegalArgumentException( gene_trees_file.getAbsolutePath() + " is not a file." );
248 // Read in first tree to get its sequence names
249 // and strip species_tree.
250 final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
251 final Phylogeny gene_tree = factory.create( gene_trees_file, new PhyloXmlParser() )[ 0 ];
252 // Removes from species_tree all species not found in gene_tree.
253 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
254 // Removes from gene_tree all species not found in species_tree.
255 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
256 _seq_names = getAllExternalSequenceNames( gene_tree );
257 if ( ( _seq_names == null ) || ( _seq_names.length < 1 ) ) {
260 _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
261 _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
262 _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
263 _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
264 _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
265 _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
266 _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
267 _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.length ) );
268 // Go through all gene trees in the file.
269 final Phylogeny[] gene_trees = factory.create( gene_trees_file, new PhyloXmlParser() );
270 for( final Phylogeny gt : gene_trees ) {
272 // Removes from gene_tree all species not found in species_tree.
273 PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
274 inferOrthologsHelper( gt, species_tree, query );
275 // System.out.println( bs );
279 _time = ( System.currentTimeMillis() - _time );
283 // Helper method which performs the actual ortholog inference for
284 // the external node with seqname query.
285 private void inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query )
286 throws SDIException {
287 Phylogeny assigned_tree = null;
288 List<PhylogenyNode> nodes = null;
289 final SDIR sdiunrooted = new SDIR();
290 List<PhylogenyNode> orthologs = null;
291 List<PhylogenyNode> super_orthologs = null;
292 List<PhylogenyNode> ultra_paralogs = null;
293 List<PhylogenyNode> subtree_neighbors = null;
294 assigned_tree = sdiunrooted.infer( gene_tree,
296 RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
297 RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
298 RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
301 setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
302 nodes = assigned_tree.getNodesViaSequenceName( query );
303 if ( nodes.size() > 1 ) {
304 throw new IllegalArgumentException( "node named [" + query + "] not unique" );
306 else if ( nodes.isEmpty() ) {
307 throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
309 final PhylogenyNode query_node = nodes.get( 0 );
310 final PhylogenyMethods methods = PhylogenyMethods.getInstance();
311 orthologs = methods.getOrthologousNodes( assigned_tree, query_node );
312 updateHash( _o_hash_maps, query, orthologs );
313 super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
314 updateHash( _so_hash_maps, query, super_orthologs );
315 subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
316 updateHash( _sn_hash_maps, query, subtree_neighbors );
317 ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
318 updateHash( _up_hash_maps, query, ultra_paralogs );
322 * Returns an ArrayList containg the names of orthologs of the PhylogenyNode
323 * with seq name seq_name.
326 * sequence name of a external node of the gene trees
327 * @param threshold_orthologs
328 * the minimal number of observations for a a sequence to be
329 * reported as orthologous as percentage (0.0-100.0%)
330 * @return ArrayList containg the names of orthologs of the PhylogenyNode
331 * with seq name seq_name
333 public ArrayList<String> inferredOrthologsToArrayList( final String seq_name, double threshold_orthologs ) {
334 HashMap<String, Integer> o_hashmap = null;
337 final ArrayList<String> arraylist = new ArrayList<String>();
338 if ( _o_hash_maps == null ) {
339 throw new RuntimeException( "Orthologs have not been calculated (successfully)." );
341 if ( threshold_orthologs < 0.0 ) {
342 threshold_orthologs = 0.0;
344 else if ( threshold_orthologs > 100.0 ) {
345 threshold_orthologs = 100.0;
347 o_hashmap = getInferredOrthologs( seq_name );
348 if ( o_hashmap == null ) {
349 throw new RuntimeException( "Orthologs for " + seq_name + " were not established." );
351 if ( _seq_names.length > 0 ) {
352 I: for( int i = 0; i < _seq_names.length; ++i ) {
353 name = _seq_names[ i ];
354 if ( name.equals( seq_name ) ) {
357 o = getBootstrapValueFromHash( o_hashmap, name );
358 if ( o < threshold_orthologs ) {
361 arraylist.add( name );
368 * Returns a String containg the names of orthologs of the PhylogenyNode
369 * with seq name query_name. The String also contains how many times a
370 * particular ortholog has been observed.
373 * The output order is (per line): Name, Ortholog, Subtree neighbor, Super
377 * The sort priority of this is determined by sort in the following manner:
380 * <li>1 : Ortholog, Super ortholog
381 * <li>2 : Super ortholog, Ortholog
382 * <li>3 : Ortholog, Distance
383 * <li>4 : Distance, Ortholog
384 * <li>5 : Ortholog, Super ortholog, Distance
385 * <li>6 : Ortholog, Distance, Super ortholog
386 * <li>7 : Super ortholog, Ortholog, Distance
387 * <li>8 : Super ortholog, Distance, Ortholog
388 * <li>9 : Distance, Ortholog, Super ortholog
389 * <li>10 : Distance, Super ortholog, Ortholog
390 * <li>11 : Ortholog, Subtree neighbor, Distance
391 * <li>12 : Ortholog, Subtree neighbor, Super ortholog, Distance (default)
392 * <li>13 : Ortholog, Super ortholog, Subtree neighbor, Distance
393 * <li>14 : Subtree neighbor, Ortholog, Super ortholog, Distance
394 * <li>15 : Subtree neighbor, Distance, Ortholog, Super ortholog
395 * <li>16 : Ortholog, Distance, Subtree neighbor, Super ortholog
396 * <li>17 : Ortholog, Subtree neighbor, Distance, Super ortholog
399 * Returns "-" if no putative orthologs have been found (given
400 * threshold_orthologs).
402 * Orthologs are to be inferred by method "inferOrthologs".
404 * (Last modified: 05/08/01)
407 * sequence name of a external node of the gene trees
409 * order and sort priority
410 * @param threshold_orthologs
411 * the minimal number of observations for a a sequence to be
412 * reported as orthologous, in percents (0.0-100.0%)
413 * @param threshold_subtreeneighborings
414 * the minimal number of observations for a a sequence to be
415 * reported as orthologous, in percents (0.0-100.0%)
416 * @return String containing the inferred orthologs, String containing "-"
417 * if no orthologs have been found null in case of error
418 * @see #inferOrthologs(File,Phylogeny,String)
419 * @see #inferOrthologs(Phylogeny[],Phylogeny)
420 * @see #inferOrthologs(File,Phylogeny)
421 * @see #getOrder(int)
423 public StringBuffer inferredOrthologsToString( final String query_name,
425 double threshold_orthologs,
426 double threshold_subtreeneighborings ) {
427 HashMap<String, Integer> o_hashmap = null;
428 HashMap<String, Integer> s_hashmap = null;
429 HashMap<String, Integer> n_hashmap = null;
431 double o = 0.0, // Orthologs.
432 s = 0.0, // Super orthologs.
433 sn = 0.0, // Subtree neighbors.
434 value1 = 0.0, value2 = 0.0, value3 = 0.0, value4 = 0.0, d = 0.0;
435 final ArrayList<Tuplet> nv = new ArrayList<Tuplet>();
436 if ( ( _o_hash_maps == null ) || ( _so_hash_maps == null ) || ( _sn_hash_maps == null ) ) {
437 throw new RuntimeException( "Orthologs have not been calculated (successfully)" );
439 if ( ( sort < 0 ) || ( sort > 17 ) ) {
442 if ( ( sort > 2 ) && ( _m == null ) && ( _l == null ) ) {
443 throw new RuntimeException( "Distance list or matrix have not been read in (successfully)" );
445 if ( threshold_orthologs < 0.0 ) {
446 threshold_orthologs = 0.0;
448 else if ( threshold_orthologs > 100.0 ) {
449 threshold_orthologs = 100.0;
451 if ( threshold_subtreeneighborings < 0.0 ) {
452 threshold_subtreeneighborings = 0.0;
454 else if ( threshold_subtreeneighborings > 100.0 ) {
455 threshold_subtreeneighborings = 100.0;
457 o_hashmap = getInferredOrthologs( query_name );
458 s_hashmap = getInferredSuperOrthologs( query_name );
459 n_hashmap = getInferredSubtreeNeighbors( query_name );
460 if ( ( o_hashmap == null ) || ( s_hashmap == null ) || ( n_hashmap == null ) ) {
461 throw new RuntimeException( "Orthologs for " + query_name + " were not established" );
463 final StringBuffer orthologs = new StringBuffer();
464 if ( _seq_names.length > 0 ) {
465 I: for( int i = 0; i < _seq_names.length; ++i ) {
466 name = _seq_names[ i ];
467 if ( name.equals( query_name ) ) {
470 o = getBootstrapValueFromHash( o_hashmap, name );
471 if ( o < threshold_orthologs ) {
474 sn = getBootstrapValueFromHash( n_hashmap, name );
475 if ( sn < threshold_subtreeneighborings ) {
478 s = getBootstrapValueFromHash( s_hashmap, name );
481 d = getDistance( query_name, name );
484 d = getDistance( name );
489 nv.add( new Tuplet( name, o, 5 ) );
492 nv.add( new Tuplet( name, o, s, 5 ) );
495 nv.add( new Tuplet( name, s, o, 5 ) );
498 nv.add( new Tuplet( name, o, d, 1 ) );
501 nv.add( new Tuplet( name, d, o, 0 ) );
504 nv.add( new Tuplet( name, o, s, d, 2 ) );
507 nv.add( new Tuplet( name, o, d, s, 1 ) );
510 nv.add( new Tuplet( name, s, o, d, 2 ) );
513 nv.add( new Tuplet( name, s, d, o, 1 ) );
516 nv.add( new Tuplet( name, d, o, s, 0 ) );
519 nv.add( new Tuplet( name, d, s, o, 0 ) );
522 nv.add( new Tuplet( name, o, sn, d, 2 ) );
525 nv.add( new Tuplet( name, o, sn, s, d, 3 ) );
528 nv.add( new Tuplet( name, o, s, sn, d, 3 ) );
531 nv.add( new Tuplet( name, sn, o, s, d, 3 ) );
534 nv.add( new Tuplet( name, sn, d, o, s, 1 ) );
537 nv.add( new Tuplet( name, o, d, sn, s, 1 ) );
540 nv.add( new Tuplet( name, o, sn, d, s, 2 ) );
543 nv.add( new Tuplet( name, o, 5 ) );
545 } // End of I for loop.
546 if ( ( nv != null ) && ( nv.size() > 0 ) ) {
547 orthologs.append( "[seq name]\t\t[ortho]\t[st-n]\t[sup-o]\t[dist]" + ForesterUtil.LINE_SEPARATOR );
548 final Tuplet[] nv_array = new Tuplet[ nv.size() ];
549 for( int j = 0; j < nv.size(); ++j ) {
550 nv_array[ j ] = nv.get( j );
552 Arrays.sort( nv_array );
553 for( int i = 0; i < nv_array.length; ++i ) {
554 name = nv_array[ i ].getKey();
555 value1 = nv_array[ i ].getValue1();
556 value2 = nv_array[ i ].getValue2();
557 value3 = nv_array[ i ].getValue3();
558 value4 = nv_array[ i ].getValue4();
559 orthologs.append( addNameAndValues( name, value1, value2, value3, value4, sort ) );
563 // No orthologs found.
564 if ( ( orthologs == null ) || ( orthologs.length() < 1 ) ) {
565 orthologs.append( "-" );
568 } // inferredOrthologsToString( String, int, double )
570 // Helper method for inferredOrthologTableToFile.
571 // Returns individual rows for the table as String.
572 private String inferredOrthologsToTableHelper( final String name2,
573 final String[] names,
575 final boolean super_orthologs ) {
576 HashMap<String, Integer> hashmap = null;
577 String name = null, orthologs = new String( "" );
579 if ( !super_orthologs ) {
580 hashmap = getInferredOrthologs( name2 );
583 hashmap = getInferredSuperOrthologs( name2 );
585 if ( hashmap == null ) {
586 throw new RuntimeException( "Unexpected failure in method inferredOrthologsToTableHelper" );
588 for( int i = 0; i < names.length; ++i ) {
590 if ( !hashmap.containsKey( name ) ) {
594 value = hashmap.get( name );
599 throw new RuntimeException( "Failed sanity check in method inferredOrthologsToTableHelper: value not 0." );
601 orthologs += ( " " + "\t" );
604 orthologs += ( value + "\t" );
611 * Writes the orthologs for each external node of the gene trees to outfile
612 * in the form of a table. Orthologs are to be inferred by method
613 * "inferOrthologs". Overwrites without asking! (Last modified: 12/07/00)
616 * the File to write to
618 public void inferredOrthologTableToFile( final File outfile ) throws IOException {
619 if ( _o_hash_maps == null ) {
622 inferredOrthologTableToFile( outfile, false );
625 // Helper for inferredOrthologTableToFile(File).
626 // (Last modified: 11/28/00)
627 private void inferredOrthologTableToFile( final File outfile, final boolean super_orthologs ) throws IOException {
628 String name = "", line = "";
629 PrintWriter out = null;
630 if ( _seq_names == null ) {
631 throw new RuntimeException( "inferredOrthologTableToFile: seq_names_ is null." );
633 Arrays.sort( _seq_names );
634 out = new PrintWriter( new FileWriter( outfile ), true );
636 throw new RuntimeException( "inferredOrthologTableToFile: failure to create PrintWriter." );
639 for( int i = 0; i < _seq_names.length; ++i ) {
640 line += ( i + ")\t" );
644 for( int i = 0; i < _seq_names.length; ++i ) {
645 name = _seq_names[ i ];
646 if ( name.length() < 8 ) {
647 line = i + ")\t" + name + "\t\t\t";
649 else if ( name.length() < 16 ) {
650 line = i + ")\t" + name + "\t\t";
653 line = i + ")\t" + name + "\t";
655 line += inferredOrthologsToTableHelper( name, _seq_names, i, super_orthologs );
662 * Writes the "super orthologs" for each external nodes of the gene trees to
663 * outfile in the form of a table. Super orthologs are to be inferred by
664 * method "inferOrthologs". Overwrites without asking!
667 * the File to write to
669 public void inferredSuperOrthologTableToFile( final File outfile ) throws IOException {
670 if ( _so_hash_maps == null ) {
673 inferredOrthologTableToFile( outfile, true );
677 * Returns a String containg the names of orthologs of the PhylogenyNode
678 * with seq name query_name. The String also contains how many times a
679 * particular ortholog has been observed. Returns "-" if no putative
680 * orthologs have been found (given threshold_orthologs).
682 * Orthologs are to be inferred by method "inferOrthologs".
685 * sequence name of a external node of the gene trees
686 * @param return_dists
687 * @param threshold_ultra_paralogs
689 * @return String containing the inferred orthologs, String containing "-"
690 * if no orthologs have been found null in case of error
692 public String inferredUltraParalogsToString( final String query_name,
693 final boolean return_dists,
694 double threshold_ultra_paralogs ) {
695 HashMap<String, Integer> sp_hashmap = null;
696 String name = "", ultra_paralogs = "";
698 double sp = 0.0, value1 = 0.0, value2 = 0.0, d = 0.0;
699 final List<Tuplet> nv = new ArrayList<Tuplet>();
700 if ( threshold_ultra_paralogs < 1.0 ) {
701 threshold_ultra_paralogs = 1.0;
703 else if ( threshold_ultra_paralogs > 100.0 ) {
704 threshold_ultra_paralogs = 100.0;
706 if ( _up_hash_maps == null ) {
707 throw new RuntimeException( "Ultra paralogs have not been calculated (successfully)." );
709 if ( return_dists && ( _m == null ) && ( _l == null ) ) {
710 throw new RuntimeException( "Distance list or matrix have not been read in (successfully)." );
712 sp_hashmap = getInferredUltraParalogs( query_name );
713 if ( sp_hashmap == null ) {
714 throw new RuntimeException( "Ultra paralogs for " + query_name + " were not established" );
716 if ( _seq_names.length > 0 ) {
717 I: for( int i = 0; i < _seq_names.length; ++i ) {
718 name = _seq_names[ i ];
719 if ( name.equals( query_name ) ) {
722 sp = getBootstrapValueFromHash( sp_hashmap, name );
723 if ( sp < threshold_ultra_paralogs ) {
726 if ( return_dists ) {
728 d = getDistance( query_name, name );
731 d = getDistance( name );
733 nv.add( new Tuplet( name, sp, d, 1 ) );
736 nv.add( new Tuplet( name, sp, 5 ) );
738 } // End of I for loop.
739 if ( ( nv != null ) && ( nv.size() > 0 ) ) {
740 final Tuplet[] nv_array = new Tuplet[ nv.size() ];
741 for( int j = 0; j < nv.size(); ++j ) {
742 nv_array[ j ] = nv.get( j );
744 Arrays.sort( nv_array );
745 if ( return_dists ) {
751 for( int i = 0; i < nv_array.length; ++i ) {
752 name = nv_array[ i ].getKey();
753 value1 = nv_array[ i ].getValue1();
754 value2 = nv_array[ i ].getValue2();
755 ultra_paralogs += addNameAndValues( name, value1, value2, 0.0, 0.0, sort );
759 // No ultra paralogs found.
760 if ( ( ultra_paralogs == null ) || ( ultra_paralogs.length() < 1 ) ) {
761 ultra_paralogs = "-";
763 return ultra_paralogs;
766 public final void readDistanceMatrix( final File matrix_file ) throws IOException {
767 DistanceMatrix[] matrices = null;
768 final SymmetricalDistanceMatrixParser parser = SymmetricalDistanceMatrixParser.createInstance();
769 matrices = parser.parse( matrix_file );
770 if ( ( matrices == null ) || ( matrices.length == 0 ) ) {
771 throw new IOException( "failed to parse distance matrix from [" + matrix_file + "]" );
773 if ( matrices.length > 1 ) {
774 throw new IOException( "[" + matrix_file + "] contains more than once distance matrix" );
780 * Brings this into the same state as immediately after construction.
782 private final void reset() {
784 _so_hash_maps = null;
785 _up_hash_maps = null;
795 * Sets the numbers of trees analyzed.
797 * numbers of trees analyzed
799 private void setBootstraps( int i ) {
807 * Sets number of ext nodes in gene trees analyzed (after stripping).
809 * number of ext nodes in gene trees analyzed (after stripping)
811 private void setExtNodesOfAnalyzedGeneTrees( int i ) {
818 // Helper for doInferOrthologs( Phylogeny, Phylogeny, String )
819 // and doInferOrthologs( Phylogeny, Phylogeny ).
820 private void updateHash( final HashMap<String, HashMap<String, Integer>> counter_map,
821 final String query_seq_name,
822 final List<PhylogenyNode> nodes ) {
823 final HashMap<String, Integer> hash_map = counter_map.get( query_seq_name );
824 if ( hash_map == null ) {
825 throw new RuntimeException( "Unexpected failure in method updateHash." );
827 for( int j = 0; j < nodes.size(); ++j ) {
828 final String seq_name = ( nodes.get( j ) ).getNodeData().getSequence().getName();
829 if ( hash_map.containsKey( seq_name ) ) {
830 hash_map.put( seq_name, hash_map.get( seq_name ) + 1 );
833 hash_map.put( seq_name, 1 );
838 // Helper method for inferredOrthologsToString
839 // and inferredUltraParalogsToString.
840 private final static String addNameAndValues( final String name,
846 final java.text.DecimalFormat df = new java.text.DecimalFormat( "0.#####" );
847 df.setDecimalSeparatorAlwaysShown( false );
849 if ( name.length() < 8 ) {
850 line += ( name + "\t\t\t" );
852 else if ( name.length() < 16 ) {
853 line += ( name + "\t\t" );
856 line += ( name + "\t" );
860 line += addToLine( value1, df );
866 line += addToLine( value1, df );
868 line += addToLine( value2, df );
872 line += addToLine( value2, df );
874 line += addToLine( value1, df );
878 line += addToLine( value1, df );
881 line += addToLine( value2, df );
884 line += addToLine( value2, df );
887 line += addToLine( value1, df );
890 line += addToLine( value1, df );
892 line += addToLine( value2, df );
893 line += addToLine( value3, df );
896 line += addToLine( value1, df );
898 line += addToLine( value3, df );
899 line += addToLine( value2, df );
902 line += addToLine( value2, df );
904 line += addToLine( value1, df );
905 line += addToLine( value3, df );
908 line += addToLine( value3, df );
910 line += addToLine( value1, df );
911 line += addToLine( value2, df );
914 line += addToLine( value2, df );
916 line += addToLine( value3, df );
917 line += addToLine( value1, df );
920 line += addToLine( value3, df );
922 line += addToLine( value2, df );
923 line += addToLine( value1, df );
926 line += addToLine( value1, df );
927 line += addToLine( value2, df );
929 line += addToLine( value3, df );
932 line += addToLine( value1, df );
933 line += addToLine( value2, df );
934 line += addToLine( value3, df );
935 line += addToLine( value4, df );
938 line += addToLine( value1, df );
939 line += addToLine( value3, df );
940 line += addToLine( value2, df );
941 line += addToLine( value4, df );
944 line += addToLine( value2, df );
945 line += addToLine( value1, df );
946 line += addToLine( value3, df );
947 line += addToLine( value4, df );
950 line += addToLine( value3, df );
951 line += addToLine( value1, df );
952 line += addToLine( value4, df );
953 line += addToLine( value2, df );
956 line += addToLine( value1, df );
957 line += addToLine( value3, df );
958 line += addToLine( value4, df );
959 line += addToLine( value2, df );
962 line += addToLine( value1, df );
963 line += addToLine( value2, df );
964 line += addToLine( value4, df );
965 line += addToLine( value3, df );
968 line += addToLine( value1, df );
972 line += addToLine( value1, df );
973 line += addToLine( value2, df );
976 line += ForesterUtil.LINE_SEPARATOR;
980 // Helper for addNameAndValues.
981 private final static String addToLine( final double value, final java.text.DecimalFormat df ) {
983 if ( value != Tuplet.DEFAULT ) {
984 s = df.format( value ) + "\t";
992 private static String[] getAllExternalSequenceNames( final Phylogeny phy ) {
993 if ( phy.isEmpty() ) {
997 final String[] names = new String[ phy.getNumberOfExternalNodes() ];
998 for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
999 names[ i++ ] = iter.next().getNodeData().getSequence().getName();
1005 * Returns the order in which ortholog (o), "super ortholog" (s) and
1006 * distance (d) are returned and sorted (priority of sort always goes from
1007 * left to right), given sort. For the meaning of sort
1009 * @see #inferredOrthologsToString(String,int,double,double)
1012 * determines order and sort priority
1013 * @return String indicating the order
1015 public final static String getOrder( final int sort ) {
1019 order = "orthologies";
1022 order = "orthologies > super orthologies";
1025 order = "super orthologies > orthologies";
1028 order = "orthologies > distance to query";
1031 order = "distance to query > orthologies";
1034 order = "orthologies > super orthologies > distance to query";
1037 order = "orthologies > distance to query > super orthologies";
1040 order = "super orthologies > orthologies > distance to query";
1043 order = "super orthologies > distance to query > orthologies";
1046 order = "distance to query > orthologies > super orthologies";
1049 order = "distance to query > super orthologies > orthologies";
1052 order = "orthologies > subtree neighbors > distance to query";
1055 order = "orthologies > subtree neighbors > super orthologies > distance to query";
1058 order = "orthologies > super orthologies > subtree neighbors > distance to query";
1061 order = "subtree neighbors > orthologies > super orthologies > distance to query";
1064 order = "subtree neighbors > distance to query > orthologies > super orthologies";
1067 order = "orthologies > distance to query > subtree neighbors > super orthologies";
1070 order = "orthologies > subtree neighbors > distance to query > super orthologies";
1073 order = "orthologies";
1079 public final static StringBuffer getOrderHelp() {
1080 final StringBuffer sb = new StringBuffer();
1081 sb.append( " 0: orthologies" + ForesterUtil.LINE_SEPARATOR );
1082 sb.append( " 1: orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1083 sb.append( " 2: super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
1084 sb.append( " 3: orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1085 sb.append( " 4: distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
1086 sb.append( " 5: orthologies > super orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1087 sb.append( " 6: orthologies > distance to query > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1088 sb.append( " 7: super orthologies > orthologies > distance to query" + ForesterUtil.LINE_SEPARATOR );
1089 sb.append( " 8: super orthologies > distance to query > orthologies" + ForesterUtil.LINE_SEPARATOR );
1090 sb.append( " 9: distance to query > orthologies > super orthologies" + ForesterUtil.LINE_SEPARATOR );
1091 sb.append( " 10: distance to query > super orthologies > orthologies" + ForesterUtil.LINE_SEPARATOR );
1092 sb.append( " 11: orthologies > subtree neighbors > distance to query" + ForesterUtil.LINE_SEPARATOR );
1093 sb.append( " 12: orthologies > subtree neighbors > super orthologies > distance to query"
1094 + ForesterUtil.LINE_SEPARATOR );
1095 sb.append( " 13: orthologies > super orthologies > subtree neighbors > distance to query"
1096 + ForesterUtil.LINE_SEPARATOR );
1097 sb.append( " 14: subtree neighbors > orthologies > super orthologies > distance to query"
1098 + ForesterUtil.LINE_SEPARATOR );
1099 sb.append( " 15: subtree neighbors > distance to query > orthologies > super orthologies"
1100 + ForesterUtil.LINE_SEPARATOR );
1101 sb.append( " 16: orthologies > distance to query > subtree neighbors > super orthologies"
1102 + ForesterUtil.LINE_SEPARATOR );
1103 sb.append( " 17: orthologies > subtree neighbors > distance to query > super orthologies"
1104 + ForesterUtil.LINE_SEPARATOR );
1108 private final static List<PhylogenyNode> getSubtreeNeighbors( final PhylogenyNode query, final int level ) {
1109 PhylogenyNode node = query;
1110 if ( !node.isExternal() ) {
1113 if ( !node.isRoot() ) {
1114 node = node.getParent();
1117 if ( !node.isRoot() ) {
1118 node = node.getParent();
1122 throw new IllegalArgumentException( "currently only supporting level 2 subtree neighbors " );
1124 final List<PhylogenyNode> sn = node.getAllExternalDescendants();