- return _up_hash_maps.get( seq_name );
- }
-
- /**
- * Returns the time (in ms) needed to run "inferOrthologs". Final variable
- * TIME needs to be set to true.
- *
- * @return time (in ms) needed to run method "inferOrthologs"
- */
- public long getTime() {
- return _time;
- }
-
- /**
- * Infers the orthologs (as well the "super orthologs", the "subtree
- * neighbors", and the "ultra paralogs") for each external node of the gene
- * Trees in multiple tree File gene_trees_file (=output of PHYLIP NEIGHBOR,
- * for example). Tallies how many times each sequence is (super-)
- * orthologous towards the query. Tallies how many times each sequence is
- * ultra paralogous towards the query. Tallies how many times each sequence
- * is a subtree neighbor of the query. Gene duplications are inferred using
- * SDI. Modifies its argument species_tree. Is a little faster than
- * "inferOrthologs(File,Phylogeny)" since orthologs are only inferred for
- * query.
- * <p>
- * To obtain the results use the methods listed below.
- *
- * @param gene_trees_file
- * a File containing gene Trees in NH format, which is the result
- * of performing a bootstrap analysis in PHYLIP
- * @param species_tree
- * a species Phylogeny, which has species names in its species
- * fields
- * @param query
- * the sequence name of the squence whose orthologs are to be
- * inferred
- * @throws SDIException
- */
- public void inferOrthologs( final File gene_trees_file, final Phylogeny species_tree, final String query )
- throws IOException, SDIException {
- int bs = 0;
- if ( RIO.TIME ) {
- _time = System.currentTimeMillis();
- }
- // Read in first tree to get its sequence names
- // and strip species_tree.
- final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
- final PhylogenyParser p = ParserUtils.createParserDependingOnFileType( gene_trees_file, true );
- if ( p instanceof NHXParser ) {
- final NHXParser nhx = ( NHXParser ) p;
- nhx.setReplaceUnderscores( false );
- nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( PhylogenyMethods.TAXONOMY_EXTRACTION.YES );
- }
- final Phylogeny gene_tree = factory.create( gene_trees_file, p )[ 0 ];
- System.out.println( "species " + species_tree.toString() );
- // Removes from species_tree all species not found in gene_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( gene_tree, species_tree );
- // System.out.println( "gene " + gene_tree.toString() );
- // System.out.println( "species " + species_tree.toString() );
- // Removes from gene_tree all species not found in species_tree.
- // Archaeopteryx.createApplication( gene_tree );
- // Archaeopteryx.createApplication( species_tree );
- // try {
- // Thread.sleep( 40000 );
- // }
- // catch ( InterruptedException e ) {
- // // TODO Auto-generated catch block
- // e.printStackTrace();
- // }
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gene_tree );
- _seq_names = getAllExternalSequenceNames( gene_tree );
- if ( ( _seq_names == null ) || ( _seq_names.size() < 1 ) ) {
- throw new IOException( "could not get sequence names" );
- }
- _o_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _so_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _up_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _sn_hash_maps = new HashMap<String, HashMap<String, Integer>>();
- _o_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _so_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _up_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- _sn_hash_maps.put( query, new HashMap<String, Integer>( _seq_names.size() ) );
- // Go through all gene trees in the file.
- final Phylogeny[] gene_trees = factory.create( gene_trees_file, p );
- for( final Phylogeny gt : gene_trees ) {
- bs++;
- // Removes from gene_tree all species not found in species_tree.
- PhylogenyMethods.taxonomyBasedDeletionOfExternalNodes( species_tree, gt );
- inferOrthologsHelper( gt, species_tree, query );
- // System.out.println( bs );
- }
- setBootstraps( bs );
- if ( RIO.TIME ) {
- _time = ( System.currentTimeMillis() - _time );
- }
- }
-
- public List<PhylogenyNode> getNodesViaSequenceName( final Phylogeny phy, final String seq_name ) {
- final List<PhylogenyNode> nodes = new ArrayList<PhylogenyNode>();
- for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {
- final PhylogenyNode n = iter.next();
- if ( n.getNodeData().isHasSequence() && n.getNodeData().getSequence().getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- if ( !n.getNodeData().isHasSequence() && n.getName().equals( seq_name ) ) {
- nodes.add( n );
- }
- }
- return nodes;
- }
-
- // Helper method which performs the actual ortholog inference for
- // the external node with seqname query.
- private void inferOrthologsHelper( final Phylogeny gene_tree, final Phylogeny species_tree, final String query )
- throws SDIException {
- Phylogeny assigned_tree = null;
- List<PhylogenyNode> nodes = null;
- final SDIR sdiunrooted = new SDIR();
- List<PhylogenyNode> orthologs = null;
- List<PhylogenyNode> super_orthologs = null;
- List<PhylogenyNode> ultra_paralogs = null;
- List<PhylogenyNode> subtree_neighbors = null;
- assigned_tree = sdiunrooted.infer( gene_tree,
- species_tree,
- RIO.ROOT_BY_MINIMIZING_MAPPING_COST,
- RIO.ROOT_BY_MINIMIZING_SUM_OF_DUPS,
- RIO.ROOT_BY_MINIMIZING_TREE_HEIGHT,
- true,
- 1 )[ 0 ];
- setExtNodesOfAnalyzedGeneTrees( assigned_tree.getNumberOfExternalNodes() );
- nodes = getNodesViaSequenceName( assigned_tree, query );
- if ( nodes.size() > 1 ) {
- throw new IllegalArgumentException( "node named [" + query + "] not unique" );
- }
- else if ( nodes.isEmpty() ) {
- throw new IllegalArgumentException( "no node containing a sequence named [" + query + "] found" );
- }
- final PhylogenyNode query_node = nodes.get( 0 );
- final PhylogenyMethods methods = PhylogenyMethods.getInstance();
- orthologs = methods.getOrthologousNodes( assigned_tree, query_node );
- updateHash( _o_hash_maps, query, orthologs );
- super_orthologs = PhylogenyMethods.getSuperOrthologousNodes( query_node );
- updateHash( _so_hash_maps, query, super_orthologs );
- subtree_neighbors = getSubtreeNeighbors( query_node, 2 );
- updateHash( _sn_hash_maps, query, subtree_neighbors );
- ultra_paralogs = PhylogenyMethods.getUltraParalogousNodes( query_node );
- updateHash( _up_hash_maps, query, ultra_paralogs );