clean up
[jalview.git] / forester / java / src / org / forester / sdi / SDIx.java
1 // $Id:
2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
4 //
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
7 // All rights reserved
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 //
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
25
26 package org.forester.sdi;
27
28 import java.io.File;
29 import java.io.IOException;
30 import java.util.ArrayList;
31 import java.util.HashSet;
32 import java.util.List;
33 import java.util.Set;
34
35 import org.forester.io.parsers.phyloxml.PhyloXmlParser;
36 import org.forester.io.writers.PhylogenyWriter;
37 import org.forester.phylogeny.Phylogeny;
38 import org.forester.phylogeny.PhylogenyMethods;
39 import org.forester.phylogeny.PhylogenyNode;
40 import org.forester.phylogeny.data.Taxonomy;
41 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
42 import org.forester.phylogeny.factories.PhylogenyFactory;
43 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
44 import org.forester.util.ForesterUtil;
45
46 public class SDIx {
47
48     public SDIx() {
49     }
50
51     private void analyze( final Phylogeny gene_tree,
52                           final String gene_tree_file_name,
53                           final Phylogeny[] species_trees,
54                           final File out_dir ) throws IOException {
55         final boolean minimize_cost = true;
56         final boolean minimize_sum_of_dup = true;
57         final boolean minimize_height = true;
58         final int trees_to_return = 1;
59         System.out.println( gene_tree_file_name + ": " + gene_tree.getName() );
60         final Set<Taxonomy> species_tree_species = getAllExternalSpecies( species_trees[ 0 ] );
61         final PhylogenyWriter w = new PhylogenyWriter();
62         for( final Phylogeny species_tree : species_trees ) {
63             PhylogenyMethods.deleteExternalNodesPositiveSelection( species_tree_species, gene_tree );
64             if ( gene_tree.isEmpty() ) {
65                 System.out.println( " >> empty: " + gene_tree_file_name + ": " + gene_tree.getName() );
66                 continue;
67             }
68             final File outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + gene_tree_file_name );
69             if ( outfile.exists() ) {
70                 System.out
71                         .println( " >> already exists, skipping: " + gene_tree_file_name + ": " + gene_tree.getName() );
72             }
73             final SDIR sdir = new SDIR();
74             final Phylogeny[] analyzed_gene_trees = sdir.infer( gene_tree,
75                                                                 species_tree,
76                                                                 minimize_cost,
77                                                                 minimize_sum_of_dup,
78                                                                 minimize_height,
79                                                                 true,
80                                                                 trees_to_return );
81             final int duplications = sdir.getMinimalDuplications();
82             final int mapping_cost = sdir.getMinimalMappingCost();
83             final List<Phylogeny> phys = new ArrayList<Phylogeny>();
84             for( final Phylogeny phy : analyzed_gene_trees ) {
85                 phys.add( phy );
86             }
87             w.toPhyloXML( outfile, phys, 0, ForesterUtil.LINE_SEPARATOR );
88         }
89     }
90
91     private void checkSpeciesTreesForEqualNumberOfExtNodes( final Phylogeny[] species_trees ) {
92         int ext_nodes = -1;
93         for( final Phylogeny phylogeny : species_trees ) {
94             if ( ext_nodes < 0 ) {
95                 ext_nodes = phylogeny.getNumberOfExternalNodes();
96             }
97             else if ( ext_nodes != phylogeny.getNumberOfExternalNodes() ) {
98                 throw new IllegalArgumentException( "species trees must have all the same number of external nodes" );
99             }
100         }
101     }
102
103     public void method1( final List<File> gene_tree_files, final Phylogeny[] species_trees, final File out_dir )
104             throws IOException {
105         checkSpeciesTreesForEqualNumberOfExtNodes( species_trees );
106         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
107         for( final File gene_tree_file : gene_tree_files ) {
108             if ( ForesterUtil.isReadableFile( gene_tree_file ) != null ) {
109                 throw new IOException( "[" + gene_tree_file + "] is not readable" );
110             }
111             Phylogeny[] gene_trees = null;
112             gene_trees = factory.create( gene_tree_file, new PhyloXmlParser() );
113             if ( gene_trees.length != 1 ) {
114                 throw new IOException( "[" + gene_tree_file + "] contains " + gene_trees.length
115                         + " gene trees, expecting precisely one" );
116             }
117             analyze( gene_trees[ 0 ], gene_tree_file.getName(), species_trees, out_dir );
118         }
119     }
120
121     private static Set<Taxonomy> getAllExternalSpecies( final Phylogeny phy ) {
122         final Set<Taxonomy> specs = new HashSet<Taxonomy>();
123         for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
124             final PhylogenyNode n = it.next();
125             if ( n.getNodeData().isHasTaxonomy() ) {
126                 specs.add( n.getNodeData().getTaxonomy() );
127             }
128             else {
129                 throw new IllegalArgumentException( "node " + n.getId() + " has no taxonomic data" );
130             }
131         }
132         return specs;
133     }
134 }