removed clustalo
[jalview.git] / forester / java / src / org / forester / application / surf_paup.java
1 // $Id:
2 //
3 // FORESTER -- software libraries and applications
4 // for evolutionary biology research and applications.
5 //
6 // Copyright (C) 2008-2009 Christian M. Zmasek
7 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
8 // All rights reserved
9 //
10 // This library is free software; you can redistribute it and/or
11 // modify it under the terms of the GNU Lesser General Public
12 // License as published by the Free Software Foundation; either
13 // version 2.1 of the License, or (at your option) any later version.
14 //
15 // This library is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 // Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public
21 // License along with this library; if not, write to the Free Software
22 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 //
24 // Contact: phylosoft @ gmail . com
25 // WWW: www.phylosoft.org
26
27 package org.forester.application;
28
29 import java.io.File;
30 import java.io.IOException;
31 import java.util.ArrayList;
32 import java.util.List;
33
34 import org.forester.evoinference.matrix.character.CharacterStateMatrix;
35 import org.forester.evoinference.matrix.character.CharacterStateMatrix.BinaryStates;
36 import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
37 import org.forester.io.parsers.nexus.NexusCharactersParser;
38 import org.forester.io.parsers.nexus.NexusPhylogeniesParser;
39 import org.forester.io.parsers.nexus.PaupLogParser;
40 import org.forester.phylogeny.Phylogeny;
41 import org.forester.phylogeny.PhylogenyNode;
42 import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
43 import org.forester.phylogeny.factories.PhylogenyFactory;
44 import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
45 import org.forester.surfacing.DomainParsimonyCalculator;
46 import org.forester.surfacing.SurfacingUtil;
47 import org.forester.util.CommandLineArguments;
48 import org.forester.util.ForesterUtil;
49
50 public class surf_paup {
51
52     final static private String PRG_VERSION   = "0.90";
53     final static private String PRG_DATE      = "2008.03.28";
54     final static private String E_MAIL        = "czmasek@burnham.org";
55     final static private String WWW           = "www.phylosoft.org/forester/applications/surfacing";
56     final static private String HELP_OPTION_1 = "help";
57     final static private String HELP_OPTION_2 = "h";
58     private static final String PRG_NAME      = "surf_paup";
59
60     public static void main( final String args[] ) {
61         ForesterUtil.printProgramInformation( PRG_NAME, PRG_VERSION, PRG_DATE, E_MAIL, WWW );
62         final List<String> allowed_options = new ArrayList<String>();
63         allowed_options.add( HELP_OPTION_1 );
64         allowed_options.add( HELP_OPTION_2 );
65         if ( ( args.length < 2 ) ) {
66             printHelp();
67             System.exit( -1 );
68         }
69         CommandLineArguments cla = null;
70         try {
71             cla = new CommandLineArguments( args );
72         }
73         catch ( final Exception e ) {
74             ForesterUtil.fatalError( PRG_NAME, e.getMessage() );
75         }
76         if ( cla.isOptionSet( HELP_OPTION_1 ) || cla.isOptionSet( HELP_OPTION_2 ) ) {
77             printHelp();
78             System.exit( 0 );
79         }
80         if ( cla.getNumberOfNames() != 3 ) {
81             printHelp();
82             System.exit( -1 );
83         }
84         final File surfacing_nexus_outfile = cla.getFile( 0 );
85         final File paup_log_file = cla.getFile( 1 );
86         final String outfile_name = cla.getFile( 2 ).toString();
87         final NexusCharactersParser nex_char_parser = new NexusCharactersParser();
88         try {
89             nex_char_parser.setSource( surfacing_nexus_outfile );
90             nex_char_parser.parse();
91         }
92         catch ( final IOException e ) {
93             ForesterUtil.fatalError( PRG_NAME, "problem with parsing character labels from  ["
94                     + surfacing_nexus_outfile + "]: " + e.getMessage() );
95             e.printStackTrace();
96         }
97         final String[] labels = nex_char_parser.getCharStateLabels();
98         ForesterUtil.programMessage( PRG_NAME, "read in " + labels.length + " character labels" );
99         final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
100         final NexusPhylogeniesParser phylogeny_parser = new NexusPhylogeniesParser();
101         Phylogeny[] phylogenies = null;
102         try {
103             phylogenies = factory.create( surfacing_nexus_outfile, phylogeny_parser );
104         }
105         catch ( final IOException e ) {
106             ForesterUtil.fatalError( PRG_NAME,
107                                      "problem with parsing phylogeny [" + surfacing_nexus_outfile + "]: "
108                                              + e.getMessage() );
109             e.printStackTrace();
110         }
111         if ( phylogenies.length != 1 ) {
112             ForesterUtil.fatalError( PRG_NAME, "failed to parse one phylogeny from [" + surfacing_nexus_outfile
113                     + "], got " + phylogenies.length + " instead" );
114         }
115         final Phylogeny phylogeny = phylogenies[ 0 ];
116         if ( !phylogeny.isRooted() ) {
117             ForesterUtil.fatalError( PRG_NAME, "phylogeny from [" + surfacing_nexus_outfile + "] is not rooted" );
118         }
119         postOrderRelabelInternalNodes( phylogeny, phylogeny.getNumberOfExternalNodes() + 1 );
120         CharacterStateMatrix<BinaryStates> matrix = null;
121         final PaupLogParser paup_log_parser = new PaupLogParser();
122         try {
123             paup_log_parser.setSource( paup_log_file );
124             matrix = paup_log_parser.parse();
125         }
126         catch ( final IOException e ) {
127             ForesterUtil.fatalError( PRG_NAME,
128                                      "failed to parse matrix from  [" + paup_log_file + "]: " + e.getMessage() );
129         }
130         ForesterUtil.programMessage( PRG_NAME,
131                                      "read in character state matrix of size " + matrix.getNumberOfIdentifiers() + "x"
132                                              + matrix.getNumberOfCharacters() );
133         final DomainParsimonyCalculator domain_parsimony = DomainParsimonyCalculator.createInstance( phylogeny );
134         domain_parsimony.executeOnGivenBinaryStatesMatrix( matrix, labels );
135         final String sep = ForesterUtil.LINE_SEPARATOR + "###################" + ForesterUtil.LINE_SEPARATOR;
136         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossMatrix(),
137                                          outfile_name + "_paup_gl",
138                                          Format.FORESTER );
139         SurfacingUtil.writeMatrixToFile( domain_parsimony.getGainLossCountsMatrix(),
140                                          outfile_name + "_paup_glc",
141                                          Format.FORESTER );
142         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
143                                                            CharacterStateMatrix.GainLossStates.GAIN,
144                                                            outfile_name + "_paup_gains",
145                                                            sep,
146                                                            ForesterUtil.LINE_SEPARATOR,
147                                                            null );
148         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(),
149                                                            CharacterStateMatrix.GainLossStates.LOSS,
150                                                            outfile_name + "_paup_losses",
151                                                            sep,
152                                                            ForesterUtil.LINE_SEPARATOR,
153                                                            null );
154         SurfacingUtil.writeBinaryStatesMatrixAsListToFile( domain_parsimony.getGainLossMatrix(), null, outfile_name
155                 + "_paup_present", sep, ForesterUtil.LINE_SEPARATOR, null );
156         final String date_time = ForesterUtil.getCurrentDateTime();
157         SurfacingUtil.preparePhylogeny( phylogeny, domain_parsimony, date_time, "parsimony (paup)", "paup_"
158                 + outfile_name, "" );
159         SurfacingUtil.writePhylogenyToFile( phylogeny, outfile_name + "_paup.xml" );
160         ForesterUtil.programMessage( PRG_NAME, "OK" );
161     }
162
163     final private static void postOrderRelabelInternalNodes( final Phylogeny phylogeny, final int starting_number ) {
164         int i = starting_number;
165         for( final PhylogenyNodeIterator it = phylogeny.iteratorPostorder(); it.hasNext(); ) {
166             final PhylogenyNode node = it.next();
167             if ( !node.isExternal() ) {
168                 node.setName( String.valueOf( i++ ) );
169             }
170         }
171     }
172
173     private static void printHelp() {
174         System.out.println();
175         System.out.println( "Usage:" );
176         System.out.println();
177         System.out
178                 .println( "% java  -cp forester.jar org.forester.applications."
179                         + PRG_NAME
180                         + " <surfacing nexus outfile with character labels and tree> <paup log file with reconstructed states matrix> <outfile name base>" );
181         System.out.println();
182     }
183 }