inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 2 May 2014 20:20:05 +0000 (20:20 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 2 May 2014 20:20:05 +0000 (20:20 +0000)
forester/java/src/org/forester/application/msa_compactor.java
forester/java/src/org/forester/archaeopteryx/Archaeopteryx.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java

index 7f65269..7539b0e 100644 (file)
@@ -67,6 +67,7 @@ public class msa_compactor {
     final static private String       OUTPUT_FORMAT_PHYLIP_OPTION            = "p";
     final static private String       OUTPUT_REMOVED_SEQS_OPTION             = "ro";
     final static private String       MAFFT_OPTIONS                          = "mo";
+    final static private String       PERFORM_PHYLOGENETIC_INFERENCE         = "t";
     //        
     final static private String       PATH_TO_MAFFT_OPTION                   = "mafft";
     final static private String       DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
@@ -104,6 +105,7 @@ public class msa_compactor {
             MSA_FORMAT output_format = MSA_FORMAT.FASTA;
             File removed_seqs_out_base = null;
             String mafft_options = "--auto";
+            boolean perform_phylogenetic_inference = false;
             final List<String> allowed_options = new ArrayList<String>();
             allowed_options.add( REMOVE_WORST_OFFENDERS_OPTION );
             allowed_options.add( AV_GAPINESS_OPTION );
@@ -119,6 +121,7 @@ public class msa_compactor {
             allowed_options.add( OUTPUT_FORMAT_PHYLIP_OPTION );
             allowed_options.add( OUTPUT_REMOVED_SEQS_OPTION );
             allowed_options.add( MAFFT_OPTIONS );
+            allowed_options.add( PERFORM_PHYLOGENETIC_INFERENCE );
             final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
             if ( dissallowed_options.length() > 0 ) {
                 ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
@@ -237,6 +240,9 @@ public class msa_compactor {
             else if ( cla.isOptionSet( MAFFT_OPTIONS ) ) {
                 ForesterUtil.fatalError( PRG_NAME, "no need to indicate MAFFT options without realigning" );
             }
+            if ( cla.isOptionSet( PERFORM_PHYLOGENETIC_INFERENCE ) ) {
+                perform_phylogenetic_inference = true;
+            }
             if ( chart_only ) {
                 if ( ( out != null ) || ( removed_seqs_out_base != null ) ) {
                     ForesterUtil
@@ -312,10 +318,12 @@ public class msa_compactor {
             if ( realign ) {
                 System.out.println( "MAFFT options                        : " + mafft_options );
             }
+            System.out.println( "Simple tree (Kimura distances, NJ)   : " + perform_phylogenetic_inference );
             System.out.println();
             final int initial_number_of_seqs = msa.getNumberOfSequences();
             List<MsaProperties> msa_props = null;
             final MsaCompactor mc = new MsaCompactor( msa );
+            mc.setInfileName( in.getName() );
             mc.setNorm( norm );
             mc.setRealign( realign );
             if ( realign ) {
@@ -325,6 +333,7 @@ public class msa_compactor {
             mc.setStep( step );
             mc.setStepForDiagnostics( step_for_diagnostics );
             mc.setReportAlnMeanIdentity( report_aln_mean_identity );
+            mc.setPeformPhylogenticInference( perform_phylogenetic_inference );
             if ( ( worst_remove > 0 ) || ( av_gap > 0 ) || ( length > 0 ) ) {
                 mc.setOutputFormat( output_format );
                 mc.setOutFileBase( out );
@@ -348,7 +357,7 @@ public class msa_compactor {
             else {
                 msa_props = mc.chart( step, realign, norm );
             }
-            Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.toString() );
+            Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.getName() );
         }
         catch ( final IllegalArgumentException iae ) {
             //  iae.printStackTrace(); //TODO remove me
@@ -419,6 +428,8 @@ public class msa_compactor {
                 + "=<integer>  minimal effecive sequence length (for deleting of shorter sequences)" );
         System.out.println( "   -" + GAP_RATIO_LENGTH_OPTION
                 + "=<decimal>  maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
+        System.out.println( "   -" + PERFORM_PHYLOGENETIC_INFERENCE
+                + "             to calculate a simple phylogenetic tree (Kimura distances, NJ)" );
         System.out.println();
         System.out.println();
         System.out.println();
index a15ef51..648fd65 100644 (file)
@@ -54,6 +54,12 @@ public final class Archaeopteryx {
         return MainFrameApplication.createInstance( phylogenies, config_file_name, title );
     }
 
+    public static MainFrame createApplication( final Phylogeny phylogeny, final Configuration config, final String title ) {
+        final Phylogeny[] phylogenies = new Phylogeny[ 1 ];
+        phylogenies[ 0 ] = phylogeny;
+        return MainFrameApplication.createInstance( phylogenies, config, title );
+    }
+
     public static void main( final String args[] ) {
         Phylogeny[] phylogenies = null;
         String config_filename = null;
index 366d5bd..ef15161 100644 (file)
@@ -36,11 +36,16 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.forester.archaeopteryx.Archaeopteryx;
+import org.forester.archaeopteryx.Configuration;
 import org.forester.evoinference.distance.NeighborJoiningF;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD;
 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
 import org.forester.evoinference.tools.BootstrapResampler;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.util.ParserUtils;
 import org.forester.io.writers.SequenceWriter;
 import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
 import org.forester.msa.DeleteableMsa;
@@ -52,6 +57,8 @@ import org.forester.msa.MsaMethods;
 import org.forester.msa.ResampleableMsa;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
 import org.forester.sequence.Sequence;
 import org.forester.tools.ConfidenceAssessor;
 import org.forester.util.ForesterUtil;
@@ -66,6 +73,7 @@ public class MsaCompactor {
     private String                    _maffts_opts              = "--auto";
     private int                       _min_length               = -1;
     //
+    private String                    _infile_name              = null;
     private DeleteableMsa             _msa                      = null;
     private boolean                   _norm                     = true;
     private File                      _out_file_base            = null;
@@ -79,6 +87,7 @@ public class MsaCompactor {
     private boolean                   _report_aln_mean_identity = false;
     private int                       _step                     = -1;
     private int                       _step_for_diagnostics     = -1;
+    private boolean                   _phylogentic_inference    = false;
     static {
         NF_4.setRoundingMode( RoundingMode.HALF_UP );
         NF_3.setRoundingMode( RoundingMode.HALF_UP );
@@ -99,6 +108,11 @@ public class MsaCompactor {
         for( final GapContribution gap_gontribution : stats ) {
             to_remove_ids.add( gap_gontribution.getId() );
         }
+        if ( _phylogentic_inference ) {
+            System.out.println( "calculating phylegentic tree..." );
+            System.out.println();
+            pi();
+        }
         if ( !_realign ) {
             _step = -1;
         }
@@ -466,7 +480,7 @@ public class MsaCompactor {
         return "";
     }
 
-    private final Phylogeny pi( final String matrix ) {
+    private final Phylogeny pi( final String matrix, final int boostrap ) {
         final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
         final int seed = 15;
         final int n = 100;
@@ -484,6 +498,31 @@ public class MsaCompactor {
         return master_phy;
     }
 
+    private final Phylogeny pi() {
+        final Phylogeny phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, false, "" );
+        PhylogenyMethods.midpointRoot( phy );
+        final boolean x = PhylogenyMethods.extractFastaInformation( phy );
+        if ( !x ) {
+            final PhylogenyNodeIterator it = phy.iteratorExternalForward();
+            while ( it.hasNext() ) {
+                final PhylogenyNode n = it.next();
+                final String name = n.getName().trim();
+                if ( !ForesterUtil.isEmpty( name ) ) {
+                    try {
+                        ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
+                    }
+                    catch ( final PhyloXmlDataFormatException e ) {
+                        // Ignore.
+                    }
+                }
+            }
+        }
+        final Configuration config = new Configuration();
+        config.setDisplayAsPhylogram( true );
+        Archaeopteryx.createApplication( phy, config, _infile_name );
+        return phy;
+    }
+
     private final void printMsaProperties( final String id, final MsaProperties msa_properties ) {
         if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) {
             System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) );
@@ -579,4 +618,12 @@ public class MsaCompactor {
         msa.write( w, format );
         w.close();
     }
+
+    public void setPeformPhylogenticInference( final boolean phylogentic_inference ) {
+        _phylogentic_inference = phylogentic_inference;
+    }
+
+    public void setInfileName( final String infile_name ) {
+        _infile_name = infile_name;
+    }
 }
index c085575..01c84c3 100644 (file)
@@ -75,17 +75,14 @@ public class PhylogenyMethods {
         throw new CloneNotSupportedException();
     }
 
-    public static void extractFastaInformation( final Phylogeny phy ) {
+    public static boolean extractFastaInformation( final Phylogeny phy ) {
+        boolean could_extract = false;
         for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
             final PhylogenyNode node = iter.next();
             if ( !ForesterUtil.isEmpty( node.getName() ) ) {
                 final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
                 if ( name_m.lookingAt() ) {
-                    System.out.println();
-                    // System.out.println( name_m.group( 1 ) );
-                    // System.out.println( name_m.group( 2 ) );
-                    // System.out.println( name_m.group( 3 ) );
-                    // System.out.println( name_m.group( 4 ) );
+                    could_extract = true;
                     final String acc_source = name_m.group( 1 );
                     final String acc = name_m.group( 2 );
                     final String seq_name = name_m.group( 3 );
@@ -105,6 +102,7 @@ public class PhylogenyMethods {
                 }
             }
         }
+        return could_extract;
     }
 
     public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {