inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 28 Feb 2014 02:24:30 +0000 (02:24 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 28 Feb 2014 02:24:30 +0000 (02:24 +0000)
forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java

index 1144fcb..e6fe009 100644 (file)
@@ -30,7 +30,6 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
-import java.util.regex.Matcher;
 
 import javax.swing.JOptionPane;
 
@@ -39,7 +38,6 @@ import org.forester.evoinference.distance.NeighborJoining;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
 import org.forester.evoinference.tools.BootstrapResampler;
-import org.forester.io.parsers.FastaParser;
 import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
@@ -48,9 +46,7 @@ import org.forester.msa.MsaInferrer;
 import org.forester.msa.MsaMethods;
 import org.forester.msa.ResampleableMsa;
 import org.forester.phylogeny.Phylogeny;
-import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
-import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.sequence.Sequence;
 import org.forester.tools.ConfidenceAssessor;
 import org.forester.util.ForesterUtil;
@@ -148,7 +144,7 @@ public class PhylogeneticInferrer extends RunnableProcess {
         }
         final NeighborJoining nj = NeighborJoining.createInstance();
         final Phylogeny phy = nj.execute( m );
-        PhylogeneticInferrer.extractFastaInformation( phy );
+        PhylogenyMethods.extractFastaInformation( phy );
         return phy;
     }
 
@@ -294,38 +290,6 @@ public class PhylogeneticInferrer extends RunnableProcess {
         }
     }
 
-    public static void extractFastaInformation( final Phylogeny phy ) {
-        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
-            final PhylogenyNode node = iter.next();
-            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
-                final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
-                if ( name_m.lookingAt() ) {
-                    System.out.println();
-                    // System.out.println( name_m.group( 1 ) );
-                    // System.out.println( name_m.group( 2 ) );
-                    // System.out.println( name_m.group( 3 ) );
-                    // System.out.println( name_m.group( 4 ) );
-                    final String acc_source = name_m.group( 1 );
-                    final String acc = name_m.group( 2 );
-                    final String seq_name = name_m.group( 3 );
-                    final String tax_sn = name_m.group( 4 );
-                    if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
-                        ForesterUtil.ensurePresenceOfSequence( node );
-                        node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
-                    }
-                    if ( !ForesterUtil.isEmpty( seq_name ) ) {
-                        ForesterUtil.ensurePresenceOfSequence( node );
-                        node.getNodeData().getSequence( 0 ).setName( seq_name );
-                    }
-                    if ( !ForesterUtil.isEmpty( tax_sn ) ) {
-                        ForesterUtil.ensurePresenceOfTaxonomy( node );
-                        node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
-                    }
-                }
-            }
-        }
-    }
-
     public enum MSA_PRG {
         MAFFT;
     }
index eead263..5f30b5a 100644 (file)
@@ -13,12 +13,23 @@ import java.util.List;
 import java.util.SortedSet;
 import java.util.TreeSet;
 
+import org.forester.archaeopteryx.Archaeopteryx;
+import org.forester.evoinference.distance.NeighborJoining;
+import org.forester.evoinference.distance.PairwiseDistanceCalculator;
+import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD;
+import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
+import org.forester.evoinference.tools.BootstrapResampler;
+import org.forester.msa.BasicMsa;
 import org.forester.msa.Mafft;
 import org.forester.msa.Msa;
 import org.forester.msa.Msa.MSA_FORMAT;
 import org.forester.msa.MsaInferrer;
 import org.forester.msa.MsaMethods;
+import org.forester.msa.ResampleableMsa;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.sequence.Sequence;
+import org.forester.tools.ConfidenceAssessor;
 import org.forester.util.ForesterUtil;
 
 public class MsaCompactor {
@@ -202,10 +213,50 @@ public class MsaCompactor {
         }
     }
 
+    Phylogeny pi() {
+        final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa );
+        final int seed = 15;
+        final int n = 100;
+        final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
+        final int[][] resampled_column_positions = BootstrapResampler.createResampledColumnPositions( _msa.getLength(),
+                                                                                                      n,
+                                                                                                      seed );
+        final Phylogeny[] eval_phys = new Phylogeny[ n ];
+        for( int i = 0; i < n; ++i ) {
+            resampleable_msa.resample( resampled_column_positions[ i ] );
+            eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa );
+        }
+        ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
+        PhylogenyMethods.extractFastaInformation( master_phy );
+        return master_phy;
+    }
+
+    private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method, final Msa msa ) {
+        BasicSymmetricalDistanceMatrix m = null;
+        switch ( pwd_distance_method ) {
+            case KIMURA_DISTANCE:
+                m = PairwiseDistanceCalculator.calcKimuraDistances( msa );
+                break;
+            case POISSON_DISTANCE:
+                m = PairwiseDistanceCalculator.calcPoissonDistances( msa );
+                break;
+            case FRACTIONAL_DISSIMILARITY:
+                m = PairwiseDistanceCalculator.calcFractionalDissimilarities( msa );
+                break;
+            default:
+                throw new IllegalArgumentException( "invalid pwd method" );
+        }
+        final NeighborJoining nj = NeighborJoining.createInstance();
+        final Phylogeny phy = nj.execute( m );
+        return phy;
+    }
+
     final private void removeWorstOffenders( final int to_remove,
                                              final int step,
                                              final boolean realign,
                                              final boolean norm ) throws IOException, InterruptedException {
+        final Phylogeny a = pi();
+        Archaeopteryx.createApplication( a );
         final GapContribution stats[] = calcGapContribtionsStats( norm );
         final List<String> to_remove_ids = new ArrayList<String>();
         for( int j = 0; j < to_remove; ++j ) {
@@ -227,6 +278,8 @@ public class MsaCompactor {
         if ( realign ) {
             mafft();
         }
+        final Phylogeny b = pi();
+        Archaeopteryx.createApplication( b );
     }
 
     final private void writeMsa( final String outfile, final MSA_FORMAT format ) throws IOException {
index a916d89..806f031 100644 (file)
@@ -38,8 +38,10 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.forester.io.parsers.FastaParser;
 import org.forester.io.parsers.PhylogenyParser;
 import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
 import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
@@ -73,6 +75,38 @@ public class PhylogenyMethods {
         throw new CloneNotSupportedException();
     }
 
+    public static void extractFastaInformation( final Phylogeny phy ) {
+        for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+                final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
+                if ( name_m.lookingAt() ) {
+                    System.out.println();
+                    // System.out.println( name_m.group( 1 ) );
+                    // System.out.println( name_m.group( 2 ) );
+                    // System.out.println( name_m.group( 3 ) );
+                    // System.out.println( name_m.group( 4 ) );
+                    final String acc_source = name_m.group( 1 );
+                    final String acc = name_m.group( 2 );
+                    final String seq_name = name_m.group( 3 );
+                    final String tax_sn = name_m.group( 4 );
+                    if ( !ForesterUtil.isEmpty( acc_source ) && !ForesterUtil.isEmpty( acc ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setAccession( new Accession( acc, acc_source ) );
+                    }
+                    if ( !ForesterUtil.isEmpty( seq_name ) ) {
+                        ForesterUtil.ensurePresenceOfSequence( node );
+                        node.getNodeData().getSequence( 0 ).setName( seq_name );
+                    }
+                    if ( !ForesterUtil.isEmpty( tax_sn ) ) {
+                        ForesterUtil.ensurePresenceOfTaxonomy( node );
+                        node.getNodeData().getTaxonomy( 0 ).setScientificName( tax_sn );
+                    }
+                }
+            }
+        }
+    }
+
     public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {
         final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
         for( final PhylogenyNodeIterator iter = phy.iteratorPreorder(); iter.hasNext(); ) {