inprogress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 1 Mar 2014 01:14:08 +0000 (01:14 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Sat, 1 Mar 2014 01:14:08 +0000 (01:14 +0000)
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/evoinference/TestPhylogenyReconstruction.java
forester/java/src/org/forester/evoinference/distance/NeighborJoining.java
forester/java/src/org/forester/evoinference/distance/PairwiseDistanceCalculator.java
forester/java/src/org/forester/evoinference/matrix/distance/BasicSymmetricalDistanceMatrix.java
forester/java/src/org/forester/msa/ResampleableMsa.java
forester/java/src/org/forester/msa_compactor/MsaCompactor.java
forester/java/src/org/forester/surfacing/BasicDomainSimilarityCalculator.java
forester/java/src/org/forester/surfacing/PairwiseGenomeComparator.java

index 802b1d4..847a8db 100644 (file)
@@ -1817,7 +1817,8 @@ public class surfacing {
         final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                      sort_by_species_count_first,
                                                                                      number_of_genomes == 2,
-                                                                                     CALC_SIMILARITY_SCORES );
+                                                                                     CALC_SIMILARITY_SCORES,
+                                                                                     true );
         switch ( scoring ) {
             case COMBINATIONS:
                 pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
index 3529d1e..6073985 100644 (file)
@@ -33,6 +33,7 @@ import java.io.StringWriter;
 import java.util.Date;
 import java.util.List;
 
+import org.forester.archaeopteryx.Archaeopteryx;
 import org.forester.evoinference.distance.NeighborJoining;
 import org.forester.evoinference.distance.PairwiseDistanceCalculator;
 import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
@@ -1941,7 +1942,13 @@ public class TestPhylogenyReconstruction {
             m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
             m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
             m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
-            nj.execute( m );
+            System.out.println( m.toString() );
+            final Phylogeny p2 = nj.execute( m );
+            p2.reRoot( p2.getNode( "Bovine" ) );
+            System.out.println( p2.toString() );
+            // from phylip Neighbor-Joining/UPGMA method version 3.69:
+            // ((((((Chimp:0.15167,Human:0.11753):0.03982,Gorilla:0.15393):0.02696,Orang:0.28469):0.04648,Gibbon:0.35793):0.42027,Mouse:0.76891):0.458845,Bovine:0.458845);
+            Archaeopteryx.createApplication( p2 );
             m = new BasicSymmetricalDistanceMatrix( 4 );
             m.setIdentifier( 0, "A" );
             m.setIdentifier( 1, "B" );
index c624bff..bcb6df1 100644 (file)
@@ -58,9 +58,9 @@ public final class NeighborJoining {
             if ( ( i == otu1 ) || ( i == otu2 ) ) {
                 continue;
             }
-            //  _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
-            i_m = _mappings[ i ];
-            _d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
+            _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+            //i_m = _mappings[ i ];
+            //_d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
         }
     }
 
@@ -71,8 +71,8 @@ public final class NeighborJoining {
             d = 0;
             i_m = _mappings[ i ];
             for( int n = 0; n < _n; ++n ) {
-                d += _d_values[ i_m ][ _mappings[ n ] ];
-                //d += getValueFromD( i, n );
+                //d += _d_values[ i_m ][ _mappings[ n ] ];
+                d += getValueFromD( i, n );
             }
             _r[ i ] = d;
         }
@@ -101,7 +101,7 @@ public final class NeighborJoining {
             // It is a condition that otu1 < otu2.
             if ( DEBUG ) {
                 if ( otu1 > otu2 ) {
-                    throw new RuntimeException( "NJ code is faulty: otu1 > otu2" );
+                    throw new RuntimeException( "faulty NJ code: otu1 > otu2" );
                 }
             }
             final PhylogenyNode node = new PhylogenyNode();
@@ -194,12 +194,15 @@ public final class NeighborJoining {
             r_j = _r[ j ];
             j_m = _mappings[ j ];
             for( int i = 0; i < j; ++i ) {
-                //  _m_values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + r_j ) / ( _n - 2 );
-                _m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
+                _m_values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + r_j ) / ( _n - 2 );
+                //_m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
             }
         }
     }
 
+    //  private final double getValueFromD( final int otu1, final int otu2 ) {
+    //      return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
+    // }
     // otu2 will, in effect, be "deleted" from the matrix.
     private final void updateMappings( final int otu2 ) {
         for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) {
index 0dc6c6e..67d4b53 100644 (file)
@@ -27,12 +27,10 @@ package org.forester.evoinference.distance;
 
 import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
 import org.forester.msa.Msa;
-import org.forester.sequence.Sequence;
 
 public final class PairwiseDistanceCalculator {
 
-    public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10;          // Felsenstein uses -1
-    private static final char  GAP                                                     = Sequence.GAP;
+    public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1
     private final Msa          _msa;
     private final double       _value_for_too_large_distance_for_kimura_formula;
 
@@ -44,23 +42,12 @@ public final class PairwiseDistanceCalculator {
     private double calcFractionalDissimilarity( final int row_1, final int row_2 ) {
         final int length = _msa.getLength();
         int nd = 0;
-        int n = 0;
-        char aa_1;
-        char aa_2;
         for( int col = 0; col < length; ++col ) {
-            aa_1 = _msa.getResidueAt( row_1, col );
-            aa_2 = _msa.getResidueAt( row_2, col );
-            if ( ( aa_1 != GAP ) && ( aa_2 != GAP ) ) {
-                if ( aa_1 != aa_2 ) {
-                    nd++;
-                }
-                n++;
+            if ( _msa.getResidueAt( row_1, col ) != _msa.getResidueAt( row_2, col ) ) {
+                ++nd;
             }
         }
-        if ( n == 0 ) {
-            return 1;
-        }
-        return ( double ) nd / n;
+        return ( double ) nd / length;
     }
 
     /**
index 616a91c..fdf2ec4 100644 (file)
@@ -110,6 +110,9 @@ public final class BasicSymmetricalDistanceMatrix implements DistanceMatrix {
 
     @Override
     public final void setValue( final int col, final int row, final double d ) {
+        if ( d < 0 ) {
+            throw new IllegalArgumentException( "negative distance value" );
+        }
         if ( ( col == row ) && ( d != 0.0 ) ) {
             throw new IllegalArgumentException( "attempt to set a non-zero value on the diagonal of a symmetrical distance matrix" );
         }
index b7c8a7d..3823241 100644 (file)
@@ -35,7 +35,6 @@ public final class ResampleableMsa extends BasicMsa {
 
     public void resample( final int[] resampled_column_positions ) {
         if ( resampled_column_positions.length != getLength() ) {
-            _resampled_column_positions = null;
             throw new IllegalArgumentException( "illegal attempt to use " + resampled_column_positions.length
                     + " resampled column positions on msa of length " + getLength() );
         }
index 5f30b5a..cc0875d 100644 (file)
@@ -128,9 +128,9 @@ public class MsaCompactor {
         final MsaInferrer mafft = Mafft
                 .createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" );
         final List<String> opts = new ArrayList<String>();
-        // opts.add( "--maxiterate" );
-        // opts.add( "1000" );
-        // opts.add( "--localpair" );
+        opts.add( "--maxiterate" );
+        opts.add( "1000" );
+        opts.add( "--localpair" );
         opts.add( "--quiet" );
         _msa = mafft.infer( _msa.asSequenceList(), opts );
     }
@@ -213,8 +213,8 @@ public class MsaCompactor {
         }
     }
 
-    Phylogeny pi() {
-        final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa );
+    Phylogeny pi( String matrix ) {
+        final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
         final int seed = 15;
         final int n = 100;
         final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
@@ -224,14 +224,17 @@ public class MsaCompactor {
         final Phylogeny[] eval_phys = new Phylogeny[ n ];
         for( int i = 0; i < n; ++i ) {
             resampleable_msa.resample( resampled_column_positions[ i ] );
-            eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa );
+            eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa, false, null );
         }
         ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
         PhylogenyMethods.extractFastaInformation( master_phy );
         return master_phy;
     }
 
-    private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method, final Msa msa ) {
+    private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method,
+                                        final Msa msa,
+                                        boolean write_matrix,
+                                        String matrix_name ) {
         BasicSymmetricalDistanceMatrix m = null;
         switch ( pwd_distance_method ) {
             case KIMURA_DISTANCE:
@@ -246,6 +249,15 @@ public class MsaCompactor {
             default:
                 throw new IllegalArgumentException( "invalid pwd method" );
         }
+        if ( write_matrix ) {
+            try {
+                m.write( ForesterUtil.createBufferedWriter( matrix_name ) );
+            }
+            catch ( IOException e ) {
+                // TODO Auto-generated catch block
+                e.printStackTrace();
+            }
+        }
         final NeighborJoining nj = NeighborJoining.createInstance();
         final Phylogeny phy = nj.execute( m );
         return phy;
@@ -255,7 +267,7 @@ public class MsaCompactor {
                                              final int step,
                                              final boolean realign,
                                              final boolean norm ) throws IOException, InterruptedException {
-        final Phylogeny a = pi();
+        final Phylogeny a = pi( "a.pwd" );
         Archaeopteryx.createApplication( a );
         final GapContribution stats[] = calcGapContribtionsStats( norm );
         final List<String> to_remove_ids = new ArrayList<String>();
@@ -278,7 +290,7 @@ public class MsaCompactor {
         if ( realign ) {
             mafft();
         }
-        final Phylogeny b = pi();
+        final Phylogeny b = pi( "b.pwd" );
         Archaeopteryx.createApplication( b );
     }
 
index b245cbc..8682fd1 100644 (file)
@@ -46,6 +46,19 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
     private final boolean                            _calc_similarity_score;
     private final boolean                            _sort_by_species_count_first;
     private final boolean                            _treat_as_binary_comparison;
+    private final boolean                            _verbose;
+
+    public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
+                                            final boolean sort_by_species_count_first,
+                                            final boolean treat_as_binary_comparison,
+                                            final boolean calc_similarity_score,
+                                            final boolean verbose ) {
+        _sort = sort;
+        _sort_by_species_count_first = sort_by_species_count_first;
+        _treat_as_binary_comparison = treat_as_binary_comparison;
+        _calc_similarity_score = calc_similarity_score;
+        _verbose = verbose;
+    }
 
     public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
                                             final boolean sort_by_species_count_first,
@@ -55,6 +68,7 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         _sort_by_species_count_first = sort_by_species_count_first;
         _treat_as_binary_comparison = treat_as_binary_comparison;
         _calc_similarity_score = calc_similarity_score;
+        _verbose = false;
     }
 
     @Override
@@ -72,9 +86,13 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
         }
         final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
         int counter = 1;
-        System.out.println( keys.size() );
+        if ( _verbose ) {
+            System.out.println( keys.size() );
+        }
         for( final String key : keys ) {
-            ForesterUtil.updateProgress( counter, pf );
+            if ( _verbose ) {
+                ForesterUtil.updateProgress( counter, pf );
+            }
             counter++;
             final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
             final List<Species> species_with_key_id_domain = new ArrayList<Species>();
@@ -111,7 +129,9 @@ public class BasicDomainSimilarityCalculator implements DomainSimilarityCalculat
                 throw new RuntimeException( "this should not have happened" );
             }
         }
-        System.out.println();
+        if ( _verbose ) {
+            System.out.println();
+        }
         return similarities;
     }
 
index a3b0e66..d0d2590 100644 (file)
@@ -139,7 +139,8 @@ public class PairwiseGenomeComparator {
                 final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
                                                                                              sort_by_species_count_first,
                                                                                              true,
-                                                                                             calc_similarity_scores );
+                                                                                             calc_similarity_scores,
+                                                                                             true );
                 final SortedSet<DomainSimilarity> similarities = calc
                         .calculateSimilarities( pw_calc,
                                                 genome_pair,