final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
number_of_genomes == 2,
- CALC_SIMILARITY_SCORES );
+ CALC_SIMILARITY_SCORES,
+ true );
switch ( scoring ) {
case COMBINATIONS:
pw_calc = new CombinationsBasedPairwiseDomainSimilarityCalculator();
import java.util.Date;
import java.util.List;
+import org.forester.archaeopteryx.Archaeopteryx;
import org.forester.evoinference.distance.NeighborJoining;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.matrix.character.BasicCharacterStateMatrix;
m.setRow( "1.52430 1.44650 0.59580 0.46310 0.00000 0.34840 0.30830", 4 );
m.setRow( "1.60430 1.43890 0.61790 0.50610 0.34840 0.00000 0.26920", 5 );
m.setRow( "1.59050 1.46290 0.55830 0.47100 0.30830 0.26920 0.00000", 6 );
- nj.execute( m );
+ System.out.println( m.toString() );
+ final Phylogeny p2 = nj.execute( m );
+ p2.reRoot( p2.getNode( "Bovine" ) );
+ System.out.println( p2.toString() );
+ // from phylip Neighbor-Joining/UPGMA method version 3.69:
+ // ((((((Chimp:0.15167,Human:0.11753):0.03982,Gorilla:0.15393):0.02696,Orang:0.28469):0.04648,Gibbon:0.35793):0.42027,Mouse:0.76891):0.458845,Bovine:0.458845);
+ Archaeopteryx.createApplication( p2 );
m = new BasicSymmetricalDistanceMatrix( 4 );
m.setIdentifier( 0, "A" );
m.setIdentifier( 1, "B" );
if ( ( i == otu1 ) || ( i == otu2 ) ) {
continue;
}
- // _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
- i_m = _mappings[ i ];
- _d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
+ _d_values[ _mappings[ otu1 ] ][ _mappings[ i ] ] = ( getValueFromD( otu1, i ) + getValueFromD( i, otu2 ) - d ) / 2;
+ //i_m = _mappings[ i ];
+ //_d_values[ otu1_m ][ i_m ] = ( ( _d_values[ otu1_m ][ i_m ] + _d_values[ i_m ][ otu2_m ] ) - 2 ) / 2;
}
}
d = 0;
i_m = _mappings[ i ];
for( int n = 0; n < _n; ++n ) {
- d += _d_values[ i_m ][ _mappings[ n ] ];
- //d += getValueFromD( i, n );
+ //d += _d_values[ i_m ][ _mappings[ n ] ];
+ d += getValueFromD( i, n );
}
_r[ i ] = d;
}
// It is a condition that otu1 < otu2.
if ( DEBUG ) {
if ( otu1 > otu2 ) {
- throw new RuntimeException( "NJ code is faulty: otu1 > otu2" );
+ throw new RuntimeException( "faulty NJ code: otu1 > otu2" );
}
}
final PhylogenyNode node = new PhylogenyNode();
r_j = _r[ j ];
j_m = _mappings[ j ];
for( int i = 0; i < j; ++i ) {
- // _m_values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + r_j ) / ( _n - 2 );
- _m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
+ _m_values[ i ][ j ] = getValueFromD( i, j ) - ( _r[ i ] + r_j ) / ( _n - 2 );
+ //_m_values[ i ][ j ] = _d_values[ _mappings[ i ] ][ j_m ] - ( ( _r[ i ] + r_j ) / ( _n_2 ) );
}
}
}
+ // private final double getValueFromD( final int otu1, final int otu2 ) {
+ // return _d_values[ _mappings[ otu1 ] ][ _mappings[ otu2 ] ];
+ // }
// otu2 will, in effect, be "deleted" from the matrix.
private final void updateMappings( final int otu2 ) {
for( int i = otu2; i < ( _mappings.length - 1 ); ++i ) {
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.msa.Msa;
-import org.forester.sequence.Sequence;
public final class PairwiseDistanceCalculator {
- public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1
- private static final char GAP = Sequence.GAP;
+ public static final double DEFAULT_VALUE_FOR_TOO_LARGE_DISTANCE_FOR_KIMURA_FORMULA = 10; // Felsenstein uses -1
private final Msa _msa;
private final double _value_for_too_large_distance_for_kimura_formula;
private double calcFractionalDissimilarity( final int row_1, final int row_2 ) {
final int length = _msa.getLength();
int nd = 0;
- int n = 0;
- char aa_1;
- char aa_2;
for( int col = 0; col < length; ++col ) {
- aa_1 = _msa.getResidueAt( row_1, col );
- aa_2 = _msa.getResidueAt( row_2, col );
- if ( ( aa_1 != GAP ) && ( aa_2 != GAP ) ) {
- if ( aa_1 != aa_2 ) {
- nd++;
- }
- n++;
+ if ( _msa.getResidueAt( row_1, col ) != _msa.getResidueAt( row_2, col ) ) {
+ ++nd;
}
}
- if ( n == 0 ) {
- return 1;
- }
- return ( double ) nd / n;
+ return ( double ) nd / length;
}
/**
@Override
public final void setValue( final int col, final int row, final double d ) {
+ if ( d < 0 ) {
+ throw new IllegalArgumentException( "negative distance value" );
+ }
if ( ( col == row ) && ( d != 0.0 ) ) {
throw new IllegalArgumentException( "attempt to set a non-zero value on the diagonal of a symmetrical distance matrix" );
}
public void resample( final int[] resampled_column_positions ) {
if ( resampled_column_positions.length != getLength() ) {
- _resampled_column_positions = null;
throw new IllegalArgumentException( "illegal attempt to use " + resampled_column_positions.length
+ " resampled column positions on msa of length " + getLength() );
}
final MsaInferrer mafft = Mafft
.createInstance( "/home/czmasek/SOFTWARE/MSA/MAFFT/mafft-7.130-without-extensions/scripts/mafft" );
final List<String> opts = new ArrayList<String>();
- // opts.add( "--maxiterate" );
- // opts.add( "1000" );
- // opts.add( "--localpair" );
+ opts.add( "--maxiterate" );
+ opts.add( "1000" );
+ opts.add( "--localpair" );
opts.add( "--quiet" );
_msa = mafft.infer( _msa.asSequenceList(), opts );
}
}
}
- Phylogeny pi() {
- final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa );
+ Phylogeny pi( String matrix ) {
+ final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
final int seed = 15;
final int n = 100;
final ResampleableMsa resampleable_msa = new ResampleableMsa( ( BasicMsa ) _msa );
final Phylogeny[] eval_phys = new Phylogeny[ n ];
for( int i = 0; i < n; ++i ) {
resampleable_msa.resample( resampled_column_positions[ i ] );
- eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa );
+ eval_phys[ i ] = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, resampleable_msa, false, null );
}
ConfidenceAssessor.evaluate( "bootstrap", eval_phys, master_phy, true, 1 );
PhylogenyMethods.extractFastaInformation( master_phy );
return master_phy;
}
- private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method, final Msa msa ) {
+ private Phylogeny inferNJphylogeny( PWD_DISTANCE_METHOD pwd_distance_method,
+ final Msa msa,
+ boolean write_matrix,
+ String matrix_name ) {
BasicSymmetricalDistanceMatrix m = null;
switch ( pwd_distance_method ) {
case KIMURA_DISTANCE:
default:
throw new IllegalArgumentException( "invalid pwd method" );
}
+ if ( write_matrix ) {
+ try {
+ m.write( ForesterUtil.createBufferedWriter( matrix_name ) );
+ }
+ catch ( IOException e ) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
final NeighborJoining nj = NeighborJoining.createInstance();
final Phylogeny phy = nj.execute( m );
return phy;
final int step,
final boolean realign,
final boolean norm ) throws IOException, InterruptedException {
- final Phylogeny a = pi();
+ final Phylogeny a = pi( "a.pwd" );
Archaeopteryx.createApplication( a );
final GapContribution stats[] = calcGapContribtionsStats( norm );
final List<String> to_remove_ids = new ArrayList<String>();
if ( realign ) {
mafft();
}
- final Phylogeny b = pi();
+ final Phylogeny b = pi( "b.pwd" );
Archaeopteryx.createApplication( b );
}
private final boolean _calc_similarity_score;
private final boolean _sort_by_species_count_first;
private final boolean _treat_as_binary_comparison;
+ private final boolean _verbose;
+
+ public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
+ final boolean sort_by_species_count_first,
+ final boolean treat_as_binary_comparison,
+ final boolean calc_similarity_score,
+ final boolean verbose ) {
+ _sort = sort;
+ _sort_by_species_count_first = sort_by_species_count_first;
+ _treat_as_binary_comparison = treat_as_binary_comparison;
+ _calc_similarity_score = calc_similarity_score;
+ _verbose = verbose;
+ }
public BasicDomainSimilarityCalculator( final DomainSimilarity.DomainSimilaritySortField sort,
final boolean sort_by_species_count_first,
_sort_by_species_count_first = sort_by_species_count_first;
_treat_as_binary_comparison = treat_as_binary_comparison;
_calc_similarity_score = calc_similarity_score;
+ _verbose = false;
}
@Override
}
final DecimalFormat pf = new java.text.DecimalFormat( "000000" );
int counter = 1;
- System.out.println( keys.size() );
+ if ( _verbose ) {
+ System.out.println( keys.size() );
+ }
for( final String key : keys ) {
- ForesterUtil.updateProgress( counter, pf );
+ if ( _verbose ) {
+ ForesterUtil.updateProgress( counter, pf );
+ }
counter++;
final List<CombinableDomains> same_id_cd_list = new ArrayList<CombinableDomains>( cdc_list.size() );
final List<Species> species_with_key_id_domain = new ArrayList<Species>();
throw new RuntimeException( "this should not have happened" );
}
}
- System.out.println();
+ if ( _verbose ) {
+ System.out.println();
+ }
return similarities;
}
final DomainSimilarityCalculator calc = new BasicDomainSimilarityCalculator( domain_similarity_sort_field,
sort_by_species_count_first,
true,
- calc_similarity_scores );
+ calc_similarity_scores,
+ true );
final SortedSet<DomainSimilarity> similarities = calc
.calculateSimilarities( pw_calc,
genome_pair,