public static int calcGapSumPerColumn( final Msa msa, final int col ) {
int gap_rows = 0;
for( int j = 0; j < msa.getNumberOfSequences(); ++j ) {
- if ( msa.getResidueAt( j, col ) == Sequence.GAP ) {
+ if ( msa.isGapAt( j, col ) ) {
gap_rows++;
}
}
final List<Sequence> seqs = new ArrayList<Sequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) {
- seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) );
+ seqs.add( msa.getSequence( row ) );
}
}
if ( seqs.size() < 1 ) {
final List<Sequence> seqs = new ArrayList<Sequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) {
- seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) );
+ seqs.add( msa.getSequence( row ) );
}
}
if ( seqs.size() < 1 ) {
final List<Sequence> seqs = new ArrayList<Sequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_rows.contains( row ) ) {
- seqs.add( BasicSequence.copySequence( msa.getSequence( row ) ) );
+ seqs.add( msa.getSequence( row ) );
}
}
if ( seqs.size() < 1 ) {
return BasicMsa.createInstance( seqs );
}
- synchronized final public Msa removeGapColumns( final double max_allowed_gap_ratio,
+ synchronized final public Msa deleteGapColumns( final double max_allowed_gap_ratio,
final int min_allowed_length,
final Msa msa ) {
init();
final boolean[] delete_cols = new boolean[ msa.getLength() ];
int new_length = 0;
for( int col = 0; col < msa.getLength(); ++col ) {
- delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) >= max_allowed_gap_ratio;
+ delete_cols[ col ] = ( ( double ) calcGapSumPerColumn( msa, col ) / msa.getNumberOfSequences() ) > max_allowed_gap_ratio;
if ( !delete_cols[ col ] ) {
++new_length;
}
return BasicMsa.createInstance( seqs );
}
- public static double calculateIdentityRatio( final Msa msa, final int column ) {
+ final public static DescriptiveStatistics calculateIdentityRatio( final int from, final int to, final Msa msa ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( int c = from; c <= to; ++c ) {
+ stats.addValue( calculateIdentityRatio( msa, c ) );
+ }
+ return stats;
+ }
+
+ final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
+ final Sequence s = msa.getSequence( row );
+ stats.addValue( s.getLength() - s.getNumberOfGapResidues() );
+ }
+ return stats;
+ }
+
+ final public static double calculateIdentityRatio( final Msa msa, final int column ) {
final SortedMap<Character, Integer> dist = calculateResidueDestributionPerColumn( msa, column );
int majority_count = 0;
final Iterator<Map.Entry<Character, Integer>> it = dist.entrySet().iterator();
public static SortedMap<Character, Integer> calculateResidueDestributionPerColumn( final Msa msa, final int column ) {
final SortedMap<Character, Integer> map = new TreeMap<Character, Integer>();
for( final Character r : msa.getColumnAt( column ) ) {
- if ( !map.containsKey( r ) ) {
- map.put( r, 1 );
- }
- else {
- map.put( r, map.get( r ) + 1 );
+ if ( r != Sequence.GAP ) {
+ if ( !map.containsKey( r ) ) {
+ map.put( r, 1 );
+ }
+ else {
+ map.put( r, map.get( r ) + 1 );
+ }
}
}
return map;