X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fmsa%2FMsaMethods.java;h=2b470203d611294f1de19114e5226e0102d17208;hb=0b49b8e750b34d28a5989facdd8a7959870de996;hp=edca9b7cd9ebf9985280f815d821d32a6cd236b0;hpb=6b3ec0cd978bd692dfae1de3e076963283fdf322;p=jalview.git diff --git a/forester/java/src/org/forester/msa/MsaMethods.java b/forester/java/src/org/forester/msa/MsaMethods.java index edca9b7..2b47020 100644 --- a/forester/java/src/org/forester/msa/MsaMethods.java +++ b/forester/java/src/org/forester/msa/MsaMethods.java @@ -107,6 +107,43 @@ public final class MsaMethods { _ignored_seqs_ids = new ArrayList(); } + public static final DescriptiveStatistics calcNumberOfGapsStats( final Msa msa ) { + final int[] gaps = calcNumberOfGapsInMsa( msa ); + final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); + for( final int gap : gaps ) { + stats.addValue( gap ); + } + return stats; + } + + public static final int[] calcNumberOfGapsInMsa( final Msa msa ) { + final int seqs = msa.getNumberOfSequences(); + final int[] gaps= new int[ seqs ]; + for( int i = 0; i < seqs; ++i ) { + gaps[ i ] = calcNumberOfGaps( msa.getSequence( i ) ); + } + return gaps; + } + + + + public final static int calcNumberOfGaps( final MolecularSequence seq ) { + int gaps = 0; + boolean was_gap = false; + for( int i = 0; i < seq.getLength(); ++i ) { + if ( seq.isGapAt( i ) ) { + if ( !was_gap ) { + ++gaps; + was_gap = true; + } + } + else { + was_gap = false; + } + } + return gaps; + } + public static DescriptiveStatistics calcBasicGapinessStatistics( final Msa msa ) { final DescriptiveStatistics stats = new BasicDescriptiveStatistics(); for( int i = 0; i < msa.getLength(); ++i ) { @@ -324,8 +361,8 @@ public final class MsaMethods { final private static HashMap calcResidueDistribution6( final Msa msa, final int col ) { // Residues are classified into one of tex2html_wrap199 types: - // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], - // special conformations [GP] and gaps. This convention follows that + // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], + // special conformations [GP] and gaps. This convention follows that // of Mirny & Shakhnovich (1999, J Mol Biol 291:177-196). final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) { @@ -370,8 +407,8 @@ public final class MsaMethods { final private static HashMap calcResidueDistribution7( final Msa msa, final int col ) { // Residues are classified into one of tex2html_wrap199 types: - // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], - // special conformations [GP] and gaps. This convention follows that + // aliphatic [AVLIMC], aromatic [FWYH], polar [STNQ], positive [KR], negative [DE], + // special conformations [GP] and gaps. This convention follows that // of Mirny & Shakhnovich (1999, J Mol Biol 291:177-196). final HashMap counts = new HashMap(); for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {