X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=forester%2Fjava%2Fsrc%2Forg%2Fforester%2Fsequence%2FBasicSequence.java;h=de3084e12c1aaec9bec93ff8eb0899ca56abd6fd;hb=b5fe992f707cdad84255eb85040effbee97d61b3;hp=60828bd21b46cccad0ddd85d26f97748e2516ada;hpb=72c535142a5e6b0da9c7edb2f605eb835b43e6fb;p=jalview.git diff --git a/forester/java/src/org/forester/sequence/BasicSequence.java b/forester/java/src/org/forester/sequence/BasicSequence.java index 60828bd..de3084e 100644 --- a/forester/java/src/org/forester/sequence/BasicSequence.java +++ b/forester/java/src/org/forester/sequence/BasicSequence.java @@ -22,31 +22,42 @@ // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA // // Contact: phylosoft @ gmail . com -// WWW: www.phylosoft.org/forester +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester package org.forester.sequence; import org.forester.util.ForesterUtil; -public class BasicSequence implements Sequence { +public class BasicSequence implements MolecularSequence { private final char[] _mol_sequence; - private final String _identifier; + private String _identifier; private final TYPE _type; - private BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) { + /** + * Only use if you know what you are doing! + * + */ + public BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) { + check( identifier, mol_sequence ); + _mol_sequence = mol_sequence.toCharArray(); + _identifier = identifier; + _type = type; + } + + private static final void check( final String identifier, final String mol_sequence ) { if ( ForesterUtil.isEmpty( identifier ) ) { throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); } if ( ForesterUtil.isEmpty( mol_sequence ) ) { throw new IllegalArgumentException( "molecular sequence cannot be empty" ); } - _mol_sequence = mol_sequence.toCharArray(); - _identifier = identifier; - _type = type; } - // Only use if you know what you are doing! + /** + * Only use if you know what you are doing! + * + */ public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) { if ( ForesterUtil.isEmpty( identifier ) ) { throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); @@ -59,6 +70,10 @@ public class BasicSequence implements Sequence { _type = type; } + public void setIdentifier( final String id ) { + _identifier = id; + } + @Override public String getIdentifier() { return _identifier; @@ -87,8 +102,8 @@ public class BasicSequence implements Sequence { @Override public int getNumberOfGapResidues() { int gaps = 0; - for( int i = 0; i < _mol_sequence.length; ++i ) { - if ( _mol_sequence[ i ] == GAP ) { + for( final char element : _mol_sequence ) { + if ( element == GAP ) { ++gaps; } } @@ -103,7 +118,7 @@ public class BasicSequence implements Sequence { if ( obj.getClass() != getClass() ) { return false; } - final Sequence other = ( Sequence ) obj; + final MolecularSequence other = ( MolecularSequence ) obj; if ( getMolecularSequenceAsString().equals( other.getMolecularSequenceAsString() ) ) { return true; } @@ -124,7 +139,7 @@ public class BasicSequence implements Sequence { return sb.toString(); } - public static Sequence copySequence( final Sequence seq ) { + public static MolecularSequence copySequence( final MolecularSequence seq ) { final char[] s = new char[ seq.getMolecularSequence().length ]; for( int i = 0; i < seq.getMolecularSequence().length; i++ ) { s[ i ] = seq.getMolecularSequence()[ i ]; @@ -132,23 +147,61 @@ public class BasicSequence implements Sequence { return new BasicSequence( new String( seq.getIdentifier() ), s, seq.getType() ); } - public static Sequence createAaSequence( final String identifier, final String mol_sequence ) { + public static MolecularSequence createSequence( final String identifier, final String mol_sequence ) { + check( identifier, mol_sequence ); + final TYPE type = ForesterUtil.guessMolecularSequenceType( mol_sequence ); + final String re; + final char repl; + if ( type == TYPE.AA ) { + re = AA_REGEXP; + repl = UNSPECIFIED_AA; + } + else if ( type == TYPE.DNA ) { + re = DNA_REGEXP; + repl = UNSPECIFIED_NUC; + } + else if ( type == TYPE.RNA ) { + re = RNA_REGEXP; + repl = UNSPECIFIED_NUC; + } + else { + throw new IllegalArgumentException( "could not determine sequence type for: " + mol_sequence); + } + return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) + .replaceAll( re, Character.toString( repl ) ), type ); + } + + public static MolecularSequence createGeneralSequence( final String identifier, final String mol_sequence ) { + check( identifier, mol_sequence ); + return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR + ), TYPE.GENERAL ); + } + + public static MolecularSequence createAaSequence( final String identifier, final String mol_sequence ) { + check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) - .replaceAll( AA_REGEXP, Character.toString( UNSPECIFIED_AA ) ), TYPE.AA ); + .replaceAll( AA_REGEXP, Character.toString( UNSPECIFIED_AA ) ), TYPE.AA ); } - public static Sequence createDnaSequence( final String identifier, final String mol_sequence ) { + public static MolecularSequence createDnaSequence( final String identifier, final String mol_sequence ) { + check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) - .replaceAll( DNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.DNA ); + .replaceAll( DNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.DNA ); } - public static Sequence createRnaSequence( final String identifier, final String mol_sequence ) { + public static MolecularSequence createRnaSequence( final String identifier, final String mol_sequence ) { + check( identifier, mol_sequence ); return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) - .replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA ); + .replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA ); } @Override public String getMolecularSequenceAsString() { return new String( getMolecularSequence() ); } + + @Override + public boolean isGapAt( final int position ) { + return getResidueAt( position ) == GAP; + } }