From cfda6a6b1a23304bdcdac75f629e9ce4beb9cd9a Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 8 Aug 2012 13:23:55 +0000 Subject: [PATCH] work for rrm project (ComPhy 2012 Moscow) --- forester/java/src/org/forester/application/aa.java | 4 ++ .../src/org/forester/sequence/BasicSequence.java | 45 +++++++++++++++++++- .../java/src/org/forester/sequence/Sequence.java | 2 + 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/forester/java/src/org/forester/application/aa.java b/forester/java/src/org/forester/application/aa.java index b9e88a7..6d7be4f 100644 --- a/forester/java/src/org/forester/application/aa.java +++ b/forester/java/src/org/forester/application/aa.java @@ -3,7 +3,10 @@ package org.forester.application; import java.io.FileInputStream; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.TreeSet; import org.forester.io.parsers.FastaParser; import org.forester.msa.Msa; @@ -18,6 +21,7 @@ public class aa { final List orig = FastaParser .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) ); final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) ); + final Set all_found_seqs = new HashSet(); for( int i = 0; i < msa.getNumberOfSequences(); ++i ) { final String id = msa.getIdentifier( i ); final String id_ = id.substring( 0, id.indexOf( "_" ) ); diff --git a/forester/java/src/org/forester/sequence/BasicSequence.java b/forester/java/src/org/forester/sequence/BasicSequence.java index c14277c..e180481 100644 --- a/forester/java/src/org/forester/sequence/BasicSequence.java +++ b/forester/java/src/org/forester/sequence/BasicSequence.java @@ -26,6 +26,8 @@ package org.forester.sequence; +import org.forester.util.ForesterUtil; + public class BasicSequence implements Sequence { private final char[] _mol_sequence; @@ -33,6 +35,12 @@ public class BasicSequence implements Sequence { private final TYPE _type; private BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) { + if ( ForesterUtil.isEmpty( identifier ) ) { + throw new IllegalArgumentException( "identifier of sequence cannot be empty"); + } + if ( ForesterUtil.isEmpty( mol_sequence ) ) { + throw new IllegalArgumentException( "molecular sequence cannot be empty"); + } _mol_sequence = mol_sequence.toCharArray(); _identifier = identifier; _type = type; @@ -40,6 +48,12 @@ public class BasicSequence implements Sequence { // Only use if you know what you are doing! public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) { + if ( ForesterUtil.isEmpty( identifier ) ) { + throw new IllegalArgumentException( "identifier of sequence cannot be empty"); + } + if ( mol_sequence == null || mol_sequence.length < 1 ) { + throw new IllegalArgumentException( "molecular sequence cannot be empty"); + } _mol_sequence = mol_sequence; _identifier = identifier; _type = type; @@ -82,11 +96,32 @@ public class BasicSequence implements Sequence { } @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Sequence other = ( Sequence) obj; + if ( getMolecularSequenceAsString() .equals( other.getMolecularSequenceAsString() )) { + return true; + } + return false; + } + + @Override + public int hashCode() { + return getMolecularSequenceAsString().hashCode(); + } + + + @Override public String toString() { final StringBuffer sb = new StringBuffer(); sb.append( _identifier.toString() ); - sb.append( " " ); - sb.append( new String( _mol_sequence ) ); + sb.append( ": " ); + sb.append( getMolecularSequenceAsString() ); return sb.toString(); } @@ -112,4 +147,10 @@ public class BasicSequence implements Sequence { return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR ) .replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA ); } + + @Override + public String getMolecularSequenceAsString() { + + return new String( getMolecularSequence() ); + } } diff --git a/forester/java/src/org/forester/sequence/Sequence.java b/forester/java/src/org/forester/sequence/Sequence.java index 9a92fbe..f4c17cf 100644 --- a/forester/java/src/org/forester/sequence/Sequence.java +++ b/forester/java/src/org/forester/sequence/Sequence.java @@ -44,6 +44,8 @@ public interface Sequence { public abstract int getNumberOfGapResidues(); public abstract char[] getMolecularSequence(); + + public abstract String getMolecularSequenceAsString(); public abstract char getResidueAt( final int position ); -- 1.7.10.2