import java.io.FileInputStream;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
import org.forester.io.parsers.FastaParser;
import org.forester.msa.Msa;
final List<Sequence> orig = FastaParser
.parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) );
final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) );
+ final Set<Sequence> all_found_seqs = new HashSet<Sequence>();
for( int i = 0; i < msa.getNumberOfSequences(); ++i ) {
final String id = msa.getIdentifier( i );
final String id_ = id.substring( 0, id.indexOf( "_" ) );
package org.forester.sequence;
+import org.forester.util.ForesterUtil;
+
public class BasicSequence implements Sequence {
private final char[] _mol_sequence;
private final TYPE _type;
private BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) {
+ if ( ForesterUtil.isEmpty( identifier ) ) {
+ throw new IllegalArgumentException( "identifier of sequence cannot be empty");
+ }
+ if ( ForesterUtil.isEmpty( mol_sequence ) ) {
+ throw new IllegalArgumentException( "molecular sequence cannot be empty");
+ }
_mol_sequence = mol_sequence.toCharArray();
_identifier = identifier;
_type = type;
// Only use if you know what you are doing!
public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) {
+ if ( ForesterUtil.isEmpty( identifier ) ) {
+ throw new IllegalArgumentException( "identifier of sequence cannot be empty");
+ }
+ if ( mol_sequence == null || mol_sequence.length < 1 ) {
+ throw new IllegalArgumentException( "molecular sequence cannot be empty");
+ }
_mol_sequence = mol_sequence;
_identifier = identifier;
_type = type;
}
@Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (obj.getClass() != getClass()) {
+ return false;
+ }
+ Sequence other = ( Sequence) obj;
+ if ( getMolecularSequenceAsString() .equals( other.getMolecularSequenceAsString() )) {
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return getMolecularSequenceAsString().hashCode();
+ }
+
+
+ @Override
public String toString() {
final StringBuffer sb = new StringBuffer();
sb.append( _identifier.toString() );
- sb.append( " " );
- sb.append( new String( _mol_sequence ) );
+ sb.append( ": " );
+ sb.append( getMolecularSequenceAsString() );
return sb.toString();
}
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
.replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA );
}
+
+ @Override
+ public String getMolecularSequenceAsString() {
+
+ return new String( getMolecularSequence() );
+ }
}