3 package org.forester.applications;
5 import java.io.FileInputStream;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.HashSet;
12 import org.forester.io.parsers.FastaParser;
13 import org.forester.msa.Msa;
14 import org.forester.sequence.BasicSequence;
15 import org.forester.sequence.MolecularSequence;
16 import org.forester.util.ForesterUtil;
20 public static void main( final String args[] ) {
22 System.out.println( "STARTING..." );
23 final List<MolecularSequence> orig = FastaParser
24 .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) );
25 final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) );
26 final Set<MolecularSequence> all_found_seqs = new HashSet<MolecularSequence>();
27 for( int i = 0; i < msa.getNumberOfSequences(); ++i ) {
28 final String id = msa.getIdentifier( i );
29 final String id_ = id.substring( 0, id.indexOf( "_" ) );
30 final String range = id.substring( id.indexOf( "[" ) + 1, id.indexOf( "]" ) );
31 //System.out.println( i + ": " + id + "=>" + id_ + " " + range );
32 if ( ForesterUtil.isEmpty( id_ ) ) {
33 System.out.println( "ERROR: id is empty for: " + id );
36 if ( ForesterUtil.isEmpty( range ) ) {
37 System.out.println( "ERROR: range is empty for: " + id );
41 final List<MolecularSequence> found_seqs = new ArrayList<MolecularSequence>();
42 for( final MolecularSequence orig_seq : orig ) {
43 final String orig_seq_id = orig_seq.getIdentifier();
44 if ( ( orig_seq_id.indexOf( id_ ) >= 0 ) && ( orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) ) {
46 found_seqs.add( orig_seq );
50 for( final MolecularSequence found_seq : found_seqs ) {
51 if ( found_seq.getLength() >= 85 ) {
52 all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq
53 .getMolecularSequenceAsString() ) );
57 System.out.println( i + ": " + id + "=>" + id_ + " " + range );
58 System.out.println( " found: " + found );
59 for( final MolecularSequence found_seq : found_seqs ) {
60 System.out.println( found_seq.toString() );
65 System.out.println( "ERROR: not found: " + id );
69 final String fasta_ary[] = new String[ all_found_seqs.size() ];
71 for( final MolecularSequence sequence : all_found_seqs ) {
72 fasta_ary[ i ] = ">" + sequence.getIdentifier() + "\n" + sequence.getMolecularSequenceAsString();
73 System.out.println( sequence );
76 Arrays.sort( fasta_ary );
77 for( final String element : fasta_ary ) {
78 System.out.println( element );
80 System.out.println( "DONE." );
82 catch ( final Exception e ) {