moved
[jalview.git] / forester / java / src / org / forester / application / aa.java
1 //
2
3 package org.forester.application;
4
5 import java.io.FileInputStream;
6 import java.util.ArrayList;
7 import java.util.Arrays;
8 import java.util.HashSet;
9 import java.util.List;
10 import java.util.Set;
11
12 import org.forester.io.parsers.FastaParser;
13 import org.forester.msa.Msa;
14 import org.forester.sequence.BasicSequence;
15 import org.forester.sequence.Sequence;
16 import org.forester.util.ForesterUtil;
17
18 public class aa {
19
20     public static void main( final String args[] ) {
21         try {
22             System.out.println( "STARTING..." );
23             final List<Sequence> orig = FastaParser
24                     .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) );
25             final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) );
26             final Set<Sequence> all_found_seqs = new HashSet<Sequence>();
27             for( int i = 0; i < msa.getNumberOfSequences(); ++i ) {
28                 final String id = msa.getIdentifier( i );
29                 final String id_ = id.substring( 0, id.indexOf( "_" ) );
30                 final String range = id.substring( id.indexOf( "[" ) + 1, id.indexOf( "]" ) );
31                 //System.out.println( i + ": " + id + "=>" + id_ + " " + range );
32                 if ( ForesterUtil.isEmpty( id_ ) ) {
33                     System.out.println( "ERROR: id is empty for: " + id );
34                     System.exit( -1 );
35                 }
36                 if ( ForesterUtil.isEmpty( range ) ) {
37                     System.out.println( "ERROR: range is empty for: " + id );
38                     System.exit( -1 );
39                 }
40                 int found = 0;
41                 final List<Sequence> found_seqs = new ArrayList<Sequence>();
42                 for( final Sequence orig_seq : orig ) {
43                     final String orig_seq_id = orig_seq.getIdentifier();
44                     if ( ( orig_seq_id.indexOf( id_ ) >= 0 ) && ( orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) ) {
45                         found++;
46                         found_seqs.add( orig_seq );
47                     }
48                 }
49                 if ( found > 0 ) {
50                     for( final Sequence found_seq : found_seqs ) {
51                         if ( found_seq.getLength() >= 85 ) {
52                             all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq
53                                     .getMolecularSequenceAsString() ) );
54                         }
55                     }
56                     if ( found > 1 ) {
57                         System.out.println( i + ": " + id + "=>" + id_ + " " + range );
58                         System.out.println( "  found: " + found );
59                         for( final Sequence found_seq : found_seqs ) {
60                             System.out.println( found_seq.toString() );
61                         }
62                     }
63                 }
64                 else {
65                     System.out.println( "ERROR: not found: " + id );
66                     System.exit( -1 );
67                 }
68             }
69             final String fasta_ary[] = new String[ all_found_seqs.size() ];
70             int i = 0;
71             for( final Sequence sequence : all_found_seqs ) {
72                 fasta_ary[ i ] = ">" + sequence.getIdentifier() + "\n" + sequence.getMolecularSequenceAsString();
73                 System.out.println( sequence );
74                 i++;
75             }
76             Arrays.sort( fasta_ary );
77             for( final String element : fasta_ary ) {
78                 System.out.println( element );
79             }
80             System.out.println( "DONE." );
81         }
82         catch ( final Exception e ) {
83             e.printStackTrace();
84         }
85     }
86 }