work for rrm project (ComPhy 2012 Moscow)
[jalview.git] / forester / java / src / org / forester / application / aa.java
1
2 package org.forester.application;
3
4 import java.io.FileInputStream;
5 import java.util.ArrayList;
6 import java.util.HashSet;
7 import java.util.List;
8 import java.util.Set;
9 import java.util.TreeSet;
10
11 import org.forester.io.parsers.FastaParser;
12 import org.forester.msa.Msa;
13 import org.forester.sequence.Sequence;
14 import org.forester.util.ForesterUtil;
15
16 public class aa {
17
18     public static void main( final String args[] ) {
19         try {
20             System.out.println( "STARTING..." );
21             final List<Sequence> orig = FastaParser
22                     .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20.fasta" ) );
23             final Msa msa = FastaParser.parseMsa( new FileInputStream( "C:\\Users\\zma\\Desktop\\test3_sorted.fasta" ) );
24             final Set<Sequence> all_found_seqs = new HashSet<Sequence>();
25             for( int i = 0; i < msa.getNumberOfSequences(); ++i ) {
26                 final String id = msa.getIdentifier( i );
27                 final String id_ = id.substring( 0, id.indexOf( "_" ) );
28                 final String range = id.substring( id.indexOf( "[" ) + 1, id.indexOf( "]" ) );
29                 //System.out.println( i + ": " + id + "=>" + id_ + " " + range );
30                 if ( ForesterUtil.isEmpty( id_ ) ) {
31                     System.out.println( "ERROR: id is empty for: " + id );
32                     System.exit( -1 );
33                 }
34                 if ( ForesterUtil.isEmpty( range ) ) {
35                     System.out.println( "ERROR: range is empty for: " + id );
36                     System.exit( -1 );
37                 }
38                 int found = 0;
39                 final List<Sequence> found_seqs = new ArrayList<Sequence>();
40                 for( final Sequence orig_seq : orig ) {
41                     final String orig_seq_id = orig_seq.getIdentifier();
42                     if ( orig_seq_id.indexOf( id_ ) >= 0 && orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) {
43                         found++;
44                         found_seqs.add( orig_seq );
45                     }
46                 }
47                 if ( found > 0 ) {
48                     for( final Sequence found_seq : found_seqs ) {
49                         if ( found_seq.getLength() >= 85 ) {
50                             all_found_seqs.add( found_seq );
51                         }
52                     }
53                     if ( found > 1 ) {
54                         System.out.println( i + ": " + id + "=>" + id_ + " " + range );
55                         System.out.println( "  found: " + found );
56                         for( final Sequence found_seq : found_seqs ) {
57                             System.out.println( found_seq.toString() );
58                         }
59                     }
60                 }
61                 else {
62                     System.out.println( "ERROR: not found: " + id );
63                     System.exit( -1 );
64                 }
65             }
66             int i = 0;
67             for( Sequence sequence : all_found_seqs ) {
68                 System.out.println( i++ + ": " + sequence );
69             }
70             System.out.println( "DONE." );
71         }
72         catch ( final Exception e ) {
73             e.printStackTrace();
74         }
75     }
76 }