From 1f9eafcd1707bc0e05e16ddc1c3389d69a085e35 Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Thu, 9 Aug 2012 13:22:51 +0000 Subject: [PATCH] work for rrm project (ComPhy 2012 Moscow) --- .../java/src/org/forester/application/aaa.java | 53 +++++++++++++------- 1 file changed, 34 insertions(+), 19 deletions(-) diff --git a/forester/java/src/org/forester/application/aaa.java b/forester/java/src/org/forester/application/aaa.java index b42ba7b..e4b9492 100644 --- a/forester/java/src/org/forester/application/aaa.java +++ b/forester/java/src/org/forester/application/aaa.java @@ -12,7 +12,6 @@ import java.util.regex.Pattern; import org.forester.io.parsers.FastaParser; import org.forester.sequence.Sequence; -import org.forester.util.ForesterUtil; public class aaa { @@ -43,7 +42,7 @@ public class aaa { int unique_counter = 0; int duplicate_counter_gn_ra = 0; int duplicate_counter_mol_seq = 0; - String prev_gn = "____"; + final List new_seqs_unique = new ArrayList(); for( final String seq : new_seqs ) { final Matcher matcher_ra = RANGE_PATTERN.matcher( seq ); final Matcher matcher_gn = GN_PATTERN.matcher( seq ); @@ -63,23 +62,7 @@ public class aaa { final String mol_seq = seq.split( "\n" )[ 1 ]; if ( !mol_seq_set.contains( mol_seq ) ) { mol_seq_set.add( mol_seq ); - if ( prev_gn.equals( gn ) ) { - int count = same_protein_seqs.size(); - if ( count == 1 ) { - System.out.println( seq ); - } - else { - int c = 1; - for( final String s : same_protein_seqs ) { - System.out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), - "__" + c + "_OF_" + count ) - .toString() ); - c++; - } - } - } - prev_gn = gn; - System.out.println( seq ); + new_seqs_unique.add( seq ); unique_counter++; } else { @@ -90,6 +73,23 @@ public class aaa { duplicate_counter_gn_ra++; } } + String prev_gn = "___"; + boolean is_first = true; + List same_protein_seqs = new ArrayList(); + for( final String seq : new_seqs_unique ) { + + final Matcher matcher_gn = GN_PATTERN.matcher( seq ); + matcher_gn.find(); + final String gn = matcher_gn.group( 1 ); + if ( !prev_gn.equals( gn ) && !is_first ) { + doit( same_protein_seqs ); + same_protein_seqs = new ArrayList(); + } + prev_gn = gn; + is_first = false; + same_protein_seqs.add( seq ); + } + doit( same_protein_seqs ); System.out.println( "unique : " + unique_counter ); System.out.println( "duplicate because gn and range same: " + duplicate_counter_gn_ra ); System.out.println( "duplicate because mol seq same : " + duplicate_counter_mol_seq ); @@ -98,4 +98,19 @@ public class aaa { e.printStackTrace(); } } + + private static void doit( List same_protein_seqs ) { + final int count = same_protein_seqs.size(); + if ( count == 1 ) { + System.out.println( same_protein_seqs.get( 0 ) ); + } + else { + int c = 1; + for( final String s : same_protein_seqs ) { + System.out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), + "__" + c + "_OF_" + count ).toString() ); + c++; + } + } + } } -- 1.7.10.2