cleanup
authorcmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 10 Aug 2012 07:20:42 +0000 (07:20 +0000)
committercmzmasek <cmzmasek@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 10 Aug 2012 07:20:42 +0000 (07:20 +0000)
14 files changed:
forester/java/src/org/forester/application/aa.java
forester/java/src/org/forester/application/aaa.java
forester/java/src/org/forester/application/decorator.java
forester/java/src/org/forester/application/surfacing.java
forester/java/src/org/forester/archaeopteryx/Configuration.java
forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java
forester/java/src/org/forester/archaeopteryx/tools/PhyloInferenceDialog.java
forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java
forester/java/src/org/forester/msa/ClustalOmega.java
forester/java/src/org/forester/phylogeny/PhylogenyMethods.java
forester/java/src/org/forester/sequence/BasicSequence.java
forester/java/src/org/forester/sequence/Sequence.java
forester/java/src/org/forester/surfacing/SurfacingUtil.java
forester/java/src/org/forester/util/BasicTableParser.java

index 899120c..a71d3fe 100644 (file)
@@ -7,7 +7,6 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
-import java.util.TreeSet;
 
 import org.forester.io.parsers.FastaParser;
 import org.forester.msa.Msa;
@@ -41,7 +40,7 @@ public class aa {
                 final List<Sequence> found_seqs = new ArrayList<Sequence>();
                 for( final Sequence orig_seq : orig ) {
                     final String orig_seq_id = orig_seq.getIdentifier();
-                    if ( orig_seq_id.indexOf( id_ ) >= 0 && orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) {
+                    if ( ( orig_seq_id.indexOf( id_ ) >= 0 ) && ( orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) ) {
                         found++;
                         found_seqs.add( orig_seq );
                     }
@@ -49,8 +48,8 @@ public class aa {
                 if ( found > 0 ) {
                     for( final Sequence found_seq : found_seqs ) {
                         if ( found_seq.getLength() >= 85 ) {
-                            
-                            all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq.getMolecularSequenceAsString() ) );
+                            all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq
+                                    .getMolecularSequenceAsString() ) );
                         }
                     }
                     if ( found > 1 ) {
@@ -66,19 +65,17 @@ public class aa {
                     System.exit( -1 );
                 }
             }
-            String fasta_ary[] = new String[ all_found_seqs.size() ];
+            final String fasta_ary[] = new String[ all_found_seqs.size() ];
             int i = 0;
-            for( Sequence sequence : all_found_seqs ) {
+            for( final Sequence sequence : all_found_seqs ) {
                 fasta_ary[ i ] = ">" + sequence.getIdentifier() + "\n" + sequence.getMolecularSequenceAsString();
                 System.out.println( sequence );
                 i++;
             }
             Arrays.sort( fasta_ary );
-            
             for( int j = 0; j < fasta_ary.length; ++j ) {
-                System.out.println(  fasta_ary[ j ] );
+                System.out.println( fasta_ary[ j ] );
             }
-            
             System.out.println( "DONE." );
         }
         catch ( final Exception e ) {
index e4b9492..ac643c4 100644 (file)
@@ -2,6 +2,7 @@
 package org.forester.application;
 
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashSet;
@@ -12,19 +13,29 @@ import java.util.regex.Pattern;
 
 import org.forester.io.parsers.FastaParser;
 import org.forester.sequence.Sequence;
+import org.forester.util.EasyWriter;
+import org.forester.util.ForesterUtil;
 
 public class aaa {
 
     public final static Pattern GN_PATTERN    = Pattern.compile( "GN=(\\S+)\\s" );     //use w+ instead of S+ for more stringent setting.
     public final static Pattern RANGE_PATTERN = Pattern.compile( "\\[(\\d+-\\d+)\\]" ); //use w+ instead of S+ for more stringent setting.
+    public final static int     MIN_LENGTH    = 85;
 
     public static void main( final String args[] ) {
         try {
+            final EasyWriter out = ( EasyWriter ) ForesterUtil.createEasyWriter( "aaa_out" );
             System.out.println( "STARTING..." );
+            final List<Sequence> too_short = new ArrayList<Sequence>();
             final List<Sequence> orig = FastaParser
                     .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20_2.fasta" ) );
+            final int initial_number = orig.size();
             final List<String> new_seqs = new ArrayList<String>();
             for( final Sequence seq : orig ) {
+                if ( seq.getLength() < MIN_LENGTH ) {
+                    too_short.add( seq );
+                    continue;
+                }
                 final Matcher matcher = GN_PATTERN.matcher( seq.getIdentifier() );
                 String gn = "";
                 if ( matcher.find() ) {
@@ -40,8 +51,8 @@ public class aaa {
             final Set<String> mol_seq_set = new HashSet<String>();
             Collections.sort( new_seqs );
             int unique_counter = 0;
-            int duplicate_counter_gn_ra = 0;
-            int duplicate_counter_mol_seq = 0;
+            final List<String> duplicate_gn_ra = new ArrayList<String>();
+            final List<String> duplicate_mol_seq = new ArrayList<String>();
             final List<String> new_seqs_unique = new ArrayList<String>();
             for( final String seq : new_seqs ) {
                 final Matcher matcher_ra = RANGE_PATTERN.matcher( seq );
@@ -66,49 +77,73 @@ public class aaa {
                         unique_counter++;
                     }
                     else {
-                        duplicate_counter_mol_seq++;
+                        duplicate_mol_seq.add( seq );
                     }
                 }
                 else {
-                    duplicate_counter_gn_ra++;
+                    duplicate_gn_ra.add( seq );
                 }
             }
             String prev_gn = "___";
             boolean is_first = true;
-            List<String> same_protein_seqs = new ArrayList<String>();
+            List<String> seqs_from_same_protein = new ArrayList<String>();
             for( final String seq : new_seqs_unique ) {
-             
                 final Matcher matcher_gn = GN_PATTERN.matcher( seq );
                 matcher_gn.find();
                 final String gn = matcher_gn.group( 1 );
                 if ( !prev_gn.equals( gn ) && !is_first ) {
-                    doit( same_protein_seqs );
-                    same_protein_seqs = new ArrayList<String>();
+                    doit( seqs_from_same_protein, out );
+                    seqs_from_same_protein = new ArrayList<String>();
                 }
                 prev_gn = gn;
                 is_first = false;
-                same_protein_seqs.add( seq );
+                seqs_from_same_protein.add( seq );
+            }
+            doit( seqs_from_same_protein, out );
+            out.println( "" );
+            out.println( "" );
+            out.println( "Removed because same GN and region:" );
+            for( final String s : duplicate_gn_ra ) {
+                out.println( s );
+            }
+            out.println( "" );
+            out.println( "" );
+            out.println( "Removed because identical mol sequence:" );
+            for( final String s : duplicate_mol_seq ) {
+                out.println( s );
+            }
+            out.println( "" );
+            out.println( "" );
+            out.println( "Removed because too short:" );
+            for( final Sequence s : too_short ) {
+                out.println( s.toString() );
             }
-            doit( same_protein_seqs );
-            System.out.println( "unique   : " + unique_counter );
-            System.out.println( "duplicate because gn and range same: " + duplicate_counter_gn_ra );
-            System.out.println( "duplicate because mol seq same     : " + duplicate_counter_mol_seq );
+            out.println( "" );
+            out.println( "" );
+            out.println( "initial:" + initial_number );
+            out.println( "ignored because shorter than " + MIN_LENGTH + "aa: " + too_short.size() );
+            out.println( "unique   : " + unique_counter );
+            out.println( "unique   : " + new_seqs_unique.size() );
+            out.println( "duplicate because gn and range same: " + duplicate_gn_ra.size() );
+            out.println( "duplicate because mol seq same     : " + duplicate_mol_seq.size() );
+            out.flush();
+            out.close();
+            System.out.println( "DONE " );
         }
         catch ( final Exception e ) {
             e.printStackTrace();
         }
     }
 
-    private static void doit( List<String> same_protein_seqs ) {
+    private static void doit( final List<String> same_protein_seqs, final EasyWriter out ) throws IOException {
         final int count = same_protein_seqs.size();
         if ( count == 1 ) {
-            System.out.println( same_protein_seqs.get( 0 ) );
+            out.println( same_protein_seqs.get( 0 ) );
         }
         else {
             int c = 1;
             for( final String s : same_protein_seqs ) {
-                System.out.println( new StringBuffer( s ).insert( s.indexOf( "|" ),
-                                                                  "__" + c + "_OF_" + count ).toString() );
+                out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), "__" + c + "_OF_" + count ).toString() );
                 c++;
             }
         }
index be44f67..6f37ff3 100644 (file)
@@ -369,7 +369,7 @@ public final class decorator {
             ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e );
         }
         catch ( final Exception e ) {
-            ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage()  );
+            ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() );
         }
         try {
             final PhylogenyWriter w = new PhylogenyWriter();
index 328fd6c..7f4bf60 100644 (file)
@@ -44,7 +44,6 @@ import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
-
 import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format;
 import org.forester.go.GoId;
 import org.forester.go.GoNameSpace;
@@ -224,7 +223,7 @@ public class surfacing {
     final static private long                                 JACKNIFE_RANDOM_SEED_DEFAULT                                                  = 19;
     final static private double                               JACKNIFE_RATIO_DEFAULT                                                        = 0.5;
     //final static private String  INFER_SPECIES_TREES_OPTION                                             = "species_tree_inference";
-     final static private String                               FILTER_POSITIVE_OPTION                                                        = "pos_filter";
+    final static private String                               FILTER_POSITIVE_OPTION                                                        = "pos_filter";
     final static private String                               FILTER_NEGATIVE_OPTION                                                        = "neg_filter";
     final static private String                               FILTER_NEGATIVE_DOMAINS_OPTION                                                = "neg_dom_filter";
     final static private String                               INPUT_FILES_FROM_FILE_OPTION                                                  = "input";
@@ -252,9 +251,7 @@ public class surfacing {
     private static final String                               PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX                                              = "_plus_minus_go_ids_all.txt";
     private static final String                               PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX                                          = "_plus_minus_go_ids_passing.txt";
     private static final String                               OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS                                           = "all_prot";
-    final static private String                               OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e";
-    
-    
+    final static private String                               OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION                         = "all_prot_e";
     private static final boolean                              VERBOSE                                                                       = false;
     private static final String                               OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX              = "_fitch_dc_gains_counts";
     private static final String                               OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX                = "_fitch_dc_losses_counts";
@@ -545,8 +542,6 @@ public class surfacing {
         return intrees;
     }
 
-   
-
     private static void log( final String msg, final Writer w ) {
         try {
             w.write( msg );
@@ -861,7 +856,8 @@ public class surfacing {
             //
             if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) {
                 try {
-                    output_list_of_all_proteins_per_domain_e_value_max = cla.getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
+                    output_list_of_all_proteins_per_domain_e_value_max = cla
+                            .getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION );
                 }
                 catch ( final Exception e ) {
                     ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" );
@@ -916,7 +912,6 @@ public class surfacing {
                                              + "=<ordered domain sequences, domain ids separated by '~', sequences separated by '#'>" );
             }
             query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION );
-           
         }
         DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT;
         DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT;
@@ -1389,9 +1384,10 @@ public class surfacing {
             System.out.println( "E-value maximum (inclusive) : " + e_value_max );
             html_desc.append( "<tr><td>E-value maximum (inclusive):</td><td>" + e_value_max + "</td></tr>" + nl );
         }
-        if ( output_protein_lists_for_all_domains  ) {
+        if ( output_protein_lists_for_all_domains ) {
             System.out.println( "Domain E-value max          : " + output_list_of_all_proteins_per_domain_e_value_max );
-            html_desc.append( "<tr><td>Protein lists: E-value maximum per domain (inclusive):</td><td>" + output_list_of_all_proteins_per_domain_e_value_max + "</td></tr>" + nl );
+            html_desc.append( "<tr><td>Protein lists: E-value maximum per domain (inclusive):</td><td>"
+                    + output_list_of_all_proteins_per_domain_e_value_max + "</td></tr>" + nl );
         }
         System.out.println( "Ignore DUFs                 : " + ignore_dufs );
         if ( ignore_virus_like_ids ) {
@@ -2331,13 +2327,14 @@ public class surfacing {
                                       protein_lists_per_species,
                                       domain_id_to_go_ids_map,
                                       go_id_to_term_map,
-                                      plus_minus_analysis_numbers
-                                       );
+                                      plus_minus_analysis_numbers );
         }
         if ( output_protein_lists_for_all_domains ) {
-            writeProteinListsForAllSpecies( out_dir, protein_lists_per_species, gwcd_list, output_list_of_all_proteins_per_domain_e_value_max );
+            writeProteinListsForAllSpecies( out_dir,
+                                            protein_lists_per_species,
+                                            gwcd_list,
+                                            output_list_of_all_proteins_per_domain_e_value_max );
         }
-       
         if ( all_bin_domain_combinations_gained_fitch != null ) {
             try {
                 executeFitchGainsAnalysis( new File( output_file
@@ -2502,7 +2499,6 @@ public class surfacing {
         }
     }
 
-  
     private static void printHelp() {
         System.out.println();
         System.out.println( "Usage:" );
@@ -2590,8 +2586,8 @@ public class surfacing {
         System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS
                 + ": to output binary domain combinations for (downstream) graph analysis" );
         System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" );
-        System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION + ": e value max per domain for output of all proteins per domain" );
-        
+        System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION
+                + ": e value max per domain for output of all proteins per domain" );
         System.out.println();
         System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar"
                 + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1"
@@ -2787,7 +2783,7 @@ public class surfacing {
                                                    proteins_file_writer,
                                                    "\t",
                                                    LIMIT_SPEC_FOR_PROT_EX,
-                                                    domain_e_cutoff );
+                                                   domain_e_cutoff );
                 proteins_file_writer.close();
             }
             catch ( final IOException e ) {
index e45dfc8..2420efe 100644 (file)
@@ -439,8 +439,6 @@ public final class Configuration {
         return _path_to_local_mafft;
     }
 
-  
-
     public File getPathToLocalFastme() {
         return _path_to_local_fastme;
     }
@@ -823,8 +821,6 @@ public final class Configuration {
         _path_to_local_mafft = path_to_local_mafft;
     }
 
-   
-
     private void setPathToLocalFastme( final File path_to_local_fastme ) {
         _path_to_local_fastme = path_to_local_fastme;
     }
@@ -1063,7 +1059,6 @@ public final class Configuration {
                 setPathToLocalMafft( new File( str ) );
             }
         }
-       
         else if ( key.equals( "fastme_local" ) ) {
             final String str = ( ( String ) st.nextElement() ).trim();
             if ( !ForesterUtil.isEmpty( str ) ) {
index 2c416b9..24ec184 100644 (file)
@@ -22,16 +22,13 @@ public final class InferenceManager {
     }
 
     public boolean canDoMsa() {
-        return ( getPathToLocalMafft() != null ) 
-                || ( getPathToLocalClustalo() != null );
+        return ( getPathToLocalMafft() != null ) || ( getPathToLocalClustalo() != null );
     }
 
     public File getPathToLocalMafft() {
         return _path_to_local_mafft;
     }
 
-    
-
     public File getPathToLocalFastme() {
         return _path_to_local_fastme;
     }
@@ -78,7 +75,6 @@ public final class InferenceManager {
                               final File path_to_local_raxml,
                               final File path_to_local_clustalo ) {
         _path_to_local_mafft = createLocalPath( path_to_local_mafft, "mafft" );
-      
         _path_to_local_fastme = createLocalPath( path_to_local_fastme, "fastme" );
         _path_to_local_raxml = createLocalPath( path_to_local_raxml, "raxml" );
         _path_to_local_clustalo = createLocalPath( path_to_local_clustalo, "clustalo" );
index 399c521..32e21c8 100644 (file)
@@ -78,7 +78,6 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener {
     private JTextField                         _input_seqs_min_length_tf;
     private JTextField                         _input_seqs_max_length_tf;
     private JTextField                         _input_seqs_type_tf;
-  
     private JTextField                         _mafft_paramenters_tf;
     private JTextField                         _clustalo_paramenters_tf;
     private JTextField                         _msa_processing_max_allowed_gap_ratio_tf;
@@ -130,13 +129,10 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener {
             inputfile_pnl_2.add( _input_seqs_max_length_tf = new JTextField() );
             inputfile_pnl_2.add( new JLabel( "Type:" ) );
             inputfile_pnl_2.add( _input_seqs_type_tf = new JTextField() );
-           
-            
-            inputfile_pnl_3.add( _mafft_cb = new JCheckBox( "MAFFT") );
+            inputfile_pnl_3.add( _mafft_cb = new JCheckBox( "MAFFT" ) );
             inputfile_pnl_3.add( new JLabel( "Parameters: " ) );
             inputfile_pnl_3.add( _mafft_paramenters_tf = new JTextField() );
-           
-            inputfile_pnl_4.add( _clustalo_cb = new  JCheckBox( "ClustalO") );
+            inputfile_pnl_4.add( _clustalo_cb = new JCheckBox( "ClustalO" ) );
             inputfile_pnl_4.add( new JLabel( "Parameters: " ) );
             inputfile_pnl_4.add( _clustalo_paramenters_tf = new JTextField() );
             _input_seqs_median_length_tf.setColumns( 4 );
@@ -151,7 +147,6 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener {
             _input_seqs_max_length_tf.setEditable( false );
             _input_seqs_number_tf.setEditable( false );
             _input_seqs_type_tf.setEditable( false );
-           
             _mafft_paramenters_tf.setColumns( 26 );
             _mafft_paramenters_tf.setText( "--maxiterate 1000 --localpair" );
             _clustalo_paramenters_tf.setColumns( 26 );
@@ -286,7 +281,7 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener {
         _value = JOptionPane.CANCEL_OPTION;
         if ( from_unaligned_seqs ) {
             updateSeqsItems();
-              }
+        }
         else {
             updateMsaItems();
         }
index 66fbdbc..9e651fb 100644 (file)
@@ -26,7 +26,6 @@
 package org.forester.archaeopteryx.tools;
 
 import java.io.BufferedWriter;
-import java.io.File;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.ArrayList;
@@ -98,16 +97,13 @@ public class PhylogeneticInferrer extends RunnableProcess {
         //        SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 );
         //        writer.close();
         switch ( msa_prg ) {
-            case MAFFT: 
+            case MAFFT:
                 return runMAFFT( _seqs, processMafftOptions() );
-                
             case CLUSTAL_O:
                 return runClustalOmega( _seqs, processMafftOptions() );
             default:
                 return null;
         }
-        
-       
     }
 
     private List<String> processMafftOptions() {
@@ -347,7 +343,7 @@ public class PhylogeneticInferrer extends RunnableProcess {
             }
         }
     }
-    
+
     public enum MSA_PRG {
         MAFFT, CLUSTAL_O;
     }
index f6d4f65..938fc78 100644 (file)
@@ -71,7 +71,7 @@ public final class ClustalOmega extends MsaInferrer {
         return _exit_code;
     }
 
-    
+    @Override
     public Msa infer( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
         final File file = File.createTempFile( "__clustalo_input_", ".fasta" );
         file.deleteOnExit();
@@ -82,8 +82,7 @@ public final class ClustalOmega extends MsaInferrer {
         file.delete();
         return msa;
     }
-    
-    
+
     @Override
     public Msa infer( final File path_to_input_seqs, final List<String> opts ) throws IOException, InterruptedException {
         init();
@@ -114,6 +113,4 @@ public final class ClustalOmega extends MsaInferrer {
         _error = null;
         _exit_code = -100;
     }
-
-  
 }
index 313848b..4c86166 100644 (file)
@@ -62,19 +62,14 @@ import org.forester.util.ForesterUtil;
 
 public class PhylogenyMethods {
 
-    private static PhylogenyMethods _instance      = null;
-
-    private PhylogenyNode           _farthest_1    = null;
-    private PhylogenyNode           _farthest_2    = null;
+    private static PhylogenyMethods _instance   = null;
+    private PhylogenyNode           _farthest_1 = null;
+    private PhylogenyNode           _farthest_2 = null;
 
     private PhylogenyMethods() {
         // Hidden constructor.
     }
 
-    
-    
-    
-    
     /**
      * Calculates the distance between PhylogenyNodes node1 and node2.
      * 
@@ -119,13 +114,10 @@ public class PhylogenyMethods {
         return farthest_d;
     }
 
-    final public static Event getEventAtLCA( PhylogenyNode n1,
-                                             PhylogenyNode n2 ) {
+    final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) {
         return obtainLCA( n1, n2 ).getNodeData().getEvent();
     }
-    
-    
-    
+
     @Override
     public Object clone() throws CloneNotSupportedException {
         throw new CloneNotSupportedException();
@@ -139,29 +131,24 @@ public class PhylogenyMethods {
         return _farthest_2;
     }
 
-    final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy,
-                                                                final PhylogenyNode n) {
+    final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) {
         if ( n.isInternal() ) {
             throw new IllegalArgumentException( "node is not external" );
         }
-        
         final ArrayList<PhylogenyNode> to_delete = new ArrayList<PhylogenyNode>();
-        for ( PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
+        for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) {
             final PhylogenyNode i = it.next();
             if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) {
-                to_delete.add( i ); 
+                to_delete.add( i );
             }
         }
-        for( PhylogenyNode d : to_delete ) {
+        for( final PhylogenyNode d : to_delete ) {
             phy.deleteSubtree( d, true );
         }
         phy.clearHashIdToNodeMap();
         phy.externalNodesHaveChanged();
-        
     }
-    
-    
-    
+
     /**
      * Returns the LCA of PhylogenyNodes node1 and node2.
      * 
index e180481..60828bd 100644 (file)
@@ -36,10 +36,10 @@ public class BasicSequence implements Sequence {
 
     private BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) {
         if ( ForesterUtil.isEmpty( identifier ) ) {
-            throw new IllegalArgumentException( "identifier of sequence cannot be empty");
+            throw new IllegalArgumentException( "identifier of sequence cannot be empty" );
         }
         if ( ForesterUtil.isEmpty( mol_sequence ) ) {
-            throw new IllegalArgumentException( "molecular sequence cannot be empty");
+            throw new IllegalArgumentException( "molecular sequence cannot be empty" );
         }
         _mol_sequence = mol_sequence.toCharArray();
         _identifier = identifier;
@@ -49,10 +49,10 @@ public class BasicSequence implements Sequence {
     // Only use if you know what you are doing!
     public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) {
         if ( ForesterUtil.isEmpty( identifier ) ) {
-            throw new IllegalArgumentException( "identifier of sequence cannot be empty");
+            throw new IllegalArgumentException( "identifier of sequence cannot be empty" );
         }
-        if ( mol_sequence == null || mol_sequence.length < 1 ) {
-            throw new IllegalArgumentException( "molecular sequence cannot be empty");
+        if ( ( mol_sequence == null ) || ( mol_sequence.length < 1 ) ) {
+            throw new IllegalArgumentException( "molecular sequence cannot be empty" );
         }
         _mol_sequence = mol_sequence;
         _identifier = identifier;
@@ -96,26 +96,25 @@ public class BasicSequence implements Sequence {
     }
 
     @Override
-    public boolean equals(Object obj) {
-        if (obj == null) {
+    public boolean equals( final Object obj ) {
+        if ( obj == null ) {
             return false;
         }
-        if (obj.getClass() != getClass()) {
+        if ( obj.getClass() != getClass() ) {
             return false;
         }
-        Sequence other = ( Sequence) obj;
-        if ( getMolecularSequenceAsString() .equals( other.getMolecularSequenceAsString() )) {
+        final Sequence other = ( Sequence ) obj;
+        if ( getMolecularSequenceAsString().equals( other.getMolecularSequenceAsString() ) ) {
             return true;
         }
         return false;
     }
+
     @Override
     public int hashCode() {
         return getMolecularSequenceAsString().hashCode();
     }
-    
-    
+
     @Override
     public String toString() {
         final StringBuffer sb = new StringBuffer();
@@ -150,7 +149,6 @@ public class BasicSequence implements Sequence {
 
     @Override
     public String getMolecularSequenceAsString() {
-       
         return new String( getMolecularSequence() );
     }
 }
index f4c17cf..90f3b04 100644 (file)
@@ -44,7 +44,7 @@ public interface Sequence {
     public abstract int getNumberOfGapResidues();
 
     public abstract char[] getMolecularSequence();
-    
+
     public abstract String getMolecularSequenceAsString();
 
     public abstract char getResidueAt( final int position );
index 0c1d60e..8daa322 100644 (file)
@@ -66,7 +66,6 @@ import org.forester.go.GoTerm;
 import org.forester.go.PfamToGoMapping;
 import org.forester.io.parsers.nexus.NexusConstants;
 import org.forester.io.writers.PhylogenyWriter;
-import org.forester.msa.MsaCompactor.SORT_BY;
 import org.forester.phylogeny.Phylogeny;
 import org.forester.phylogeny.PhylogenyMethods;
 import org.forester.phylogeny.PhylogenyNode;
@@ -375,8 +374,7 @@ public final class SurfacingUtil {
                 }
                 for( int i = 0; i < nodes.size() - 1; ++i ) {
                     for( int j = i + 1; j < nodes.size(); ++j ) {
-                        final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ),
-                                                                                            nodes.get( j ) );
+                        final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), nodes.get( j ) );
                         String rank = "unknown";
                         if ( lca.getNodeData().isHasTaxonomy()
                                 && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) {
@@ -1189,14 +1187,12 @@ public final class SurfacingUtil {
                                             final String limit_to_species,
                                             final double domain_e_cutoff ) throws IOException {
         System.out.println( "Per domain E-value: " + domain_e_cutoff );
-        
         for( final Species species : protein_lists_per_species.keySet() ) {
             System.out.println( species + ":" );
             for( final Protein protein : protein_lists_per_species.get( species ) ) {
                 if ( ForesterUtil.isEmpty( limit_to_species )
                         || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
                     final List<Domain> domains = protein.getProteinDomains( domain_id );
-                   
                     if ( domains.size() > 0 ) {
                         out.write( protein.getSpecies().getSpeciesId() );
                         out.write( separator );
@@ -1205,43 +1201,32 @@ public final class SurfacingUtil {
                         out.write( domain_id.toString() );
                         out.write( separator );
                         int prev_to = -1;
-                       
                         for( final Domain domain : domains ) {
-                            
-                            if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) {
-                                 out.write( "/" );
-                                 out.write( domain.getFrom() + "-" + domain.getTo() );
-                                 if ( prev_to >= 0) {
-                                     final int l = domain.getFrom() - prev_to;
-                                     System.out.println( l );
-                                 }
-                                 prev_to = domain.getTo();
+                            if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
+                                out.write( "/" );
+                                out.write( domain.getFrom() + "-" + domain.getTo() );
+                                if ( prev_to >= 0 ) {
+                                    final int l = domain.getFrom() - prev_to;
+                                    System.out.println( l );
+                                }
+                                prev_to = domain.getTo();
                             }
                         }
-                        
                         out.write( "/" );
                         out.write( separator );
-                       
-                        
                         final List<Domain> domain_list = new ArrayList<Domain>();
-                        
                         for( final Domain domain : protein.getProteinDomains() ) {
-                            if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) {
+                            if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) {
                                 domain_list.add( domain );
                             }
                         }
-                        
-                        Domain domain_ary[] = new Domain[ domain_list.size() ];
-                        
+                        final Domain domain_ary[] = new Domain[ domain_list.size() ];
                         for( int i = 0; i < domain_list.size(); ++i ) {
                             domain_ary[ i ] = domain_list.get( i );
                         }
-                        
                         Arrays.sort( domain_ary, new DomainComparator( true ) );
-                       
                         out.write( "{" );
                         boolean first = true;
-                        
                         for( final Domain domain : domain_ary ) {
                             if ( first ) {
                                 first = false;
@@ -1250,7 +1235,7 @@ public final class SurfacingUtil {
                                 out.write( "," );
                             }
                             out.write( domain.getDomainId().toString() );
-                            out.write( ":" + domain.getFrom() +  "-" + domain.getTo() );
+                            out.write( ":" + domain.getFrom() + "-" + domain.getTo() );
                             out.write( ":" + domain.getPerDomainEvalue() );
                         }
                         out.write( "}" );
@@ -1265,7 +1250,6 @@ public final class SurfacingUtil {
                         }
                         out.write( SurfacingConstants.NL );
                     }
-                    
                 }
             }
         }
@@ -2656,7 +2640,6 @@ public final class SurfacingUtil {
 
         @Override
         public final int compare( final Domain d0, final Domain d1 ) {
-
             if ( d0.getFrom() < d1.getFrom() ) {
                 return _ascending ? -1 : 1;
             }
@@ -2664,13 +2647,6 @@ public final class SurfacingUtil {
                 return _ascending ? 1 : -1;
             }
             return 0;
-
         }
-
     }
 }
-
-
-
-
-
index 59e5cd2..74dd567 100644 (file)
@@ -67,18 +67,12 @@ public class BasicTableParser {
         final boolean use_start_of_comment_line = !( ForesterUtil.isEmpty( start_of_comment_line ) );
         while ( ( line = reader.readLine() ) != null ) {
             line = line.trim();
-            if ( !ForesterUtil.isEmpty( line) && 
-                    
-                    
-                     (( line.charAt( 0 ) == '"' && line.charAt( line.length() -1 ) == '"' && ForesterUtil.countChars( line, '"' ) == 2 ) 
-                             
-                             ||
-                             
-                             
-                      ( line.charAt( 0 ) == '\'' && line.charAt( line.length() -1 ) == '\'' && ForesterUtil.countChars( line, '\'' ) == 2  ) ) ) {
-                line = line.substring( 1, line.length() -1  ).trim();
+            if ( !ForesterUtil.isEmpty( line )
+                    && ( ( ( line.charAt( 0 ) == '"' ) && ( line.charAt( line.length() - 1 ) == '"' ) && ( ForesterUtil
+                            .countChars( line, '"' ) == 2 ) ) || ( ( line.charAt( 0 ) == '\'' )
+                            && ( line.charAt( line.length() - 1 ) == '\'' ) && ( ForesterUtil.countChars( line, '\'' ) == 2 ) ) ) ) {
+                line = line.substring( 1, line.length() - 1 ).trim();
             }
-            
             if ( saw_first_table
                     && ( ForesterUtil.isEmpty( line ) || ( tables_separated_by_single_string_line && ( line
                             .indexOf( column_delimiter ) < 0 ) ) ) ) {