From 72c535142a5e6b0da9c7edb2f605eb835b43e6fb Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Fri, 10 Aug 2012 07:20:42 +0000 Subject: [PATCH] cleanup --- forester/java/src/org/forester/application/aa.java | 15 ++--- .../java/src/org/forester/application/aaa.java | 69 +++++++++++++++----- .../src/org/forester/application/decorator.java | 2 +- .../src/org/forester/application/surfacing.java | 34 +++++----- .../org/forester/archaeopteryx/Configuration.java | 5 -- .../archaeopteryx/tools/InferenceManager.java | 6 +- .../archaeopteryx/tools/PhyloInferenceDialog.java | 11 +--- .../archaeopteryx/tools/PhylogeneticInferrer.java | 8 +-- .../java/src/org/forester/msa/ClustalOmega.java | 7 +- .../org/forester/phylogeny/PhylogenyMethods.java | 33 +++------- .../src/org/forester/sequence/BasicSequence.java | 26 ++++---- .../java/src/org/forester/sequence/Sequence.java | 2 +- .../src/org/forester/surfacing/SurfacingUtil.java | 48 ++++---------- .../src/org/forester/util/BasicTableParser.java | 16 ++--- 14 files changed, 122 insertions(+), 160 deletions(-) diff --git a/forester/java/src/org/forester/application/aa.java b/forester/java/src/org/forester/application/aa.java index 899120c..a71d3fe 100644 --- a/forester/java/src/org/forester/application/aa.java +++ b/forester/java/src/org/forester/application/aa.java @@ -7,7 +7,6 @@ import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.TreeSet; import org.forester.io.parsers.FastaParser; import org.forester.msa.Msa; @@ -41,7 +40,7 @@ public class aa { final List found_seqs = new ArrayList(); for( final Sequence orig_seq : orig ) { final String orig_seq_id = orig_seq.getIdentifier(); - if ( orig_seq_id.indexOf( id_ ) >= 0 && orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) { + if ( ( orig_seq_id.indexOf( id_ ) >= 0 ) && ( orig_seq_id.indexOf( "[" + range + "]" ) >= 0 ) ) { found++; found_seqs.add( orig_seq ); } @@ -49,8 +48,8 @@ public class aa { if ( found > 0 ) { for( final Sequence found_seq : found_seqs ) { if ( found_seq.getLength() >= 85 ) { - - all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq.getMolecularSequenceAsString() ) ); + all_found_seqs.add( BasicSequence.createAaSequence( id, found_seq + .getMolecularSequenceAsString() ) ); } } if ( found > 1 ) { @@ -66,19 +65,17 @@ public class aa { System.exit( -1 ); } } - String fasta_ary[] = new String[ all_found_seqs.size() ]; + final String fasta_ary[] = new String[ all_found_seqs.size() ]; int i = 0; - for( Sequence sequence : all_found_seqs ) { + for( final Sequence sequence : all_found_seqs ) { fasta_ary[ i ] = ">" + sequence.getIdentifier() + "\n" + sequence.getMolecularSequenceAsString(); System.out.println( sequence ); i++; } Arrays.sort( fasta_ary ); - for( int j = 0; j < fasta_ary.length; ++j ) { - System.out.println( fasta_ary[ j ] ); + System.out.println( fasta_ary[ j ] ); } - System.out.println( "DONE." ); } catch ( final Exception e ) { diff --git a/forester/java/src/org/forester/application/aaa.java b/forester/java/src/org/forester/application/aaa.java index e4b9492..ac643c4 100644 --- a/forester/java/src/org/forester/application/aaa.java +++ b/forester/java/src/org/forester/application/aaa.java @@ -2,6 +2,7 @@ package org.forester.application; import java.io.FileInputStream; +import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -12,19 +13,29 @@ import java.util.regex.Pattern; import org.forester.io.parsers.FastaParser; import org.forester.sequence.Sequence; +import org.forester.util.EasyWriter; +import org.forester.util.ForesterUtil; public class aaa { public final static Pattern GN_PATTERN = Pattern.compile( "GN=(\\S+)\\s" ); //use w+ instead of S+ for more stringent setting. public final static Pattern RANGE_PATTERN = Pattern.compile( "\\[(\\d+-\\d+)\\]" ); //use w+ instead of S+ for more stringent setting. + public final static int MIN_LENGTH = 85; public static void main( final String args[] ) { try { + final EasyWriter out = ( EasyWriter ) ForesterUtil.createEasyWriter( "aaa_out" ); System.out.println( "STARTING..." ); + final List too_short = new ArrayList(); final List orig = FastaParser .parse( new FileInputStream( "C:\\Users\\zma\\Desktop\\RRMa_domains_ext_20_2.fasta" ) ); + final int initial_number = orig.size(); final List new_seqs = new ArrayList(); for( final Sequence seq : orig ) { + if ( seq.getLength() < MIN_LENGTH ) { + too_short.add( seq ); + continue; + } final Matcher matcher = GN_PATTERN.matcher( seq.getIdentifier() ); String gn = ""; if ( matcher.find() ) { @@ -40,8 +51,8 @@ public class aaa { final Set mol_seq_set = new HashSet(); Collections.sort( new_seqs ); int unique_counter = 0; - int duplicate_counter_gn_ra = 0; - int duplicate_counter_mol_seq = 0; + final List duplicate_gn_ra = new ArrayList(); + final List duplicate_mol_seq = new ArrayList(); final List new_seqs_unique = new ArrayList(); for( final String seq : new_seqs ) { final Matcher matcher_ra = RANGE_PATTERN.matcher( seq ); @@ -66,49 +77,73 @@ public class aaa { unique_counter++; } else { - duplicate_counter_mol_seq++; + duplicate_mol_seq.add( seq ); } } else { - duplicate_counter_gn_ra++; + duplicate_gn_ra.add( seq ); } } String prev_gn = "___"; boolean is_first = true; - List same_protein_seqs = new ArrayList(); + List seqs_from_same_protein = new ArrayList(); for( final String seq : new_seqs_unique ) { - final Matcher matcher_gn = GN_PATTERN.matcher( seq ); matcher_gn.find(); final String gn = matcher_gn.group( 1 ); if ( !prev_gn.equals( gn ) && !is_first ) { - doit( same_protein_seqs ); - same_protein_seqs = new ArrayList(); + doit( seqs_from_same_protein, out ); + seqs_from_same_protein = new ArrayList(); } prev_gn = gn; is_first = false; - same_protein_seqs.add( seq ); + seqs_from_same_protein.add( seq ); + } + doit( seqs_from_same_protein, out ); + out.println( "" ); + out.println( "" ); + out.println( "Removed because same GN and region:" ); + for( final String s : duplicate_gn_ra ) { + out.println( s ); + } + out.println( "" ); + out.println( "" ); + out.println( "Removed because identical mol sequence:" ); + for( final String s : duplicate_mol_seq ) { + out.println( s ); + } + out.println( "" ); + out.println( "" ); + out.println( "Removed because too short:" ); + for( final Sequence s : too_short ) { + out.println( s.toString() ); } - doit( same_protein_seqs ); - System.out.println( "unique : " + unique_counter ); - System.out.println( "duplicate because gn and range same: " + duplicate_counter_gn_ra ); - System.out.println( "duplicate because mol seq same : " + duplicate_counter_mol_seq ); + out.println( "" ); + out.println( "" ); + out.println( "initial:" + initial_number ); + out.println( "ignored because shorter than " + MIN_LENGTH + "aa: " + too_short.size() ); + out.println( "unique : " + unique_counter ); + out.println( "unique : " + new_seqs_unique.size() ); + out.println( "duplicate because gn and range same: " + duplicate_gn_ra.size() ); + out.println( "duplicate because mol seq same : " + duplicate_mol_seq.size() ); + out.flush(); + out.close(); + System.out.println( "DONE " ); } catch ( final Exception e ) { e.printStackTrace(); } } - private static void doit( List same_protein_seqs ) { + private static void doit( final List same_protein_seqs, final EasyWriter out ) throws IOException { final int count = same_protein_seqs.size(); if ( count == 1 ) { - System.out.println( same_protein_seqs.get( 0 ) ); + out.println( same_protein_seqs.get( 0 ) ); } else { int c = 1; for( final String s : same_protein_seqs ) { - System.out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), - "__" + c + "_OF_" + count ).toString() ); + out.println( new StringBuffer( s ).insert( s.indexOf( "|" ), "__" + c + "_OF_" + count ).toString() ); c++; } } diff --git a/forester/java/src/org/forester/application/decorator.java b/forester/java/src/org/forester/application/decorator.java index be44f67..6f37ff3 100644 --- a/forester/java/src/org/forester/application/decorator.java +++ b/forester/java/src/org/forester/application/decorator.java @@ -369,7 +369,7 @@ public final class decorator { ForesterUtil.unexpectedFatalError( decorator.PRG_NAME, e ); } catch ( final Exception e ) { - ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() ); + ForesterUtil.fatalError( decorator.PRG_NAME, e.getLocalizedMessage() ); } try { final PhylogenyWriter w = new PhylogenyWriter(); diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index 328fd6c..7f4bf60 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -44,7 +44,6 @@ import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; - import org.forester.evoinference.matrix.character.CharacterStateMatrix.Format; import org.forester.go.GoId; import org.forester.go.GoNameSpace; @@ -224,7 +223,7 @@ public class surfacing { final static private long JACKNIFE_RANDOM_SEED_DEFAULT = 19; final static private double JACKNIFE_RATIO_DEFAULT = 0.5; //final static private String INFER_SPECIES_TREES_OPTION = "species_tree_inference"; - final static private String FILTER_POSITIVE_OPTION = "pos_filter"; + final static private String FILTER_POSITIVE_OPTION = "pos_filter"; final static private String FILTER_NEGATIVE_OPTION = "neg_filter"; final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter"; final static private String INPUT_FILES_FROM_FILE_OPTION = "input"; @@ -252,9 +251,7 @@ public class surfacing { private static final String PLUS_MINUS_ALL_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_all.txt"; private static final String PLUS_MINUS_PASSING_GO_IDS_DOM_SUFFIX = "_plus_minus_go_ids_passing.txt"; private static final String OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS = "all_prot"; - final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e"; - - + final static private String OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION = "all_prot_e"; private static final boolean VERBOSE = false; private static final String OUTPUT_DOMAIN_COMBINATIONS_GAINED_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_gains_counts"; private static final String OUTPUT_DOMAIN_COMBINATIONS_LOST_MORE_THAN_ONCE_ANALYSIS_SUFFIX = "_fitch_dc_losses_counts"; @@ -545,8 +542,6 @@ public class surfacing { return intrees; } - - private static void log( final String msg, final Writer w ) { try { w.write( msg ); @@ -861,7 +856,8 @@ public class surfacing { // if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) { try { - output_list_of_all_proteins_per_domain_e_value_max = cla.getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ); + output_list_of_all_proteins_per_domain_e_value_max = cla + .getOptionValueAsDouble( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ); } catch ( final Exception e ) { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" ); @@ -916,7 +912,6 @@ public class surfacing { + "=" ); } query_domain_ids = cla.getOptionValue( surfacing.SEQ_EXTRACT_OPTION ); - } DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field = DOMAIN_SORT_FILD_DEFAULT; DomainSimilarity.DomainSimilaritySortField domain_similarity_sort_field_for_automated_pwc = DOMAIN_SORT_FILD_DEFAULT; @@ -1389,9 +1384,10 @@ public class surfacing { System.out.println( "E-value maximum (inclusive) : " + e_value_max ); html_desc.append( "E-value maximum (inclusive):" + e_value_max + "" + nl ); } - if ( output_protein_lists_for_all_domains ) { + if ( output_protein_lists_for_all_domains ) { System.out.println( "Domain E-value max : " + output_list_of_all_proteins_per_domain_e_value_max ); - html_desc.append( "Protein lists: E-value maximum per domain (inclusive):" + output_list_of_all_proteins_per_domain_e_value_max + "" + nl ); + html_desc.append( "Protein lists: E-value maximum per domain (inclusive):" + + output_list_of_all_proteins_per_domain_e_value_max + "" + nl ); } System.out.println( "Ignore DUFs : " + ignore_dufs ); if ( ignore_virus_like_ids ) { @@ -2331,13 +2327,14 @@ public class surfacing { protein_lists_per_species, domain_id_to_go_ids_map, go_id_to_term_map, - plus_minus_analysis_numbers - ); + plus_minus_analysis_numbers ); } if ( output_protein_lists_for_all_domains ) { - writeProteinListsForAllSpecies( out_dir, protein_lists_per_species, gwcd_list, output_list_of_all_proteins_per_domain_e_value_max ); + writeProteinListsForAllSpecies( out_dir, + protein_lists_per_species, + gwcd_list, + output_list_of_all_proteins_per_domain_e_value_max ); } - if ( all_bin_domain_combinations_gained_fitch != null ) { try { executeFitchGainsAnalysis( new File( output_file @@ -2502,7 +2499,6 @@ public class surfacing { } } - private static void printHelp() { System.out.println(); System.out.println( "Usage:" ); @@ -2590,8 +2586,8 @@ public class surfacing { System.out.println( surfacing.DOMAIN_COMBINITONS_OUTPUT_OPTION_FOR_GRAPH_ANALYSIS + ": to output binary domain combinations for (downstream) graph analysis" ); System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS + ": to output all proteins per domain" ); - System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION + ": e value max per domain for output of all proteins per domain" ); - + System.out.println( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION + + ": e value max per domain for output of all proteins per domain" ); System.out.println(); System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar" + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1" @@ -2787,7 +2783,7 @@ public class surfacing { proteins_file_writer, "\t", LIMIT_SPEC_FOR_PROT_EX, - domain_e_cutoff ); + domain_e_cutoff ); proteins_file_writer.close(); } catch ( final IOException e ) { diff --git a/forester/java/src/org/forester/archaeopteryx/Configuration.java b/forester/java/src/org/forester/archaeopteryx/Configuration.java index e45dfc8..2420efe 100644 --- a/forester/java/src/org/forester/archaeopteryx/Configuration.java +++ b/forester/java/src/org/forester/archaeopteryx/Configuration.java @@ -439,8 +439,6 @@ public final class Configuration { return _path_to_local_mafft; } - - public File getPathToLocalFastme() { return _path_to_local_fastme; } @@ -823,8 +821,6 @@ public final class Configuration { _path_to_local_mafft = path_to_local_mafft; } - - private void setPathToLocalFastme( final File path_to_local_fastme ) { _path_to_local_fastme = path_to_local_fastme; } @@ -1063,7 +1059,6 @@ public final class Configuration { setPathToLocalMafft( new File( str ) ); } } - else if ( key.equals( "fastme_local" ) ) { final String str = ( ( String ) st.nextElement() ).trim(); if ( !ForesterUtil.isEmpty( str ) ) { diff --git a/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java b/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java index 2c416b9..24ec184 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/InferenceManager.java @@ -22,16 +22,13 @@ public final class InferenceManager { } public boolean canDoMsa() { - return ( getPathToLocalMafft() != null ) - || ( getPathToLocalClustalo() != null ); + return ( getPathToLocalMafft() != null ) || ( getPathToLocalClustalo() != null ); } public File getPathToLocalMafft() { return _path_to_local_mafft; } - - public File getPathToLocalFastme() { return _path_to_local_fastme; } @@ -78,7 +75,6 @@ public final class InferenceManager { final File path_to_local_raxml, final File path_to_local_clustalo ) { _path_to_local_mafft = createLocalPath( path_to_local_mafft, "mafft" ); - _path_to_local_fastme = createLocalPath( path_to_local_fastme, "fastme" ); _path_to_local_raxml = createLocalPath( path_to_local_raxml, "raxml" ); _path_to_local_clustalo = createLocalPath( path_to_local_clustalo, "clustalo" ); diff --git a/forester/java/src/org/forester/archaeopteryx/tools/PhyloInferenceDialog.java b/forester/java/src/org/forester/archaeopteryx/tools/PhyloInferenceDialog.java index 399c521..32e21c8 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/PhyloInferenceDialog.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/PhyloInferenceDialog.java @@ -78,7 +78,6 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener { private JTextField _input_seqs_min_length_tf; private JTextField _input_seqs_max_length_tf; private JTextField _input_seqs_type_tf; - private JTextField _mafft_paramenters_tf; private JTextField _clustalo_paramenters_tf; private JTextField _msa_processing_max_allowed_gap_ratio_tf; @@ -130,13 +129,10 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener { inputfile_pnl_2.add( _input_seqs_max_length_tf = new JTextField() ); inputfile_pnl_2.add( new JLabel( "Type:" ) ); inputfile_pnl_2.add( _input_seqs_type_tf = new JTextField() ); - - - inputfile_pnl_3.add( _mafft_cb = new JCheckBox( "MAFFT") ); + inputfile_pnl_3.add( _mafft_cb = new JCheckBox( "MAFFT" ) ); inputfile_pnl_3.add( new JLabel( "Parameters: " ) ); inputfile_pnl_3.add( _mafft_paramenters_tf = new JTextField() ); - - inputfile_pnl_4.add( _clustalo_cb = new JCheckBox( "ClustalO") ); + inputfile_pnl_4.add( _clustalo_cb = new JCheckBox( "ClustalO" ) ); inputfile_pnl_4.add( new JLabel( "Parameters: " ) ); inputfile_pnl_4.add( _clustalo_paramenters_tf = new JTextField() ); _input_seqs_median_length_tf.setColumns( 4 ); @@ -151,7 +147,6 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener { _input_seqs_max_length_tf.setEditable( false ); _input_seqs_number_tf.setEditable( false ); _input_seqs_type_tf.setEditable( false ); - _mafft_paramenters_tf.setColumns( 26 ); _mafft_paramenters_tf.setText( "--maxiterate 1000 --localpair" ); _clustalo_paramenters_tf.setColumns( 26 ); @@ -286,7 +281,7 @@ public class PhyloInferenceDialog extends JDialog implements ActionListener { _value = JOptionPane.CANCEL_OPTION; if ( from_unaligned_seqs ) { updateSeqsItems(); - } + } else { updateMsaItems(); } diff --git a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java index 66fbdbc..9e651fb2 100644 --- a/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java +++ b/forester/java/src/org/forester/archaeopteryx/tools/PhylogeneticInferrer.java @@ -26,7 +26,6 @@ package org.forester.archaeopteryx.tools; import java.io.BufferedWriter; -import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; @@ -98,16 +97,13 @@ public class PhylogeneticInferrer extends RunnableProcess { // SequenceWriter.writeSeqs( _seqs, writer, SEQ_FORMAT.FASTA, 100 ); // writer.close(); switch ( msa_prg ) { - case MAFFT: + case MAFFT: return runMAFFT( _seqs, processMafftOptions() ); - case CLUSTAL_O: return runClustalOmega( _seqs, processMafftOptions() ); default: return null; } - - } private List processMafftOptions() { @@ -347,7 +343,7 @@ public class PhylogeneticInferrer extends RunnableProcess { } } } - + public enum MSA_PRG { MAFFT, CLUSTAL_O; } diff --git a/forester/java/src/org/forester/msa/ClustalOmega.java b/forester/java/src/org/forester/msa/ClustalOmega.java index f6d4f65..938fc78 100644 --- a/forester/java/src/org/forester/msa/ClustalOmega.java +++ b/forester/java/src/org/forester/msa/ClustalOmega.java @@ -71,7 +71,7 @@ public final class ClustalOmega extends MsaInferrer { return _exit_code; } - + @Override public Msa infer( final List seqs, final List opts ) throws IOException, InterruptedException { final File file = File.createTempFile( "__clustalo_input_", ".fasta" ); file.deleteOnExit(); @@ -82,8 +82,7 @@ public final class ClustalOmega extends MsaInferrer { file.delete(); return msa; } - - + @Override public Msa infer( final File path_to_input_seqs, final List opts ) throws IOException, InterruptedException { init(); @@ -114,6 +113,4 @@ public final class ClustalOmega extends MsaInferrer { _error = null; _exit_code = -100; } - - } diff --git a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java index 313848b..4c86166 100644 --- a/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java +++ b/forester/java/src/org/forester/phylogeny/PhylogenyMethods.java @@ -62,19 +62,14 @@ import org.forester.util.ForesterUtil; public class PhylogenyMethods { - private static PhylogenyMethods _instance = null; - - private PhylogenyNode _farthest_1 = null; - private PhylogenyNode _farthest_2 = null; + private static PhylogenyMethods _instance = null; + private PhylogenyNode _farthest_1 = null; + private PhylogenyNode _farthest_2 = null; private PhylogenyMethods() { // Hidden constructor. } - - - - /** * Calculates the distance between PhylogenyNodes node1 and node2. * @@ -119,13 +114,10 @@ public class PhylogenyMethods { return farthest_d; } - final public static Event getEventAtLCA( PhylogenyNode n1, - PhylogenyNode n2 ) { + final public static Event getEventAtLCA( final PhylogenyNode n1, final PhylogenyNode n2 ) { return obtainLCA( n1, n2 ).getNodeData().getEvent(); } - - - + @Override public Object clone() throws CloneNotSupportedException { throw new CloneNotSupportedException(); @@ -139,29 +131,24 @@ public class PhylogenyMethods { return _farthest_2; } - final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, - final PhylogenyNode n) { + final public static void deleteNonOrthologousExternalNodes( final Phylogeny phy, final PhylogenyNode n ) { if ( n.isInternal() ) { throw new IllegalArgumentException( "node is not external" ); } - final ArrayList to_delete = new ArrayList(); - for ( PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { + for( final PhylogenyNodeIterator it = phy.iteratorExternalForward(); it.hasNext(); ) { final PhylogenyNode i = it.next(); if ( !PhylogenyMethods.getEventAtLCA( n, i ).isSpeciation() ) { - to_delete.add( i ); + to_delete.add( i ); } } - for( PhylogenyNode d : to_delete ) { + for( final PhylogenyNode d : to_delete ) { phy.deleteSubtree( d, true ); } phy.clearHashIdToNodeMap(); phy.externalNodesHaveChanged(); - } - - - + /** * Returns the LCA of PhylogenyNodes node1 and node2. * diff --git a/forester/java/src/org/forester/sequence/BasicSequence.java b/forester/java/src/org/forester/sequence/BasicSequence.java index e180481..60828bd 100644 --- a/forester/java/src/org/forester/sequence/BasicSequence.java +++ b/forester/java/src/org/forester/sequence/BasicSequence.java @@ -36,10 +36,10 @@ public class BasicSequence implements Sequence { private BasicSequence( final String identifier, final String mol_sequence, final TYPE type ) { if ( ForesterUtil.isEmpty( identifier ) ) { - throw new IllegalArgumentException( "identifier of sequence cannot be empty"); + throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); } if ( ForesterUtil.isEmpty( mol_sequence ) ) { - throw new IllegalArgumentException( "molecular sequence cannot be empty"); + throw new IllegalArgumentException( "molecular sequence cannot be empty" ); } _mol_sequence = mol_sequence.toCharArray(); _identifier = identifier; @@ -49,10 +49,10 @@ public class BasicSequence implements Sequence { // Only use if you know what you are doing! public BasicSequence( final String identifier, final char[] mol_sequence, final TYPE type ) { if ( ForesterUtil.isEmpty( identifier ) ) { - throw new IllegalArgumentException( "identifier of sequence cannot be empty"); + throw new IllegalArgumentException( "identifier of sequence cannot be empty" ); } - if ( mol_sequence == null || mol_sequence.length < 1 ) { - throw new IllegalArgumentException( "molecular sequence cannot be empty"); + if ( ( mol_sequence == null ) || ( mol_sequence.length < 1 ) ) { + throw new IllegalArgumentException( "molecular sequence cannot be empty" ); } _mol_sequence = mol_sequence; _identifier = identifier; @@ -96,26 +96,25 @@ public class BasicSequence implements Sequence { } @Override - public boolean equals(Object obj) { - if (obj == null) { + public boolean equals( final Object obj ) { + if ( obj == null ) { return false; } - if (obj.getClass() != getClass()) { + if ( obj.getClass() != getClass() ) { return false; } - Sequence other = ( Sequence) obj; - if ( getMolecularSequenceAsString() .equals( other.getMolecularSequenceAsString() )) { + final Sequence other = ( Sequence ) obj; + if ( getMolecularSequenceAsString().equals( other.getMolecularSequenceAsString() ) ) { return true; } return false; } - + @Override public int hashCode() { return getMolecularSequenceAsString().hashCode(); } - - + @Override public String toString() { final StringBuffer sb = new StringBuffer(); @@ -150,7 +149,6 @@ public class BasicSequence implements Sequence { @Override public String getMolecularSequenceAsString() { - return new String( getMolecularSequence() ); } } diff --git a/forester/java/src/org/forester/sequence/Sequence.java b/forester/java/src/org/forester/sequence/Sequence.java index f4c17cf..90f3b04 100644 --- a/forester/java/src/org/forester/sequence/Sequence.java +++ b/forester/java/src/org/forester/sequence/Sequence.java @@ -44,7 +44,7 @@ public interface Sequence { public abstract int getNumberOfGapResidues(); public abstract char[] getMolecularSequence(); - + public abstract String getMolecularSequenceAsString(); public abstract char getResidueAt( final int position ); diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index 0c1d60e..8daa322 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -66,7 +66,6 @@ import org.forester.go.GoTerm; import org.forester.go.PfamToGoMapping; import org.forester.io.parsers.nexus.NexusConstants; import org.forester.io.writers.PhylogenyWriter; -import org.forester.msa.MsaCompactor.SORT_BY; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyMethods; import org.forester.phylogeny.PhylogenyNode; @@ -375,8 +374,7 @@ public final class SurfacingUtil { } for( int i = 0; i < nodes.size() - 1; ++i ) { for( int j = i + 1; j < nodes.size(); ++j ) { - final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), - nodes.get( j ) ); + final PhylogenyNode lca = PhylogenyMethods.obtainLCA( nodes.get( i ), nodes.get( j ) ); String rank = "unknown"; if ( lca.getNodeData().isHasTaxonomy() && !ForesterUtil.isEmpty( lca.getNodeData().getTaxonomy().getRank() ) ) { @@ -1189,14 +1187,12 @@ public final class SurfacingUtil { final String limit_to_species, final double domain_e_cutoff ) throws IOException { System.out.println( "Per domain E-value: " + domain_e_cutoff ); - for( final Species species : protein_lists_per_species.keySet() ) { System.out.println( species + ":" ); for( final Protein protein : protein_lists_per_species.get( species ) ) { if ( ForesterUtil.isEmpty( limit_to_species ) || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) { final List domains = protein.getProteinDomains( domain_id ); - if ( domains.size() > 0 ) { out.write( protein.getSpecies().getSpeciesId() ); out.write( separator ); @@ -1205,43 +1201,32 @@ public final class SurfacingUtil { out.write( domain_id.toString() ); out.write( separator ); int prev_to = -1; - for( final Domain domain : domains ) { - - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { - out.write( "/" ); - out.write( domain.getFrom() + "-" + domain.getTo() ); - if ( prev_to >= 0) { - final int l = domain.getFrom() - prev_to; - System.out.println( l ); - } - prev_to = domain.getTo(); + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { + out.write( "/" ); + out.write( domain.getFrom() + "-" + domain.getTo() ); + if ( prev_to >= 0 ) { + final int l = domain.getFrom() - prev_to; + System.out.println( l ); + } + prev_to = domain.getTo(); } } - out.write( "/" ); out.write( separator ); - - final List domain_list = new ArrayList(); - for( final Domain domain : protein.getProteinDomains() ) { - if ( domain_e_cutoff < 0 || domain.getPerDomainEvalue() <= domain_e_cutoff ) { + if ( ( domain_e_cutoff < 0 ) || ( domain.getPerDomainEvalue() <= domain_e_cutoff ) ) { domain_list.add( domain ); } } - - Domain domain_ary[] = new Domain[ domain_list.size() ]; - + final Domain domain_ary[] = new Domain[ domain_list.size() ]; for( int i = 0; i < domain_list.size(); ++i ) { domain_ary[ i ] = domain_list.get( i ); } - Arrays.sort( domain_ary, new DomainComparator( true ) ); - out.write( "{" ); boolean first = true; - for( final Domain domain : domain_ary ) { if ( first ) { first = false; @@ -1250,7 +1235,7 @@ public final class SurfacingUtil { out.write( "," ); } out.write( domain.getDomainId().toString() ); - out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); + out.write( ":" + domain.getFrom() + "-" + domain.getTo() ); out.write( ":" + domain.getPerDomainEvalue() ); } out.write( "}" ); @@ -1265,7 +1250,6 @@ public final class SurfacingUtil { } out.write( SurfacingConstants.NL ); } - } } } @@ -2656,7 +2640,6 @@ public final class SurfacingUtil { @Override public final int compare( final Domain d0, final Domain d1 ) { - if ( d0.getFrom() < d1.getFrom() ) { return _ascending ? -1 : 1; } @@ -2664,13 +2647,6 @@ public final class SurfacingUtil { return _ascending ? 1 : -1; } return 0; - } - } } - - - - - diff --git a/forester/java/src/org/forester/util/BasicTableParser.java b/forester/java/src/org/forester/util/BasicTableParser.java index 59e5cd2..74dd567 100644 --- a/forester/java/src/org/forester/util/BasicTableParser.java +++ b/forester/java/src/org/forester/util/BasicTableParser.java @@ -67,18 +67,12 @@ public class BasicTableParser { final boolean use_start_of_comment_line = !( ForesterUtil.isEmpty( start_of_comment_line ) ); while ( ( line = reader.readLine() ) != null ) { line = line.trim(); - if ( !ForesterUtil.isEmpty( line) && - - - (( line.charAt( 0 ) == '"' && line.charAt( line.length() -1 ) == '"' && ForesterUtil.countChars( line, '"' ) == 2 ) - - || - - - ( line.charAt( 0 ) == '\'' && line.charAt( line.length() -1 ) == '\'' && ForesterUtil.countChars( line, '\'' ) == 2 ) ) ) { - line = line.substring( 1, line.length() -1 ).trim(); + if ( !ForesterUtil.isEmpty( line ) + && ( ( ( line.charAt( 0 ) == '"' ) && ( line.charAt( line.length() - 1 ) == '"' ) && ( ForesterUtil + .countChars( line, '"' ) == 2 ) ) || ( ( line.charAt( 0 ) == '\'' ) + && ( line.charAt( line.length() - 1 ) == '\'' ) && ( ForesterUtil.countChars( line, '\'' ) == 2 ) ) ) ) { + line = line.substring( 1, line.length() - 1 ).trim(); } - if ( saw_first_table && ( ForesterUtil.isEmpty( line ) || ( tables_separated_by_single_string_line && ( line .indexOf( column_delimiter ) < 0 ) ) ) ) { -- 1.7.10.2