From: cmzmasek@gmail.com Date: Fri, 21 Jun 2013 21:59:34 +0000 (+0000) Subject: inprogress X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=663daba455e534e015bd56bae070e9248e3a4533;p=jalview.git inprogress --- diff --git a/forester/java/src/org/forester/application/surfacing.java b/forester/java/src/org/forester/application/surfacing.java index f2019d4..ce716fe 100644 --- a/forester/java/src/org/forester/application/surfacing.java +++ b/forester/java/src/org/forester/application/surfacing.java @@ -222,16 +222,14 @@ public class surfacing { private static final int JACKNIFE_NUMBER_OF_RESAMPLINGS_DEFAULT = 100; final static private long JACKNIFE_RANDOM_SEED_DEFAULT = 19; final static private double JACKNIFE_RATIO_DEFAULT = 0.5; - //final static private String INFER_SPECIES_TREES_OPTION = "species_tree_inference"; final static private String FILTER_POSITIVE_OPTION = "pos_filter"; final static private String FILTER_NEGATIVE_OPTION = "neg_filter"; final static private String FILTER_NEGATIVE_DOMAINS_OPTION = "neg_dom_filter"; - final static private String INPUT_FILES_FROM_FILE_OPTION = "input"; + final static private String INPUT_GENOMES_FILE_OPTION = "genomes"; final static private String INPUT_SPECIES_TREE_OPTION = "species_tree"; final static private String SEQ_EXTRACT_OPTION = "prot_extract"; - final static private char SEPARATOR_FOR_INPUT_VALUES = '#'; - final static private String PRG_VERSION = "2.252"; - final static private String PRG_DATE = "2012.08.01"; + final static private String PRG_VERSION = "2.260"; + final static private String PRG_DATE = "130721"; final static private String E_MAIL = "czmasek@burnham.org"; final static private String WWW = "www.phylosoft.org/forester/applications/surfacing"; final static private boolean IGNORE_DUFS_DEFAULT = true; @@ -283,7 +281,7 @@ public class surfacing { public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_MAPPED.txt"; public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_MAPPED_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique_MAPPED.txt"; private static final boolean PERFORM_DC_REGAIN_PROTEINS_STATS = true; - private static final boolean DA_ANALYSIS = true; + private static final boolean DA_ANALYSIS = false; private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option, final String[][] input_file_properties, @@ -610,10 +608,9 @@ public class surfacing { allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION ); allowed_options.add( JACKNIFE_RATIO_OPTION ); allowed_options.add( INPUT_SPECIES_TREE_OPTION ); - //allowed_options.add( INFER_SPECIES_TREES_OPTION ); allowed_options.add( FILTER_POSITIVE_OPTION ); allowed_options.add( FILTER_NEGATIVE_OPTION ); - allowed_options.add( INPUT_FILES_FROM_FILE_OPTION ); + allowed_options.add( INPUT_GENOMES_FILE_OPTION ); allowed_options.add( RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION ); allowed_options.add( FILTER_NEGATIVE_DOMAINS_OPTION ); allowed_options.add( IGNORE_VIRAL_IDS ); @@ -790,30 +787,22 @@ public class surfacing { plus_minus_analysis_high_copy_target_species, plus_minus_analysis_high_low_copy_species, plus_minus_analysis_numbers ); - File input_files_file = null; - String[] input_file_names_from_file = null; - if ( cla.isOptionSet( surfacing.INPUT_FILES_FROM_FILE_OPTION ) ) { - if ( !cla.isOptionValueSet( surfacing.INPUT_FILES_FROM_FILE_OPTION ) ) { - ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for input files file: -" - + surfacing.INPUT_FILES_FROM_FILE_OPTION + "=" ); - } - input_files_file = new File( cla.getOptionValue( surfacing.INPUT_FILES_FROM_FILE_OPTION ) ); - final String msg = ForesterUtil.isReadableFile( input_files_file ); + File input_genomes_file = null; + if ( cla.isOptionSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) { + if ( !cla.isOptionValueSet( surfacing.INPUT_GENOMES_FILE_OPTION ) ) { + ForesterUtil.fatalError( surfacing.PRG_NAME, "no value for input genomes file: -" + + surfacing.INPUT_GENOMES_FILE_OPTION + "=" ); + } + input_genomes_file = new File( cla.getOptionValue( surfacing.INPUT_GENOMES_FILE_OPTION ) ); + final String msg = ForesterUtil.isReadableFile( input_genomes_file ); if ( !ForesterUtil.isEmpty( msg ) ) { - ForesterUtil.fatalError( surfacing.PRG_NAME, "can not read from \"" + input_files_file + "\": " + msg ); - } - try { - input_file_names_from_file = ForesterUtil.file2array( input_files_file ); - } - catch ( final IOException e ) { - ForesterUtil.fatalError( surfacing.PRG_NAME, "failed to read from \"" + input_files_file + "\": " + e ); + ForesterUtil + .fatalError( surfacing.PRG_NAME, "can not read from \"" + input_genomes_file + "\": " + msg ); } } - if ( ( cla.getNumberOfNames() < 1 ) - && ( ( input_file_names_from_file == null ) || ( input_file_names_from_file.length < 1 ) ) ) { - ForesterUtil.fatalError( surfacing.PRG_NAME, - "No hmmpfam output file indicated is input: use comand line directly or " - + surfacing.INPUT_FILES_FROM_FILE_OPTION + "=" ); + else { + ForesterUtil.fatalError( surfacing.PRG_NAME, "no input genomes file given: " + + surfacing.INPUT_GENOMES_FILE_OPTION + "=" ); } DomainSimilarity.DomainSimilarityScoring scoring = SCORING_DEFAULT; if ( cla.isOptionSet( surfacing.SCORING_OPTION ) ) { @@ -854,7 +843,6 @@ public class surfacing { double output_list_of_all_proteins_per_domain_e_value_max = -1; if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_OPTIONS ) ) { output_protein_lists_for_all_domains = true; - // if ( cla.isOptionSet( surfacing.OUTPUT_LIST_OF_ALL_PROTEINS_PER_DOMAIN_E_VALUE_OPTION ) ) { try { output_list_of_all_proteins_per_domain_e_value_max = cla @@ -864,7 +852,6 @@ public class surfacing { ForesterUtil.fatalError( surfacing.PRG_NAME, "no acceptable value for per domain E-value maximum" ); } } - // } Detailedness detailedness = DETAILEDNESS_DEFAULT; if ( cla.isOptionSet( surfacing.DETAILEDNESS_OPTION ) ) { @@ -989,8 +976,6 @@ public class surfacing { domain_similarity_print_option = PrintableDomainSimilarity.PRINT_OPTION.HTML; } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_HTML ) ) { - // domain_similarity_print_option = - // DomainSimilarity.PRINT_OPTION.SIMPLE_HTML; ForesterUtil.fatalError( surfacing.PRG_NAME, "simple HTML output not implemented yet :(" ); } else if ( sort.equals( surfacing.DOMAIN_SIMILARITY_PRINT_OPTION_SIMPLE_TAB_DELIMITED ) ) { @@ -1033,12 +1018,12 @@ public class surfacing { + surfacing.DOMAIN_COUNT_SORT_COMBINATIONS_COUNT + ">\"" ); } } - String[][] input_file_properties = null; - if ( input_file_names_from_file != null ) { - input_file_properties = surfacing.processInputFileNames( input_file_names_from_file ); - } - else { - input_file_properties = surfacing.processInputFileNames( cla.getNames() ); + final String[][] input_file_properties = processInputGenomesFile( input_genomes_file ); + for( final String[] input_file_propertie : input_file_properties ) { + for( int j = 0; j < input_file_propertie.length; j++ ) { + System.out.print( input_file_propertie[ j ] + " " ); + } + System.out.println(); } final int number_of_genomes = input_file_properties.length; if ( number_of_genomes < 2 ) { @@ -1054,7 +1039,7 @@ public class surfacing { automated_pairwise_comparison_suffix, out_dir ); for( int i = 0; i < number_of_genomes; i++ ) { - File dcc_outfile = new File( input_file_properties[ i ][ 0 ] + File dcc_outfile = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX ); if ( out_dir != null ) { dcc_outfile = new File( out_dir + ForesterUtil.FILE_SEPARATOR + dcc_outfile ); @@ -1206,17 +1191,6 @@ public class surfacing { } } } - // boolean infer_species_trees = false; - // if ( cla.isOptionSet( surfacing.INFER_SPECIES_TREES_OPTION ) ) { - // if ( ( output_file == null ) || ( number_of_genomes < 3 ) - // || ForesterUtil.isEmpty( automated_pairwise_comparison_suffix ) ) { - // ForesterUtil.fatalError( surfacing.PRG_NAME, "cannot infer species trees (-" - // + surfacing.INFER_SPECIES_TREES_OPTION + " without pairwise analyses (" - // + surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION - // + "=)" ); - // } - // infer_species_trees = true; - // } File[] intree_files = null; Phylogeny[] intrees = null; if ( cla.isOptionSet( surfacing.INPUT_SPECIES_TREE_OPTION ) ) { @@ -1333,12 +1307,8 @@ public class surfacing { + surfacing.GO_OBO_FILE_USE_OPTION + "=)" ); } System.out.println( "Output directory : " + out_dir ); - if ( input_file_names_from_file != null ) { - System.out.println( "Input files names from : " + input_files_file + " [" - + input_file_names_from_file.length + " input files]" ); - html_desc.append( "Input files names from:" + input_files_file + " [" - + input_file_names_from_file.length + " input files]" + nl ); - } + System.out.println( "Input genomes from : " + input_genomes_file ); + html_desc.append( "Input genomes from:" + input_genomes_file + "" + nl ); if ( positive_filter_file != null ) { final int filter_size = filter.size(); System.out.println( "Positive protein filter : " + positive_filter_file + " [" + filter_size @@ -1613,10 +1583,6 @@ public class surfacing { System.out.println( " Ratio : " + ForesterUtil.round( jacknife_ratio, 2 ) ); System.out.println( " Random number seed : " + random_seed ); } - // if ( infer_species_trees ) { - // html_desc.append( "Infer species trees:true" + nl ); - // System.out.println( " Infer species trees : true" ); - // } if ( ( intrees != null ) && ( intrees.length > 0 ) ) { for( final File intree_file : intree_files ) { html_desc.append( "Intree for gain/loss parsimony analysis:" + intree_file @@ -1777,8 +1743,10 @@ public class surfacing { System.out.println(); System.out.println( ( i + 1 ) + "/" + number_of_genomes ); log( ( i + 1 ) + "/" + number_of_genomes, log_writer ); - System.out.println( "Processing : " + input_file_properties[ i ][ 0 ] ); - log( "Genome : " + input_file_properties[ i ][ 0 ], log_writer ); + System.out.println( "Processing : " + input_file_properties[ i ][ 1 ] + + " [" + input_file_properties[ i ][ 0 ] + "]" ); + log( "Genome : " + input_file_properties[ i ][ 1 ] + " [" + + input_file_properties[ i ][ 0 ] + "]", log_writer ); HmmscanPerDomainTableParser parser = null; INDIVIDUAL_SCORE_CUTOFF ind_score_cutoff = INDIVIDUAL_SCORE_CUTOFF.NONE; if ( individual_score_cutoffs != null ) { @@ -1924,10 +1892,6 @@ public class surfacing { + parser.getProteinsIgnoredDueToFilter() ); } html_desc.append( "" + nl ); - // domain_partner_counts_array[ i ] = - // Methods.getDomainPartnerCounts( protein_domain_collections_array[ - // i ], - // false, input_file_properties[ i ][ 1 ] ); try { int count = 0; for( final Protein protein : protein_list ) { @@ -2599,7 +2563,7 @@ public class surfacing { + "=: to filter out proteins containing at least one domain listed in " ); System.out.println( surfacing.FILTER_NEGATIVE_DOMAINS_OPTION + "=: to filter out (ignore) domains listed in " ); - System.out.println( surfacing.INPUT_FILES_FROM_FILE_OPTION + "=: to read input files from " ); + System.out.println( surfacing.INPUT_GENOMES_FILE_OPTION + "=: to read input files from " ); System.out .println( surfacing.RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION + "=: seed for random number generator for Fitch Parsimony analysis (type: long, default: no randomization - given a choice, prefer absence" ); @@ -2621,14 +2585,14 @@ public class surfacing { System.out.println(); System.out.println( "Example 1: java -Xms128m -Xmx512m -cp path/to/forester.jar" + " org.forester.application.surfacing p2g=pfam2go_2012_02_07.txt -dufs -cos=Pfam_260_NC1" - + " -no_eo -mo=0 -input=genomes_limited.txt -out_dir=out -o=o " + + " -no_eo -mo=0 -genomes=eukaryotes.txt -out_dir=out -o=o " + " -species_tree=tol.xml -obo=gene_ontology_2012_02_07.obo -pos_filter=f.txt -all_prot" ); System.out.println(); System.out.println( "Example 2: java -Xms128m -Xmx512m -cp path/to/forester.jar" + " org.forester.application.surfacing -detail=punctilious -o=TEST.html -pwc=TEST" + " -cos=Pfam_ls_22_TC2 -p2g=pfam2go -obo=gene_ontology_edit.obo " - + "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo " - + "-ds_output=detailed_html -scoring=domains -sort=alpha human mouse brafl strpu" ); + + "-dc_sort=dom -ignore_with_self -no_singles -e=0.001 -mo=1 -no_eo -genomes=eukaryotes.txt " + + "-ds_output=detailed_html -scoring=domains -sort=alpha " ); System.out.println(); } @@ -2653,24 +2617,17 @@ public class surfacing { } } - private static String[][] processInputFileNames( final String[] names ) { - final String[][] input_file_properties = new String[ names.length ][]; - for( int i = 0; i < names.length; ++i ) { - if ( names[ i ].indexOf( SEPARATOR_FOR_INPUT_VALUES ) < 0 ) { - input_file_properties[ i ] = new String[ 2 ]; - input_file_properties[ i ][ 0 ] = names[ i ]; - input_file_properties[ i ][ 1 ] = names[ i ]; - } - else { - input_file_properties[ i ] = names[ i ].split( surfacing.SEPARATOR_FOR_INPUT_VALUES + "" ); - if ( input_file_properties[ i ].length != 3 ) { - ForesterUtil - .fatalError( surfacing.PRG_NAME, - "properties for the input files (hmmpfam output) are expected " - + "to be in the following format \"#\" (or just one word, which is both the filename and the species id), instead received \"" - + names[ i ] + "\"" ); - } - } + private static String[][] processInputGenomesFile( final File input_genomes ) { + String[][] input_file_properties = null; + try { + input_file_properties = ForesterUtil.file22dArray( input_genomes ); + } + catch ( final IOException e ) { + ForesterUtil.fatalError( surfacing.PRG_NAME, + "genomes files is to be in the following format \" \": " + + e.getLocalizedMessage() ); + } + for( int i = 0; i < input_file_properties.length; ++i ) { final String error = ForesterUtil.isReadableFile( new File( input_file_properties[ i ][ 0 ] ) ); if ( !ForesterUtil.isEmpty( error ) ) { ForesterUtil.fatalError( surfacing.PRG_NAME, error ); diff --git a/forester/java/src/org/forester/go/BasicGoSubset.java b/forester/java/src/org/forester/go/BasicGoSubset.java index f1658d7..420f01b 100644 --- a/forester/java/src/org/forester/go/BasicGoSubset.java +++ b/forester/java/src/org/forester/go/BasicGoSubset.java @@ -58,17 +58,8 @@ public class BasicGoSubset implements GoSubset { else if ( my_s.equals( GOSLIM_POMBE_STR ) ) { _type = Type.GOSLIM_POMBE; } - else if ( my_s.equals( HIGH_LEVEL_ANNOTATION_QC_STR ) ) { - _type = Type.HIGH_LEVEL_ANNOTATION_QC; - } - else if ( my_s.equals( UNVETTED_STR ) ) { - _type = Type.UNVETTED; - } - else if ( my_s.equals( MF_NEEDS_REVIEW_STR ) ) { - _type = Type.MF_NEEDS_REVIEW; - } else { - throw new IllegalArgumentException( "unknown GO subset type: " + my_s ); + _type = Type.OTHER; } } @@ -134,17 +125,9 @@ public class BasicGoSubset implements GoSubset { case GOSLIM_POMBE: sb.append( GOSLIM_POMBE_STR ); break; - case MF_NEEDS_REVIEW: - sb.append( MF_NEEDS_REVIEW_STR ); - break; - case HIGH_LEVEL_ANNOTATION_QC: - sb.append( HIGH_LEVEL_ANNOTATION_QC_STR ); - break; - case UNVETTED: - sb.append( UNVETTED_STR ); + case OTHER: + sb.append( "other" ); break; - default: - new AssertionError( "unknown type: " + getType() ); } return sb.toString(); } diff --git a/forester/java/src/org/forester/go/BasicGoXRef.java b/forester/java/src/org/forester/go/BasicGoXRef.java index c523102..548a7a6 100644 --- a/forester/java/src/org/forester/go/BasicGoXRef.java +++ b/forester/java/src/org/forester/go/BasicGoXRef.java @@ -100,7 +100,7 @@ public class BasicGoXRef implements GoXRef { _type = Type.SABIO_RK; } else { - throw new IllegalArgumentException( "unknown GO xref type: " + type ); + _type = Type.OTHER; } _xref = sa[ 1 ].trim(); } @@ -192,6 +192,9 @@ public class BasicGoXRef implements GoXRef { case WIKIPEDIA: sb.append( WIKIPEDIA_STR ); break; + case OTHER: + sb.append( "other" ); + break; default: new AssertionError( "unknown type: " + getType() ); } diff --git a/forester/java/src/org/forester/go/GoSubset.java b/forester/java/src/org/forester/go/GoSubset.java index 02a2f03..2b7d2b5 100644 --- a/forester/java/src/org/forester/go/GoSubset.java +++ b/forester/java/src/org/forester/go/GoSubset.java @@ -27,18 +27,15 @@ package org.forester.go; public interface GoSubset extends Comparable { - public static final String GOSLIM_GENERIC_STR = "goslim_generic"; - public static final String GOSLIM_GOA_STR = "goslim_goa"; - public static final String GOSLIM_PIR_STR = "goslim_pir"; - public static final String GOSUBSET_PROK_STR = "gosubset_prok"; - public static final String GOSLIM_CANDIDA_STR = "goslim_candida"; - public static final String GOSLIM_ASPERGILLUS_STR = "goslim_aspergillus"; - public static final String GOSLIM_PLANT_STR = "goslim_plant"; - public static final String GOSLIM_YEAST_STR = "goslim_yeast"; - public static final String GOSLIM_POMBE_STR = "goslim_pombe"; - public static final String HIGH_LEVEL_ANNOTATION_QC_STR = "high_level_annotation_qc"; - public static final String UNVETTED_STR = "unvetted"; - public static final String MF_NEEDS_REVIEW_STR = "mf_needs_review"; + public static final String GOSLIM_GENERIC_STR = "goslim_generic"; + public static final String GOSLIM_GOA_STR = "goslim_goa"; + public static final String GOSLIM_PIR_STR = "goslim_pir"; + public static final String GOSUBSET_PROK_STR = "gosubset_prok"; + public static final String GOSLIM_CANDIDA_STR = "goslim_candida"; + public static final String GOSLIM_ASPERGILLUS_STR = "goslim_aspergillus"; + public static final String GOSLIM_PLANT_STR = "goslim_plant"; + public static final String GOSLIM_YEAST_STR = "goslim_yeast"; + public static final String GOSLIM_POMBE_STR = "goslim_pombe"; public Type getType(); @@ -52,8 +49,6 @@ public interface GoSubset extends Comparable { GOSLIM_PLANT, GOSLIM_YEAST, GOSLIM_POMBE, - HIGH_LEVEL_ANNOTATION_QC, - UNVETTED, - MF_NEEDS_REVIEW; + OTHER; } } diff --git a/forester/java/src/org/forester/go/GoXRef.java b/forester/java/src/org/forester/go/GoXRef.java index e0a4ca8..794a5be 100644 --- a/forester/java/src/org/forester/go/GoXRef.java +++ b/forester/java/src/org/forester/go/GoXRef.java @@ -74,6 +74,7 @@ public interface GoXRef extends Comparable { CORUM, UNIPATHWAY, PO, - SABIO_RK; + SABIO_RK, + OTHER; } } diff --git a/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java index 9d9a441..81cdf08 100644 --- a/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java +++ b/forester/java/src/org/forester/surfacing/AdjactantDirectedBinaryDomainCombination.java @@ -36,13 +36,15 @@ public class AdjactantDirectedBinaryDomainCombination extends BasicBinaryDomainC if ( ( n_terminal == null ) || ( c_terminal == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } - // _id_0 = n_terminal.getId(); - // _id_1 = c_terminal.getId(); _data = n_terminal.getId() + BinaryDomainCombination.SEPARATOR + c_terminal.getId(); } public AdjactantDirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) { - this( new DomainId( n_terminal ), new DomainId( c_terminal ) ); + super(); + if ( ( n_terminal == null ) || ( c_terminal == null ) ) { + throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); + } + _data = n_terminal + BinaryDomainCombination.SEPARATOR + c_terminal; } public static AdjactantDirectedBinaryDomainCombination createInstance( final String ids ) { diff --git a/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java index 39c7450..da009b5 100644 --- a/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java +++ b/forester/java/src/org/forester/surfacing/BasicBinaryDomainCombination.java @@ -34,13 +34,7 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination { String _data; - //String _id_0; - // String _id_1; - //DomainId _id_0; - //DomainId _id_1; BasicBinaryDomainCombination() { - //_id_0 = null; - // _id_1 = null; _data = null; } @@ -48,17 +42,11 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination { if ( ( id_0 == null ) || ( id_1 == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } - final String my_id_0 = id_0.trim(); - final String my_id_1 = id_1.trim(); - if ( my_id_0.toLowerCase().compareTo( my_id_1.toLowerCase() ) < 0 ) { - //_id_0 = my_id_0; - //_id_1 = my_id_1; - _data = my_id_0 + BinaryDomainCombination.SEPARATOR + my_id_1; + if ( id_0.toLowerCase().compareTo( id_1.toLowerCase() ) < 0 ) { + _data = id_0 + BinaryDomainCombination.SEPARATOR + id_1; } else { - //_id_0 = my_id_1; - // _id_1 = my_id_0; - _data = my_id_1 + BinaryDomainCombination.SEPARATOR + my_id_0; + _data = id_1 + BinaryDomainCombination.SEPARATOR + id_0; } } @@ -109,14 +97,11 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination { @Override public DomainId getId1() { - // return new DomainId( _id_1 ); return new DomainId( _data.split( BinaryDomainCombination.SEPARATOR )[ 1 ] ); } @Override public int hashCode() { - // return getId0().hashCode() + ( 19 * getId1().hashCode() ); - // return ( _id_0 + _id_1 ).hashCode(); return _data.hashCode(); } @@ -167,11 +152,6 @@ public class BasicBinaryDomainCombination implements BinaryDomainCombination { @Override public String toString() { return _data; - // final StringBuffer sb = new StringBuffer(); - // sb.append( _id_0 ); - // sb.append( BinaryDomainCombination.SEPARATOR ); - // sb.append( _id_1 ); - // return sb.toString(); } public static BinaryDomainCombination createInstance( final String ids ) { diff --git a/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java b/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java index 2e5d797..bfd7b2e 100644 --- a/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java +++ b/forester/java/src/org/forester/surfacing/DirectedBinaryDomainCombination.java @@ -36,13 +36,15 @@ public class DirectedBinaryDomainCombination extends BasicBinaryDomainCombinatio if ( ( n_terminal == null ) || ( c_terminal == null ) ) { throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); } - //_id_0 = n_terminal.getId(); - //_id_1 = c_terminal.getId(); _data = n_terminal.getId() + BinaryDomainCombination.SEPARATOR + c_terminal.getId(); } public DirectedBinaryDomainCombination( final String n_terminal, final String c_terminal ) { - this( new DomainId( n_terminal ), new DomainId( c_terminal ) ); + super(); + if ( ( n_terminal == null ) || ( c_terminal == null ) ) { + throw new IllegalArgumentException( "attempt to create binary domain combination using null" ); + } + _data = n_terminal + BinaryDomainCombination.SEPARATOR + c_terminal; } public static BinaryDomainCombination createInstance( final String ids ) { diff --git a/forester/java/src/org/forester/surfacing/SurfacingUtil.java b/forester/java/src/org/forester/surfacing/SurfacingUtil.java index abcd31f..9497ea9 100644 --- a/forester/java/src/org/forester/surfacing/SurfacingUtil.java +++ b/forester/java/src/org/forester/surfacing/SurfacingUtil.java @@ -1748,7 +1748,7 @@ public final class SurfacingUtil { final GenomeWideCombinableDomains gwcd, final int i, final GenomeWideCombinableDomainsSortOrder dc_sort_order ) { - File dc_outfile_dot = new File( input_file_properties[ i ][ 0 ] + File dc_outfile_dot = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITONS_OUTPUTFILE_SUFFIX_FOR_GRAPH_ANALYSIS ); if ( output_dir != null ) { dc_outfile_dot = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile_dot ); @@ -2046,7 +2046,7 @@ public final class SurfacingUtil { final GenomeWideCombinableDomains gwcd, final int i, final GenomeWideCombinableDomains.GenomeWideCombinableDomainsSortOrder dc_sort_order ) { - File dc_outfile = new File( input_file_properties[ i ][ 0 ] + File dc_outfile = new File( input_file_properties[ i ][ 1 ] + surfacing.DOMAIN_COMBINITON_COUNTS_OUTPUTFILE_SUFFIX ); if ( output_dir != null ) { dc_outfile = new File( output_dir + ForesterUtil.FILE_SEPARATOR + dc_outfile ); @@ -2062,7 +2062,7 @@ public final class SurfacingUtil { } final DescriptiveStatistics stats = gwcd.getPerGenomeDomainPromiscuityStatistics(); try { - per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 0 ] + "\t" ); + per_genome_domain_promiscuity_statistics_writer.write( input_file_properties[ i ][ 1 ] + "\t" ); per_genome_domain_promiscuity_statistics_writer.write( FORMATTER_3.format( stats.arithmeticMean() ) + "\t" ); if ( stats.getN() < 2 ) { per_genome_domain_promiscuity_statistics_writer.write( "n/a" + "\t" ); diff --git a/forester/java/src/org/forester/surfacing/TestSurfacing.java b/forester/java/src/org/forester/surfacing/TestSurfacing.java index bb4a722..3efaccb 100644 --- a/forester/java/src/org/forester/surfacing/TestSurfacing.java +++ b/forester/java/src/org/forester/surfacing/TestSurfacing.java @@ -1245,7 +1245,7 @@ public class TestSurfacing { final BasicBinaryDomainCombination s6 = new BasicBinaryDomainCombination( "b", "c" ); final BasicBinaryDomainCombination s7 = new BasicBinaryDomainCombination( "d", "a" ); final BasicBinaryDomainCombination s8 = new BasicBinaryDomainCombination( "b", "d" ); - final BinaryDomainCombination s9 = BasicBinaryDomainCombination.createInstance( " z-z=a-aa " ); + final BinaryDomainCombination s9 = BasicBinaryDomainCombination.createInstance( "z-z=a-aa" ); if ( !s9.toString().equals( "a-aa=z-z" ) ) { System.out.println( s9.toString() ); return false; diff --git a/forester/java/src/org/forester/util/ForesterUtil.java b/forester/java/src/org/forester/util/ForesterUtil.java index c98edc0..47e6aa7 100644 --- a/forester/java/src/org/forester/util/ForesterUtil.java +++ b/forester/java/src/org/forester/util/ForesterUtil.java @@ -477,6 +477,34 @@ public final class ForesterUtil { return ary; } + public static String[][] file22dArray( final File file ) throws IOException { + final List list = new ArrayList(); + final BufferedReader in = new BufferedReader( new FileReader( file ) ); + String str; + while ( ( str = in.readLine() ) != null ) { + str = str.trim(); + if ( ( str.length() > 0 ) && !str.startsWith( "#" ) ) { + list.add( str ); + } + } + in.close(); + final String[][] ary = new String[ list.size() ][ 2 ]; + final Pattern pa = Pattern.compile( "(\\S+)\\s+(\\S+)" ); + int i = 0; + for( final String s : list ) { + final Matcher m = pa.matcher( s ); + if ( m.matches() ) { + ary[ i ][ 0 ] = m.group( 1 ); + ary[ i ][ 1 ] = m.group( 2 ); + ++i; + } + else { + throw new IOException( "unexpcted format: " + s ); + } + } + return ary; + } + final public static List file2list( final File file ) throws IOException { final List list = new ArrayList(); final BufferedReader in = new BufferedReader( new FileReader( file ) );