public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_BC = "_fitch_present_dc";
public final static String PARSIMONY_OUTPUT_FITCH_PRESENT_HTML_BC = "_fitch_present_dc.html";
public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_D = "_dollo_gains_d";
- public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_GOID_D = "_dollo_gains_goid_d";
public final static String PARSIMONY_OUTPUT_DOLLO_GAINS_HTML_D = "_dollo_gains_d.html";
public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_D = "_dollo_losses_d";
public final static String PARSIMONY_OUTPUT_DOLLO_LOSSES_HTML_D = "_dollo_losses_d.html";
public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_D = "_dollo_present_d";
- public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_GOID_D = "_dollo_present_goid_d";
public final static String PARSIMONY_OUTPUT_DOLLO_PRESENT_HTML_D = "_dollo_present_d.html";
public final static String DOMAINS_PRESENT_NEXUS = "_dom.nex";
public final static String BDC_PRESENT_NEXUS = "_dc.nex";
public static final String PARSIMONY_OUTPUT_DOLLO_PRESENT_SECONDARY_FEATURES = "_dollo_present_secondary_features";
public static final String SECONDARY_FEATURES_PARSIMONY_TREE_OUTPUT_SUFFIX_DOLLO = "_secondary_features_dollo"
+ ForesterConstants.PHYLO_XML_SUFFIX;
- public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_BIOLOGICAL_PROCESS = "_dollo_biol_proc_goid_d";
- public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_CELLULAR_COMPONENT = "_dollo_cell_comp_goid_d";
- public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_MOLECULAR_FUNCTION = "_dollo_mol_funct_goid_d";
public static final String PARSIMONY_OUTPUT_DOLLO_ALL_GOID_D_ALL_NAMESPACES = "_dollo_goid_d";
- public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_BIOLOGICAL_PROCESS = "_fitch_biol_proc_goid_dc";
- public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_CELLULAR_COMPONENT = "_fitch_cell_comp_goid_dc";
- public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_MOLECULAR_FUNCTION = "_fitch_mol_funct_goid_dc";
public static final String PARSIMONY_OUTPUT_FITCH_ALL_GOID_BC_ALL_NAMESPACES = "_fitch_goid_dc";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
+ ForesterConstants.PHYLO_XML_SUFFIX;
final static private String NJ_TREE_SHARED_BIN_COMBINATIONS_BASED_GENOME_DISTANCE_SUFFIX = "_bin_combinations_NJ"
+ ForesterConstants.PHYLO_XML_SUFFIX;
- final static private String DISPLAY_M_HISTOGRAMS_OPTION = "mhisto";
- // final static private boolean DISPLAY_M_HISTOGRAMS_OPTION_DEFAULT = false;
final static private String JACKNIFE_OPTION = "jack";
final static private String JACKNIFE_RANDOM_SEED_OPTION = "seed";
final static private String JACKNIFE_RATIO_OPTION = "jack_ratio";
final static private String INPUT_SPECIES_TREE_OPTION = "species_tree";
final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
- final static private String PRG_VERSION = "2.003";
- final static private String PRG_DATE = "2010.12.03";
+ final static private String PRG_VERSION = "2.100";
+ final static private String PRG_DATE = "2011.06.17";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
final static private boolean IGNORE_COMBINATION_WITH_SAME_DEFAULLT = false;
final static private double MAX_E_VALUE_DEFAULT = -1;
final static private int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
- final static private String DEFAULT_SEARCH_PARAMETER = "ls";
- final private static boolean VERBOSE_DEFAULT = true;
private static final String RANDOM_SEED_FOR_FITCH_PARSIMONY_OPTION = "random_seed";
private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS = "consider_bdc_direction";
private static final String CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS_AND_ADJACENCY = "consider_bdc_adj";
private static final INDIVIDUAL_SCORE_CUTOFF INDIVIDUAL_SCORE_CUTOFF_DEFAULT = INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_counts.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
+ public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
- // final String error = ForesterUtil.isReadableFile( new File(
- // input_file_properties[ i ][ 0 ] ) );
- // if ( !ForesterUtil.isEmpty( error ) ) {
- // ForesterUtil.fatalError( surfacing.PRG_NAME, error );
- // }
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
final String automated_pairwise_comparison_suffix,
}
/**
- * Warning: This sideeffects 'all_bin_domain_combinations_encountered'!
+ * Warning: This side-effects 'all_bin_domain_combinations_encountered'!
*
*
* @param output_file
allowed_options.add( surfacing.GO_NAMESPACE_LIMIT_OPTION );
allowed_options.add( surfacing.PAIRWISE_DOMAIN_COMPARISONS_OPTION );
allowed_options.add( surfacing.IGNORE_DOMAINS_WITHOUT_COMBINATIONS_IN_ALL_SPECIES_OPTION );
- allowed_options.add( surfacing.DISPLAY_M_HISTOGRAMS_OPTION );
allowed_options.add( surfacing.CONSIDER_DOMAIN_COMBINATION_DIRECTEDNESS );
allowed_options.add( JACKNIFE_OPTION );
allowed_options.add( JACKNIFE_RANDOM_SEED_OPTION );
"no (acceptable) go id to term mapping file provided ('GO OBO file') (-"
+ surfacing.GO_OBO_FILE_USE_OPTION + "=<file>)" );
}
- boolean display_histograms = false;
- if ( cla.isOptionSet( surfacing.DISPLAY_M_HISTOGRAMS_OPTION ) ) {
- display_histograms = true;
- }
System.out.println( "Output directory : " + out_dir );
if ( input_file_names_from_file != null ) {
System.out.println( "Input files names from : " + input_files_file + " ["
true,
surfacing.PAIRWISE_DOMAIN_COMPARISONS_PREFIX,
surfacing.PRG_NAME,
- display_histograms,
out_dir,
write_pwc_files );
String matrix_output_file = new String( output_file.toString() );
inferred_trees.add( nj_gd );
inferred_trees.add( nj_bc );
inferred_trees.add( nj_d );
- // final List<HistogramData> histogram_datas = pwgc.getHistogramDatas();
- // if ( infer_species_trees ) {
- // inferred_trees = new ArrayList<Phylogeny>();
- // final List<Phylogeny> inferred_trees_bc = inferSpeciesTrees( new File( output_file + INFERRED_SBC_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc
- // .getSharedBinaryCombinationsBasedDistances() );
- // final List<Phylogeny> inferred_trees_d = inferSpeciesTrees( new File( output_file + INFERRED_SD_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc
- // .getSharedDomainsBasedDistances() );
- // inferred_trees.addAll( inferred_trees_bc );
- // inferred_trees.addAll( inferred_trees_d );
- // }
if ( jacknifed_distances ) {
pwgc.performPairwiseComparisonsJacknifed( species,
number_of_genomes,
// + INFERRED_SD_BASED_NJ_SPECIES_TREE_SUFFIX ), pwgc.getSharedDomainsBasedDistances() );
// }
}
- if ( display_histograms ) {
- // final List<HistogramData> histogram_datas_all = new ArrayList<HistogramData>();
- // histogram_datas_all.add( new HistogramData( "all",
- // values_for_all_scores_histogram,
- // null,
- // 20 ) );
- // final HistogramsFrame hf_all = new HistogramsFrame( histogram_datas_all );
- // final HistogramsFrame hf = new HistogramsFrame( histogram_datas );
- // hf_all.setVisible( true );
- // hf.setVisible( true );
- }
} // if ( ( output_file != null ) && ( number_of_genomes > 2 ) && !isEmpty( automated_pairwise_comparison_suffix ) )
if ( ( out_dir != null ) && ( !perform_pwc ) ) {
output_file = new File( out_dir + ForesterUtil.FILE_SEPARATOR + output_file );
System.out.println( surfacing.INPUT_SPECIES_TREE_OPTION
+ ": species tree, to perform (Dollo, Fitch) parismony analyses" );
System.out
- .println( surfacing.DISPLAY_M_HISTOGRAMS_OPTION + ": to display multiple histograms (using fluorite)" );
- System.out
.println( JACKNIFE_OPTION
+ ": perform jacknife resampling for domain and binary domain combination based distance matrices [default resamplings: "
+ JACKNIFE_NUMBER_OF_RESAMPLINGS_DEFAULT + "]" );
import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.PriorityQueue;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import org.forester.go.GoId;
import org.forester.go.GoNameSpace;
import org.forester.go.GoTerm;
-import org.forester.go.GoUtils;
import org.forester.go.PfamToGoMapping;
import org.forester.io.parsers.nexus.NexusConstants;
import org.forester.io.writers.PhylogenyWriter;
return stats;
}
+ private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
+ final String outfilename_for_counts,
+ final String outfilename_for_dc,
+ final String outfilename_for_dc_for_go_mapping ) {
+ try {
+ final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
+ final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
+ final BufferedWriter out_dc_for_go_mapping = new BufferedWriter( new FileWriter( outfilename_for_dc_for_go_mapping ) );
+ final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
+ for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
+ final PhylogenyNode n = it.next();
+ final Set<String> gained_dc = n.getNodeData().getBinaryCharacters().getGainedCharacters();
+ for( final String dc : gained_dc ) {
+ if ( dc_gain_counts.containsKey( dc ) ) {
+ dc_gain_counts.put( dc, dc_gain_counts.get( dc ) + 1 );
+ }
+ else {
+ dc_gain_counts.put( dc, 1 );
+ }
+ }
+ }
+ final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
+ final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
+ final SortedMap<Integer, PriorityQueue<String>> domain_lists_go = new TreeMap<Integer, PriorityQueue<String>>();
+ final Set<String> dcs = dc_gain_counts.keySet();
+ for( final String dc : dcs ) {
+ final int count = dc_gain_counts.get( dc );
+ if ( histogram.containsKey( count ) ) {
+ histogram.put( count, histogram.get( count ) + 1 );
+ domain_lists.put( count, domain_lists.get( count ).append( ", " + dc ) );
+ domain_lists_go.get( count ).add( dc );
+ }
+ else {
+ histogram.put( count, 1 );
+ domain_lists.put( count, new StringBuilder( dc ) );
+ final PriorityQueue<String> q = new PriorityQueue<String>();
+ q.add( dc );
+ domain_lists_go.put( count, q );
+ }
+ }
+ final Set<Integer> histogram_keys = histogram.keySet();
+ for( final Integer histogram_key : histogram_keys ) {
+ final int count = histogram.get( histogram_key );
+ final StringBuilder dc = domain_lists.get( histogram_key );
+ out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
+ out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
+ }
+ out_counts.close();
+ out_dc.close();
+ out_dc_for_go_mapping.close();
+ }
+ catch ( final IOException e ) {
+ ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
+ }
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch counts to ["
+ + outfilename_for_counts + "]" );
+ ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to ["
+ + outfilename_for_dc + "]" );
+ ForesterUtil.programMessage( surfacing.PRG_NAME,
+ "Wrote independent domain combination gains fitch lists to (for GO mapping) ["
+ + outfilename_for_dc_for_go_mapping + "]" );
+ }
+
public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
int overlap_count = 0;
for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
+ surfacing.BINARY_DOMAIN_COMBINATIONS_PARSIMONY_TREE_OUTPUT_SUFFIX_FITCH );
calculateIndependentDomainCombinationGains( local_phylogeny_l, outfile_name
+ surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_COUNTS_OUTPUT_SUFFIX, outfile_name
- + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX );
+ + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX, outfile_name
+ + surfacing.INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX );
}
}
- private static void calculateIndependentDomainCombinationGains( final Phylogeny local_phylogeny_l,
- final String outfilename_for_counts,
- final String outfilename_for_dc ) {
- try {
- final BufferedWriter out_counts = new BufferedWriter( new FileWriter( outfilename_for_counts ) );
- final BufferedWriter out_dc = new BufferedWriter( new FileWriter( outfilename_for_dc ) );
- final SortedMap<String, Integer> dc_gain_counts = new TreeMap<String, Integer>();
- for( final PhylogenyNodeIterator it = local_phylogeny_l.iteratorPostorder(); it.hasNext(); ) {
- final PhylogenyNode n = it.next();
- final Set<String> gained_dc = n.getNodeData().getBinaryCharacters().getGainedCharacters();
- for( final String dc : gained_dc ) {
- if ( dc_gain_counts.containsKey( dc ) ) {
- dc_gain_counts.put( dc, dc_gain_counts.get( dc ) + 1 );
- }
- else {
- dc_gain_counts.put( dc, 1 );
- }
- }
- }
- final SortedMap<Integer, Integer> histogram = new TreeMap<Integer, Integer>();
- final SortedMap<Integer, StringBuilder> domain_lists = new TreeMap<Integer, StringBuilder>();
- final Set<String> dcs = dc_gain_counts.keySet();
- for( final String dc : dcs ) {
- final int count = dc_gain_counts.get( dc );
- if ( histogram.containsKey( count ) ) {
- histogram.put( count, histogram.get( count ) + 1 );
- domain_lists.put( count, domain_lists.get( count ).append( ", " + dc ) );
- }
- else {
- histogram.put( count, 1 );
- domain_lists.put( count, new StringBuilder( dc ) );
- }
- }
- final Set<Integer> histogram_keys = histogram.keySet();
- for( final Integer histogram_key : histogram_keys ) {
- final int count = histogram.get( histogram_key );
- final StringBuilder dc = domain_lists.get( histogram_key );
- out_counts.write( histogram_key + "\t" + count + ForesterUtil.LINE_SEPARATOR );
- out_dc.write( histogram_key + "\t" + dc + ForesterUtil.LINE_SEPARATOR );
- }
- out_counts.close();
- out_dc.close();
- }
- catch ( final IOException e ) {
- ForesterUtil.printWarningMessage( surfacing.PRG_NAME, "Failure to write: " + e );
- }
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch counts to ["
- + outfilename_for_counts + "]" );
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote independent domain combination gains fitch lists to ["
- + outfilename_for_dc + "]" );
- }
-
public static void executeParsimonyAnalysisForSecondaryFeatures( final String outfile_name,
final DomainParsimonyCalculator secondary_features_parsimony,
final Phylogeny phylogeny,
p.setRooted( true );
}
+ /*
+ * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
+ *
+ *
+ */
+ static public StringBuffer proteinToDomainCombinations( final Protein protein,
+ final String protein_id,
+ final String separator ) {
+ final StringBuffer sb = new StringBuffer();
+ if ( protein.getSpecies() == null ) {
+ throw new IllegalArgumentException( "species must not be null" );
+ }
+ if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
+ throw new IllegalArgumentException( "species id must not be empty" );
+ }
+ final List<Domain> domains = protein.getProteinDomains();
+ if ( domains.size() > 1 ) {
+ final Map<String, Integer> counts = new HashMap<String, Integer>();
+ for( final Domain domain : domains ) {
+ final String id = domain.getDomainId().getId();
+ if ( counts.containsKey( id ) ) {
+ counts.put( id, counts.get( id ) + 1 );
+ }
+ else {
+ counts.put( id, 1 );
+ }
+ }
+ final Set<String> dcs = new HashSet<String>();
+ for( int i = 1; i < domains.size(); ++i ) {
+ for( int j = 0; j < i; ++j ) {
+ Domain domain_n = domains.get( i );
+ Domain domain_c = domains.get( j );
+ if ( domain_n.getFrom() > domain_c.getFrom() ) {
+ domain_n = domains.get( j );
+ domain_c = domains.get( i );
+ }
+ final String dc = domain_n.getDomainId().getId() + domain_c.getDomainId().getId();
+ if ( !dcs.contains( dc ) ) {
+ dcs.add( dc );
+ sb.append( protein.getSpecies() );
+ sb.append( separator );
+ sb.append( protein_id );
+ sb.append( separator );
+ sb.append( domain_n.getDomainId().getId() );
+ sb.append( separator );
+ sb.append( domain_c.getDomainId().getId() );
+ sb.append( separator );
+ sb.append( domain_n.getPerDomainEvalue() );
+ sb.append( separator );
+ sb.append( domain_c.getPerDomainEvalue() );
+ sb.append( separator );
+ sb.append( counts.get( domain_n.getDomainId().getId() ) );
+ sb.append( separator );
+ sb.append( counts.get( domain_c.getDomainId().getId() ) );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ }
+ }
+ }
+ else if ( domains.size() == 1 ) {
+ sb.append( protein.getSpecies() );
+ sb.append( separator );
+ sb.append( protein_id );
+ sb.append( separator );
+ sb.append( domains.get( 0 ).getDomainId().getId() );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( domains.get( 0 ).getPerDomainEvalue() );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( 1 );
+ sb.append( separator );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ else {
+ sb.append( protein.getSpecies() );
+ sb.append( separator );
+ sb.append( protein_id );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( separator );
+ sb.append( ForesterUtil.LINE_SEPARATOR );
+ }
+ return sb;
+ }
+
/**
*
* Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
return pruned_protein;
}
- static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
+ public static List<Domain> sortDomainsWithAscendingConfidenceValues( final Protein protein ) {
final List<Domain> domains = new ArrayList<Domain>();
for( final Domain d : protein.getProteinDomains() ) {
domains.add( d );
+ input_file_properties[ i ][ 2 ] + ") to: \"" + dc_outfile_dot + "\"" );
}
- /*
- * species | protein id | n-terminal domain | c-terminal domain | n-terminal domain per domain E-value | c-terminal domain per domain E-value
- *
- *
- */
- static public StringBuffer proteinToDomainCombinations( final Protein protein,
- final String protein_id,
- final String separator ) {
- final StringBuffer sb = new StringBuffer();
- if ( protein.getSpecies() == null ) {
- throw new IllegalArgumentException( "species must not be null" );
- }
- if ( ForesterUtil.isEmpty( protein.getSpecies().getSpeciesId() ) ) {
- throw new IllegalArgumentException( "species id must not be empty" );
- }
- final List<Domain> domains = protein.getProteinDomains();
- if ( domains.size() > 1 ) {
- final Map<String, Integer> counts = new HashMap<String, Integer>();
- for( final Domain domain : domains ) {
- final String id = domain.getDomainId().getId();
- if ( counts.containsKey( id ) ) {
- counts.put( id, counts.get( id ) + 1 );
- }
- else {
- counts.put( id, 1 );
- }
- }
- final Set<String> dcs = new HashSet<String>();
- for( int i = 1; i < domains.size(); ++i ) {
- for( int j = 0; j < i; ++j ) {
- Domain domain_n = domains.get( i );
- Domain domain_c = domains.get( j );
- if ( domain_n.getFrom() > domain_c.getFrom() ) {
- domain_n = domains.get( j );
- domain_c = domains.get( i );
- }
- final String dc = domain_n.getDomainId().getId() + domain_c.getDomainId().getId();
- if ( !dcs.contains( dc ) ) {
- dcs.add( dc );
- sb.append( protein.getSpecies() );
- sb.append( separator );
- sb.append( protein_id );
- sb.append( separator );
- sb.append( domain_n.getDomainId().getId() );
- sb.append( separator );
- sb.append( domain_c.getDomainId().getId() );
- sb.append( separator );
- sb.append( domain_n.getPerDomainEvalue() );
- sb.append( separator );
- sb.append( domain_c.getPerDomainEvalue() );
- sb.append( separator );
- sb.append( counts.get( domain_n.getDomainId().getId() ) );
- sb.append( separator );
- sb.append( counts.get( domain_c.getDomainId().getId() ) );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- }
- }
- }
- else if ( domains.size() == 1 ) {
- sb.append( protein.getSpecies() );
- sb.append( separator );
- sb.append( protein_id );
- sb.append( separator );
- sb.append( domains.get( 0 ).getDomainId().getId() );
- sb.append( separator );
- sb.append( separator );
- sb.append( domains.get( 0 ).getPerDomainEvalue() );
- sb.append( separator );
- sb.append( separator );
- sb.append( 1 );
- sb.append( separator );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- else {
- sb.append( protein.getSpecies() );
- sb.append( separator );
- sb.append( protein_id );
- sb.append( separator );
- sb.append( separator );
- sb.append( separator );
- sb.append( separator );
- sb.append( separator );
- sb.append( separator );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- return sb;
- }
-
public static void writeBinaryStatesMatrixAsListToFile( final CharacterStateMatrix<CharacterStateMatrix.GainLossStates> matrix,
final CharacterStateMatrix.GainLossStates state,
final String filename,
}
}
- private static void writeDomainDataORIG( final Map<DomainId, List<GoId>> domain_id_to_go_ids_map,
- final Map<GoId, GoTerm> go_id_to_term_map,
- final GoNameSpace go_namespace_limit,
- final Writer out,
- final String domain_0,
- final String domain_1,
- final String prefix_for_html,
- final String character_separator_for_non_html_output,
- final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps,
- final Set<GoId> all_go_ids ) throws IOException {
- boolean any_go_annotation_present = false;
- boolean first_has_no_go = false;
- int domain_count = 2; // To distinguish between domains and binary domain combinations.
- if ( ForesterUtil.isEmpty( domain_1 ) ) {
- domain_count = 1;
- }
- // The following has a difficult to understand logic.
- for( int d = 0; d < domain_count; ++d ) {
- List<GoId> go_ids = null;
- boolean go_annotation_present = false;
- if ( d == 0 ) {
- final DomainId domain_id = new DomainId( domain_0 );
- if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
- go_annotation_present = true;
- any_go_annotation_present = true;
- go_ids = domain_id_to_go_ids_map.get( domain_id );
- }
- else {
- first_has_no_go = true;
- }
- }
- else {
- final DomainId domain_id = new DomainId( domain_1 );
- if ( domain_id_to_go_ids_map.containsKey( domain_id ) ) {
- go_annotation_present = true;
- any_go_annotation_present = true;
- go_ids = domain_id_to_go_ids_map.get( domain_id );
- }
- }
- if ( go_annotation_present ) {
- boolean first = ( ( d == 0 ) || ( ( d == 1 ) && first_has_no_go ) );
- for( final GoId go_id : go_ids ) {
- out.write( "<tr>" );
- if ( first ) {
- first = false;
- writeDomainIdsToHtml( out,
- domain_0,
- domain_1,
- prefix_for_html,
- domain_id_to_secondary_features_maps );
- }
- else {
- out.write( "<td></td>" );
- }
- if ( !go_id_to_term_map.containsKey( go_id ) ) {
- throw new IllegalArgumentException( "GO-id [" + go_id + "] not found in GO-id to GO-term map" );
- }
- final GoTerm go_term = go_id_to_term_map.get( go_id );
- if ( ( go_namespace_limit == null ) || go_namespace_limit.equals( go_term.getGoNameSpace() ) ) {
- final String top = GoUtils.getPenultimateGoTerm( go_term, go_id_to_term_map ).getName();
- final String go_id_str = go_id.getId();
- out.write( "<td>" );
- out.write( "<a href=\"" + SurfacingConstants.AMIGO_LINK + go_id_str
- + "\" target=\"amigo_window\">" + go_id_str + "</a>" );
- out.write( "</td><td>" );
- out.write( go_term.getName() );
- if ( domain_count == 2 ) {
- out.write( " (" + d + ")" );
- }
- out.write( "</td><td>" );
- out.write( top );
- out.write( "</td><td>" );
- out.write( "[" );
- out.write( go_term.getGoNameSpace().toShortString() );
- out.write( "]" );
- out.write( "</td>" );
- if ( all_go_ids != null ) {
- all_go_ids.add( go_id );
- }
- }
- else {
- out.write( "<td>" );
- out.write( "</td><td>" );
- out.write( "</td><td>" );
- out.write( "</td><td>" );
- out.write( "</td>" );
- }
- out.write( "</tr>" );
- out.write( SurfacingConstants.NL );
- }
- }
- } // for( int d = 0; d < domain_count; ++d )
- if ( !any_go_annotation_present ) {
- out.write( "<tr>" );
- writeDomainIdsToHtml( out, domain_0, domain_1, prefix_for_html, domain_id_to_secondary_features_maps );
- out.write( "<td>" );
- out.write( "</td><td>" );
- out.write( "</td><td>" );
- out.write( "</td><td>" );
- out.write( "</td>" );
- out.write( "</tr>" );
- out.write( SurfacingConstants.NL );
- }
- }
-
private static void writeDomainIdsToHtml( final Writer out,
final String domain_0,
final String domain_1,
out.write( " " );
}
out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
- //if ( ForesterUtil.isEmpty( domain_1 ) ) {
- // out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
- // + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
- //}
- // if ( !ForesterUtil.isEmpty( domain_1 ) ) {
- // out.write( "=" );
- // out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
- //}
- // else if ( ( domain_id_to_secondary_features_maps != null )
- // && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
- // out.write( " [" );
- // boolean first = true;
- // for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
- // final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
- // if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
- // for( final String sec_feature : sec_features ) {
- // if ( first ) {
- // first = false;
- // }
- // else {
- // out.write( ", " );
- // }
- // if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
- // && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
- // out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
- // + "\" target=\"scop_window\">" + sec_feature + "</a>" );
- // }
- // else {
- // out.write( sec_feature );
- // }
- // }
- // }
- // }
- // out.write( "]" );
- // }
- out.write( "</td>" );
- }
-
- private static void writeDomainIdsToHtmlORIG( final Writer out,
- final String domain_0,
- final String domain_1,
- final String prefix_for_detailed_html,
- final Map<DomainId, Set<String>>[] domain_id_to_secondary_features_maps )
- throws IOException {
- out.write( "<td>" );
- if ( !ForesterUtil.isEmpty( prefix_for_detailed_html ) ) {
- out.write( prefix_for_detailed_html );
- out.write( " " );
- }
- out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_0 + "\">" + domain_0 + "</a>" );
- if ( ForesterUtil.isEmpty( domain_1 ) ) {
- out.write( " <a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + domain_0
- + SurfacingConstants.GOOGLE_SCHOLAR_LIMITS + "\">[gs]</a>" );
- }
- if ( !ForesterUtil.isEmpty( domain_1 ) ) {
- out.write( "=" );
- out.write( "<a href=\"" + SurfacingConstants.PFAM_FAMILY_ID_LINK + domain_1 + "\">" + domain_1 + "</a>" );
- }
- else if ( ( domain_id_to_secondary_features_maps != null )
- && ( domain_id_to_secondary_features_maps.length > 0 ) ) {
- out.write( " [" );
- boolean first = true;
- for( final Map<DomainId, Set<String>> domain_id_to_secondary_features_map : domain_id_to_secondary_features_maps ) {
- final Set<String> sec_features = domain_id_to_secondary_features_map.get( new DomainId( domain_0 ) );
- if ( ( sec_features != null ) && ( sec_features.size() > 0 ) ) {
- for( final String sec_feature : sec_features ) {
- if ( first ) {
- first = false;
- }
- else {
- out.write( ", " );
- }
- if ( SurfacingConstants.SECONDARY_FEATURES_ARE_SCOP
- && ( SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK != null ) ) {
- out.write( "<a href=\"" + SurfacingConstants.SECONDARY_FEATURES_SCOP_LINK + sec_feature
- + "\" target=\"scop_window\">" + sec_feature + "</a>" );
- }
- else {
- out.write( sec_feature );
- }
- }
- }
- }
- out.write( "]" );
- }
out.write( "</td>" );
}
}
}
- public static void writeTaxonomyLinksORIG( final Writer writer, final String species ) throws IOException {
- if ( ( species.length() > 1 ) && ( species.indexOf( '_' ) < 1 ) ) {
- final Matcher matcher = PATTERN_SP_STYLE_TAXONOMY.matcher( species );
- writer.write( " [" );
- if ( matcher.matches() ) {
- writer.write( "<a href=\"" + SurfacingConstants.UNIPROT_LINK + species
- + "\" target=\"taxonomy_window\">uniprot</a>" );
- }
- else {
- writer.write( "<a href=\"" + SurfacingConstants.EOL_LINK + species
- + "\" target=\"taxonomy_window\">eol</a>" );
- writer.write( "|" );
- writer.write( "<a href=\"" + SurfacingConstants.TOL_LINK + species
- + "\" target=\"taxonomy_window\">tol</a>" );
- writer.write( "|" );
- writer.write( "<a href=\"" + SurfacingConstants.WIKIPEDIA_LINK + species
- + "\" target=\"taxonomy_window\">wikipedia</a>" );
- writer.write( "|" );
- writer.write( "<a href=\"" + SurfacingConstants.GOOGLE_SCHOLAR_LINK + species
- + "\" target=\"taxonomy_window\">gs</a>" );
- }
- writer.write( "]" );
- }
- }
-
- private static void writeToNexus( final String outfile_name, final CharacterStateMatrix<BinaryStates> matrix ) {
- if ( !( matrix instanceof BasicCharacterStateMatrix ) ) {
- throw new IllegalArgumentException( "can only write matrices of type [" + BasicCharacterStateMatrix.class
- + "] to nexus" );
- }
- final BasicCharacterStateMatrix<BinaryStates> my_matrix = ( org.forester.evoinference.matrix.character.BasicCharacterStateMatrix<BinaryStates> ) matrix;
- try {
- final BufferedWriter w = new BufferedWriter( new FileWriter( outfile_name ) );
- w.write( NexusConstants.NEXUS );
- w.write( ForesterUtil.LINE_SEPARATOR );
- my_matrix.writeNexusTaxaBlock( w );
- my_matrix.writeNexusBinaryChractersBlock( w );
- w.flush();
- w.close();
- ForesterUtil.programMessage( surfacing.PRG_NAME, "Wrote Nexus file: \"" + outfile_name + "\"" );
- }
- catch ( final IOException e ) {
- ForesterUtil.fatalError( surfacing.PRG_NAME, e.getMessage() );
- }
- }
-
private static void writeToNexus( final String outfile_name,
final CharacterStateMatrix<BinaryStates> matrix,
final Phylogeny phylogeny ) {
}
}
- private static void writeToNexus( final String outfile_name, final DomainParsimonyCalculator domain_parsimony ) {
- writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAINS,
- domain_parsimony.createMatrixOfDomainPresenceOrAbsence() );
- writeToNexus( outfile_name + surfacing.NEXUS_EXTERNAL_DOMAIN_COMBINATIONS,
- domain_parsimony.createMatrixOfBinaryDomainCombinationPresenceOrAbsence() );
- }
-
private static void writeToNexus( final String outfile_name,
final DomainParsimonyCalculator domain_parsimony,
final Phylogeny phylogeny ) {