final static private String SEQ_EXTRACT_OPTION = "prot_extract";
final static private char SEPARATOR_FOR_INPUT_VALUES = '#';
final static private String PRG_VERSION = "2.210";
- final static private String PRG_DATE = "2011.12.08";
+ final static private String PRG_DATE = "2012.02.21";
final static private String E_MAIL = "czmasek@burnham.org";
final static private String WWW = "www.phylosoft.org/forester/applications/surfacing";
final static private boolean IGNORE_DUFS_DEFAULT = true;
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping.txt";
public static final String INDEPENDENT_DC_GAINS_FITCH_PARS_DC_FOR_GO_MAPPING_OUTPUT_UNIQUE_SUFFIX = "_indep_dc_gains_fitch_lists_for_go_mapping_unique.txt";
+ public static final String LIMIT_SPEC_FOR_PROT_EX = null; // e.g. "HUMAN"; set to null for not using this feature (default).
private static void checkWriteabilityForPairwiseComparisons( final PrintableDomainSimilarity.PRINT_OPTION domain_similarity_print_option,
final String[][] input_file_properties,
SurfacingUtil.extractProteinNames( protein_list,
query_domain_ids_array[ j ],
query_domains_writer_ary[ j ],
- "\t" );
+ "\t",
+ LIMIT_SPEC_FOR_PROT_EX );
query_domains_writer_ary[ j ].flush();
}
catch ( final IOException e ) {
SurfacingUtil.checkForOutputFileWriteability( out );
try {
final Writer proteins_file_writer = new BufferedWriter( new FileWriter( out ) );
- SurfacingUtil.extractProteinNames( protein_lists_per_species, domain, proteins_file_writer, "\t" );
+ SurfacingUtil.extractProteinNames( protein_lists_per_species,
+ domain,
+ proteins_file_writer,
+ "\t",
+ LIMIT_SPEC_FOR_PROT_EX );
proteins_file_writer.close();
}
catch ( final IOException e ) {
public static void extractProteinNames( final List<Protein> proteins,
final List<DomainId> query_domain_ids_nc_order,
final Writer out,
- final String separator ) throws IOException {
+ final String separator,
+ final String limit_to_species ) throws IOException {
for( final Protein protein : proteins ) {
- if ( protein.contains( query_domain_ids_nc_order, true ) ) {
- out.write( protein.getSpecies().getSpeciesId() );
- out.write( separator );
- out.write( protein.getProteinId().getId() );
- out.write( separator );
- out.write( "[" );
- final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
- boolean first = true;
- for( final Domain domain : protein.getProteinDomains() ) {
- if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
- visited_domain_ids.add( domain.getDomainId() );
- if ( first ) {
- first = false;
- }
- else {
- out.write( " " );
- }
- out.write( domain.getDomainId().getId() );
- out.write( " {" );
- out.write( "" + domain.getTotalCount() );
- out.write( "}" );
- }
- }
- out.write( "]" );
- out.write( separator );
- if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
- .equals( SurfacingConstants.NONE ) ) ) {
- out.write( protein.getDescription() );
- }
- out.write( separator );
- if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
- .equals( SurfacingConstants.NONE ) ) ) {
- out.write( protein.getAccession() );
- }
- out.write( SurfacingConstants.NL );
- }
- }
- out.flush();
- }
-
- public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
- final DomainId domain_id,
- final Writer out,
- final String separator ) throws IOException {
- for( final Species species : protein_lists_per_species.keySet() ) {
- for( final Protein protein : protein_lists_per_species.get( species ) ) {
- final List<Domain> domains = protein.getProteinDomains( domain_id );
- if ( domains.size() > 0 ) {
- final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final Domain domain : domains ) {
- stats.addValue( domain.getPerSequenceEvalue() );
- }
+ if ( ForesterUtil.isEmpty( limit_to_species )
+ || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+ if ( protein.contains( query_domain_ids_nc_order, true ) ) {
out.write( protein.getSpecies().getSpeciesId() );
out.write( separator );
out.write( protein.getProteinId().getId() );
out.write( separator );
- out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+ out.write( "[" );
+ final Set<DomainId> visited_domain_ids = new HashSet<DomainId>();
+ boolean first = true;
+ for( final Domain domain : protein.getProteinDomains() ) {
+ if ( !visited_domain_ids.contains( domain.getDomainId() ) ) {
+ visited_domain_ids.add( domain.getDomainId() );
+ if ( first ) {
+ first = false;
+ }
+ else {
+ out.write( " " );
+ }
+ out.write( domain.getDomainId().getId() );
+ out.write( " {" );
+ out.write( "" + domain.getTotalCount() );
+ out.write( "}" );
+ }
+ }
+ out.write( "]" );
out.write( separator );
if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
.equals( SurfacingConstants.NONE ) ) ) {
out.flush();
}
+ public static void extractProteinNames( final SortedMap<Species, List<Protein>> protein_lists_per_species,
+ final DomainId domain_id,
+ final Writer out,
+ final String separator,
+ final String limit_to_species ) throws IOException {
+ for( final Species species : protein_lists_per_species.keySet() ) {
+ for( final Protein protein : protein_lists_per_species.get( species ) ) {
+ if ( ForesterUtil.isEmpty( limit_to_species )
+ || protein.getSpecies().getSpeciesId().equalsIgnoreCase( limit_to_species ) ) {
+ final List<Domain> domains = protein.getProteinDomains( domain_id );
+ if ( domains.size() > 0 ) {
+ final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
+ for( final Domain domain : domains ) {
+ stats.addValue( domain.getPerSequenceEvalue() );
+ }
+ out.write( protein.getSpecies().getSpeciesId() );
+ out.write( separator );
+ out.write( protein.getProteinId().getId() );
+ out.write( separator );
+ out.write( "[" + FORMATTER.format( stats.median() ) + "]" );
+ out.write( separator );
+ if ( !( ForesterUtil.isEmpty( protein.getDescription() ) || protein.getDescription()
+ .equals( SurfacingConstants.NONE ) ) ) {
+ out.write( protein.getDescription() );
+ }
+ out.write( separator );
+ if ( !( ForesterUtil.isEmpty( protein.getAccession() ) || protein.getAccession()
+ .equals( SurfacingConstants.NONE ) ) ) {
+ out.write( protein.getAccession() );
+ }
+ out.write( SurfacingConstants.NL );
+ }
+ }
+ }
+ }
+ out.flush();
+ }
+
public static SortedSet<DomainId> getAllDomainIds( final List<GenomeWideCombinableDomains> gwcd_list ) {
final SortedSet<DomainId> all_domains_ids = new TreeSet<DomainId>();
for( final GenomeWideCombinableDomains gwcd : gwcd_list ) {