4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 package org.forester.io.parsers;
30 import java.io.BufferedReader;
32 import java.io.FileReader;
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.HashSet;
37 import java.util.List;
40 import java.util.SortedSet;
41 import java.util.TreeMap;
42 import java.util.TreeSet;
44 import org.forester.protein.BasicDomain;
45 import org.forester.protein.BasicProtein;
46 import org.forester.protein.Domain;
47 import org.forester.protein.Protein;
48 import org.forester.util.ForesterUtil;
50 public final class HmmscanPerDomainTableParser {
52 private static final String RETRO = "RETRO";
53 private static final String PHAGE = "PHAGE";
54 private static final String VIR = "VIR";
55 private static final String TRANSPOS = "TRANSPOS";
56 private static final String RV = "RV";
57 private static final String GAG = "GAG_";
58 private static final String HCV = "HCV_";
59 private static final String HERPES = "HERPES_";
60 private static final String BACULO = "BACULO_";
61 private static final int E_VALUE_MAXIMUM_DEFAULT = -1;
62 private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
63 private static final boolean IGNORE_DUFS_DEFAULT = false;
64 private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
65 private static final boolean IGNORE_REPLACED_RRMS = false;
66 private static final boolean IGNORE_hGDE_amylase = true; //TODO eventually remove me, added 10/22/13
67 private final Set<String> _filter;
68 private final FilterType _filter_type;
69 private final File _input_file;
70 private final String _species;
71 private double _fs_e_value_maximum;
72 private double _i_e_value_maximum;
73 private Map<String, Double> _individual_score_cutoffs;
74 private boolean _ignore_dufs;
75 private boolean _ignore_virus_like_ids;
76 private int _max_allowed_overlap;
77 private boolean _ignore_engulfed_domains;
78 private ReturnType _return_type;
79 private int _proteins_encountered;
80 private int _proteins_ignored_due_to_filter;
81 private int _proteins_stored;
82 private int _domains_encountered;
83 private int _domains_ignored_due_to_duf;
84 private int _domains_ignored_due_to_overlap;
85 private int _domains_ignored_due_to_fs_e_value;
86 private int _domains_ignored_due_to_i_e_value;
87 private int _domains_ignored_due_to_individual_score_cutoff;
88 private int _domains_stored;
89 private SortedSet<String> _domains_stored_set;
91 private int _domains_ignored_due_to_negative_domain_filter;
92 private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
93 private int _domains_ignored_due_to_virus_like_id;
94 private Map<String, Integer> _domains_ignored_due_to_virus_like_id_counts_map;
95 private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
96 private final boolean _allow_proteins_with_same_name;
98 public HmmscanPerDomainTableParser( final File input_file,
100 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
101 _input_file = input_file;
104 _filter_type = FilterType.NONE;
105 _ind_cutoff = individual_cutoff_applies_to;
106 _allow_proteins_with_same_name = false;
110 public HmmscanPerDomainTableParser( final File input_file,
111 final String species,
112 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
113 final boolean allow_proteins_with_same_name ) {
114 _input_file = input_file;
117 _filter_type = FilterType.NONE;
118 _ind_cutoff = individual_cutoff_applies_to;
119 _allow_proteins_with_same_name = allow_proteins_with_same_name;
123 public HmmscanPerDomainTableParser( final File input_file,
124 final String species,
125 final Set<String> filter,
126 final FilterType filter_type,
127 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
128 _input_file = input_file;
131 _filter_type = filter_type;
132 _ind_cutoff = individual_cutoff_applies_to;
133 _allow_proteins_with_same_name = false;
137 public HmmscanPerDomainTableParser( final File input_file,
138 final String species,
139 final Set<String> filter,
140 final FilterType filter_type,
141 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
142 final boolean allow_proteins_with_same_name ) {
143 _input_file = input_file;
146 _filter_type = filter_type;
147 _ind_cutoff = individual_cutoff_applies_to;
148 _allow_proteins_with_same_name = allow_proteins_with_same_name;
152 public boolean isAllowProteinsWithSameName() {
153 return _allow_proteins_with_same_name;
156 private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
157 final List<Domain> l = current_protein.getProteinDomains();
158 for( final Domain d : l ) {
159 getDomainsStoredSet().add( d.getDomainId() );
161 proteins.add( current_protein );
165 private void addProtein( final List<Protein> proteins, Protein current_protein ) {
166 if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
167 || isIgnoreEngulfedDomains() ) {
168 final int domains_count = current_protein.getNumberOfProteinDomains();
169 current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
170 isIgnoreEngulfedDomains(),
172 final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
173 _domains_stored -= domains_removed;
174 _domains_ignored_due_to_overlap += domains_removed;
176 if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN ) || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
177 final Set<String> domain_ids_in_protein = new HashSet<String>();
178 for( final Domain d : current_protein.getProteinDomains() ) {
179 domain_ids_in_protein.add( d.getDomainId() );
181 domain_ids_in_protein.retainAll( getFilter() );
182 if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
183 if ( domain_ids_in_protein.size() > 0 ) {
184 actuallyAddProtein( proteins, current_protein );
187 ++_proteins_ignored_due_to_filter;
191 if ( domain_ids_in_protein.size() < 1 ) {
192 actuallyAddProtein( proteins, current_protein );
195 ++_proteins_ignored_due_to_filter;
200 actuallyAddProtein( proteins, current_protein );
204 public int getDomainsEncountered() {
205 return _domains_encountered;
208 public int getDomainsIgnoredDueToDuf() {
209 return _domains_ignored_due_to_duf;
212 public int getDomainsIgnoredDueToIEval() {
213 return _domains_ignored_due_to_i_e_value;
216 public int getDomainsIgnoredDueToFsEval() {
217 return _domains_ignored_due_to_fs_e_value;
220 public int getDomainsIgnoredDueToIndividualScoreCutoff() {
221 return _domains_ignored_due_to_individual_score_cutoff;
224 public int getDomainsIgnoredDueToNegativeDomainFilter() {
225 return _domains_ignored_due_to_negative_domain_filter;
228 public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
229 return _domains_ignored_due_to_negative_domain_filter_counts_map;
232 public int getDomainsIgnoredDueToOverlap() {
233 return _domains_ignored_due_to_overlap;
236 public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
237 return _domains_ignored_due_to_virus_like_id_counts_map;
240 public int getDomainsIgnoredDueToVirusLikeIds() {
241 return _domains_ignored_due_to_virus_like_id;
244 public int getDomainsStored() {
245 return _domains_stored;
248 public SortedSet<String> getDomainsStoredSet() {
249 return _domains_stored_set;
252 private double getFsEValueMaximum() {
253 return _fs_e_value_maximum;
256 private double getIEValueMaximum() {
257 return _i_e_value_maximum;
260 private Set<String> getFilter() {
264 private FilterType getFilterType() {
268 public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
272 private Map<String, Double> getIndividualScoreCutoffs() {
273 return _individual_score_cutoffs;
276 private File getInputFile() {
280 private int getMaxAllowedOverlap() {
281 return _max_allowed_overlap;
284 public int getProteinsEncountered() {
285 return _proteins_encountered;
288 public int getProteinsIgnoredDueToFilter() {
289 return _proteins_ignored_due_to_filter;
292 public int getProteinsStored() {
293 return _proteins_stored;
296 private ReturnType getReturnType() {
300 private String getSpecies() {
304 public long getTime() {
308 private void init() {
309 _fs_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
310 _i_e_value_maximum = HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT;
311 setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
312 setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
313 _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
314 setIndividualScoreCutoffs( null );
315 setIgnoreEngulfedDomains( false );
316 setIgnoreVirusLikeIds( false );
320 private void intitCounts() {
321 setDomainsStoredSet( new TreeSet<String>() );
322 setDomainsEncountered( 0 );
323 setProteinsEncountered( 0 );
324 setProteinsIgnoredDueToFilter( 0 );
325 setDomainsIgnoredDueToNegativeFilter( 0 );
326 setDomainsIgnoredDueToDuf( 0 );
327 setDomainsIgnoredDueToFsEval( 0 );
328 setDomainsIgnoredDueToIEval( 0 );
329 setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
330 setDomainsIgnoredDueToVirusLikeId( 0 );
331 setDomainsIgnoredDueToOverlap( 0 );
332 setDomainsStored( 0 );
333 setProteinsStored( 0 );
335 setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
336 setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
339 private boolean isIgnoreDufs() {
343 private boolean isIgnoreEngulfedDomains() {
344 return _ignore_engulfed_domains;
347 private boolean isIgnoreVirusLikeIds() {
348 return _ignore_virus_like_ids;
351 public List<Protein> parse() throws IOException {
352 if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
353 && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
354 throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
357 final Set<String> prev_queries = new HashSet<String>();
358 final String error = ForesterUtil.isReadableFile( getInputFile() );
359 if ( !ForesterUtil.isEmpty( error ) ) {
360 throw new IOException( error );
362 final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
364 final List<Protein> proteins = new ArrayList<Protein>();
365 Protein current_protein = null;
367 final long start_time = new Date().getTime();
368 String prev_query = "";
370 while ( ( line = br.readLine() ) != null ) {
372 if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
375 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
376 // # --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord
377 // # target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target
378 // #------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
379 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 1 4 1.5e-41 3e-38 130.8 11.1 3 171 140 307 139 346 0.81 Ion transport protein
380 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 2 4 9.1e-45 1.8e-41 141.3 13.1 4 200 479 664 476 665 0.97 Ion transport protein
381 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 3 4 5.2e-45 1e-41 142.1 14.0 1 201 900 1117 900 1117 0.96 Ion transport protein
382 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 4 4 9.2e-51 1.8e-47 160.9 11.3 1 201 1217 1423 1217 1423 0.97 Ion transport protein
383 // PKD_channel PF08016.5 426 jgi|Nemve1|7|gw.28.1.1 - 1604 5.9e-19 67.4 70.5 1 8 0.00053 1.1 7.3 0.4 220 264 142 191 134 200 0.73 Polycystin cation channel
384 final String tokens[] = line.split( "\\s+" );
385 final String target_id = tokens[ 0 ];
386 final String target_acc = tokens[ 1 ];
387 final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
388 final String query = tokens[ 3 ];
389 final String query_acc = tokens[ 4 ];
390 final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
391 final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
392 final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
393 final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
394 final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
395 final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
396 final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
397 final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
398 final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
399 final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
400 final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
401 final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
402 final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
403 final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
404 ++_domains_encountered;
405 if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
406 if ( !isAllowProteinsWithSameName() ) {
407 if ( query.equals( prev_query ) ) {
408 throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
409 + ", " + prev_qlen );
411 if ( prev_queries.contains( query ) ) {
412 throw new IOException( "more than one protein named [" + query + "]" );
417 prev_queries.add( query );
418 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
419 addProtein( proteins, current_protein );
421 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
422 current_protein = new BasicProtein( query, getSpecies(), qlen );
425 throw new IllegalArgumentException( "unknown return type" );
428 boolean failed_cutoff = false;
429 if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
430 if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
431 final double cutoff = getIndividualScoreCutoffs().get( target_id );
432 if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
433 if ( fs_score < cutoff ) {
434 failed_cutoff = true;
437 else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
438 if ( domain_score < cutoff ) {
439 failed_cutoff = true;
444 throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
445 + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
448 final String uc_id = target_id.toUpperCase();
449 if ( failed_cutoff ) {
450 ++_domains_ignored_due_to_individual_score_cutoff;
452 else if ( ali_from == ali_to ) {
455 else if ( ( getFsEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
456 && ( fs_e_value > getFsEValueMaximum() ) ) {
457 ++_domains_ignored_due_to_fs_e_value;
459 else if ( ( getIEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
460 && ( i_e_value > getIEValueMaximum() ) ) {
461 ++_domains_ignored_due_to_i_e_value;
463 else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
464 ++_domains_ignored_due_to_duf;
466 else if ( IGNORE_REPLACED_RRMS
467 && ( uc_id.contains( "RRM_1" ) || uc_id.contains( "RRM_3" ) || uc_id.contains( "RRM_5" ) || uc_id
468 .contains( "RRM_6" ) ) ) {
470 else if ( IGNORE_hGDE_amylase && ( uc_id.equals( "hGDE_amylase" ) ) ) {
472 else if ( isIgnoreVirusLikeIds()
473 && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
474 || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
475 || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
476 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
477 ++_domains_ignored_due_to_virus_like_id;
479 else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) {
480 ++_domains_ignored_due_to_negative_domain_filter;
481 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
485 final Domain pd = new BasicDomain( target_id,
488 ( short ) domain_number,
489 ( short ) total_domains,
492 current_protein.addProteinDomain( pd );
494 catch ( final IllegalArgumentException e ) {
495 throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
500 } // while ( ( line = br.readLine() ) != null )
501 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
502 addProtein( proteins, current_protein );
504 setProteinsEncountered( prev_queries.size() );
505 setTime( new Date().getTime() - start_time );
509 private double parseDouble( final String double_str, final int line_number, final String label ) throws IOException {
512 d = Double.valueOf( double_str ).doubleValue();
514 catch ( final NumberFormatException e ) {
515 throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
516 + "] in [" + getInputFile().getCanonicalPath() + "]" );
521 private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
524 i = Integer.valueOf( double_str ).intValue();
526 catch ( final NumberFormatException e ) {
527 throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
528 + "] in [" + getInputFile().getCanonicalPath() + "]" );
533 private void setDomainsEncountered( final int domains_encountered ) {
534 _domains_encountered = domains_encountered;
537 private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
538 _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
541 private void setDomainsIgnoredDueToFsEval( final int domains_ignored_due_to_fs_e_value ) {
542 _domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value;
545 private void setDomainsIgnoredDueToIEval( final int domains_ignored_due_to_i_e_value ) {
546 _domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value;
549 private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
550 _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
553 private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
554 _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
557 private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
558 _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
561 private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
562 _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
565 private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
566 _domains_ignored_due_to_virus_like_id = i;
569 private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
570 _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
573 private void setDomainsStored( final int domains_stored ) {
574 _domains_stored = domains_stored;
577 private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
578 _domains_stored_set = _storeddomains_stored;
581 public void setFsEValueMaximum( final double fs_e_value_maximum ) {
582 if ( fs_e_value_maximum < 0.0 ) {
583 throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
585 _fs_e_value_maximum = fs_e_value_maximum;
588 public void setIEValueMaximum( final double i_e_value_maximum ) {
589 if ( i_e_value_maximum < 0.0 ) {
590 throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
592 _i_e_value_maximum = i_e_value_maximum;
595 public void setIgnoreDufs( final boolean ignore_dufs ) {
596 _ignore_dufs = ignore_dufs;
600 * To ignore domains which are completely engulfed by domains (individual
601 * ones or stretches of overlapping ones) with better support values.
604 * @param ignored_engulfed_domains
606 public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
607 _ignore_engulfed_domains = ignore_engulfed_domains;
610 public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
611 _ignore_virus_like_ids = ignore_virus_like_ids;
615 * Sets the individual score cutoff values (for example, gathering
616 * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
618 * @param individual_score_cutoffs
620 public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
621 _individual_score_cutoffs = individual_score_cutoffs;
624 public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
625 if ( max_allowed_overlap < 0 ) {
626 throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
628 _max_allowed_overlap = max_allowed_overlap;
631 private void setProteinsEncountered( final int proteins_encountered ) {
632 _proteins_encountered = proteins_encountered;
635 private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
636 _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
639 private void setProteinsStored( final int proteins_stored ) {
640 _proteins_stored = proteins_stored;
643 public void setReturnType( final ReturnType return_type ) {
644 _return_type = return_type;
647 private void setTime( final long time ) {
651 public static enum FilterType {
652 NONE, POSITIVE_PROTEIN, NEGATIVE_PROTEIN, NEGATIVE_DOMAIN
655 static public enum INDIVIDUAL_SCORE_CUTOFF {
656 FULL_SEQUENCE, DOMAIN, NONE;
659 public static enum ReturnType {
660 UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN