4 // FORESTER -- software libraries and applications
5 // for evolutionary biology research and applications.
7 // Copyright (C) 2008-2009 Christian M. Zmasek
8 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
25 // Contact: phylosoft @ gmail . com
26 // WWW: https://sites.google.com/site/cmzmasek/home/software/forester
28 package org.forester.io.parsers;
30 import java.io.BufferedReader;
32 import java.io.FileReader;
33 import java.io.IOException;
34 import java.util.ArrayList;
35 import java.util.Date;
36 import java.util.HashSet;
37 import java.util.List;
40 import java.util.SortedSet;
41 import java.util.TreeMap;
42 import java.util.TreeSet;
44 import org.forester.protein.BasicDomain;
45 import org.forester.protein.BasicProtein;
46 import org.forester.protein.Domain;
47 import org.forester.protein.Protein;
48 import org.forester.util.ForesterUtil;
50 public final class HmmscanPerDomainTableParser {
52 private static final String RETRO = "RETRO";
53 private static final String PHAGE = "PHAGE";
54 private static final String VIR = "VIR";
55 private static final String TRANSPOS = "TRANSPOS";
56 private static final String RV = "RV";
57 private static final String GAG = "GAG_";
58 private static final String HCV = "HCV_";
59 private static final String HERPES = "HERPES_";
60 private static final String BACULO = "BACULO_";
61 private static final int E_VALUE_MAXIMUM_DEFAULT = -1;
62 private static final int LENGTH_RATIO_CUTOFF_DEFAULT = -1;
63 private static final ReturnType RETURN_TYPE_DEFAULT = ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN;
64 private static final boolean IGNORE_DUFS_DEFAULT = false;
65 private static final int MAX_ALLOWED_OVERLAP_DEFAULT = -1;
66 private final Set<String> _filter;
67 private final FilterType _filter_type;
68 private final File _input_file;
69 private final String _species;
70 private double _fs_e_value_maximum;
71 private double _i_e_value_maximum;
72 private double _rel_env_length_ratio_cutoff;
73 private Map<String, Double> _individual_score_cutoffs;
74 private boolean _ignore_dufs;
75 private boolean _ignore_virus_like_ids;
76 private int _max_allowed_overlap;
77 private boolean _ignore_engulfed_domains;
78 private ReturnType _return_type;
79 private int _proteins_encountered;
80 private int _proteins_ignored_due_to_filter;
81 private int _proteins_stored;
82 private int _domains_encountered;
83 private int _domains_ignored_due_to_duf;
84 private int _domains_ignored_due_to_overlap;
85 private int _domains_ignored_due_to_fs_e_value;
86 private int _domains_ignored_due_to_i_e_value;
87 private int _domains_ignored_due_to_rel_env_length_ratio_cutoff;
88 private int _domains_ignored_due_to_individual_score_cutoff;
89 private int _domains_stored;
90 private SortedSet<String> _domains_stored_set;
92 private int _domains_ignored_due_to_negative_domain_filter;
93 private Map<String, Integer> _domains_ignored_due_to_negative_domain_filter_counts_map;
94 private int _domains_ignored_due_to_virus_like_id;
95 private Map<String, Integer> _domains_ignored_due_to_virus_like_id_counts_map;
96 private final INDIVIDUAL_SCORE_CUTOFF _ind_cutoff;
97 private final boolean _allow_proteins_with_same_name;
99 public HmmscanPerDomainTableParser( final File input_file,
100 final String species,
101 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
102 _input_file = input_file;
105 _filter_type = FilterType.NONE;
106 _ind_cutoff = individual_cutoff_applies_to;
107 _allow_proteins_with_same_name = false;
111 public HmmscanPerDomainTableParser( final File input_file,
112 final String species,
113 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
114 final boolean allow_proteins_with_same_name ) {
115 _input_file = input_file;
118 _filter_type = FilterType.NONE;
119 _ind_cutoff = individual_cutoff_applies_to;
120 _allow_proteins_with_same_name = allow_proteins_with_same_name;
124 public HmmscanPerDomainTableParser( final File input_file,
125 final String species,
126 final Set<String> filter,
127 final FilterType filter_type,
128 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to ) {
129 _input_file = input_file;
132 _filter_type = filter_type;
133 _ind_cutoff = individual_cutoff_applies_to;
134 _allow_proteins_with_same_name = false;
138 public HmmscanPerDomainTableParser( final File input_file,
139 final String species,
140 final Set<String> filter,
141 final FilterType filter_type,
142 final INDIVIDUAL_SCORE_CUTOFF individual_cutoff_applies_to,
143 final boolean allow_proteins_with_same_name ) {
144 _input_file = input_file;
147 _filter_type = filter_type;
148 _ind_cutoff = individual_cutoff_applies_to;
149 _allow_proteins_with_same_name = allow_proteins_with_same_name;
153 public boolean isAllowProteinsWithSameName() {
154 return _allow_proteins_with_same_name;
157 private void actuallyAddProtein( final List<Protein> proteins, final Protein current_protein ) {
158 final List<Domain> l = current_protein.getProteinDomains();
159 for( final Domain d : l ) {
160 getDomainsStoredSet().add( d.getDomainId() );
162 proteins.add( current_protein );
166 private void addProtein( final List<Protein> proteins, Protein current_protein ) {
167 if ( ( getMaxAllowedOverlap() != HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT )
168 || isIgnoreEngulfedDomains() ) {
169 final int domains_count = current_protein.getNumberOfProteinDomains();
170 current_protein = ForesterUtil.removeOverlappingDomains( getMaxAllowedOverlap(),
171 isIgnoreEngulfedDomains(),
173 final int domains_removed = domains_count - current_protein.getNumberOfProteinDomains();
174 _domains_stored -= domains_removed;
175 _domains_ignored_due_to_overlap += domains_removed;
177 if ( ( getFilterType() == FilterType.POSITIVE_PROTEIN )
178 || ( getFilterType() == FilterType.NEGATIVE_PROTEIN ) ) {
179 final Set<String> domain_ids_in_protein = new HashSet<String>();
180 for( final Domain d : current_protein.getProteinDomains() ) {
181 domain_ids_in_protein.add( d.getDomainId() );
183 domain_ids_in_protein.retainAll( getFilter() );
184 if ( getFilterType() == FilterType.POSITIVE_PROTEIN ) {
185 if ( domain_ids_in_protein.size() > 0 ) {
186 actuallyAddProtein( proteins, current_protein );
189 ++_proteins_ignored_due_to_filter;
193 if ( domain_ids_in_protein.size() < 1 ) {
194 actuallyAddProtein( proteins, current_protein );
197 ++_proteins_ignored_due_to_filter;
202 actuallyAddProtein( proteins, current_protein );
206 public int getDomainsEncountered() {
207 return _domains_encountered;
210 public int getDomainsIgnoredDueToDuf() {
211 return _domains_ignored_due_to_duf;
214 public int getDomainsIgnoredDueToIEval() {
215 return _domains_ignored_due_to_i_e_value;
218 public int getDomainsIgnoredDueToRelEnvLengthRatioCutoff() {
219 return _domains_ignored_due_to_rel_env_length_ratio_cutoff;
224 public int getDomainsIgnoredDueToFsEval() {
225 return _domains_ignored_due_to_fs_e_value;
228 public int getDomainsIgnoredDueToIndividualScoreCutoff() {
229 return _domains_ignored_due_to_individual_score_cutoff;
232 public int getDomainsIgnoredDueToNegativeDomainFilter() {
233 return _domains_ignored_due_to_negative_domain_filter;
236 public Map<String, Integer> getDomainsIgnoredDueToNegativeDomainFilterCountsMap() {
237 return _domains_ignored_due_to_negative_domain_filter_counts_map;
240 public int getDomainsIgnoredDueToOverlap() {
241 return _domains_ignored_due_to_overlap;
244 public Map<String, Integer> getDomainsIgnoredDueToVirusLikeIdCountsMap() {
245 return _domains_ignored_due_to_virus_like_id_counts_map;
248 public int getDomainsIgnoredDueToVirusLikeIds() {
249 return _domains_ignored_due_to_virus_like_id;
252 public int getDomainsStored() {
253 return _domains_stored;
256 public SortedSet<String> getDomainsStoredSet() {
257 return _domains_stored_set;
260 private double getFsEValueMaximum() {
261 return _fs_e_value_maximum;
264 private double getIEValueMaximum() {
265 return _i_e_value_maximum;
268 private double getRelEnvLengthRatioCutoff() {
269 return _rel_env_length_ratio_cutoff;
272 private Set<String> getFilter() {
276 private FilterType getFilterType() {
280 public INDIVIDUAL_SCORE_CUTOFF getIndividualCutoffAppliesTo() {
284 private Map<String, Double> getIndividualScoreCutoffs() {
285 return _individual_score_cutoffs;
288 private File getInputFile() {
292 private int getMaxAllowedOverlap() {
293 return _max_allowed_overlap;
296 public int getProteinsEncountered() {
297 return _proteins_encountered;
300 public int getProteinsIgnoredDueToFilter() {
301 return _proteins_ignored_due_to_filter;
304 public int getProteinsStored() {
305 return _proteins_stored;
308 private ReturnType getReturnType() {
312 private String getSpecies() {
316 public long getTime() {
320 private void init() {
321 _fs_e_value_maximum = E_VALUE_MAXIMUM_DEFAULT;
322 _i_e_value_maximum = E_VALUE_MAXIMUM_DEFAULT;
323 _rel_env_length_ratio_cutoff = LENGTH_RATIO_CUTOFF_DEFAULT;
324 setIgnoreDufs( HmmscanPerDomainTableParser.IGNORE_DUFS_DEFAULT );
325 setReturnType( HmmscanPerDomainTableParser.RETURN_TYPE_DEFAULT );
326 _max_allowed_overlap = HmmscanPerDomainTableParser.MAX_ALLOWED_OVERLAP_DEFAULT;
327 setIndividualScoreCutoffs( null );
328 setIgnoreEngulfedDomains( false );
329 setIgnoreVirusLikeIds( false );
333 private void intitCounts() {
334 setDomainsStoredSet( new TreeSet<String>() );
335 setDomainsEncountered( 0 );
336 setProteinsEncountered( 0 );
337 setProteinsIgnoredDueToFilter( 0 );
338 setDomainsIgnoredDueToNegativeFilter( 0 );
339 setDomainsIgnoredDueToDuf( 0 );
340 setDomainsIgnoredDueToFsEval( 0 );
341 setDomainsIgnoredDueToIEval( 0 );
342 setDomainsIgnoredDueToRelEnvLengthRatioCutoff( 0 );
343 setDomainsIgnoredDueToIndividualScoreCutoff( 0 );
344 setDomainsIgnoredDueToVirusLikeId( 0 );
345 setDomainsIgnoredDueToOverlap( 0 );
346 setDomainsStored( 0 );
347 setProteinsStored( 0 );
349 setDomainsIgnoredDueToVirusLikeIdCountsMap( new TreeMap<String, Integer>() );
350 setDomainsIgnoredDueToNegativeDomainFilterCountsMap( new TreeMap<String, Integer>() );
353 private boolean isIgnoreDufs() {
357 private boolean isIgnoreEngulfedDomains() {
358 return _ignore_engulfed_domains;
361 private boolean isIgnoreVirusLikeIds() {
362 return _ignore_virus_like_ids;
365 public List<Protein> parse() throws IOException {
366 if ( ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE )
367 && ( ( getIndividualScoreCutoffs() == null ) || ( getIndividualScoreCutoffs().size() < 1 ) ) ) {
368 throw new RuntimeException( "attempt to use individual cuttoffs with having set them" );
371 final Set<String> prev_queries = new HashSet<String>();
372 final String error = ForesterUtil.isReadableFile( getInputFile() );
373 if ( !ForesterUtil.isEmpty( error ) ) {
374 throw new IOException( error );
376 final BufferedReader br = new BufferedReader( new FileReader( getInputFile() ) );
378 final List<Protein> proteins = new ArrayList<Protein>();
379 Protein current_protein = null;
381 final long start_time = new Date().getTime();
382 String prev_query = "";
384 while ( ( line = br.readLine() ) != null ) {
386 if ( ForesterUtil.isEmpty( line ) || line.startsWith( "#" ) ) {
389 // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
390 // # --- full sequence --- -------------- this domain ------------- hmm coord ali coord env coord
391 // # target name accession tlen query name accession qlen E-value score bias # of c-Evalue i-Evalue score bias from to from to from to acc description of target
392 // #------------------- ---------- ----- -------------------- ---------- ----- --------- ------ ----- --- --- --------- --------- ------ ----- ----- ----- ----- ----- ----- ----- ---- ---------------------
393 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 1 4 1.5e-41 3e-38 130.8 11.1 3 171 140 307 139 346 0.81 Ion transport protein
394 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 2 4 9.1e-45 1.8e-41 141.3 13.1 4 200 479 664 476 665 0.97 Ion transport protein
395 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 3 4 5.2e-45 1e-41 142.1 14.0 1 201 900 1117 900 1117 0.96 Ion transport protein
396 // Ion_trans PF00520.24 201 jgi|Nemve1|7|gw.28.1.1 - 1604 6.3e-169 557.4 95.3 4 4 9.2e-51 1.8e-47 160.9 11.3 1 201 1217 1423 1217 1423 0.97 Ion transport protein
397 // PKD_channel PF08016.5 426 jgi|Nemve1|7|gw.28.1.1 - 1604 5.9e-19 67.4 70.5 1 8 0.00053 1.1 7.3 0.4 220 264 142 191 134 200 0.73 Polycystin cation channel
398 final String tokens[] = line.split( "\\s+" );
399 final String target_id = tokens[ 0 ];
400 final String target_acc = tokens[ 1 ];
401 final int tlen = parseInt( tokens[ 2 ], line_number, "tlen" );
402 final String query = tokens[ 3 ];
403 final String query_acc = tokens[ 4 ];
404 final int qlen = parseInt( tokens[ 5 ], line_number, "qlen" );
405 final double fs_e_value = parseDouble( tokens[ 6 ], line_number, "E-value" );
406 final double fs_score = parseDouble( tokens[ 7 ], line_number, "score" );
407 final int domain_number = parseInt( tokens[ 9 ], line_number, "count" );
408 final int total_domains = parseInt( tokens[ 10 ], line_number, "total" );
409 final double c_e_value = parseDouble( tokens[ 11 ], line_number, "c-Evalue" );
410 final double i_e_value = parseDouble( tokens[ 12 ], line_number, "i-Evalue" );
411 final double domain_score = parseDouble( tokens[ 13 ], line_number, "score" );
412 final int hmm_from = parseInt( tokens[ 15 ], line_number, "hmm from" );
413 final int hmm_to = parseInt( tokens[ 16 ], line_number, "hmm to" );
414 final int ali_from = parseInt( tokens[ 17 ], line_number, "ali from" );
415 final int ali_to = parseInt( tokens[ 18 ], line_number, "ali to" );
416 final int env_from = parseInt( tokens[ 19 ], line_number, "env from" );
417 final int env_to = parseInt( tokens[ 20 ], line_number, "env to" );
418 ++_domains_encountered;
419 if ( !query.equals( prev_query ) || ( qlen != prev_qlen ) ) {
420 if ( !isAllowProteinsWithSameName() ) {
421 if ( query.equals( prev_query ) ) {
422 throw new IOException( "more than one protein named [" + query + "]" + " lengths: " + qlen
423 + ", " + prev_qlen );
425 if ( prev_queries.contains( query ) ) {
426 throw new IOException( "more than one protein named [" + query + "]" );
431 prev_queries.add( query );
432 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
433 addProtein( proteins, current_protein );
435 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
436 current_protein = new BasicProtein( query, getSpecies(), qlen );
439 throw new IllegalArgumentException( "unknown return type" );
442 boolean failed_cutoff = false;
443 if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.NONE ) {
444 if ( getIndividualScoreCutoffs().containsKey( target_id ) ) {
445 final double cutoff = getIndividualScoreCutoffs().get( target_id );
446 if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.FULL_SEQUENCE ) {
447 if ( fs_score < cutoff ) {
448 failed_cutoff = true;
451 else if ( getIndividualCutoffAppliesTo() != INDIVIDUAL_SCORE_CUTOFF.DOMAIN ) {
452 if ( domain_score < cutoff ) {
453 failed_cutoff = true;
458 throw new IOException( "could not find a score cutoff value for domain id \"" + target_id
459 + "\" [line " + line_number + "] in [" + getInputFile().getCanonicalPath() + "]" );
462 final String uc_id = target_id.toUpperCase();
463 final int env_length = 1 + env_to - env_from;
464 if ( failed_cutoff ) {
465 ++_domains_ignored_due_to_individual_score_cutoff;
467 else if ( ali_from == ali_to ) {
470 else if ( ( getFsEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
471 && ( fs_e_value > getFsEValueMaximum() ) ) {
472 ++_domains_ignored_due_to_fs_e_value;
474 else if ( ( getIEValueMaximum() != HmmscanPerDomainTableParser.E_VALUE_MAXIMUM_DEFAULT )
475 && ( i_e_value > getIEValueMaximum() ) ) {
476 ++_domains_ignored_due_to_i_e_value;
479 else if ( ( getRelEnvLengthRatioCutoff() > 0.0 )
480 && ( env_length < ( getRelEnvLengthRatioCutoff() * tlen) ) ) {
481 ++_domains_ignored_due_to_rel_env_length_ratio_cutoff;
484 else if ( isIgnoreDufs() && uc_id.startsWith( "DUF" ) ) {
485 ++_domains_ignored_due_to_duf;
487 else if ( isIgnoreVirusLikeIds()
488 && ( uc_id.contains( VIR ) || uc_id.contains( PHAGE ) || uc_id.contains( RETRO )
489 || uc_id.contains( TRANSPOS ) || uc_id.startsWith( RV ) || uc_id.startsWith( GAG )
490 || uc_id.startsWith( HCV ) || uc_id.startsWith( HERPES ) || uc_id.startsWith( BACULO ) ) ) {
491 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToVirusLikeIdCountsMap(), target_id );
492 ++_domains_ignored_due_to_virus_like_id;
494 else if ( ( getFilterType() == FilterType.NEGATIVE_DOMAIN ) && getFilter().contains( target_id ) ) {
495 ++_domains_ignored_due_to_negative_domain_filter;
496 ForesterUtil.increaseCountingMap( getDomainsIgnoredDueToNegativeDomainFilterCountsMap(), target_id );
500 final Domain pd = new BasicDomain( target_id,
503 ( short ) domain_number,
504 ( short ) total_domains,
510 current_protein.addProteinDomain( pd );
512 catch ( final IllegalArgumentException e ) {
513 throw new IOException( "problem with domain parsing at line " + line_number + "[" + line + "]: "
518 } // while ( ( line = br.readLine() ) != null )
519 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
520 addProtein( proteins, current_protein );
522 setProteinsEncountered( prev_queries.size() );
523 setTime( new Date().getTime() - start_time );
527 private double parseDouble( final String double_str, final int line_number, final String label )
531 d = Double.valueOf( double_str ).doubleValue();
533 catch ( final NumberFormatException e ) {
534 throw new IOException( "could not parse \" +label + \" from \"" + double_str + "\" [line " + line_number
535 + "] in [" + getInputFile().getCanonicalPath() + "]" );
540 private int parseInt( final String double_str, final int line_number, final String label ) throws IOException {
543 i = Integer.valueOf( double_str ).intValue();
545 catch ( final NumberFormatException e ) {
546 throw new IOException( "could not parse \"" + label + "\" from \"" + double_str + "\" [line " + line_number
547 + "] in [" + getInputFile().getCanonicalPath() + "]" );
552 private void setDomainsEncountered( final int domains_encountered ) {
553 _domains_encountered = domains_encountered;
556 private void setDomainsIgnoredDueToDuf( final int domains_ignored_due_to_duf ) {
557 _domains_ignored_due_to_duf = domains_ignored_due_to_duf;
560 private void setDomainsIgnoredDueToFsEval( final int domains_ignored_due_to_fs_e_value ) {
561 _domains_ignored_due_to_fs_e_value = domains_ignored_due_to_fs_e_value;
564 private void setDomainsIgnoredDueToIEval( final int domains_ignored_due_to_i_e_value ) {
565 _domains_ignored_due_to_i_e_value = domains_ignored_due_to_i_e_value;
568 private void setDomainsIgnoredDueToRelEnvLengthRatioCutoff( final int domains_ignored_due_to_rel_env_length_ratio_cutoff ) {
569 _domains_ignored_due_to_rel_env_length_ratio_cutoff = domains_ignored_due_to_rel_env_length_ratio_cutoff;
574 private void setDomainsIgnoredDueToIndividualScoreCutoff( final int domains_ignored_due_to_individual_score_cutoff ) {
575 _domains_ignored_due_to_individual_score_cutoff = domains_ignored_due_to_individual_score_cutoff;
578 private void setDomainsIgnoredDueToNegativeDomainFilterCountsMap( final Map<String, Integer> domains_ignored_due_to_negative_domain_filter_counts_map ) {
579 _domains_ignored_due_to_negative_domain_filter_counts_map = domains_ignored_due_to_negative_domain_filter_counts_map;
582 private void setDomainsIgnoredDueToNegativeFilter( final int domains_ignored_due_to_negative_domain_filter ) {
583 _domains_ignored_due_to_negative_domain_filter = domains_ignored_due_to_negative_domain_filter;
586 private void setDomainsIgnoredDueToOverlap( final int domains_ignored_due_to_overlap ) {
587 _domains_ignored_due_to_overlap = domains_ignored_due_to_overlap;
590 private void setDomainsIgnoredDueToVirusLikeId( final int i ) {
591 _domains_ignored_due_to_virus_like_id = i;
594 private void setDomainsIgnoredDueToVirusLikeIdCountsMap( final Map<String, Integer> domains_ignored_due_to_virus_like_id_counts_map ) {
595 _domains_ignored_due_to_virus_like_id_counts_map = domains_ignored_due_to_virus_like_id_counts_map;
598 private void setDomainsStored( final int domains_stored ) {
599 _domains_stored = domains_stored;
602 private void setDomainsStoredSet( final SortedSet<String> _storeddomains_stored ) {
603 _domains_stored_set = _storeddomains_stored;
606 public void setFsEValueMaximum( final double fs_e_value_maximum ) {
607 if ( fs_e_value_maximum < 0.0 ) {
608 throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
610 _fs_e_value_maximum = fs_e_value_maximum;
613 public void setIEValueMaximum( final double i_e_value_maximum ) {
614 if ( i_e_value_maximum < 0.0 ) {
615 throw new IllegalArgumentException( "attempt to set the maximum E-value to a negative value" );
617 _i_e_value_maximum = i_e_value_maximum;
620 public void setRelEnvLengthRatioCutoff( final double rel_env_length_ratio_cutoff ) {
621 if ( rel_env_length_ratio_cutoff <= 0.0 ) {
622 throw new IllegalArgumentException( "attempt to set rel env length ratio cutoff to zero or a negative value" );
624 _rel_env_length_ratio_cutoff = rel_env_length_ratio_cutoff;
627 public void setIgnoreDufs( final boolean ignore_dufs ) {
628 _ignore_dufs = ignore_dufs;
632 * To ignore domains which are completely engulfed by domains (individual
633 * ones or stretches of overlapping ones) with better support values.
636 * @param ignored_engulfed_domains
638 public void setIgnoreEngulfedDomains( final boolean ignore_engulfed_domains ) {
639 _ignore_engulfed_domains = ignore_engulfed_domains;
642 public void setIgnoreVirusLikeIds( final boolean ignore_virus_like_ids ) {
643 _ignore_virus_like_ids = ignore_virus_like_ids;
647 * Sets the individual score cutoff values (for example, gathering
648 * thresholds from Pfam). Domain ids are the keys, cutoffs the values.
650 * @param individual_score_cutoffs
652 public void setIndividualScoreCutoffs( final Map<String, Double> individual_score_cutoffs ) {
653 _individual_score_cutoffs = individual_score_cutoffs;
656 public void setMaxAllowedOverlap( final int max_allowed_overlap ) {
657 if ( max_allowed_overlap < 0 ) {
658 throw new IllegalArgumentException( "Attempt to set max allowed overlap to less than zero." );
660 _max_allowed_overlap = max_allowed_overlap;
663 private void setProteinsEncountered( final int proteins_encountered ) {
664 _proteins_encountered = proteins_encountered;
667 private void setProteinsIgnoredDueToFilter( final int proteins_ignored_due_to_filter ) {
668 _proteins_ignored_due_to_filter = proteins_ignored_due_to_filter;
671 private void setProteinsStored( final int proteins_stored ) {
672 _proteins_stored = proteins_stored;
675 public void setReturnType( final ReturnType return_type ) {
676 _return_type = return_type;
679 private void setTime( final long time ) {
683 public static enum FilterType {
690 static public enum INDIVIDUAL_SCORE_CUTOFF {
696 public static enum ReturnType {
697 UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN