import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.forester.io.parsers.IteratingPhylogenyParser;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Confidence;
-import org.forester.phylogeny.data.DomainArchitecture;
import org.forester.phylogeny.data.Event;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.PhylogenyDataUtil;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.util.ForesterUtil;
-public final class NHXParser implements PhylogenyParser {
+public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParser {
public static final TAXONOMY_EXTRACTION TAXONOMY_EXTRACTION_DEFAULT = TAXONOMY_EXTRACTION.NO;
- public final static Pattern UC_LETTERS_NUMBERS_PATTERN = Pattern.compile( "^[A-Z0-9]+$" );
public final static Pattern NUMBERS_ONLY_PATTERN = Pattern.compile( "^[0-9\\.]+$" );
public final static Pattern MB_PROB_PATTERN = Pattern.compile( "prob=([^,]+)" );
public final static Pattern MB_PROB_SD_PATTERN = Pattern.compile( "prob_stddev=([^,]+)" );
return _taxonomy_extraction;
}
+ @Override
public final boolean hasNext() {
return _next != null;
}
+ @Override
public final Phylogeny next() throws NHXFormatException, IOException {
final Phylogeny phy = _next;
getNext();
return p;
}
+ @Override
public final void reset() throws NHXFormatException, IOException {
_i = 0;
_next = null;
}
private final void getNext() throws IOException, NHXFormatException {
+ if ( _source == null ) {
+ throw new IOException( "source is not set" );
+ }
while ( true ) {
char c = '\b';
if ( _input_type == BUFFERED_READER ) {
}
node_to_annotate.getNodeData().getTaxonomy().setIdentifier( new Identifier( s.substring( 2 ) ) );
}
- else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) {
- if ( !node_to_annotate.getNodeData().isHasSequence() ) {
- node_to_annotate.getNodeData().setSequence( new Sequence() );
- }
- node_to_annotate.getNodeData().getSequence()
- .setDomainArchitecture( new DomainArchitecture( s.substring( 3 ) ) );
- }
else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) {
if ( !node_to_annotate.getNodeData().isHasSequence() ) {
node_to_annotate.getNodeData().setSequence( new Sequence() );
}
public static enum TAXONOMY_EXTRACTION {
- NO, YES, PFAM_STYLE_ONLY;
+ NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT, AGGRESSIVE;
}
}