// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.io.parsers.nhx;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyNode;
import org.forester.phylogeny.data.Accession;
-import org.forester.phylogeny.data.Annotation;
import org.forester.phylogeny.data.Confidence;
import org.forester.phylogeny.data.DomainArchitecture;
import org.forester.phylogeny.data.Event;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.PhylogenyDataUtil;
-import org.forester.phylogeny.data.PropertiesMap;
-import org.forester.phylogeny.data.Property;
import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
final static private byte STRING_BUFFER = 1;
final static private byte CHAR_ARRAY = 2;
final static private byte BUFFERED_READER = 3;
+ final static private byte STRING_BUILDER = 4;
private boolean _guess_rootedness;
- private boolean _has_next;
private boolean _ignore_quotes;
private byte _input_type;
private int _source_length;
return _taxonomy_extraction;
}
- public boolean hasNext() {
- return _has_next;
- }
-
/**
* Parses the source set with setSource( final Object nhx_source ). Returns
* the Phylogenies found in the source as Phylogeny[].
*/
@Override
public Phylogeny[] parse() throws IOException, NHXFormatException {
- setHasNext( false );
boolean in_comment = false;
boolean saw_colon = false;
boolean saw_open_bracket = false;
setCurrentPhylogeny( null );
setCurrentNode( null );
int i = 0;
+ String my_source_str = null;
+ StringBuffer my_source_sbuff = null;
+ StringBuilder my_source_sbuil = null;
+ char[] my_source_charary = null;
+ BufferedReader my_source_br = null;
+ switch ( getInputType() ) {
+ case STRING:
+ my_source_str = ( String ) getNhxSource();
+ break;
+ case STRING_BUFFER:
+ my_source_sbuff = ( StringBuffer ) getNhxSource();
+ break;
+ case STRING_BUILDER:
+ my_source_sbuil = ( StringBuilder ) getNhxSource();
+ break;
+ case CHAR_ARRAY:
+ my_source_charary = ( char[] ) getNhxSource();
+ break;
+ case BUFFERED_READER:
+ my_source_br = ( BufferedReader ) getNhxSource();
+ break;
+ default:
+ throw new RuntimeException( "unknown input type" );
+ }
while ( true ) {
char c = '\b';
if ( getInputType() == NHXParser.BUFFERED_READER ) {
- final int ci = ( ( BufferedReader ) getNhxSource() ).read();
+ final int ci = my_source_br.read();
if ( ci >= 0 ) {
c = ( char ) ci;
}
else {
switch ( getInputType() ) {
case STRING:
- c = ( ( String ) getNhxSource() ).charAt( i );
+ c = my_source_str.charAt( i );
break;
case STRING_BUFFER:
- c = ( ( StringBuffer ) getNhxSource() ).charAt( i );
+ c = my_source_sbuff.charAt( i );
+ break;
+ case STRING_BUILDER:
+ c = my_source_sbuil.charAt( i );
break;
case CHAR_ARRAY:
- c = ( ( char[] ) getNhxSource() )[ i ];
+ c = my_source_charary[ i ];
break;
}
}
setSourceLength( ( ( String ) nhx_source ).length() );
setNhxSource( nhx_source );
}
+ else if ( nhx_source instanceof StringBuilder ) {
+ setInputType( NHXParser.STRING_BUILDER );
+ setSourceLength( ( ( StringBuilder ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
else if ( nhx_source instanceof StringBuffer ) {
setInputType( NHXParser.STRING_BUFFER );
setSourceLength( ( ( StringBuffer ) nhx_source ).length() );
setNhxSource( nhx_source );
}
+ else if ( nhx_source instanceof StringBuilder ) {
+ setInputType( NHXParser.STRING_BUILDER );
+ setSourceLength( ( ( StringBuilder ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
else if ( nhx_source instanceof char[] ) {
setInputType( NHXParser.CHAR_ARRAY );
setSourceLength( ( ( char[] ) nhx_source ).length );
+ " StringBuffer, char[], File," + " or InputStream " + " [attempt to parse object of "
+ nhx_source.getClass() + "]." );
}
- setHasNext( true );
}
public void setTaxonomyExtraction( final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction ) {
setReplaceUnderscores( REPLACE_UNDERSCORES_DEFAULT );
setGuessRootedness( GUESS_ROOTEDNESS_DEFAULT );
setIgnoreQuotes( IGNORE_QUOTES_DEFAULT );
- setHasNext( false );
}
private boolean isGuessRootedness() {
_current_phylogeny = current_phylogeny;
}
- private void setHasNext( final boolean has_next ) {
- _has_next = has_next;
- }
-
private void setInputType( final byte input_type ) {
_input_type = input_type;
}
if ( !s.startsWith( ":" ) ) {
node_to_annotate.setName( t.nextToken() );
if ( !replace_underscores && ( !is_nhx && ( taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
- final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node_to_annotate.getName(),
- taxonomy_extraction );
- if ( !ForesterUtil.isEmpty( tax ) ) {
- if ( !node_to_annotate.getNodeData().isHasTaxonomy() ) {
- node_to_annotate.getNodeData().setTaxonomy( new Taxonomy() );
- }
- node_to_annotate.getNodeData().getTaxonomy().setTaxonomyCode( tax );
- }
+ ParserUtils.extractTaxonomyDataFromNodeName( node_to_annotate, taxonomy_extraction );
}
}
while ( t.hasMoreTokens() ) {
}
node_to_annotate.getNodeData().getTaxonomy().setScientificName( s.substring( 2 ) );
}
- else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.ANNOTATION ) ) {
- if ( !node_to_annotate.getNodeData().isHasSequence() ) {
- node_to_annotate.getNodeData().setSequence( new Sequence() );
- }
- final Annotation annotation = new Annotation( "_:_" );
- annotation.setDesc( s.substring( 3 ) );
- node_to_annotate.getNodeData().getSequence().addAnnotation( annotation );
- }
else if ( s.startsWith( org.forester.io.parsers.nhx.NHXtags.IS_DUPLICATION ) ) {
if ( ( s.charAt( 2 ) == 'Y' ) || ( s.charAt( 2 ) == 'T' ) ) {
node_to_annotate.getNodeData().setEvent( Event.createSingleDuplicationEvent() );
PhylogenyMethods.setBranchColorValue( node_to_annotate, c );
}
}
- else if ( s.startsWith( NHXtags.CUSTOM_DATA_ON_NODE ) ) {
- if ( !node_to_annotate.getNodeData().isHasProperties() ) {
- node_to_annotate.getNodeData().setProperties( new PropertiesMap() );
- }
- node_to_annotate.getNodeData().getProperties().addProperty( Property.createFromNhxString( s ) );
- }
else if ( s.startsWith( NHXtags.DOMAIN_STRUCTURE ) ) {
if ( !node_to_annotate.getNodeData().isHasSequence() ) {
node_to_annotate.getNodeData().setSequence( new Sequence() );
node_to_annotate.getNodeData().getSequence()
.setDomainArchitecture( new DomainArchitecture( s.substring( 3 ) ) );
}
- else if ( s.startsWith( NHXtags.NODE_IDENTIFIER ) ) {
- node_to_annotate.getNodeData().setNodeIdentifier( new Identifier( s.substring( 3 ) ) );
- }
else if ( s.startsWith( NHXtags.SEQUENCE_ACCESSION ) ) {
if ( !node_to_annotate.getNodeData().isHasSequence() ) {
node_to_annotate.getNodeData().setSequence( new Sequence() );
}
node_to_annotate.getNodeData().getSequence().setName( s.substring( 3 ) );
}
- else if ( s.startsWith( NHXtags.GENE_NAME_SYNONYM ) ) {
- if ( !node_to_annotate.getNodeData().isHasSequence() ) {
- node_to_annotate.getNodeData().setSequence( new Sequence() );
- }
- node_to_annotate.getNodeData().getSequence().setName( s.substring( 2 ) );
- }
else if ( s.indexOf( '=' ) < 0 ) {
if ( node_to_annotate.getDistanceToParent() != PhylogenyDataUtil.BRANCH_LENGTH_DEFAULT ) {
throw new NHXFormatException( "error in NHX formatted data: more than one distance to parent:"