final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
}
else if ( p instanceof NexusPhylogeniesParser ) {
final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
nex.setReplaceUnderscores( false );
nex.setIgnoreQuotes( true );
- nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
}
else {
throw new RuntimeException( "unknown parser type: " + p );
}
public static enum TAXONOMY_EXTRACTION {
- NO, AGRESSIVE, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT;
+ NO, PFAM_STYLE_RELAXED, PFAM_STYLE_STRICT;
}
}
}
}
}
- if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) {
+ if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) {
final Matcher m1 = TAXOMONY_CODE_PATTERN_1.matcher( name );
if ( m1.matches() ) {
return name;
public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
throws PhyloXmlDataFormatException {
+ if ( taxonomy_extraction == TAXONOMY_EXTRACTION.NO ) {
+ throw new IllegalArgumentException();
+ }
final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction );
if ( !ForesterUtil.isEmpty( id ) ) {
if ( !node.getNodeData().isHasTaxonomy() ) {
return code;
}
}
- else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
- || ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) ) {
+ else if ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) ) {
final String sn = extractScientificNameFromNodeName( node.getName() );
if ( !ForesterUtil.isEmpty( sn ) ) {
if ( !node.getNodeData().isHasTaxonomy() ) {
public final static String extractUniprotTaxonomyIdFromNodeName( final String name,
final TAXONOMY_EXTRACTION taxonomy_extraction ) {
if ( ( name.indexOf( "_" ) > 0 )
- && ( ( taxonomy_extraction != TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) || ( name.indexOf( "/" ) > 4 ) ) ) {
+ && ( ( taxonomy_extraction == TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) || ( name.indexOf( "/" ) > 4 ) ) ) {
final String[] s = name.split( "[_\\s]" );
if ( s.length > 1 ) {
final String str = s[ 1 ];
}
}
}
- if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGRESSIVE ) {
- final Matcher m1 = TAXOMONY_UNIPROT_ID_PATTERN_1.matcher( name );
- if ( m1.matches() ) {
- return name;
- }
- }
return null;
}
}
public static StringBuilder toFasta( final Sequence seq, final int width ) {
- final StringBuilder sb = new StringBuilder();
- sb.append( ">" );
- sb.append( seq.getIdentifier().toString() );
- sb.append( ForesterUtil.LINE_SEPARATOR );
- if ( ( width < 1 ) || ( width >= seq.getLength() ) ) {
- sb.append( seq.getMolecularSequence() );
- }
- else {
- final int lines = seq.getLength() / width;
- final int rest = seq.getLength() - ( lines * width );
- for( int i = 0; i < lines; ++i ) {
- sb.append( seq.getMolecularSequence(), i * width, width );
- if ( i < ( lines - 1 ) ) {
- sb.append( ForesterUtil.LINE_SEPARATOR );
- }
- }
- if ( rest > 0 ) {
- sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( seq.getMolecularSequence(), lines * width, rest );
- }
- }
- return sb;
+ return toFasta( seq.getIdentifier(), seq.getMolecularSequenceAsString(), width );
}
public static StringBuilder toFasta( final String name, final String mol_seq, final int width ) {
final int lines = mol_seq.length() / width;
final int rest = mol_seq.length() - ( lines * width );
for( int i = 0; i < lines; ++i ) {
- sb.append( mol_seq, i * width, width );
+ sb.append( mol_seq, i * width, ( i + 1 ) * width );
if ( i < ( lines - 1 ) ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
}
}
if ( rest > 0 ) {
sb.append( ForesterUtil.LINE_SEPARATOR );
- sb.append( mol_seq, lines * width, rest );
+ sb.append( mol_seq, lines * width, mol_seq.length() );
}
}
return sb;
final NHXParser nhx = ( NHXParser ) p;
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nhx.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
}
else if ( p instanceof NexusPhylogeniesParser ) {
final NexusPhylogeniesParser nex = ( NexusPhylogeniesParser ) p;
nex.setReplaceUnderscores( false );
nex.setIgnoreQuotes( true );
- nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.AGRESSIVE );
+ nex.setTaxonomyExtraction( TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
}
return factory.create( gene_trees_file, p );
}
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
//
final String gene_trees_00_str = "(MOUSE,RAT);(MOUSE,RAT);(MOUSE,RAT);(RAT,MOUSE);";
final Phylogeny[] gene_trees_00 = factory.create( gene_trees_00_str, nhx );
final NHXParser nhx = new NHXParser();
nhx.setReplaceUnderscores( false );
nhx.setIgnoreQuotes( true );
- nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+ nhx.setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
final String gene_trees_1_str = "(((((MOUSE,RAT),HUMAN),CAEEL),YEAST),ARATH);"
+ "((((MOUSE,RAT),HUMAN),(ARATH,YEAST)),CAEEL);" + "((MOUSE,RAT),(((ARATH,YEAST),CAEEL),HUMAN));"
+ "(((((MOUSE,HUMAN),RAT),CAEEL),YEAST),ARATH);" + "((((HUMAN,MOUSE),RAT),(ARATH,YEAST)),CAEEL);";
import org.forester.io.parsers.tol.TolParser;
import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.PhylogenyWriter;
+import org.forester.io.writers.SequenceWriter;
import org.forester.msa.BasicMsa;
import org.forester.msa.Mafft;
import org.forester.msa.Msa;
System.exit( -1 );
}
final long start_time = new Date().getTime();
+ System.out.print( "Sequence writer: " );
+ if ( testSequenceWriter() ) {
+ System.out.println( "OK." );
+ succeeded++;
+ }
+ else {
+ System.out.println( "failed." );
+ failed++;
+ }
System.out.print( "Sequence id parsing: " );
if ( testSequenceIdParsing() ) {
System.out.println( "OK." );
.equals( "MOUSE" ) ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.AGRESSIVE )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
.equals( "MOUSE" ) ) {
return false;
}
if ( ParserUtils.extractTaxonomyCodeFromNodeName( "_MOUSE_", TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT ) != null ) {
return false;
}
- if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.AGRESSIVE )
+ if ( !ParserUtils.extractTaxonomyCodeFromNodeName( "x_MOUSE_x", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED )
.equals( "MOUSE" ) ) {
return false;
}
System.out.println( n1.toString() );
return false;
}
- final PhylogenyNode n2 = PhylogenyNode
- .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
- if ( !n2.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
- System.out.println( n2.toString() );
- return false;
- }
final PhylogenyNode n2x = PhylogenyNode
.createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( n2x.getNodeData().isHasTaxonomy() ) {
return false;
}
final PhylogenyNode n13 = PhylogenyNode
- .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.AGRESSIVE );
+ .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
if ( !n13.getName().equals( "blah_12345/1-2" ) ) {
return false;
}
if ( n32.getNodeData().isHasTaxonomy() ) {
return false;
}
+ final PhylogenyNode n40 = PhylogenyNode
+ .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) {
+ return false;
+ }
+ final PhylogenyNode n41 = PhylogenyNode
+ .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( n41.getNodeData().isHasTaxonomy() ) {
+ return false;
+ }
+ final PhylogenyNode n42 = PhylogenyNode
+ .createInstanceFromNhxString( "12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_STRICT );
+ if ( n42.getNodeData().isHasTaxonomy() ) {
+ return false;
+ }
+ final PhylogenyNode n43 = PhylogenyNode.createInstanceFromNhxString( "12345",
+ NHXParser.TAXONOMY_EXTRACTION.NO );
+ if ( n43.getNodeData().isHasTaxonomy() ) {
+ return false;
+ }
+ final PhylogenyNode n44 = PhylogenyNode
+ .createInstanceFromNhxString( "12345~1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED );
+ if ( n44.getNodeData().isHasTaxonomy() ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
return true;
}
+ private static boolean testSequenceWriter() {
+ try {
+ final String n = ForesterUtil.LINE_SEPARATOR;
+ if ( !SequenceWriter.toFasta( "name", "awes", 5 ).toString().equals( ">name" + n + "awes" ) ) {
+ return false;
+ }
+ if ( !SequenceWriter.toFasta( "name", "awes", 4 ).toString().equals( ">name" + n + "awes" ) ) {
+ return false;
+ }
+ if ( !SequenceWriter.toFasta( "name", "awes", 3 ).toString().equals( ">name" + n + "awe" + n + "s" ) ) {
+ return false;
+ }
+ if ( !SequenceWriter.toFasta( "name", "awes", 2 ).toString().equals( ">name" + n + "aw" + n + "es" ) ) {
+ return false;
+ }
+ if ( !SequenceWriter.toFasta( "name", "awes", 1 ).toString()
+ .equals( ">name" + n + "a" + n + "w" + n + "e" + n + "s" ) ) {
+ return false;
+ }
+ if ( !SequenceWriter.toFasta( "name", "abcdefghij", 3 ).toString()
+ .equals( ">name" + n + "abc" + n + "def" + n + "ghi" + n + "j" ) ) {
+ return false;
+ }
+ }
+ catch ( final Exception e ) {
+ e.printStackTrace();
+ return false;
+ }
+ return true;
+ }
+
private static boolean testCreateBalancedPhylogeny() {
try {
final Phylogeny p0 = DevelopmentTools.createBalancedPhylogeny( 6, 5 );