import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import org.forester.phylogeny.PhylogenyMethods;
import org.forester.phylogeny.PhylogenyMethods.DESCENDANT_SORT_PRIORITY;
import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
private final static String[] AVAILABLE_FONT_FAMILIES_SORTED = GraphicsEnvironment.getLocalGraphicsEnvironment()
.getAvailableFontFamilyNames();
- private final static Pattern seq_identifier_pattern_1 = Pattern
- .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})\\s*$" );
- private final static Pattern seq_identifier_pattern_2 = Pattern
- .compile( "^([A-Za-z]{2,5})[|=:]([0-9A-Za-z_\\.]{5,40})[|,; ].*$" );
static {
Arrays.sort( AVAILABLE_FONT_FAMILIES_SORTED );
}
return tax_set;
}
- public final static Accession obtainSequenceAccessionFromName( final String sequence_name ) {
- final String n = sequence_name.trim();
- final Matcher matcher1 = seq_identifier_pattern_1.matcher( n );
- String group1 = "";
- String group2 = "";
- if ( matcher1.matches() ) {
- group1 = matcher1.group( 1 );
- group2 = matcher1.group( 2 );
- }
- else {
- final Matcher matcher2 = seq_identifier_pattern_2.matcher( n );
- if ( matcher2.matches() ) {
- group1 = matcher2.group( 1 );
- group2 = matcher2.group( 2 );
- }
- }
- if ( ForesterUtil.isEmpty( group1 ) || ForesterUtil.isEmpty( group2 ) ) {
- return null;
- }
- return new Accession( group2, group1 );
- }
-
public final static void printWarningMessage( final String name, final String message ) {
System.out.println( "[" + name + "] > " + message );
}
parser = new NexusPhylogeniesParser();
( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
break;
- case TREEBASE:
+ case TREEBASE_TREE:
+ parser = new NexusPhylogeniesParser();
+ ( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
+ ( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
+ break;
+ case TREEBASE_STUDY:
parser = new NexusPhylogeniesParser();
( ( NexusPhylogeniesParser ) parser ).setReplaceUnderscores( true );
( ( NexusPhylogeniesParser ) parser ).setTaxonomyExtraction( NHXParser.TAXONOMY_EXTRACTION.NO );
import org.forester.phylogeny.data.Accession;
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.data.Sequence;
+import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.phylogeny.iterators.PreorderTreeIterator;
import org.forester.util.ForesterUtil;
+import org.forester.util.SequenceAccessionTools;
public final class WebserviceUtil {
- public static final String TREE_FAM_INST = "tree_fam";
public static final String PFAM_INST = "pfam";
+ public static final String PFAM_NAME = "Pfam";
+ public static final String PFAM_SERVER = "http://pfam.janelia.org";
+ public static final String TOL_NAME = "Tree of Life";
public static final String TOL_WEBSERVER = "http://tolweb.org/onlinecontributors/app?service=external&page=xml/TreeStructureService&node_id="
+ PhylogeniesWebserviceClient.QUERY_PLACEHOLDER;
- public static final String TOL_NAME = "Tree of Life";
+ public static final String TREE_BASE_DESC = "This data set was downloaded from TreeBASE, a relational database of phylogenetic knowledge. TreeBASE has been supported by the NSF, Harvard University, Yale University, SDSC and UC Davis. Please do not remove this acknowledgment.";
+ public static final String TREE_BASE_INST = "treebase";
public static final String TREE_BASE_NAME = "TreeBASE";
+ public static final String TREE_FAM_INST = "tree_fam";
public static final String TREE_FAM_NAME = "TreeFam";
- public static final String PFAM_NAME = "Pfam";
- public static final String PFAM_SERVER = "http://pfam.janelia.org";
public static List<PhylogeniesWebserviceClient> createDefaultClients() {
final List<PhylogeniesWebserviceClient> clients = new ArrayList<PhylogeniesWebserviceClient>();
"http://tolweb.org",
null ) );
clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
+ "Read Tree(s) from TreeBASE Study...",
+ "Use TreeBASE to obtain evolutionary tree(s) from a study",
+ "Please enter a TreeBASE study (\"S\") identifier (without the \"S\")\n(Examples: 15613, 15632, 14525, 14909)",
+ WsPhylogenyFormat.TREEBASE_STUDY,
+ null,
+ "http://purl.org/phylo/treebase/phylows/study/TB2:S"
+ + PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
+ + "?format=nexus",
+ true,
+ "http://www.treebase.org",
+ TREE_BASE_INST ) );
+ clients.add( new BasicPhylogeniesWebserviceClient( TREE_BASE_NAME,
"Read Tree from TreeBASE...",
"Use TreeBASE to obtain a evolutionary tree",
- "Please enter a TreeBASE tree identifier\n(Examples: 2654, 825, 4931, 2518, 2406, 4934)",
- WsPhylogenyFormat.TREEBASE,
+ "Please enter a TreeBASE tree (\"Tr\") identifier (without the \"Tr\")\n(Examples: 422, 2654, 825, 4931, 2518, 2406, 4934)",
+ WsPhylogenyFormat.TREEBASE_TREE,
null,
"http://purl.org/phylo/treebase/phylows/tree/TB2:Tr"
+ PhylogeniesWebserviceClient.QUERY_PLACEHOLDER
+ "?format=nexus",
true,
"http://www.treebase.org",
- null ) );
+ TREE_BASE_INST ) );
clients.add( new BasicPhylogeniesWebserviceClient( PFAM_NAME,
"Read Gene Tree from Pfam...",
"Use Pfam to obtain gene trees for seed alignments",
return clients;
}
+ public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
+ throws PhyloXmlDataFormatException {
+ if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
+ WebserviceUtil.processTreeFamTrees( phylogeny );
+ }
+ else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
+ WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
+ PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" );
+ }
+ else if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_BASE_INST ) ) {
+ if ( PhylogenyMethods.isInternalNamesLookLikeConfidences( phylogeny ) ) {
+ PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "" );
+ }
+ WebserviceUtil.processTreeBaseTrees( phylogeny );
+ }
+ }
+
static void extractSpTremblAccFromNodeName( final Phylogeny phy, final String source ) {
final PreorderTreeIterator it = new PreorderTreeIterator( phy );
while ( it.hasNext() ) {
}
}
- public static void processInstructions( final PhylogeniesWebserviceClient client, final Phylogeny phylogeny )
- throws PhyloXmlDataFormatException {
- if ( client.getProcessingInstructions().equals( WebserviceUtil.TREE_FAM_INST ) ) {
-
- WebserviceUtil.processTreeFamTrees( phylogeny );
+ static void processTreeBaseTrees( final Phylogeny phy ) {
+ phy.setDescription( TREE_BASE_DESC );
+ final PhylogenyNodeIterator it = phy.iteratorExternalForward();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+ if ( acc != null ) {
+ if ( !n.getNodeData().isHasSequence() ) {
+ n.getNodeData().addSequence( new Sequence() );
+ }
+ final Sequence s = n.getNodeData().getSequence();
+ if ( s.getAccession() == null ) {
+ s.setAccession( acc );
+ }
+ }
+ }
}
- else if ( client.getProcessingInstructions().equals( WebserviceUtil.PFAM_INST ) ) {
- WebserviceUtil.extractSpTremblAccFromNodeName( phylogeny, "sptrembl" );
- PhylogenyMethods.transferInternalNodeNamesToConfidence( phylogeny, "bootstrap" ); }
}
static void processTreeFamTrees( final Phylogeny phy ) {
final PhylogenyNode n = it.next();
if ( n.isExternal() ) {
n.getNodeData().setEvent( null );
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ final Accession acc = SequenceAccessionTools.parseAccessorFromString( n.getName() );
+ if ( acc != null ) {
+ if ( !n.getNodeData().isHasSequence() ) {
+ n.getNodeData().addSequence( new Sequence() );
+ }
+ final Sequence s = n.getNodeData().getSequence();
+ if ( s.getAccession() == null ) {
+ s.setAccession( acc );
+ }
+ }
+ }
+ }
+ else {
+ if ( ( n.getBranchData() != null ) && n.getBranchData().isHasConfidences()
+ && ( n.getBranchData().getConfidence( 0 ) != null ) ) {
+ n.getBranchData().getConfidence( 0 ).setType( "bootstrap" );
+ }
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ if ( !n.getNodeData().isHasTaxonomy() ) {
+ n.getNodeData().addTaxonomy( new Taxonomy() );
+ }
+ final Taxonomy t = n.getNodeData().getTaxonomy();
+ if ( ForesterUtil.isEmpty( t.getScientificName() ) ) {
+ t.setScientificName( n.getName() );
+ n.setName( "" );
+ }
+ }
}
-
if ( n.getNodeData().isHasTaxonomy() && ( n.getNodeData().getTaxonomy().getIdentifier() != null ) ) {
n.getNodeData()
.getTaxonomy()
- .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(), "ncbi" ) );
+ .setIdentifier( new Identifier( n.getNodeData().getTaxonomy().getIdentifier().getValue(),
+ "ncbi" ) );
}
}
}
-
-
}
}
public enum WsPhylogenyFormat {
- NH, NHX, NEXUS, TOL_XML_RESPONSE, PHYLOXML, NH_EXTRACT_TAXONOMY, PFAM, TREEBASE
+ NEXUS, NH, NH_EXTRACT_TAXONOMY, NHX, PFAM, PHYLOXML, TOL_XML_RESPONSE, TREEBASE_STUDY, TREEBASE_TREE
}
}
public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase();
+ final private static String end = NexusConstants.END.toLowerCase();
+ final private static String endblock = "endblock";
+ final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase();
+ final private static Pattern TITLE_PATTERN = Pattern.compile( "TITLE.?\\s+([^;]+)",
+ Pattern.CASE_INSENSITIVE );
final private static String translate = NexusConstants.TRANSLATE.toLowerCase();
final private static String tree = NexusConstants.TREE.toLowerCase();
- final private static String utree = NexusConstants.UTREE.toLowerCase();
- final private static String end = NexusConstants.END.toLowerCase();
- final private static String endblock = "endblock";
final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
Pattern.CASE_INSENSITIVE );
- final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
- private Object _nexus_source;
- private List<String> _taxlabels;
- private Map<String, String> _translate_map;
- private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
- private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
- private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO;
- private Phylogeny _next;
+ final private static String utree = NexusConstants.UTREE.toLowerCase();
private BufferedReader _br;
- private boolean _in_trees_block;
- private StringBuilder _nh;
- private String _name;
- private StringBuilder _translate_sb;
+ private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
private boolean _in_taxalabels;
private boolean _in_translate;
+ private boolean _in_tree;
+ private boolean _in_trees_block;
private boolean _is_rooted;
+ private String _name;
+ private Phylogeny _next;
+ private Object _nexus_source;
+ private StringBuilder _nh;
+ private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
private boolean _rooted_info_present;
- private boolean _in_tree;
+ private List<String> _taxlabels;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO;
+ private String _title;
+ private Map<String, String> _translate_map;
+ private StringBuilder _translate_sb;
+
+ @Override
+ public String getName() {
+ return "Nexus Phylogenies Parser";
+ }
@Override
public final boolean hasNext() {
_translate_map = new HashMap<String, String>();
_nh = new StringBuilder();
_name = "";
- _translate_sb = new StringBuilder();
+ _title = "";
+ _translate_sb = null;
_next = null;
_in_trees_block = false;
_in_taxalabels = false;
_taxonomy_extraction = taxonomy_extraction;
}
- private final void createPhylogeny( final String name,
+ private final void createPhylogeny( final String title,
+ final String name,
final StringBuilder nhx,
final boolean rooted_info_present,
final boolean is_rooted ) throws IOException {
if ( p == null ) {
throw new PhylogenyParserException( "failed to create phylogeny" );
}
- p.setName( name );
+ String myname = null;
+ if ( !ForesterUtil.isEmpty( title ) && !ForesterUtil.isEmpty( name ) ) {
+ myname = title.replace( '_', ' ' ).trim() + " (" + name.trim() + ")";
+ }
+ else if ( !ForesterUtil.isEmpty( title ) ) {
+ myname = title.replace( '_', ' ' ).trim();
+ }
+ else if ( !ForesterUtil.isEmpty( name ) ) {
+ myname = name.trim();
+ }
+ if ( !ForesterUtil.isEmpty( myname ) ) {
+ p.setName( myname );
+ }
if ( rooted_info_present ) {
p.setRooted( is_rooted );
}
if ( !_replace_underscores && ( ( _taxonomy_extraction != TAXONOMY_EXTRACTION.NO ) ) ) {
ParserUtils.extractTaxonomyDataFromNodeName( node, _taxonomy_extraction );
}
+ else if ( _replace_underscores ) {
+ if ( !ForesterUtil.isEmpty( node.getName() ) ) {
+ node.setName( node.getName().replace( '_', ' ' ).trim() );
+ }
+ }
}
}
_next = p;
_in_trees_block = true;
_in_taxalabels = false;
_in_translate = false;
+ _title = "";
}
else if ( line_lc.startsWith( taxlabels ) ) {
_in_trees_block = false;
_in_translate = false;
}
else if ( line_lc.startsWith( translate ) ) {
+ _translate_sb = new StringBuilder();
_in_taxalabels = false;
_in_translate = true;
}
else if ( _in_trees_block ) {
- //FIXME TODO need to work on this "title" and "link"
- if ( line_lc.startsWith( "title" ) || line_lc.startsWith( "link" ) ) {
- // Do nothing.
+ if ( line_lc.startsWith( "title" ) ) {
+ final Matcher title_m = TITLE_PATTERN.matcher( line );
+ if ( title_m.lookingAt() ) {
+ _title = title_m.group( 1 );
+ }
+ }
+ else if ( line_lc.startsWith( "link" ) ) {
}
else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
_in_trees_block = false;
_in_tree = false;
_in_translate = false;
if ( _nh.length() > 0 ) {
- createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+ createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
_nh = new StringBuilder();
_name = "";
_rooted_info_present = false;
boolean might = false;
if ( _nh.length() > 0 ) {
might = true;
- createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+ createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
_nh = new StringBuilder();
_name = "";
_rooted_info_present = false;
&& !line_lc.startsWith( end ) && !line_lc.startsWith( endblock ) && line_lc.endsWith( ";" ) ) {
_in_tree = false;
_in_translate = false;
- createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+ createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
_nh = new StringBuilder();
_name = "";
_rooted_info_present = false;
}
}
if ( _nh.length() > 0 ) {
- createPhylogeny( _name, _nh, _rooted_info_present, _is_rooted );
+ createPhylogeny( _title, _name, _nh, _rooted_info_present, _is_rooted );
if ( _next != null ) {
return;
}
for( final String pair : s.split( "," ) ) {
final String[] kv = pair.trim().split( "\\s+" );
if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) {
- throw new IOException( "ill-formatted translate values: " + translate_sb );
+ throw new IOException( "ill-formatted translate values: " + pair );
}
if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) {
- throw new IOException( "ill-formatted translate values: " + translate_sb );
+ throw new IOException( "ill-formatted translate values: " + pair );
}
String key = "";
String value = "";
private final static String removeWhiteSpaceBeforeSemicolon( final String s ) {
return s.replaceAll( "\\s+;", ";" );
}
-
- @Override
- public String getName() {
- return "Nexus Phylogenies Parser";
- }
}
}
}
- final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+ final static public boolean isInternalNamesLookLikeConfidences( final Phylogeny phy ) {
final PhylogenyNodeIterator it = phy.iteratorPostorder();
while ( it.hasNext() ) {
final PhylogenyNode n = it.next();
- if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !n.isExternal() && !n.isRoot() ) {
if ( !ForesterUtil.isEmpty( n.getName() ) ) {
- double d = -1.0;
+ double value = -1;
try {
- d = Double.parseDouble( n.getName() );
+ value = Double.parseDouble( n.getName() );
}
- catch ( final Exception e ) {
- d = -1.0;
+ catch ( final NumberFormatException e ) {
+ return false;
}
- if ( d >= 0.0 ) {
- n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
- n.setName( "" );
+ if ( ( value < 0.0 ) || ( value > 100 ) ) {
+ return false;
}
}
}
}
+ return true;
+ }
+
+ final static public void transferInternalNodeNamesToConfidence( final Phylogeny phy, final String confidence_type ) {
+ final PhylogenyNodeIterator it = phy.iteratorPostorder();
+ while ( it.hasNext() ) {
+ transferInternalNodeNameToConfidence( confidence_type, it.next() );
+ }
+ }
+
+ private static void transferInternalNodeNameToConfidence( final String confidence_type, final PhylogenyNode n ) {
+ if ( !n.isExternal() && !n.getBranchData().isHasConfidences() ) {
+ if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ double d = -1.0;
+ try {
+ d = Double.parseDouble( n.getName() );
+ }
+ catch ( final Exception e ) {
+ d = -1.0;
+ }
+ if ( d >= 0.0 ) {
+ n.getBranchData().addConfidence( new Confidence( d, confidence_type ) );
+ n.setName( "" );
+ }
+ }
+ }
}
final static public void transferNodeNameToField( final Phylogeny phy,
final private String _value;
public enum Source {
- NCBI, REFSEQ, UNIPROT, GI, EMBL, UNKNOWN;
+ NCBI, REFSEQ, UNIPROT, GI, EMBL, ENSEMBL, UNKNOWN;
@Override
public String toString() {
return "gi";
case EMBL:
return "embl";
+ case ENSEMBL:
+ return "ensembl";
case UNKNOWN:
return "unknown";
default:
if ( phylogenies[ 17 ].getNumberOfExternalNodes() != 10 ) {
return false;
}
+ final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser();
+ phylogenies = null;
+ phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S15613.nex", p2 );
+ if ( phylogenies.length != 9 ) {
+ return false;
+ }
+ if ( !isEqual( 0.48039661496919533, phylogenies[ 0 ].getNode( "Diadocidia_spinosula" )
+ .getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.3959796191512233, phylogenies[ 0 ].getNode( "Diadocidia_stanfordensis" )
+ .getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( !phylogenies[ 0 ].getName().equals( "Family Diadocidiidae MLT (Imported_tree_0)" ) ) {
+ return false;
+ }
+ if ( !phylogenies[ 1 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) {
+ return false;
+ }
+ if ( !phylogenies[ 2 ].getName().equals( "Family Diadocidiidae BAT (con_50_majrule)" ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.065284, phylogenies[ 7 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.065284, phylogenies[ 8 ].getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
if ( phy.isRooted() ) {
return false;
}
+ //
+ final NexusPhylogeniesParser p2 = new NexusPhylogeniesParser();
+ p2.setSource( Test.PATH_TO_TEST_DATA + "S15613.nex" );
+ // 0
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) {
+ return false;
+ }
+ // 1
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 2
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 3
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 4
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 5
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 6
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 7
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ // 8
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ if ( !isEqual( 0.065284, phy.getNode( "Bradysia_amoena" ).getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ if ( phy != null ) {
+ return false;
+ }
+ // 0
+ p2.reset();
+ if ( !p2.hasNext() ) {
+ return false;
+ }
+ phy = p2.next();
+ if ( !isEqual( 0.48039661496919533, phy.getNode( "Diadocidia_spinosula" ).getDistanceToParent() ) ) {
+ return false;
+ }
+ if ( !isEqual( 0.3959796191512233, phy.getNode( "Diadocidia_stanfordensis" ).getDistanceToParent() ) ) {
+ return false;
+ }
}
catch ( final Exception e ) {
e.printStackTrace( System.out );
.compile( "(?:\\b|_)(?:sp|tr)[\\.|\\-_=/\\\\]([A-Z][0-9][A-Z0-9]{3}[0-9])(?:\\b|_)" );\r
public final static Pattern UNIPROT_KB_PATTERN_2 = Pattern\r
.compile( "(?:\\b|_)(?:[A-Z0-9]{2,5}|(?:[A-Z][0-9][A-Z0-9]{3}[0-9]))_(([A-Z9][A-Z]{2}[A-Z0-9]{2})|RAT|PIG|PEA)(?:\\b|_)" );\r
+ public final static Pattern ENSEMBL_PATTERN = Pattern.compile( "(?:\\b|_)(ENS[A-Z]*[0-9]+)(?:\\b|_)" );\r
// RefSeq accession numbers can be distinguished from GenBank accessions \r
// by their distinct prefix format of 2 characters followed by an\r
// underscore character ('_'). For example, a RefSeq protein accession is NP_015325. \r
if ( !ForesterUtil.isEmpty( v ) ) {\r
return new Accession( v, Source.GI );\r
}\r
+ v = parseEnsemlAccessorFromString( s );\r
+ if ( !ForesterUtil.isEmpty( v ) ) {\r
+ return new Accession( v, Source.ENSEMBL );\r
+ }\r
}\r
return null;\r
}\r
return null;\r
}\r
\r
+ public final static String parseEnsemlAccessorFromString( final String s ) {\r
+ final Matcher m = ENSEMBL_PATTERN.matcher( s );\r
+ if ( m.find() ) {\r
+ return m.group( 1 );\r
+ }\r
+ return null;\r
+ }\r
+\r
public final static String parseRefSeqAccessorFromString( final String s ) {\r
final Matcher m = REFSEQ_PATTERN.matcher( s );\r
if ( m.lookingAt() ) {\r