.setToolTipText( "To collapse branches with confidence values below a threshold into multifurcations (in the case of multiple confidences per branch: without at least one confidence value above a threshold)" );
_tools_menu.addSeparator();
_tools_menu
+ .add( _extract_tax_code_from_node_names_jmi = new JMenuItem( "Extract Taxonomic Codes or Ids from Node Names" ) );
+ customizeJMenuItem( _extract_tax_code_from_node_names_jmi );
+ _extract_tax_code_from_node_names_jmi
+ .setToolTipText( "To extract SwissProt/Uniprot taxonomic codes (mnemonics) from nodes names in the form of 'xyz_CAEEL', or Uniprot/NCBI identifiers from nodes names in the form of 'xyz_6239'" );
+ _tools_menu
.add( _move_node_names_to_tax_sn_jmi = new JMenuItem( "Transfer Node Names to Taxonomic Scientific Names" ) );
customizeJMenuItem( _move_node_names_to_tax_sn_jmi );
_move_node_names_to_tax_sn_jmi.setToolTipText( "To interpret node names as taxonomic scientific names" );
_tools_menu.add( _move_node_names_to_seq_names_jmi = new JMenuItem( "Transfer Node Names to Sequence Names" ) );
customizeJMenuItem( _move_node_names_to_seq_names_jmi );
_move_node_names_to_seq_names_jmi.setToolTipText( "To interpret node names as sequence (protein, gene) names" );
- _tools_menu
- .add( _extract_tax_code_from_node_names_jmi = new JMenuItem( "Extract Taxonomic Codes or Ids from Node Names" ) );
- customizeJMenuItem( _extract_tax_code_from_node_names_jmi );
- _extract_tax_code_from_node_names_jmi
- .setToolTipText( "To extract taxonomic codes (mnemonics) from nodes names in the form of 'xyz_ECOLI', or Uniprot identifiers from nodes names in the form of 'xyz_1234567'" );
_tools_menu.addSeparator();
_tools_menu
.add( _obtain_detailed_taxonomic_information_jmi = new JMenuItem( OBTAIN_DETAILED_TAXONOMIC_INFORMATION ) );
if ( counter_failed < 15 ) {
sb_failed.append( name + "\n" );
}
- else if ( counter_failed == 15 ) {
+ else if ( counter_failed == 15 ) {
sb_failed.append( "...\n" );
}
counter_failed++;
}
if ( counter > 0 ) {
String failed = "";
+ String all = "all ";
if ( counter_failed > 0 ) {
- failed = "\nDid not extract taxonomic data for " + counter_failed + " (named) external nodes:\n" + sb_failed;
-
+ all = "";
+ failed = "\nCould not extract taxonomic data for " + counter_failed
+ + " named external nodes:\n" + sb_failed;
}
JOptionPane.showMessageDialog( this,
- "Successfully extracted taxonomic data from " + counter
- + " external nodes:\n" + sb.toString() + failed,
- "Taxonomic Data Extraction Successfully Completed",
- JOptionPane.INFORMATION_MESSAGE );
+ "Extracted taxonomic data from " + all + counter
+ + " named external nodes:\n" + sb.toString() + failed,
+ "Taxonomic Data Extraction Completed",
+ counter_failed > 0 ? JOptionPane.WARNING_MESSAGE
+ : JOptionPane.INFORMATION_MESSAGE );
}
else {
JOptionPane
.showMessageDialog( this,
- "Could not extract any taxonomic data, maybe node names are empty\nor not in the form \"XYZ_CAEEL\", \"XYZ_CAEEL/12-394\", or \"XYZ_1234567\"?",
+ "Could not extract any taxonomic data. Maybe node names are empty\n"
+ + "or not in the forms \"XYZ_CAEEL\", \"XYZ_CAEEL/12-394\", or \"XYZ_6239\",\n"
+ + "or nodes already have taxonomic data?\n",
"No Taxonomic Data Extracted",
- JOptionPane.WARNING_MESSAGE );
+ JOptionPane.ERROR_MESSAGE );
}
}
}
initNodeData();
if ( recalc_longest_ext_node_info ) {
calculateLongestExtNodeInfo();
- while ( getLongestExtNodeInfo() > y / 2 && getTreeFontSet().getLargeFont().getSize() > 2 ) {
+ while ( ( getLongestExtNodeInfo() > y / 2 ) && ( getTreeFontSet().getLargeFont().getSize() > 2 ) ) {
getMainPanel().getTreeFontSet().decreaseFontSize();
calculateLongestExtNodeInfo();
}
-
}
int ext_nodes = _phylogeny.getRoot().getNumberOfExternalNodes();
final int max_depth = PhylogenyMethods.calculateMaxDepth( _phylogeny );
errorMessageNoCutCopyPasteInUnrootedDisplay();
return;
}
- if ( node.isRoot() && node.getNumberOfDescendants() != 1 ) {
+ if ( node.isRoot() && ( node.getNumberOfDescendants() != 1 ) ) {
JOptionPane.showMessageDialog( this,
"Cannot delete entire tree",
"Attempt to delete entire tree",
import org.forester.io.parsers.util.PhylogenyParserException;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
-import org.forester.phylogeny.data.Taxonomy;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
final BufferedReader reader = ParserUtils.createReader( getNexusSource() );
String line;
String name = "";
- StringBuffer nhx = new StringBuffer();
- final StringBuffer translate_sb = new StringBuffer();
+ StringBuilder nhx = new StringBuilder();
+ final StringBuilder translate_sb = new StringBuilder();
boolean in_trees_block = false;
boolean in_taxalabels = false;
boolean in_translate = false;
in_translate = false;
if ( nhx.length() > 0 ) {
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
else if ( line_lc.startsWith( tree ) || ( line_lc.startsWith( utree ) ) ) {
if ( nhx.length() > 0 ) {
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
in_tree = false;
in_translate = false;
createPhylogeny( name, nhx, rooted_info_present, is_rooted );
- nhx = new StringBuffer();
+ nhx = new StringBuilder();
name = "";
rooted_info_present = false;
is_rooted = false;
}
private void createPhylogeny( final String name,
- final StringBuffer nhx,
+ final StringBuilder nhx,
final boolean rooted_info_present,
final boolean is_rooted ) throws IOException {
final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
final NHXParser pars = new NHXParser();
- pars.setTaxonomyExtraction( getTaxonomyExtraction() );
- pars.setReplaceUnderscores( isReplaceUnderscores() );
- pars.setIgnoreQuotes( isIgnoreQuotes() );
+ if ( ( getTaxlabels().size() < 1 ) && ( getTranslateMap().size() < 1 ) ) {
+ pars.setTaxonomyExtraction( getTaxonomyExtraction() );
+ pars.setReplaceUnderscores( isReplaceUnderscores() );
+ pars.setIgnoreQuotes( isIgnoreQuotes() );
+ }
+ else {
+ pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO );
+ pars.setReplaceUnderscores( false );
+ pars.setIgnoreQuotes( false );
+ }
if ( rooted_info_present ) {
pars.setGuessRootedness( false );
}
}
}
if ( !isReplaceUnderscores() && ( ( getTaxonomyExtraction() != TAXONOMY_EXTRACTION.NO ) ) ) {
-
- ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() );
-
-// final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
-// getTaxonomyExtraction() );
-// if ( !ForesterUtil.isEmpty( tax ) ) {
-// if ( !node.getNodeData().isHasTaxonomy() ) {
-// node.getNodeData().setTaxonomy( new Taxonomy() );
-// }
-// node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
-// }
+ ParserUtils.extractTaxonomyDataFromNodeName( node, getTaxonomyExtraction() );
+ // final String tax = ParserUtils.extractTaxonomyCodeFromNodeName( node.getName(),
+ // getTaxonomyExtraction() );
+ // if ( !ForesterUtil.isEmpty( tax ) ) {
+ // if ( !node.getNodeData().isHasTaxonomy() ) {
+ // node.getNodeData().setTaxonomy( new Taxonomy() );
+ // }
+ // node.getNodeData().getTaxonomy().setTaxonomyCode( tax );
+ // }
}
}
}
_taxlabels = taxlabels;
}
- private void setTranslateKeyValuePairs( final StringBuffer translate_sb ) throws IOException {
+ private void setTranslateKeyValuePairs( final StringBuilder translate_sb ) throws IOException {
String s = translate_sb.toString().trim();
if ( s.endsWith( ";" ) ) {
s = s.substring( 0, s.length() - 1 ).trim();
final static private byte STRING_BUFFER = 1;
final static private byte CHAR_ARRAY = 2;
final static private byte BUFFERED_READER = 3;
+ final static private byte STRING_BUILDER = 4;
private boolean _guess_rootedness;
private boolean _has_next;
private boolean _ignore_quotes;
setCurrentPhylogeny( null );
setCurrentNode( null );
int i = 0;
+ String my_source_str = null;
+ StringBuffer my_source_sbuff = null;
+ StringBuilder my_source_sbuil = null;
+ char[] my_source_charary = null;
+ BufferedReader my_source_br = null;
+ switch ( getInputType() ) {
+ case STRING:
+ my_source_str = ( String ) getNhxSource();
+ break;
+ case STRING_BUFFER:
+ my_source_sbuff = ( StringBuffer ) getNhxSource();
+ break;
+ case STRING_BUILDER:
+ my_source_sbuil = ( StringBuilder ) getNhxSource();
+ break;
+ case CHAR_ARRAY:
+ my_source_charary = ( char[] ) getNhxSource();
+ break;
+ case BUFFERED_READER:
+ my_source_br = ( BufferedReader ) getNhxSource();
+ break;
+ default:
+ throw new RuntimeException( "unknown input type" );
+ }
while ( true ) {
char c = '\b';
if ( getInputType() == NHXParser.BUFFERED_READER ) {
- final int ci = ( ( BufferedReader ) getNhxSource() ).read();
+ final int ci = my_source_br.read();
if ( ci >= 0 ) {
c = ( char ) ci;
}
else {
switch ( getInputType() ) {
case STRING:
- c = ( ( String ) getNhxSource() ).charAt( i );
+ c = my_source_str.charAt( i );
break;
case STRING_BUFFER:
- c = ( ( StringBuffer ) getNhxSource() ).charAt( i );
+ c = my_source_sbuff.charAt( i );
+ break;
+ case STRING_BUILDER:
+ c = my_source_sbuil.charAt( i );
break;
case CHAR_ARRAY:
- c = ( ( char[] ) getNhxSource() )[ i ];
+ c = my_source_charary[ i ];
break;
}
}
setSourceLength( ( ( String ) nhx_source ).length() );
setNhxSource( nhx_source );
}
+ else if ( nhx_source instanceof StringBuilder ) {
+ setInputType( NHXParser.STRING_BUILDER );
+ setSourceLength( ( ( StringBuilder ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
else if ( nhx_source instanceof StringBuffer ) {
setInputType( NHXParser.STRING_BUFFER );
setSourceLength( ( ( StringBuffer ) nhx_source ).length() );
setNhxSource( nhx_source );
}
+ else if ( nhx_source instanceof StringBuilder ) {
+ setInputType( NHXParser.STRING_BUILDER );
+ setSourceLength( ( ( StringBuilder ) nhx_source ).length() );
+ setNhxSource( nhx_source );
+ }
else if ( nhx_source instanceof char[] ) {
setInputType( NHXParser.CHAR_ARRAY );
setSourceLength( ( ( char[] ) nhx_source ).length );
public static final String OTHER = "other";
public static final String UNKNOWN = "unknown";
public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" );
- public final static Pattern TAXOMONY_CODE_PATTERN = ParserUtils.TAXOMONY_CODE_PATTERN_1;
+ public final static Pattern TAXOMONY_CODE_PATTERN = ParserUtils.TAXOMONY_CODE_PATTERN_1;
public final static Pattern LIT_REF_DOI_PATTERN = Pattern
.compile( "[a-zA-Z0-9_\\.]+\\S+" );
public final static Set<String> SEQUENCE_TYPES = new HashSet<String>();
return null;
}
+ public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
+ final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
+ throws PhyloXmlDataFormatException {
+ final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction );
+ if ( !ForesterUtil.isEmpty( id ) ) {
+ if ( !node.getNodeData().isHasTaxonomy() ) {
+ node.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ if ( ( node.getNodeData().getTaxonomy().getIdentifier() == null )
+ || ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
+ node.getNodeData().getTaxonomy().setIdentifier( new Identifier( id, "uniprot" ) );
+ return id;
+ }
+ }
+ else {
+ final String code = extractTaxonomyCodeFromNodeName( node.getName(), taxonomy_extraction );
+ if ( !ForesterUtil.isEmpty( code ) ) {
+ if ( !node.getNodeData().isHasTaxonomy() ) {
+ node.getNodeData().setTaxonomy( new Taxonomy() );
+ }
+ if ( ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
+ node.getNodeData().getTaxonomy().setTaxonomyCode( code );
+ return code;
+ }
+ }
+ }
+ return null;
+ }
+
public final static String extractUniprotTaxonomyIdFromNodeName( final String name,
final TAXONOMY_EXTRACTION taxonomy_extraction ) {
if ( ( name.indexOf( "_" ) > 0 )
public final static Phylogeny[] readPhylogenies( final String file_name ) throws FileNotFoundException, IOException {
return readPhylogenies( new File( file_name ) );
}
-
- public final static String extractTaxonomyDataFromNodeName( final PhylogenyNode node,
- final NHXParser.TAXONOMY_EXTRACTION taxonomy_extraction )
- throws PhyloXmlDataFormatException {
- final String id = extractUniprotTaxonomyIdFromNodeName( node.getName(), taxonomy_extraction );
- if ( !ForesterUtil.isEmpty( id ) ) {
- if ( !node.getNodeData().isHasTaxonomy() ) {
- node.getNodeData().setTaxonomy( new Taxonomy() );
- }
- if ( node.getNodeData().getTaxonomy().getIdentifier() == null || ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getIdentifier().getValue() ) ) {
- node.getNodeData().getTaxonomy().setIdentifier( new Identifier( id, "uniprot" ) );
- return id;
- }
- }
- else {
- final String code = extractTaxonomyCodeFromNodeName( node.getName(), taxonomy_extraction );
- if ( !ForesterUtil.isEmpty( code ) ) {
- if ( !node.getNodeData().isHasTaxonomy() ) {
- node.getNodeData().setTaxonomy( new Taxonomy() );
- }
- if ( ForesterUtil.isEmpty( node.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
- node.getNodeData().getTaxonomy().setTaxonomyCode( code );
- return code;
- }
- }
- }
- return null;
- }
}
public Phylogeny copy() {
return copy( _root );
}
-
-
/**
* Returns a deep copy of this Phylogeny.
public void setRoot( final PhylogenyNode n ) {
_root = n;
- }
+ }
/**
* Sets whether this Phylogeny is rooted or not.
import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
-import org.forester.util.ForesterConstants;
import org.forester.util.ForesterUtil;
public class Taxonomy implements PhylogenyData, MultipleUris, Comparable<Taxonomy> {
-
-
private String _scientific_name;
private String _common_name;
private List<String> _synonyms;
}
public void setTaxonomyCode( final String taxonomy_code ) throws PhyloXmlDataFormatException {
-
- if ( !ForesterUtil.isEmpty( taxonomy_code )
- && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) {
- throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
- }
-
+ if ( !ForesterUtil.isEmpty( taxonomy_code )
+ && !PhyloXmlUtil.TAXOMONY_CODE_PATTERN.matcher( taxonomy_code ).matches() ) {
+ throw new PhyloXmlDataFormatException( "illegal taxonomy code: [" + taxonomy_code + "]" );
+ }
_taxonomy_code = taxonomy_code;
}
}
final NodesLinkingResult nodes_linking_result = linkNodesOfG( gene_tree,
species_tree,
- null,
strip_gene_tree,
strip_species_tree );
_stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();
return res;
}
+ final static NodesLinkingResult linkNodesOfG( final Phylogeny gene_tree,
+ final Phylogeny species_tree,
+ final boolean strip_gene_tree,
+ final boolean strip_species_tree ) throws SDIException {
+ final TaxonomyComparisonBase tax_comp_base = SDIutil.determineTaxonomyComparisonBase( gene_tree );
+ if ( tax_comp_base == null ) {
+ throw new RuntimeException( "failed to establish taxonomy linking base (taxonomy linking base is null)" );
+ }
+ return linkNodesOfG( gene_tree, species_tree, tax_comp_base, strip_gene_tree, strip_species_tree );
+ }
+
/**
* This allows for linking of internal nodes of the species tree (as opposed
* to just external nodes, as in the method it overrides.
final TaxonomyComparisonBase tax_comp_base,
final boolean strip_gene_tree,
final boolean strip_species_tree ) throws SDIException {
+ if ( tax_comp_base == null ) {
+ throw new IllegalArgumentException( "taxonomy linking base is null" );
+ }
final Map<String, PhylogenyNode> species_to_node_map = new HashMap<String, PhylogenyNode>();
final List<PhylogenyNode> species_tree_ext_nodes = new ArrayList<PhylogenyNode>();
final NodesLinkingResult res = new NodesLinkingResult();
- if ( tax_comp_base == null ) {
- res.setTaxCompBase( SDIutil.determineTaxonomyComparisonBase( gene_tree ) );
- }
- else {
- res.setTaxCompBase( tax_comp_base );
- }
+ res.setTaxCompBase( tax_comp_base );
// Stringyfied taxonomy is the key, node is the value.
for( final PhylogenyNodeIterator iter = species_tree.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode s = iter.next();
if ( strip_gene_tree ) {
stripTree( gene_tree, res.getStrippedGeneTreeNodes() );
if ( gene_tree.isEmpty() || ( gene_tree.getNumberOfExternalNodes() < 2 ) ) {
- throw new SDIException( "species could not be mapped between gene tree and species tree" );
+ throw new SDIException( "species could not be mapped between gene tree and species tree (based on "
+ + res.getTaxCompBase() + ")" );
}
}
if ( strip_species_tree ) {
final boolean strip_species_tree ) throws SDIException {\r
final NodesLinkingResult nodes_linking_result = GSDI.linkNodesOfG( gene_tree,\r
species_tree,\r
- null,\r
strip_gene_tree,\r
strip_species_tree );\r
_stripped_gene_tree_nodes = nodes_linking_result.getStrippedGeneTreeNodes();\r
System.out.println( "failed." );
failed++;
}
-
System.out.print( "Conversion to NHX (node level): " );
if ( Test.testNHXconversion() ) {
System.out.println( "OK." );
return false;
}
final PhylogenyNode n10 = PhylogenyNode.createInstanceFromNhxString( "blag_12X45-blag",
- NHXParser.TAXONOMY_EXTRACTION.YES );
+ NHXParser.TAXONOMY_EXTRACTION.YES );
if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "12X45" ) ) {
System.out.println( n10.toString() );
return false;
public final static String UTF8 = "UTF-8";
public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
public final static boolean RELEASE = false;
-
-
+
public enum PhylogeneticTreeFormats {
NH, NHX, NEXUS, PHYLOXML
}