From fbb463a0dc277aa6efa5231f6db554de3696b194 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Tue, 8 Apr 2014 21:29:49 +0000 Subject: [PATCH] inprogress --- .../org/forester/analysis/TaxonomyDataManager.java | 44 ++++++++-- .../src/org/forester/archaeopteryx/Constants.java | 4 +- .../src/org/forester/archaeopteryx/MainFrame.java | 11 +-- .../forester/archaeopteryx/MainFrameApplet.java | 5 -- .../archaeopteryx/MainFrameApplication.java | 88 -------------------- .../io/parsers/nexus/NexusPhylogeniesParser.java | 24 +++--- .../org/forester/io/parsers/util/ParserUtils.java | 2 +- forester/java/src/org/forester/test/Test.java | 55 ++++++++++++ 8 files changed, 106 insertions(+), 127 deletions(-) diff --git a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java index 1652603..ad9ee25 100644 --- a/forester/java/src/org/forester/analysis/TaxonomyDataManager.java +++ b/forester/java/src/org/forester/analysis/TaxonomyDataManager.java @@ -40,7 +40,9 @@ import org.forester.archaeopteryx.MainFrameApplication; import org.forester.archaeopteryx.TreePanel; import org.forester.archaeopteryx.tools.AncestralTaxonomyInferrer; import org.forester.archaeopteryx.tools.RunnableProcess; +import org.forester.io.parsers.nhx.NHXParser; import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException; +import org.forester.io.parsers.util.ParserUtils; import org.forester.phylogeny.Phylogeny; import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.data.Identifier; @@ -245,7 +247,10 @@ public final class TaxonomyDataManager extends RunnableProcess { if ( ( ( tax != null ) && ( isHasAppropriateId( tax ) || !ForesterUtil.isEmpty( tax.getScientificName() ) || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil.isEmpty( tax.getCommonName() ) ) ) || ( allow_to_use_basic_node_names && !ForesterUtil.isEmpty( node.getName() ) ) ) { - if ( tax != null ) { + if ( ( ( tax != null ) && ( isHasAppropriateId( tax ) + || !ForesterUtil.isEmpty( tax.getScientificName() ) + || !ForesterUtil.isEmpty( tax.getTaxonomyCode() ) || !ForesterUtil + .isEmpty( tax.getCommonName() ) ) ) ) { uniprot_tax = obtainUniProtTaxonomy( tax, null, qt ); } else { @@ -255,7 +260,6 @@ public final class TaxonomyDataManager extends RunnableProcess { if ( tax == null ) { tax = new Taxonomy(); node.getNodeData().addTaxonomy( tax ); - node.setName( "" ); } updateTaxonomy( qt, node, tax, uniprot_tax ); } @@ -322,16 +326,40 @@ public final class TaxonomyDataManager extends RunnableProcess { if ( ForesterUtil.isEmpty( simple_name ) ) { throw new IllegalArgumentException( "illegal attempt to use empty simple name" ); } - qt = QUERY_TYPE.SN; - UniProtTaxonomy ut = obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), simple_name, qt ); - if ( ut == null ) { + UniProtTaxonomy ut = null; + final String code = ParserUtils.extractTaxonomyCodeFromNodeName( simple_name, + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !ForesterUtil.isEmpty( code ) ) { qt = QUERY_TYPE.CODE; - ut = obtainTaxonomy( TaxonomyDataManager.getCodeTaxCacheMap(), simple_name, qt ); + ut = obtainTaxonomy( TaxonomyDataManager.getCodeTaxCacheMap(), code, qt ); } if ( ut == null ) { - qt = QUERY_TYPE.CN; - ut = obtainTaxonomy( TaxonomyDataManager.getCnTaxCacheMap(), simple_name, qt ); + final String sn = ParserUtils.extractScientificNameFromNodeName( simple_name ); + if ( !ForesterUtil.isEmpty( sn ) ) { + qt = QUERY_TYPE.SN; + ut = obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), sn, qt ); + } + } + if ( ut == null ) { + final String id = ParserUtils + .extractUniprotTaxonomyIdFromNodeName( simple_name, + NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !ForesterUtil.isEmpty( id ) ) { + qt = QUERY_TYPE.ID; + ut = obtainTaxonomy( TaxonomyDataManager.getIdTaxCacheMap(), id, qt ); + } } + // + // qt = QUERY_TYPE.SN; + // UniProtTaxonomy ut = obtainTaxonomy( TaxonomyDataManager.getSnTaxCacheMap(), simple_name, qt ); + // if ( ut == null ) { + // qt = QUERY_TYPE.CODE; + // ut = obtainTaxonomy( TaxonomyDataManager.getCodeTaxCacheMap(), simple_name, qt ); + // } + // if ( ut == null ) { + // qt = QUERY_TYPE.CN; + // ut = obtainTaxonomy( TaxonomyDataManager.getCnTaxCacheMap(), simple_name, qt ); + // } return ut; } diff --git a/forester/java/src/org/forester/archaeopteryx/Constants.java b/forester/java/src/org/forester/archaeopteryx/Constants.java index a41e5a5..be02fd7 100644 --- a/forester/java/src/org/forester/archaeopteryx/Constants.java +++ b/forester/java/src/org/forester/archaeopteryx/Constants.java @@ -42,8 +42,8 @@ public final class Constants { public final static boolean __SYNTH_LF = false; // TODO remove me public final static boolean ALLOW_DDBJ_BLAST = false; public final static String PRG_NAME = "Archaeopteryx"; - final static String VERSION = "0.987 J"; - final static String PRG_DATE = "140319"; + final static String VERSION = "0.988 SR"; + final static String PRG_DATE = "140418"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Arial", "Helvetica", "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans" }; diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrame.java b/forester/java/src/org/forester/archaeopteryx/MainFrame.java index 7ef6dd6..0866f1c 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrame.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrame.java @@ -140,7 +140,6 @@ public abstract class MainFrame extends JFrame implements ActionListener { JMenuItem _lineage_inference; // file menu: JMenuItem _open_item; - JMenuItem _open_url_item; JMenuItem _save_item; JMenuItem _save_all_item; JMenuItem _close_item; @@ -269,10 +268,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { applet = getCurrentTreePanel().obtainApplet(); } } - if ( o == _open_url_item ) { - readPhylogeniesFromURL(); - } - else if ( o == _exit_item ) { + if ( o == _exit_item ) { close(); } else if ( o == _gsdi_item ) { @@ -594,10 +590,7 @@ public abstract class MainFrame extends JFrame implements ActionListener { void buildFileMenu() { _file_jmenu = createMenu( "File", getConfiguration() ); - _file_jmenu.add( _open_url_item = new JMenuItem( "Read tree from URL/webservice..." ) ); - _file_jmenu.addSeparator(); _file_jmenu.add( _exit_item = new JMenuItem( "Exit" ) ); - customizeJMenuItem( _open_url_item ); customizeJMenuItem( _exit_item ); _jmenubar.add( _file_jmenu ); } @@ -1147,8 +1140,6 @@ public abstract class MainFrame extends JFrame implements ActionListener { } } - abstract void readPhylogeniesFromURL(); - void readPhylogeniesFromWebservice( final int i ) { final UrlTreeReader reader = new UrlTreeReader( this, i ); new Thread( reader ).start(); diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java index 1fd72c4..b03b1f6 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java @@ -304,9 +304,4 @@ public final class MainFrameApplet extends MainFrame { JApplet getApplet() { return _applet; } - - @Override - void readPhylogeniesFromURL() { - throw new NoSuchMethodError( "not implemented" ); - } } diff --git a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java index 6ae6a95..0744fdd 100644 --- a/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java +++ b/forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java @@ -37,8 +37,6 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -94,8 +92,6 @@ import org.forester.phylogeny.PhylogenyNode; import org.forester.phylogeny.PhylogenyNode.NH_CONVERSION_SUPPORT_VALUE_STYLE; import org.forester.phylogeny.data.Confidence; import org.forester.phylogeny.data.Taxonomy; -import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; -import org.forester.phylogeny.factories.PhylogenyFactory; import org.forester.phylogeny.iterators.PhylogenyNodeIterator; import org.forester.sequence.Sequence; import org.forester.util.BasicDescriptiveStatistics; @@ -786,8 +782,6 @@ public final class MainFrameApplication extends MainFrame { _file_jmenu = MainFrame.createMenu( "File", getConfiguration() ); _file_jmenu.add( _open_item = new JMenuItem( "Read Tree from File..." ) ); _file_jmenu.addSeparator(); - _file_jmenu.add( _open_url_item = new JMenuItem( "Read Tree from URL/Webservice..." ) ); - _file_jmenu.addSeparator(); final WebservicesManager webservices_manager = WebservicesManager.getInstance(); _load_phylogeny_from_webservice_menu_items = new JMenuItem[ webservices_manager .getAvailablePhylogeniesWebserviceClients().size() ]; @@ -831,7 +825,6 @@ public final class MainFrameApplication extends MainFrame { customizeJMenuItem( _open_item ); _open_item .setFont( new Font( _open_item.getFont().getFontName(), Font.BOLD, _open_item.getFont().getSize() + 4 ) ); - customizeJMenuItem( _open_url_item ); for( int i = 0; i < webservices_manager.getAvailablePhylogeniesWebserviceClients().size(); ++i ) { customizeJMenuItem( _load_phylogeny_from_webservice_menu_items[ i ] ); } @@ -1157,87 +1150,6 @@ public final class MainFrameApplication extends MainFrame { System.exit( 0 ); } - @Override - void readPhylogeniesFromURL() { - URL url = null; - Phylogeny[] phys = null; - final String message = "Please enter a complete URL, for example \"http://www.phyloxml.org/examples/apaf.xml\""; - final String url_string = JOptionPane.showInputDialog( this, - message, - "Use URL/webservice to obtain a phylogeny", - JOptionPane.QUESTION_MESSAGE ); - boolean nhx_or_nexus = false; - if ( ( url_string != null ) && ( url_string.length() > 0 ) ) { - try { - url = new URL( url_string ); - PhylogenyParser parser = null; - if ( url.getHost().toLowerCase().indexOf( "tolweb" ) >= 0 ) { - parser = new TolParser(); - } - else { - parser = ParserUtils.createParserDependingOnUrlContents( url, getConfiguration() - .isValidatePhyloXmlAgainstSchema() ); - } - if ( parser instanceof NexusPhylogeniesParser ) { - nhx_or_nexus = true; - } - else if ( parser instanceof NHXParser ) { - nhx_or_nexus = true; - } - if ( _mainpanel.getCurrentTreePanel() != null ) { - _mainpanel.getCurrentTreePanel().setWaitCursor(); - } - else { - _mainpanel.setWaitCursor(); - } - final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); - phys = factory.create( url.openStream(), parser ); - } - catch ( final MalformedURLException e ) { - JOptionPane.showMessageDialog( this, - "Malformed URL: " + url + "\n" + e.getLocalizedMessage(), - "Malformed URL", - JOptionPane.ERROR_MESSAGE ); - } - catch ( final IOException e ) { - JOptionPane.showMessageDialog( this, - "Could not read from " + url + "\n" - + ForesterUtil.wordWrap( e.getLocalizedMessage(), 80 ), - "Failed to read URL", - JOptionPane.ERROR_MESSAGE ); - } - catch ( final Exception e ) { - JOptionPane.showMessageDialog( this, - ForesterUtil.wordWrap( e.getLocalizedMessage(), 80 ), - "Unexpected Exception", - JOptionPane.ERROR_MESSAGE ); - } - finally { - if ( _mainpanel.getCurrentTreePanel() != null ) { - _mainpanel.getCurrentTreePanel().setArrowCursor(); - } - else { - _mainpanel.setArrowCursor(); - } - } - if ( ( phys != null ) && ( phys.length > 0 ) ) { - if ( nhx_or_nexus && getOptions().isInternalNumberAreConfidenceForNhParsing() ) { - for( final Phylogeny phy : phys ) { - PhylogenyMethods.transferInternalNodeNamesToConfidence( phy, "" ); - } - } - AptxUtil.addPhylogeniesToTabs( phys, - new File( url.getFile() ).getName(), - new File( url.getFile() ).toString(), - getConfiguration(), - getMainPanel() ); - _mainpanel.getControlPanel().showWhole(); - } - } - activateSaveAllIfNeeded(); - System.gc(); - } - void setMsa( final Msa msa ) { _msa = msa; } diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index 4a25f4d..86ffaef 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -61,6 +61,7 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P final private static String tree = NexusConstants.TREE.toLowerCase(); final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+", Pattern.CASE_INSENSITIVE ); + final private static Pattern TRANSLATE_PATTERN = Pattern.compile( "([0-9A-Za-z]+)\\s+(.+)" ); final private static String utree = NexusConstants.UTREE.toLowerCase(); private BufferedReader _br; private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT; @@ -361,23 +362,20 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P if ( s.endsWith( ";" ) ) { s = s.substring( 0, s.length() - 1 ).trim(); } - for( final String pair : s.split( "," ) ) { - final String[] kv = pair.trim().split( "\\s+" ); - if ( ( kv.length < 2 ) || ( kv.length > 3 ) ) { - throw new IOException( "ill-formatted translate values: " + pair ); - } - if ( ( kv.length == 3 ) && !kv[ 0 ].toLowerCase().trim().equals( translate ) ) { - throw new IOException( "ill-formatted translate values: " + pair ); - } + for( String pair : s.split( "," ) ) { String key = ""; String value = ""; - if ( kv.length == 3 ) { - key = kv[ 1 ]; - value = kv[ 2 ]; + final int ti = pair.toLowerCase().indexOf( "translate" ); + if ( ti > -1 ) { + pair = pair.substring( ti + 9 ); + } + final Matcher m = TRANSLATE_PATTERN.matcher( pair ); + if ( m.find() ) { + key = m.group( 1 ); + value = m.group( 2 ).replaceAll( "\'", "" ).replaceAll( "\"", "" ).trim(); } else { - key = kv[ 0 ]; - value = kv[ 1 ]; + throw new IOException( "ill-formatted translate values: " + pair ); } if ( value.endsWith( ";" ) ) { value = value.substring( 0, value.length() - 1 ); diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index 1b0655c..6ad78db 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -63,7 +63,7 @@ public final class ParserUtils { final public static Pattern TAXOMONY_SN_PATTERN = Pattern .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" ); final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern - .compile( "\\b([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)(?:\\b|_)" ); + .compile( "\\b([A-Z][a-z]+[_ ][a-z]{2,}(?:[_ ][a-z]+)?)(?:\\b|_)" ); final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + TAX_CODE + ")/\\d+-\\d+\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 8f32df5..c7c8641 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -7577,6 +7577,10 @@ public final class Test { .equals( "Aranaeus" ) ) { return false; } + phylogenies = factory.create( Test.PATH_TO_TEST_DATA + "S14117.nex", parser ); + if ( phylogenies.length != 3 ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -11892,6 +11896,57 @@ public final class Test { System.out.println( n13.toString() ); return false; } + final PhylogenyNode n14 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n14.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n14.toString() ); + return false; + } + final PhylogenyNode n15 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n15.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n15.toString() ); + return false; + } + final PhylogenyNode n16 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n16.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n16.toString() ); + return false; + } + final PhylogenyNode n17 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus K392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n17.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus" ) ) { + System.out.println( n17.toString() ); + return false; + } + // + final PhylogenyNode n18 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n18.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n18.toString() ); + return false; + } + final PhylogenyNode n19 = PhylogenyNode + .createInstanceFromNhxString( "Mus_musculus_musculus_K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n19.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n19.toString() ); + return false; + } + final PhylogenyNode n20 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus 392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n20.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n20.toString() ); + return false; + } + final PhylogenyNode n21 = PhylogenyNode + .createInstanceFromNhxString( "Mus musculus musculus K392", + NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE ); + if ( !n21.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) { + System.out.println( n21.toString() ); + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); -- 1.7.10.2