From de830ea1bec9c9e224a53c92c9e5a886ee9642f5 Mon Sep 17 00:00:00 2001 From: "cmzmasek@gmail.com" Date: Fri, 4 Apr 2014 21:59:42 +0000 Subject: [PATCH] inprogress --- .../org/forester/archaeopteryx/UrlTreeReader.java | 6 +++- .../io/parsers/nexus/NexusPhylogeniesParser.java | 20 +++++------ .../org/forester/io/parsers/util/ParserUtils.java | 22 +++++++----- forester/java/src/org/forester/test/Test.java | 38 ++++++++++++-------- 4 files changed, 53 insertions(+), 33 deletions(-) diff --git a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java index 184e35e..b08fa24 100644 --- a/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java +++ b/forester/java/src/org/forester/archaeopteryx/UrlTreeReader.java @@ -245,7 +245,7 @@ public class UrlTreeReader implements Runnable { try { JOptionPane.showMessageDialog( null, ForesterUtil.wordWrap( "Successfully read in " + trees.length - + " evolutionry tree(s) from [" + url + "]", 80 ), + + " tree(s) from [" + url + "]", 80 ), "Success", JOptionPane.INFORMATION_MESSAGE ); } @@ -254,6 +254,10 @@ public class UrlTreeReader implements Runnable { } _main_frame.getContentPane().repaint(); } + else { + JOptionPane.showMessageDialog( null, ForesterUtil.wordWrap( "Failed to read in tree(s) from [" + url + + "]", 80 ), "Error", JOptionPane.ERROR_MESSAGE ); + } } _main_frame.activateSaveAllIfNeeded(); System.gc(); diff --git a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java index fbbce85..0f21364 100644 --- a/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java +++ b/forester/java/src/org/forester/io/parsers/nexus/NexusPhylogeniesParser.java @@ -149,16 +149,16 @@ public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, P final boolean is_rooted ) throws IOException { _next = null; final NHXParser pars = new NHXParser(); - if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) { - pars.setTaxonomyExtraction( _taxonomy_extraction ); - pars.setReplaceUnderscores( _replace_underscores ); - pars.setIgnoreQuotes( _ignore_quotes_in_nh_data ); - } - else { - pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); - pars.setReplaceUnderscores( false ); - pars.setIgnoreQuotes( false ); - } + // if ( ( _taxlabels.size() < 1 ) && ( _translate_map.size() < 1 ) ) { + pars.setTaxonomyExtraction( _taxonomy_extraction ); + pars.setReplaceUnderscores( _replace_underscores ); + pars.setIgnoreQuotes( _ignore_quotes_in_nh_data ); + //} + //else { + // pars.setTaxonomyExtraction( TAXONOMY_EXTRACTION.NO ); + // pars.setReplaceUnderscores( false ); + // pars.setIgnoreQuotes( false ); + //} if ( rooted_info_present ) { pars.setGuessRootedness( false ); } diff --git a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java index a465d4a..76f2a18 100644 --- a/forester/java/src/org/forester/io/parsers/util/ParserUtils.java +++ b/forester/java/src/org/forester/io/parsers/util/ParserUtils.java @@ -62,11 +62,13 @@ public final class ParserUtils { + TAX_CODE + ")\\b" ); final public static Pattern TAXOMONY_SN_PATTERN = Pattern .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)\\b" ); + final public static Pattern TAXOMONY_SN_PATTERN_SN = Pattern + .compile( "\\b([A-Z][a-z]+_[a-z]{2,}(?:_[a-z][a-z0-9_]+)?)(?:\\b|_)" ); final private static Pattern TAXOMONY_CODE_PATTERN_PFS = Pattern.compile( "(?:\\b|_)[A-Z0-9]{4,}_(" + TAX_CODE + ")/\\d+-\\d+\\b" ); - final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|_)(\\d{1,7})\\b" ); + // final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_A = Pattern.compile( "(?:\\b|(?:[A-Z]_))(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFR = Pattern - .compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_(\\d{1,7})\\b" ); + .compile( "(?:\\b|_)[A-Z0-9]{1,}_(\\d{1,7})\\b" ); final private static Pattern TAXOMONY_UNIPROT_ID_PATTERN_PFS = Pattern .compile( "(?:\\b|_)[A-Z0-9]{4,}_(\\d{1,7})/\\d+-\\d+\\b" ); @@ -199,6 +201,10 @@ public final class ParserUtils { if ( m.find() ) { return m.group( 1 ).replace( '_', ' ' ); } + final Matcher m_sn = TAXOMONY_SN_PATTERN_SN.matcher( name ); + if ( m_sn.find() ) { + return m_sn.group( 1 ).replace( '_', ' ' ); + } return null; } @@ -273,12 +279,12 @@ public final class ParserUtils { if ( m.find() ) { return m.group( 1 ); } - else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { - m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name ); - if ( m.find() ) { - return m.group( 1 ); - } - } + //else if ( taxonomy_extraction == TAXONOMY_EXTRACTION.AGGRESSIVE ) { + // m = TAXOMONY_UNIPROT_ID_PATTERN_A.matcher( name ); + // if ( m.find() ) { + // return m.group( 1 ); + // } + //} } return null; } diff --git a/forester/java/src/org/forester/test/Test.java b/forester/java/src/org/forester/test/Test.java index 80a20b6..4023539 100644 --- a/forester/java/src/org/forester/test/Test.java +++ b/forester/java/src/org/forester/test/Test.java @@ -4066,6 +4066,16 @@ public final class Test { .equals( "Mus musculus" ) ) { return false; } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus" ).equals( "Mus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_musculus" ) + .equals( "Mus musculus musculus" ) ) { + return false; + } + if ( !ParserUtils.extractScientificNameFromNodeName( "Mus_musculus_123" ).equals( "Mus musculus" ) ) { + return false; + } } catch ( final Exception e ) { e.printStackTrace( System.out ); @@ -8421,8 +8431,8 @@ public final class Test { return false; } final PhylogenyNode n13 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); - if ( !n13.getName().equals( "blah_12345/1-2" ) ) { + .createInstanceFromNhxString( "BLAH_12345/1-2", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + if ( !n13.getName().equals( "BLAH_12345/1-2" ) ) { return false; } if ( PhylogenyMethods.getSpecies( n13 ).equals( "12345" ) ) { @@ -8487,7 +8497,7 @@ public final class Test { return false; } final PhylogenyNode n19 = PhylogenyNode - .createInstanceFromNhxString( "blah_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAH_1-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n19.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1" ) ) { return false; } @@ -8495,7 +8505,7 @@ public final class Test { return false; } final PhylogenyNode n30 = PhylogenyNode - .createInstanceFromNhxString( "blah_1234567-roejojoej", + .createInstanceFromNhxString( "BLAH_1234567-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n30.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "1234567" ) ) { return false; @@ -8504,7 +8514,7 @@ public final class Test { return false; } final PhylogenyNode n31 = PhylogenyNode - .createInstanceFromNhxString( "blah_12345678-roejojoej", + .createInstanceFromNhxString( "BLAH_12345678-roejojoej", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n31.getNodeData().isHasTaxonomy() ) { return false; @@ -8515,7 +8525,7 @@ public final class Test { return false; } final PhylogenyNode n40 = PhylogenyNode - .createInstanceFromNhxString( "bcl2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BCL2_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n40.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { return false; } @@ -11576,7 +11586,7 @@ public final class Test { return false; } final PhylogenyNode n3 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_12345", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n3.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n3.toString() ); return false; @@ -11594,43 +11604,43 @@ public final class Test { return false; } final PhylogenyNode n6 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG-12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n6.getNodeData().isHasTaxonomy() ) { System.out.println( n6.toString() ); return false; } final PhylogenyNode n7 = PhylogenyNode - .createInstanceFromNhxString( "blag-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG-12345_blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n7.getNodeData().isHasTaxonomy() ) { System.out.println( n7.toString() ); return false; } final PhylogenyNode n8 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_12345-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n8.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n8.toString() ); return false; } final PhylogenyNode n9 = PhylogenyNode - .createInstanceFromNhxString( "blag_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_12345/blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n9.getNodeData().getTaxonomy().getIdentifier().getValue().equals( "12345" ) ) { System.out.println( n9.toString() ); return false; } final PhylogenyNode n10x = PhylogenyNode - .createInstanceFromNhxString( "blag_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_12X45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10x.getNodeData().isHasTaxonomy() ) { System.out.println( n10x.toString() ); return false; } final PhylogenyNode n10xx = PhylogenyNode - .createInstanceFromNhxString( "blag_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_1YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( n10xx.getNodeData().isHasTaxonomy() ) { System.out.println( n10xx.toString() ); return false; } final PhylogenyNode n10 = PhylogenyNode - .createInstanceFromNhxString( "blag_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); + .createInstanceFromNhxString( "BLAG_9YX45-blag", NHXParser.TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ); if ( !n10.getNodeData().getTaxonomy().getTaxonomyCode().equals( "9YX45" ) ) { System.out.println( n10.toString() ); return false; -- 1.7.10.2