in progress
authorcmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 19 Sep 2014 03:00:00 +0000 (03:00 +0000)
committercmzmasek@gmail.com <cmzmasek@gmail.com@ca865154-3058-d1c3-3e42-d8f55a55bdbd>
Fri, 19 Sep 2014 03:00:00 +0000 (03:00 +0000)
15 files changed:
forester/java/src/org/forester/analysis/TaxonomyDataManager.java
forester/java/src/org/forester/archaeopteryx/ArchaeopteryxE.java
forester/java/src/org/forester/archaeopteryx/Constants.java
forester/java/src/org/forester/archaeopteryx/MainFrame.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplet.java
forester/java/src/org/forester/archaeopteryx/MainFrameApplication.java
forester/java/src/org/forester/archaeopteryx/TreePanel.java
forester/java/src/org/forester/io/parsers/nhx/NHXParser.java
forester/java/src/org/forester/io/parsers/util/ParserUtils.java
forester/java/src/org/forester/phylogeny/PhylogenyNode.java
forester/java/src/org/forester/test/Test.java
forester/java/src/org/forester/util/ForesterConstants.java
forester/java/src/org/forester/util/ForesterUtil.java
forester/java/src/org/forester/util/TaxonomyUtil.java
forester/java/src/org/forester/ws/seqdb/SequenceDbWsTools.java

index fbc8d0c..d78720c 100644 (file)
@@ -199,7 +199,10 @@ public final class TaxonomyDataManager extends RunnableProcess {
     }
 
     private final static List<UniProtTaxonomy> getTaxonomiesFromTaxonomyCode( final String query ) throws IOException {
-        if ( ( query.indexOf( "XX" ) == 3 ) && TaxonomyUtil.isHasTaxIdFromFakeTaxCode( query ) ) {
+        //FIXME fix "SPHAR" issue
+        if ( ( ( query.indexOf( "XX" ) == 3 ) && TaxonomyUtil.isHasTaxIdFromFakeTaxCode( query ) )
+                || query.equals( "SPHAR" ) /* TODO remove me, is same as Sphingomonas aromaticivorans */
+        ) {
             final int id = TaxonomyUtil.getTaxIdFromFakeTaxCode( query );
             return SequenceDbWsTools.getTaxonomiesFromId( String.valueOf( id ), MAX_TAXONOMIES_TO_RETURN );
         }
index 251a43c..7ea1610 100644 (file)
@@ -758,11 +758,11 @@ public class ArchaeopteryxE extends JApplet implements ActionListener {
                 .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_INT ) );
         _options_jmenu
                 .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_EXT ) );
+        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );
         if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) {
-            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) );
             _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) );
+            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) );
         }
-        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );
         _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( MainFrame.SHOW_ANN_REF_SOURCE_LABEL ) );
         _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_CONF_STDDEV_LABEL ) );
         _options_jmenu
@@ -932,7 +932,8 @@ public class ArchaeopteryxE extends JApplet implements ActionListener {
     }
 
     void displayBasicInformation() {
-        if ( ( getMainPanel().getCurrentPhylogeny() != null ) && !getMainPanel().getCurrentPhylogeny().isEmpty() ) {
+        if ( ( getMainPanel() != null ) && ( getMainPanel().getCurrentPhylogeny() != null )
+                && !getMainPanel().getCurrentPhylogeny().isEmpty() ) {
             String title = "Basic Information";
             if ( !ForesterUtil.isEmpty( getMainPanel().getCurrentPhylogeny().getName() ) ) {
                 title = title + " for \"" + _mainpanel.getCurrentPhylogeny().getName() + "\"";
index deff15d..16f5a7d 100644 (file)
@@ -42,8 +42,8 @@ public final class Constants {
     public final static boolean __SYNTH_LF                                                    = false;                                                                             // TODO remove me
     public final static boolean ALLOW_DDBJ_BLAST                                              = false;
     public final static String  PRG_NAME                                                      = "Archaeopteryx";
-    final static String         VERSION                                                       = "0.9897 beta";
-    final static String         PRG_DATE                                                      = "140828";
+    final static String         VERSION                                                       = "0.9898 beta";
+    final static String         PRG_DATE                                                      = "140918";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
     final static String[]       DEFAULT_FONT_CHOICES                                          = { "Arial", "Helvetica",
             "Verdana", "Tahoma", "Dialog", "Lucida Sans", "SansSerif", "Sans-serif", "Sans"  };
index fb23501..2cc0189 100644 (file)
@@ -344,7 +344,9 @@ public abstract class MainFrame extends JFrame implements ActionListener {
             switchColors();\r
         }\r
         else if ( o == _display_basic_information_item ) {\r
-            displayBasicInformation( getCurrentTreePanel().getTreeFile() );\r
+            if ( getCurrentTreePanel() != null ) {\r
+                displayBasicInformation( getCurrentTreePanel().getTreeFile() );\r
+            }\r
         }\r
         else if ( o == _view_as_NH_item ) {\r
             viewAsNH();\r
index b804681..8314448 100644 (file)
@@ -215,11 +215,11 @@ public final class MainFrameApplet extends MainFrame {
                 .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_INT ) );
         _options_jmenu
                 .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( MainFrame.DISPLAY_NODE_BOXES_LABEL_EXT ) );
+        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );
         if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) {
-            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) );
             _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) );
+            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) );
         }
-        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );
         _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( MainFrame.SHOW_ANN_REF_SOURCE_LABEL ) );
         _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( MainFrame.SHOW_CONF_STDDEV_LABEL ) );
         _options_jmenu
index 947ed2a..318be47 100644 (file)
@@ -896,11 +896,11 @@ public final class MainFrameApplication extends MainFrame {
                 .add( _show_default_node_shapes_internal_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_INT ) );\r
         _options_jmenu\r
                 .add( _show_default_node_shapes_external_cbmi = new JCheckBoxMenuItem( DISPLAY_NODE_BOXES_LABEL_EXT ) );\r
+        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );\r
         if ( getConfiguration().doDisplayOption( Configuration.show_domain_architectures ) ) {\r
-            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( SHOW_DOMAIN_LABELS_LABEL ) );\r
             _options_jmenu.add( _right_line_up_domains_cbmi = new JCheckBoxMenuItem( MainFrame.RIGHT_LINE_UP_DOMAINS ) );\r
+            _options_jmenu.add( _show_domain_labels = new JCheckBoxMenuItem( MainFrame.SHOW_DOMAIN_LABELS_LABEL ) );\r
         }\r
-        _options_jmenu.add( _line_up_renderable_data_cbmi = new JCheckBoxMenuItem( MainFrame.LINE_UP_RENDERABLE_DATA ) );\r
         _options_jmenu.add( _show_annotation_ref_source = new JCheckBoxMenuItem( SHOW_ANN_REF_SOURCE_LABEL ) );\r
         _options_jmenu.add( _show_confidence_stddev_cbmi = new JCheckBoxMenuItem( SHOW_CONF_STDDEV_LABEL ) );\r
         _options_jmenu.add( _color_by_taxonomic_group_cbmi = new JCheckBoxMenuItem( COLOR_BY_TAXONOMIC_GROUP ) );\r
index 314ee51..ce136b5 100644 (file)
@@ -173,6 +173,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
     private static final BasicStroke     STROKE_2                                           = new BasicStroke( 2f );
     private static final double          TWO_PI                                             = 2 * Math.PI;
     private final static int             WIGGLE                                             = 2;
+    private static final String          SHOW_ONLY_THIS_CONF_TYPE                           = "posterior probability";                                  //TODO remove me
     HashMap<Long, Short>                 _nodeid_dist_to_leaf                               = new HashMap<Long, Short>();
     final private Arc2D                  _arc                                               = new Arc2D.Double();
     private AffineTransform              _at;
@@ -2854,13 +2855,19 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
         final List<PhylogenyNode> additional_nodes = new ArrayList<PhylogenyNode>();
         if ( getFoundNodes0() != null ) {
             for( final Long id : getFoundNodes0() ) {
-                additional_nodes.add( _phylogeny.getNode( id ) );
+                final PhylogenyNode n = _phylogeny.getNode( id );
+                if ( n != null ) {
+                    additional_nodes.add( n );
+                }
             }
         }
         if ( getFoundNodes1() != null ) {
             for( final Long id : getFoundNodes1() ) {
                 if ( ( getFoundNodes0() == null ) || !getFoundNodes0().contains( id ) ) {
-                    additional_nodes.add( _phylogeny.getNode( id ) );
+                    final PhylogenyNode n = _phylogeny.getNode( id );
+                    if ( n != null ) {
+                        additional_nodes.add( n );
+                    }
                 }
             }
         }
@@ -4042,26 +4049,30 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
         Collections.sort( confidences );
         final StringBuilder sb = new StringBuilder();
         for( final Confidence confidence : confidences ) {
-            final double value = confidence.getValue();
-            if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
-                if ( value < getOptions().getMinConfidenceValue() ) {
-                    return;
-                }
-                if ( not_first ) {
-                    sb.append( "/" );
-                }
-                else {
-                    not_first = true;
-                }
-                sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( value, getOptions()
-                        .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) );
-                if ( getOptions().isShowConfidenceStddev() ) {
-                    if ( confidence.getStandardDeviation() != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
-                        sb.append( "(" );
-                        sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( confidence.getStandardDeviation(),
-                                                                                    getOptions()
-                                                                                            .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) );
-                        sb.append( ")" );
+            if ( ForesterUtil.isEmpty( SHOW_ONLY_THIS_CONF_TYPE )
+                    || ( !ForesterUtil.isEmpty( confidence.getType() ) && confidence.getType()
+                            .equalsIgnoreCase( SHOW_ONLY_THIS_CONF_TYPE ) ) ) {
+                final double value = confidence.getValue();
+                if ( value != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
+                    if ( value < getOptions().getMinConfidenceValue() ) {
+                        return;
+                    }
+                    if ( not_first ) {
+                        sb.append( "/" );
+                    }
+                    else {
+                        not_first = true;
+                    }
+                    sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( value, getOptions()
+                            .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) );
+                    if ( getOptions().isShowConfidenceStddev() ) {
+                        if ( confidence.getStandardDeviation() != Confidence.CONFIDENCE_DEFAULT_VALUE ) {
+                            sb.append( "(" );
+                            sb.append( FORMATTER_CONFIDENCE.format( ForesterUtil.round( confidence
+                                    .getStandardDeviation(), getOptions()
+                                    .getNumberOfDigitsAfterCommaForConfidenceValues() ) ) );
+                            sb.append( ")" );
+                        }
                     }
                 }
             }
index 449721d..4bcfb08 100644 (file)
@@ -56,9 +56,9 @@ import org.forester.util.ForesterUtil;
 
 public final class NHXParser implements PhylogenyParser, IteratingPhylogenyParser {
 
-    public final static Pattern  MB_BL_PATTERN                              = Pattern.compile( "length_median=([^,]+)" );
+    public final static Pattern  MB_BL_PATTERN                              = Pattern.compile( "length.median=([^,]+)" );
     public final static Pattern  MB_PROB_PATTERN                            = Pattern.compile( "prob=([^,]+)" );
-    public final static Pattern  MB_PROB_SD_PATTERN                         = Pattern.compile( "prob_stddev=([^,]+)" );
+    public final static Pattern  MB_PROB_SD_PATTERN                         = Pattern.compile( "prob.stddev=([^,]+)" );
     public final static Pattern  NUMBERS_ONLY_PATTERN                       = Pattern.compile( "^[0-9\\.]+$" );
     final static public boolean  REPLACE_UNDERSCORES_DEFAULT                = false;
     private static final boolean ALLOW_ERRORS_IN_DISTANCE_TO_PARENT_DEFAULT = false;
index 9168c9d..6b70303 100644 (file)
@@ -60,7 +60,7 @@ public final class ParserUtils {
     final public static String   TAX_CODE_LO                          = "(?:[A-Z]{5})|RAT|PIG|PEA";
     final public static Pattern  TAXOMONY_CODE_PATTERN_A              = Pattern.compile( "(?:\\b|_)(" + TAX_CODE
                                                                               + ")(?:\\b|_)" );
-    final public static Pattern  TAXOMONY_CODE_PATTERN_A_LO           = Pattern.compile( "(?:\\b|_)(" + TAX_CODE_LO
+    final public static Pattern  TAXOMONY_CODE_PATTERN_A_LO           = Pattern.compile( "_(" + TAX_CODE_LO
                                                                               + ")(?:\\b|_)" );
     final public static Pattern  TAXOMONY_CODE_PATTERN_BRACKETED      = Pattern.compile( "\\[(" + TAX_CODE + ")\\]" );
     final public static Pattern  TAXOMONY_CODE_PATTERN_PFR            = Pattern.compile( "(?:\\b|_)[a-zA-Z0-9]{3,}_("
index 979fafe..e7ef81b 100644 (file)
@@ -971,12 +971,10 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
     @Override
     final public String toString() {
         final StringBuilder sb = new StringBuilder();
-        
         if ( !ForesterUtil.isEmpty( getName() ) ) {
             sb.append( getName() );
             sb.append( " " );
         }
-        
         if ( getNodeData().isHasTaxonomy() ) {
             if ( !ForesterUtil.isEmpty( getNodeData().getTaxonomy().getScientificName() ) ) {
                 sb.append( getNodeData().getTaxonomy().getScientificName() );
@@ -1009,7 +1007,6 @@ public final class PhylogenyNode implements Comparable<PhylogenyNode> {
                 sb.append( " " );
             }
         }
-       
         if ( sb.length() <= 1 ) {
             sb.append( "[" );
             sb.append( getId() );
index 35b8600..0dd12a5 100644 (file)
@@ -298,6 +298,15 @@ public final class Test {
             succeeded++;
         }
         System.out.println( "OK." );
+        System.out.print( "Taxonomy data extraction: " );
+        if ( Test.testExtractTaxonomyDataFromNodeName() ) {
+            System.out.println( "OK." );
+            succeeded++;
+        }
+        else {
+            System.out.println( "failed." );
+            failed++;
+        }
         System.out.print( "Taxonomy code extraction: " );
         if ( Test.testExtractTaxonomyCodeFromNodeName() ) {
             System.out.println( "OK." );
@@ -1566,7 +1575,7 @@ public final class Test {
                 return false;
             }
             final MolecularSequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" );
-            if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZXXU" ) ) {
+            if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZOXU" ) ) {
                 return false;
             }
             final MolecularSequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" );
@@ -2171,7 +2180,6 @@ public final class Test {
             if ( !t3_rt.getNode( "node b" ).getNodeData().getBinaryCharacters().getType().equals( "characters" ) ) {
                 return false;
             }
-            //
             if ( !t3_rt.getNode( "node ba" ).getNodeData().getDate().getDesc().equals( "Silurian" ) ) {
                 return false;
             }
@@ -3045,7 +3053,6 @@ public final class Test {
             if ( !isEqual( t_bx.getNode( "acd" ).getBranchData().getConfidence( 0 ).getValue(), 1 ) ) {
                 return false;
             }
-            //
             final Phylogeny[] t2 = factory
                     .create( "((((a,b),c),d),e);(((a,b),c),(d,e));(((((a,b),c),d),e),f);((((a,b),c),(d,e)),f);(((a,b),c),d,e);((a,b,c),d,e);",
                              new NHXParser() );
@@ -3055,7 +3062,6 @@ public final class Test {
             for( final Phylogeny target : t2 ) {
                 ConfidenceAssessor.evaluate( "bootstrap", ev2, target, false, 1 );
             }
-            //
             final Phylogeny t4 = factory.create( "((((((A,B)ab,C)abc,D)abcd,E)abcde,F)abcdef,G)abcdefg",
                                                  new NHXParser() )[ 0 ];
             final Phylogeny[] ev4 = factory.create( "(((A,B),C),(X,Y));((F,G),((A,B,C),(D,E)))", new NHXParser() );
@@ -3990,10 +3996,6 @@ public final class Test {
                 System.out.println( entry.getSequenceName() );
                 return false;
             }
-            // if ( !entry.getSequenceSymbol().equals( "" ) ) {
-            //     System.out.println( entry.getSequenceSymbol() );
-            //     return false;
-            // }
             if ( !entry.getGeneName().equals( "treX-like" ) ) {
                 System.out.println( entry.getGeneName() );
                 return false;
@@ -4013,7 +4015,6 @@ public final class Test {
             if ( entry.getCrossReferences().size() != 5 ) {
                 return false;
             }
-            //
             final SequenceDatabaseEntry entry1 = SequenceDbWsTools.obtainEntry( "ABJ16409" );
             if ( !entry1.getAccession().equals( "ABJ16409" ) ) {
                 return false;
@@ -4037,7 +4038,6 @@ public final class Test {
             if ( entry1.getCrossReferences().size() != 6 ) {
                 return false;
             }
-            //
             final SequenceDatabaseEntry entry2 = SequenceDbWsTools.obtainEntry( "NM_184234" );
             if ( !entry2.getAccession().equals( "NM_184234" ) ) {
                 return false;
@@ -4089,8 +4089,6 @@ public final class Test {
             if ( entry3.getCrossReferences().size() != 8 ) {
                 return false;
             }
-            //
-            //
             final SequenceDatabaseEntry entry4 = SequenceDbWsTools.obtainEntry( "AAA36557.1" );
             if ( !entry4.getAccession().equals( "AAA36557" ) ) {
                 return false;
@@ -4510,6 +4508,44 @@ public final class Test {
         return true;
     }
 
+    private static boolean testExtractTaxonomyDataFromNodeName() {
+        try {
+            PhylogenyNode n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "tr|B1AM49|B1AM49_HUMAN~1-2" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN|" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "tr|B1AM49|HNRPR_HUMAN~12" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "HNRPR_HUMAN" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+            n = new PhylogenyNode( "HNRPR_HUMAN_X" );
+            if ( !ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE ).equals( "HUMAN" ) ) {
+                return false;
+            }
+        }
+        catch ( final Exception e ) {
+            e.printStackTrace( System.out );
+            return false;
+        }
+        return true;
+    }
+
     private static boolean testExtractTaxonomyCodeFromNodeName() {
         try {
             if ( ParserUtils.extractTaxonomyCodeFromNodeName( "MOUSE", TAXONOMY_EXTRACTION.PFAM_STYLE_RELAXED ) != null ) {
@@ -4846,7 +4882,7 @@ public final class Test {
             if ( !msa_0.getSequenceAsString( 1 ).toString().equalsIgnoreCase( "DKXASDFXSFXFKFKSXDFKSLX" ) ) {
                 return false;
             }
-            if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPRXWXERR" ) ) {
+            if ( !msa_0.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "SXDFKSXLFSFPWEXPROWXERR" ) ) {
                 return false;
             }
             if ( !msa_0.getSequenceAsString( 3 ).toString().equalsIgnoreCase( "AAAAAAAAAAAAAAAAAAAAAAA" ) ) {
@@ -6326,7 +6362,6 @@ public final class Test {
             if ( !dmsa1.getSequenceAsString( 1 ).toString().equals( "EAAC" ) ) {
                 return false;
             }
-            //
             final MolecularSequence s__0 = BasicSequence.createAaSequence( "a", "A------" );
             final MolecularSequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" );
             final MolecularSequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" );
@@ -6454,8 +6489,6 @@ public final class Test {
             if ( !ext.get( 4 ).getName().equals( "h" ) ) {
                 return false;
             }
-            //
-            //
             ext.clear();
             final StringBuffer sb2 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t2 = factory.create( sb2, new NHXParser() )[ 0 ];
@@ -6484,8 +6517,6 @@ public final class Test {
             if ( !ext.get( 3 ).getName().equals( "gh" ) ) {
                 return false;
             }
-            //
-            //
             ext.clear();
             final StringBuffer sb3 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t3 = factory.create( sb3, new NHXParser() )[ 0 ];
@@ -6512,8 +6543,6 @@ public final class Test {
             if ( !ext.get( 2 ).getName().equals( "fgh" ) ) {
                 return false;
             }
-            //
-            //
             ext.clear();
             final StringBuffer sb4 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t4 = factory.create( sb4, new NHXParser() )[ 0 ];
@@ -6530,8 +6559,6 @@ public final class Test {
             if ( n.getNextExternalNodeWhileTakingIntoAccountCollapsedNodes() != null ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb5 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
             final Phylogeny t5 = factory.create( sb5, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6567,8 +6594,6 @@ public final class Test {
             if ( !ext.get( 7 ).getName().equals( "h" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb6 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
             final Phylogeny t6 = factory.create( sb6, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6602,8 +6627,6 @@ public final class Test {
             if ( !ext.get( 6 ).getName().equals( "h" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb7 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
             final Phylogeny t7 = factory.create( sb7, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6637,8 +6660,6 @@ public final class Test {
             if ( !ext.get( 6 ).getName().equals( "h" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb8 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h))fgh)cdefgh)abcdefgh" );
             final Phylogeny t8 = factory.create( sb8, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6675,8 +6696,6 @@ public final class Test {
             if ( !ext.get( 6 ).getName().equals( "h" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb9 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t9 = factory.create( sb9, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6710,8 +6729,6 @@ public final class Test {
             if ( !ext.get( 6 ).getName().equals( "gh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb10 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t10 = factory.create( sb10, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6747,8 +6764,6 @@ public final class Test {
             if ( !ext.get( 6 ).getName().equals( "gh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb11 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t11 = factory.create( sb11, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6780,8 +6795,6 @@ public final class Test {
             if ( !ext.get( 5 ).getName().equals( "fgh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb12 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t12 = factory.create( sb12, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6816,8 +6829,6 @@ public final class Test {
             if ( !ext.get( 5 ).getName().equals( "fgh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb13 = new StringBuffer( "((a,b)ab,(((c,d)cd,e)cde,(f,(g,h)gh)fgh)cdefgh)abcdefgh" );
             final Phylogeny t13 = factory.create( sb13, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6848,8 +6859,6 @@ public final class Test {
             if ( !ext.get( 4 ).getName().equals( "fgh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb14 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" );
             final Phylogeny t14 = factory.create( sb14, new NHXParser() )[ 0 ];
             ext.clear();
@@ -6880,8 +6889,6 @@ public final class Test {
             if ( !ext.get( 4 ).getName().equals( "fgh" ) ) {
                 return false;
             }
-            //
-            //
             final StringBuffer sb15 = new StringBuffer( "((a,b,0)ab,(((c,d)cd,e)cde,x,(f,(g,h,1,2)gh,0)fgh)cdefgh)abcdefgh" );
             final Phylogeny t15 = factory.create( sb15, new NHXParser() )[ 0 ];
             ext.clear();
@@ -7338,7 +7345,6 @@ public final class Test {
             if ( phy != null ) {
                 return false;
             }
-            //
             p.reset();
             if ( !p.hasNext() ) {
                 return false;
@@ -7360,7 +7366,6 @@ public final class Test {
             if ( phy != null ) {
                 return false;
             }
-            ////
             p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_2.nex" );
             if ( !p.hasNext() ) {
                 return false;
@@ -7382,7 +7387,6 @@ public final class Test {
             if ( phy != null ) {
                 return false;
             }
-            //
             p.reset();
             if ( !p.hasNext() ) {
                 return false;
@@ -7404,7 +7408,6 @@ public final class Test {
             if ( phy != null ) {
                 return false;
             }
-            //
             p.setSource( Test.PATH_TO_TEST_DATA + "nexus_test_3.nex" );
             if ( !p.hasNext() ) {
                 return false;
@@ -11271,8 +11274,7 @@ public final class Test {
             }
             final PhylogenyNode n2 = new PhylogenyNode( "NM_001030253" );
             SequenceDbWsTools.obtainSeqInformation( n2 );
-            if ( !n2.getNodeData().getSequence().getName()
-                    .equals( "Danio rerio B-cell leukemia/lymphoma 2 (bcl2), mRNA" ) ) {
+            if ( !n2.getNodeData().getSequence().getName().equals( "Danio rerio B-cell CLL/lymphoma 2a (bcl2a), mRNA" ) ) {
                 return false;
             }
             if ( !n2.getNodeData().getTaxonomy().getScientificName().equals( "Danio rerio" ) ) {
@@ -11411,7 +11413,6 @@ public final class Test {
                 System.out.println( "provider=" + id.getSource() );
                 return false;
             }
-            //
             id = SequenceAccessionTools.parseAccessorFromString( "N3B004Z009" );
             if ( ( id == null ) || ForesterUtil.isEmpty( id.getValue() ) || ForesterUtil.isEmpty( id.getSource() )
                     || !id.getValue().equals( "N3B004Z009" ) || !id.getSource().equals( "uniprot" ) ) {
@@ -11616,14 +11617,12 @@ public final class Test {
             if ( !s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) );
             if ( !s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) );
@@ -11633,7 +11632,6 @@ public final class Test {
             if ( !s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) );
@@ -11641,7 +11639,6 @@ public final class Test {
             if ( !s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) );
@@ -11650,14 +11647,12 @@ public final class Test {
             if ( !s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) );
@@ -11666,7 +11661,6 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) );
@@ -11676,7 +11670,6 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) );
@@ -11684,49 +11677,42 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "C" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "G" ) );
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "F" ) );
@@ -11734,7 +11720,6 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "A" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "B" ) );
@@ -11742,7 +11727,6 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) );
@@ -11750,7 +11734,6 @@ public final class Test {
             if ( s0.match( query_nodes ) ) {
                 return false;
             }
-            //
             query_nodes = new HashSet<PhylogenyNode>();
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "E" ) );
             query_nodes.add( PhylogenyNode.createInstanceFromNhxString( "D" ) );
@@ -12535,7 +12518,6 @@ public final class Test {
                 System.out.println( n17.toString() );
                 return false;
             }
-            //
             final PhylogenyNode n18 = PhylogenyNode
                     .createInstanceFromNhxString( "Mus_musculus_musculus_392", NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
             if ( !n18.getNodeData().getTaxonomy().getScientificName().equals( "Mus musculus musculus" ) ) {
@@ -12562,13 +12544,6 @@ public final class Test {
                 System.out.println( n21.toString() );
                 return false;
             }
-            final PhylogenyNode n22 = PhylogenyNode
-                    .createInstanceFromNhxString( "NEMVE_Nematostella_vectensis",
-                                                  NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
-            if ( !n22.getNodeData().getTaxonomy().getTaxonomyCode().equals( "NEMVE" ) ) {
-                System.out.println( n22.toString() );
-                return false;
-            }
             final PhylogenyNode n23 = PhylogenyNode
                     .createInstanceFromNhxString( "9EMVE_Nematostella_vectensis",
                                                   NHXParser.TAXONOMY_EXTRACTION.AGGRESSIVE );
index f8fc57b..a4f6bcd 100644 (file)
@@ -27,7 +27,7 @@ package org.forester.util;
 
 public final class ForesterConstants {
 
-    public final static String  FORESTER_VERSION            = "1.035";
+    public final static String  FORESTER_VERSION            = "1.036";
     public final static String  FORESTER_DATE               = "140811";
     public final static String  PHYLO_XML_VERSION           = "1.10";
     public final static String  PHYLO_XML_LOCATION          = "http://www.phyloxml.org";
index 58271c3..8d36a6e 100644 (file)
@@ -105,9 +105,6 @@ public final class ForesterUtil {
         FORMATTER_3 = new DecimalFormat( "#.###", dfs );
     }
 
-    private ForesterUtil() {
-    }
-
     final public static void appendSeparatorIfNotEmpty( final StringBuffer sb, final char separator ) {
         if ( sb.length() > 0 ) {
             sb.append( separator );
@@ -204,6 +201,39 @@ public final class ForesterUtil {
         }
     }
 
+    /**
+     * Helper method for calcColor methods.
+     * 
+     * @param smallercolor_component_x
+     *            color component the smaller color
+     * @param largercolor_component_x
+     *            color component the larger color
+     * @param x
+     *            factor
+     * @return an int representing a color component
+     */
+    final private static int calculateColorComponent( final double smallercolor_component_x,
+                                                      final double largercolor_component_x,
+                                                      final double x ) {
+        return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) );
+    }
+
+    /**
+     * Helper method for calcColor methods.
+     * 
+     * 
+     * @param value
+     *            the value
+     * @param larger
+     *            the largest value
+     * @param smaller
+     *            the smallest value
+     * @return a normalized value between larger and smaller
+     */
+    final private static double calculateColorFactor( final double value, final double larger, final double smaller ) {
+        return ( 255.0 * ( value - smaller ) ) / ( larger - smaller );
+    }
+
     public static int calculateOverlap( final Domain domain, final List<Boolean> covered_positions ) {
         int overlap_count = 0;
         for( int i = domain.getFrom(); i <= domain.getTo(); ++i ) {
@@ -497,6 +527,22 @@ public final class ForesterUtil {
         return ForesterUtil.LINE_SEPARATOR;
     }
 
+    final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) {
+        if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" )
+                || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) {
+            return TYPE.AA;
+        }
+        else {
+            if ( mol_seq.contains( "T" ) ) {
+                return TYPE.DNA;
+            }
+            else if ( mol_seq.contains( "U" ) ) {
+                return TYPE.RNA;
+            }
+        }
+        return null;
+    }
+
     final public static void increaseCountingMap( final Map<String, Integer> counting_map, final String item_name ) {
         if ( !counting_map.containsKey( item_name ) ) {
             counting_map.put( item_name, 1 );
@@ -1052,6 +1098,20 @@ public final class ForesterUtil {
         System.out.println( "[" + prg_name + "] > " + message );
     }
 
+    public static List<String> readUrl( final String url_str ) throws IOException {
+        final URL url = new URL( url_str );
+        final URLConnection urlc = url.openConnection();
+        //urlc.setRequestProperty( "User-Agent", "" );
+        final BufferedReader in = new BufferedReader( new InputStreamReader( urlc.getInputStream() ) );
+        String line;
+        final List<String> result = new ArrayList<String>();
+        while ( ( line = in.readLine() ) != null ) {
+            result.add( line );
+        }
+        in.close();
+        return result;
+    }
+
     /**
      * 
      * Example regarding engulfment: ------------0.1 ----------0.2 --0.3 =>
@@ -1203,6 +1263,11 @@ public final class ForesterUtil {
         return false;
     }
 
+    final private static String[] splitString( final String str ) {
+        final String regex = "[\\s;,]+";
+        return str.split( regex );
+    }
+
     final public static String stringArrayToString( final String[] a ) {
         return stringArrayToString( a, ", " );
     }
@@ -1348,57 +1413,6 @@ public final class ForesterUtil {
         return sb.toString();
     }
 
-    /**
-     * Helper method for calcColor methods.
-     * 
-     * @param smallercolor_component_x
-     *            color component the smaller color
-     * @param largercolor_component_x
-     *            color component the larger color
-     * @param x
-     *            factor
-     * @return an int representing a color component
-     */
-    final private static int calculateColorComponent( final double smallercolor_component_x,
-                                                      final double largercolor_component_x,
-                                                      final double x ) {
-        return ( int ) ( smallercolor_component_x + ( ( x * ( largercolor_component_x - smallercolor_component_x ) ) / 255.0 ) );
-    }
-
-    final public static MolecularSequence.TYPE guessMolecularSequenceType( final String mol_seq ) {
-        if ( mol_seq.contains( "L" ) || mol_seq.contains( "I" ) || mol_seq.contains( "E" ) || mol_seq.contains( "H" )
-                || mol_seq.contains( "D" ) || mol_seq.contains( "Q" ) ) {
-            return TYPE.AA;
-        }
-        else {
-            if ( mol_seq.contains( "T" ) ) {
-                return TYPE.DNA;
-            }
-            else if ( mol_seq.contains( "U" ) ) {
-                return TYPE.RNA;
-            }
-        }
-        return null;
-    }
-
-    /**
-     * Helper method for calcColor methods.
-     * 
-     * 
-     * @param value
-     *            the value
-     * @param larger
-     *            the largest value
-     * @param smaller
-     *            the smallest value
-     * @return a normalized value between larger and smaller
-     */
-    final private static double calculateColorFactor( final double value, final double larger, final double smaller ) {
-        return ( 255.0 * ( value - smaller ) ) / ( larger - smaller );
-    }
-
-    final private static String[] splitString( final String str ) {
-        final String regex = "[\\s;,]+";
-        return str.split( regex );
+    private ForesterUtil() {
     }
 }
index ee5427a..ef38563 100644 (file)
@@ -182,7 +182,8 @@ public final class TaxonomyUtil {
         put( "MNELE", "ctenophora" );
         put( "AMPQE", "porifera" );
         put( "MONBE", "choanoflagellida" );
-        put( "SALS5", "choanoflagellida" );
+        put( "SALS5", "choanoflagellida" ); //TODO remove me
+        put( "SALR5", "choanoflagellida" );
         put( "AMOPA", "ichthyophonida & filasterea" );
         put( "SARXX", "ichthyophonida & filasterea" );
         put( "CAPO3", "ichthyophonida & filasterea" );
@@ -578,6 +579,7 @@ public final class TaxonomyUtil {
         FAKE_CODE_TO_ID_MAP.put( "CTEXX", 283909 );
         FAKE_CODE_TO_ID_MAP.put( "HMAXX", 6085 );
         FAKE_CODE_TO_ID_MAP.put( "SARXX", 72019 );
+        FAKE_CODE_TO_ID_MAP.put( "SPHAR", 72019 ); //TODO is same as Sphingomonas aromaticivorans
         FAKE_CODE_TO_ID_MAP.put( "AALXX", 398408 );
         FAKE_CODE_TO_ID_MAP.put( "PFIXX", 83344 );
         FAKE_CODE_TO_ID_MAP.put( "MPSXX", 692275 );
index fde5351..606d35a 100644 (file)
@@ -55,17 +55,17 @@ import org.forester.util.SequenceAccessionTools;
 
 public final class SequenceDbWsTools {
 
-    public final static String   BASE_UNIPROT_URL        = "http://www.uniprot.org/";
-    public final static int      DEFAULT_LINES_TO_RETURN = 4000;
-    public final static String   EMBL_DBS_REFSEQ_N       = "refseqn";
-    public final static String   EMBL_DBS_REFSEQ_P       = "refseqp";
-    public final static String   EMBL_GENBANK            = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
-    public final static String   EMBL_REFSEQ             = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
-    public final static String   EMBL_EMBL               = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
-    private final static boolean DEBUG                   = true;
-    private final static String  URL_ENC                 = "UTF-8";
-    private final static int     SLEEP                   = 200;
-    private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = true;
+    public final static String   BASE_UNIPROT_URL           = "http://www.uniprot.org/";
+    public final static int      DEFAULT_LINES_TO_RETURN    = 4000;
+    public final static String   EMBL_DBS_REFSEQ_N          = "refseqn";
+    public final static String   EMBL_DBS_REFSEQ_P          = "refseqp";
+    public final static String   EMBL_GENBANK               = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=GENBANK&style=raw&id=";
+    public final static String   EMBL_REFSEQ                = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=REFSEQ&style=raw&id=";
+    public final static String   EMBL_EMBL                  = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch?db=EMBL&style=raw&id=";
+    private final static boolean DEBUG                      = true;
+    private final static String  URL_ENC                    = "UTF-8";
+    private final static int     SLEEP                      = 200;
+    private static final boolean ALLOW_TO_OVERWRITE_MOL_SEQ = false;
 
     public static List<UniProtTaxonomy> getTaxonomiesFromCommonNameStrict( final String cn,
                                                                            final int max_taxonomies_return )