in progress...
authorcmzmasek <chris.zma@outlook.com>
Wed, 12 Jul 2017 17:15:28 +0000 (10:15 -0700)
committercmzmasek <chris.zma@outlook.com>
Wed, 12 Jul 2017 17:15:28 +0000 (10:15 -0700)
forester/java/src/org/forester/application/rename.java [new file with mode: 0644]
forester/java/src/org/forester/application/rename_fasta.java [new file with mode: 0644]
forester/java/src/org/forester/archaeopteryx/AptxConstants.java
forester/java/src/org/forester/archaeopteryx/TreePanel.java
forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java
forester/java/src/org/forester/tools/PhylogenyDecorator.java

diff --git a/forester/java/src/org/forester/application/rename.java b/forester/java/src/org/forester/application/rename.java
new file mode 100644 (file)
index 0000000..68bd95d
--- /dev/null
@@ -0,0 +1,51 @@
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public class rename {
+
+    public static void main( final String args[] ) {
+        final File infile = new File( args[ 0 ] );
+        final File outfile = new File( args[ 1 ] );
+        Phylogeny p = null;
+        try {
+            final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+            final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true );
+            p = factory.create( infile, pp )[ 0 ];
+        }
+        catch ( final Exception e ) {
+            System.out.println( "\nCould not read \"" + infile + "\" [" + e.getMessage() + "]\n" );
+            System.exit( -1 );
+        }
+        for( final PhylogenyNodeIterator iter = p.iteratorPreorder(); iter.hasNext(); ) {
+            final PhylogenyNode node = iter.next();
+            final String node_name = node.getName();
+            if ( !ForesterUtil.isEmpty( node_name ) ) {
+                final int i = node_name.lastIndexOf( '_' );
+                if ( i > 0 ) {
+                    node.setName( node_name.substring( i + 1 )  );
+                }
+            }
+        }
+        try {
+            final PhylogenyWriter w = new PhylogenyWriter();
+            w.toNewHampshire( p, true, outfile );
+        }
+        catch ( final IOException e ) {
+            System.out.println( "\nFailure to write output [" + e.getMessage() + "]\n" );
+            System.exit( -1 );
+        }
+    }
+}
diff --git a/forester/java/src/org/forester/application/rename_fasta.java b/forester/java/src/org/forester/application/rename_fasta.java
new file mode 100644 (file)
index 0000000..c5c4bd6
--- /dev/null
@@ -0,0 +1,80 @@
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+//
+//
+// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
+//
+//
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterUtil;
+
+public final class rename_fasta {
+
+    
+
+    public static void main( final String args[] ) {
+        try {
+            final File infile = new File( args[ 0 ] );
+            final File outfile = new File( args[ 1 ] );
+            List<MolecularSequence> seqs;
+            seqs = FastaParser.parse( new FileInputStream( infile ) );
+            for( MolecularSequence seq : seqs ) {
+                BasicSequence bseq = ( BasicSequence ) seq;
+                final int i = bseq.getIdentifier().lastIndexOf( '_' );
+                if ( i > 0 ) {
+                    bseq.setIdentifier( bseq.getIdentifier().substring( i + 1 ) );
+                }
+            }
+            SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 );
+        }
+        catch ( FileNotFoundException e ) {
+            e.printStackTrace();
+        }
+        catch ( IOException e ) {
+            e.printStackTrace();
+        }
+    }
+
+  
+}
index cf20424..479d846 100644 (file)
@@ -39,7 +39,7 @@ public final class AptxConstants {
     final static boolean        __ALLOW_PHYLOGENETIC_INFERENCE                                = true;
     public final static String  PRG_NAME                                                      = "Archaeopteryx";
     final static String         VERSION                                                       = "0.9921 beta";
-    final static String         PRG_DATE                                                      = "170322";
+    final static String         PRG_DATE                                                      = "170712";
     final static String         DEFAULT_CONFIGURATION_FILE_NAME                               = "_aptx_configuration_file";
     final static String[]       DEFAULT_FONT_CHOICES                                          = { 
             "Arial Unicode MS", "Dialog", "SansSerif", "Sans", "Arial", "Helvetica" };
@@ -103,9 +103,9 @@ public final class AptxConstants {
     public final static Color   DOMAIN_BASE_COLOR_FOR_PDF                                     = new Color( 100,
                                                                                                            100,
                                                                                                            100 );
-    public final static Color   DOMAIN_LABEL_COLOR_FOR_PDF                                    = new Color( 150,
-                                                                                                           150,
-                                                                                                           150 );
+    public final static Color   DOMAIN_LABEL_COLOR_FOR_PDF                                    = new Color( 0,
+                                                                                                           0,
+                                                                                                           0 );
     final static short          DEFAULT_NODE_SHAPE_SIZE_DEFAULT                               = 7;
     static final int            MAX_LENGTH_FOR_COLLAPSED_NAME = 8;
 }
index af3e9ee..3a8ba12 100644 (file)
@@ -1425,7 +1425,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                 sortDescendants( node );
                 break;
             case GET_EXT_DESC_DATA:
-                showExtDescNodeData( node );
+                showExtDescNodeData( node, '_' );
                 break;
             case UNCOLLAPSE_ALL:
                 uncollapseAll( node );
@@ -2098,7 +2098,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                                           final boolean to_pdf,
                                           final boolean to_graphics_file ) {
         g.setFont( getTreeFontSet().getSmallFont() );
-        if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) {
+        if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) {
             g.setColor( Color.BLACK );
         }
         else {
@@ -2531,7 +2531,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
             else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) {
                 x += ROUNDED_D;
             }
-            if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) {
+            if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) {
                 g.setColor( Color.BLACK );
             }
             else {
@@ -3491,7 +3491,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                     if ( getOptions().isLineUpRendarableNodeData() ) {
                         if ( getOptions().isRightLineUpDomains() ) {
                             rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() )
-                                    + _length_of_longest_text
+                                    + _length_of_longest_text + 50 //TODO why plus 50?
                                     + ( ( _longest_domain - rds.getTotalLength() ) * rds.getRenderingFactorWidth() ) ),
                                         node.getYcoord() - ( h / 2.0f ),
                                         g,
@@ -3500,7 +3500,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                         }
                         else {
                             rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() )
-                                    + _length_of_longest_text ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf );
+                                    + _length_of_longest_text + 50 ),
+                                        node.getYcoord() - ( h / 2.0f ),
+                                        g,
+                                        this,
+                                        to_pdf );
                         }
                     }
                     else {
@@ -4073,7 +4077,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
         _urt_factor_ov = urt_factor_ov;
     }
 
-    private void showExtDescNodeData( final PhylogenyNode node ) {
+    private void showExtDescNodeData( final PhylogenyNode node, final char separator ) {
         final List<String> data = new ArrayList<String>();
         final List<PhylogenyNode> nodes = node.getAllExternalDescendants();
         if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) {
@@ -4113,44 +4117,49 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee
                     if ( n.getNodeData().isHasSequence()
                             && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getMolecularSequence() ) ) {
                         final StringBuilder ann = new StringBuilder();
-                        if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+                        if ( getControlPanel().isShowNodeNames() && !ForesterUtil.isEmpty( n.getName() ) ) {
                             ann.append( n.getName() );
-                            ann.append( "|" );
+                            ann.append( separator );
                         }
-                        if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
-                            ann.append( "SYM=" );
+                        if ( getControlPanel().isShowSeqSymbols()
+                                && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
                             ann.append( n.getNodeData().getSequence().getSymbol() );
-                            ann.append( "|" );
+                            ann.append( separator );
                         }
-                        if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
-                            ann.append( "NAME=" );
+                        if ( getControlPanel().isShowSeqNames()
+                                && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
                             ann.append( n.getNodeData().getSequence().getName() );
-                            ann.append( "|" );
+                            ann.append( separator );
                         }
-                        if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
-                            ann.append( "GN=" );
+                        if ( getControlPanel().isShowGeneNames()
+                                && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
                             ann.append( n.getNodeData().getSequence().getGeneName() );
-                            ann.append( "|" );
+                            ann.append( separator );
                         }
-                        if ( n.getNodeData().getSequence().getAccession() != null ) {
-                            ann.append( "ACC=" );
+                        if ( getControlPanel().isShowSequenceAcc()
+                                && n.getNodeData().getSequence().getAccession() != null ) {
                             ann.append( n.getNodeData().getSequence().getAccession().asText() );
-                            ann.append( "|" );
+                            ann.append( separator );
                         }
                         if ( n.getNodeData().isHasTaxonomy() ) {
-                            if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
-                                ann.append( "TAXID=" );
+                            if ( getControlPanel().isShowTaxonomyCode()
+                                    && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
                                 ann.append( n.getNodeData().getTaxonomy().getTaxonomyCode() );
-                                ann.append( "|" );
+                                ann.append( separator );
                             }
-                            if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
-                                ann.append( "SN=" );
+                            if ( getControlPanel().isShowTaxonomyScientificNames()
+                                    && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
                                 ann.append( n.getNodeData().getTaxonomy().getScientificName() );
-                                ann.append( "|" );
+                                ann.append( separator );
+                            }
+                            if ( getControlPanel().isShowTaxonomyCommonNames()
+                                    && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+                                ann.append( n.getNodeData().getTaxonomy().getCommonName() );
+                                ann.append( separator );
                             }
                         }
                         String ann_str;
-                        if ( ann.charAt( ann.length() - 1 ) == '|' ) {
+                        if ( ann.length() > 0 && ann.charAt( ann.length() - 1 ) == separator ) {
                             ann_str = ann.substring( 0, ann.length() - 1 );
                         }
                         else {
index c75e521..fbaf157 100644 (file)
@@ -426,12 +426,16 @@ public final class HmmscanPerDomainTableParser {
                         throw new IOException( "more than one protein named [" + query + "]" );
                     }
                 }
+                final String fail_query = prev_query; //TODO
                 prev_query = query;
                 prev_qlen = qlen;
                 prev_queries.add( query );
                 if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
                     addProtein( proteins, current_protein );
                 }
+                else  {
+                    System.out.println(fail_query ); //TODO
+                }
                 if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
                     current_protein = new BasicProtein( query, getSpecies(), qlen );
                 }
index 8aa59d8..3a5ba40 100644 (file)
@@ -71,7 +71,8 @@ public final class PhylogenyDecorator {
 
     public static void decorate( final Phylogeny phylogeny,
                                  final Map<String, Map<String, String>> map,
-                                 final boolean picky ) throws IllegalArgumentException, PhyloXmlDataFormatException {
+                                 final boolean picky )
+            throws IllegalArgumentException, PhyloXmlDataFormatException {
         for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
             final PhylogenyNode node = iter.next();
             final String name = node.getName();
@@ -86,15 +87,14 @@ public final class PhylogenyDecorator {
                         if ( new_values.containsKey( TP_TAXONOMY_ID )
                                 && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
                             ForesterUtil.ensurePresenceOfTaxonomy( node );
-                            node.getNodeData()
-                            .getTaxonomy()
-                            .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
-                                                            new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
+                            node.getNodeData().getTaxonomy()
+                                    .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
+                                                                    new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
                         }
                         else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
                             ForesterUtil.ensurePresenceOfTaxonomy( node );
                             node.getNodeData().getTaxonomy()
-                            .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
+                                    .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
                         }
                         if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
                             ForesterUtil.ensurePresenceOfTaxonomy( node );
@@ -111,10 +111,9 @@ public final class PhylogenyDecorator {
                         if ( new_values.containsKey( TP_SEQ_ACCESSION )
                                 && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
                             ForesterUtil.ensurePresenceOfSequence( node );
-                            node.getNodeData()
-                            .getSequence()
-                            .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
-                                                          new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
+                            node.getNodeData().getSequence()
+                                    .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
+                                                                  new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
                         }
                         if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
                             ForesterUtil.ensurePresenceOfSequence( node );
@@ -159,8 +158,8 @@ public final class PhylogenyDecorator {
                                    final boolean picky,
                                    final boolean cut_name_after_space,
                                    final boolean trim_after_tilde,
-                                   final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
-                                   PhyloXmlDataFormatException {
+                                   final boolean verbose )
+            throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
         return PhylogenyDecorator.decorate( phylogeny,
                                             map,
                                             field,
@@ -197,7 +196,8 @@ public final class PhylogenyDecorator {
                                    final Map<String, String> intermediate_map,
                                    final boolean cut_name_after_space,
                                    final boolean trim_after_tilde,
-                                   final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException {
+                                   final boolean verbose )
+            throws IllegalArgumentException, PhyloXmlDataFormatException {
         if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
             throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
         }
@@ -255,7 +255,7 @@ public final class PhylogenyDecorator {
                             }
                             else if ( picky ) {
                                 throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
-                                                                    + "\"" );
+                                        + "\"" );
                             }
                         }
                         switch ( field ) {
@@ -299,7 +299,7 @@ public final class PhylogenyDecorator {
                                     node.getNodeData().setSequence( new Sequence() );
                                 }
                                 node.getNodeData().getSequence()
-                                .setDomainArchitecture( new DomainArchitecture( new_value ) );
+                                        .setDomainArchitecture( new DomainArchitecture( new_value ) );
                                 break;
                             case TAXONOMY_CODE:
                                 if ( verbose ) {
@@ -353,13 +353,13 @@ public final class PhylogenyDecorator {
                         throw new IllegalArgumentException( "node name \"" + name + "\" maps to empty value" );
                     }
                 }
-                else if ( picky ) {
+                else if ( picky && node.isExternal() ) {
                     throw new IllegalArgumentException( "node name \"" + name + "\" not found in map" );
                 }
             }
         }
         return "updated " + ext_nodes_updated + "/" + ext_nodes + " external nodes, updated " + int_nodes_updated + "/"
-        + int_nodes + " internal nodes";
+                + int_nodes + " internal nodes";
     }
 
     public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
@@ -459,12 +459,12 @@ public final class PhylogenyDecorator {
     }
 
     public static enum FIELD {
-        DOMAIN_STRUCTURE,
-        MOL_SEQ,
-        NODE_NAME,
-        SEQUENCE_ANNOTATION_DESC,
-        SEQUENCE_NAME,
-        TAXONOMY_CODE,
-        TAXONOMY_SCIENTIFIC_NAME;
+                              DOMAIN_STRUCTURE,
+                              MOL_SEQ,
+                              NODE_NAME,
+                              SEQUENCE_ANNOTATION_DESC,
+                              SEQUENCE_NAME,
+                              TAXONOMY_CODE,
+                              TAXONOMY_SCIENTIFIC_NAME;
     }
 }