From c1fe7fa5cf07889a74cb6de92cd9a4f39e5687db Mon Sep 17 00:00:00 2001 From: cmzmasek Date: Wed, 12 Jul 2017 10:15:28 -0700 Subject: [PATCH] in progress... --- .../java/src/org/forester/application/rename.java | 51 +++++++++++++ .../src/org/forester/application/rename_fasta.java | 80 ++++++++++++++++++++ .../org/forester/archaeopteryx/AptxConstants.java | 8 +- .../src/org/forester/archaeopteryx/TreePanel.java | 63 ++++++++------- .../io/parsers/HmmscanPerDomainTableParser.java | 4 + .../src/org/forester/tools/PhylogenyDecorator.java | 48 ++++++------ 6 files changed, 199 insertions(+), 55 deletions(-) create mode 100644 forester/java/src/org/forester/application/rename.java create mode 100644 forester/java/src/org/forester/application/rename_fasta.java diff --git a/forester/java/src/org/forester/application/rename.java b/forester/java/src/org/forester/application/rename.java new file mode 100644 index 0000000..68bd95d --- /dev/null +++ b/forester/java/src/org/forester/application/rename.java @@ -0,0 +1,51 @@ + +package org.forester.application; + +import java.io.File; +import java.io.IOException; + +import org.forester.io.parsers.PhylogenyParser; +import org.forester.io.parsers.util.ParserUtils; +import org.forester.io.writers.PhylogenyWriter; +import org.forester.phylogeny.Phylogeny; +import org.forester.phylogeny.PhylogenyNode; +import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory; +import org.forester.phylogeny.factories.PhylogenyFactory; +import org.forester.phylogeny.iterators.PhylogenyNodeIterator; +import org.forester.util.ForesterUtil; + +public class rename { + + public static void main( final String args[] ) { + final File infile = new File( args[ 0 ] ); + final File outfile = new File( args[ 1 ] ); + Phylogeny p = null; + try { + final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance(); + final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true ); + p = factory.create( infile, pp )[ 0 ]; + } + catch ( final Exception e ) { + System.out.println( "\nCould not read \"" + infile + "\" [" + e.getMessage() + "]\n" ); + System.exit( -1 ); + } + for( final PhylogenyNodeIterator iter = p.iteratorPreorder(); iter.hasNext(); ) { + final PhylogenyNode node = iter.next(); + final String node_name = node.getName(); + if ( !ForesterUtil.isEmpty( node_name ) ) { + final int i = node_name.lastIndexOf( '_' ); + if ( i > 0 ) { + node.setName( node_name.substring( i + 1 ) ); + } + } + } + try { + final PhylogenyWriter w = new PhylogenyWriter(); + w.toNewHampshire( p, true, outfile ); + } + catch ( final IOException e ) { + System.out.println( "\nFailure to write output [" + e.getMessage() + "]\n" ); + System.exit( -1 ); + } + } +} diff --git a/forester/java/src/org/forester/application/rename_fasta.java b/forester/java/src/org/forester/application/rename_fasta.java new file mode 100644 index 0000000..c5c4bd6 --- /dev/null +++ b/forester/java/src/org/forester/application/rename_fasta.java @@ -0,0 +1,80 @@ +// $Id: +// FORESTER -- software libraries and applications +// for evolutionary biology research and applications. +// +// Copyright (C) 2008-2009 Christian M. Zmasek +// Copyright (C) 2008-2009 Burnham Institute for Medical Research +// All rights reserved +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +// +// Contact: phylosoft @ gmail . com +// WWW: https://sites.google.com/site/cmzmasek/home/software/forester +// +// +// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split +// +// + +package org.forester.application; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.forester.io.parsers.FastaParser; +import org.forester.io.writers.SequenceWriter; +import org.forester.io.writers.SequenceWriter.SEQ_FORMAT; +import org.forester.sequence.BasicSequence; +import org.forester.sequence.MolecularSequence; +import org.forester.util.CommandLineArguments; +import org.forester.util.ForesterUtil; + +public final class rename_fasta { + + + + public static void main( final String args[] ) { + try { + final File infile = new File( args[ 0 ] ); + final File outfile = new File( args[ 1 ] ); + List seqs; + seqs = FastaParser.parse( new FileInputStream( infile ) ); + for( MolecularSequence seq : seqs ) { + BasicSequence bseq = ( BasicSequence ) seq; + final int i = bseq.getIdentifier().lastIndexOf( '_' ); + if ( i > 0 ) { + bseq.setIdentifier( bseq.getIdentifier().substring( i + 1 ) ); + } + } + SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 ); + } + catch ( FileNotFoundException e ) { + e.printStackTrace(); + } + catch ( IOException e ) { + e.printStackTrace(); + } + } + + +} diff --git a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java index cf20424..479d846 100644 --- a/forester/java/src/org/forester/archaeopteryx/AptxConstants.java +++ b/forester/java/src/org/forester/archaeopteryx/AptxConstants.java @@ -39,7 +39,7 @@ public final class AptxConstants { final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true; public final static String PRG_NAME = "Archaeopteryx"; final static String VERSION = "0.9921 beta"; - final static String PRG_DATE = "170322"; + final static String PRG_DATE = "170712"; final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file"; final static String[] DEFAULT_FONT_CHOICES = { "Arial Unicode MS", "Dialog", "SansSerif", "Sans", "Arial", "Helvetica" }; @@ -103,9 +103,9 @@ public final class AptxConstants { public final static Color DOMAIN_BASE_COLOR_FOR_PDF = new Color( 100, 100, 100 ); - public final static Color DOMAIN_LABEL_COLOR_FOR_PDF = new Color( 150, - 150, - 150 ); + public final static Color DOMAIN_LABEL_COLOR_FOR_PDF = new Color( 0, + 0, + 0 ); final static short DEFAULT_NODE_SHAPE_SIZE_DEFAULT = 7; static final int MAX_LENGTH_FOR_COLLAPSED_NAME = 8; } diff --git a/forester/java/src/org/forester/archaeopteryx/TreePanel.java b/forester/java/src/org/forester/archaeopteryx/TreePanel.java index af3e9ee..3a8ba12 100644 --- a/forester/java/src/org/forester/archaeopteryx/TreePanel.java +++ b/forester/java/src/org/forester/archaeopteryx/TreePanel.java @@ -1425,7 +1425,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee sortDescendants( node ); break; case GET_EXT_DESC_DATA: - showExtDescNodeData( node ); + showExtDescNodeData( node, '_' ); break; case UNCOLLAPSE_ALL: uncollapseAll( node ); @@ -2098,7 +2098,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee final boolean to_pdf, final boolean to_graphics_file ) { g.setFont( getTreeFontSet().getSmallFont() ); - if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { + if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) { g.setColor( Color.BLACK ); } else { @@ -2531,7 +2531,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) { x += ROUNDED_D; } - if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) { + if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) { g.setColor( Color.BLACK ); } else { @@ -3491,7 +3491,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee if ( getOptions().isLineUpRendarableNodeData() ) { if ( getOptions().isRightLineUpDomains() ) { rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) - + _length_of_longest_text + + _length_of_longest_text + 50 //TODO why plus 50? + ( ( _longest_domain - rds.getTotalLength() ) * rds.getRenderingFactorWidth() ) ), node.getYcoord() - ( h / 2.0f ), g, @@ -3500,7 +3500,11 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee } else { rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) - + _length_of_longest_text ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf ); + + _length_of_longest_text + 50 ), + node.getYcoord() - ( h / 2.0f ), + g, + this, + to_pdf ); } } else { @@ -4073,7 +4077,7 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee _urt_factor_ov = urt_factor_ov; } - private void showExtDescNodeData( final PhylogenyNode node ) { + private void showExtDescNodeData( final PhylogenyNode node, final char separator ) { final List data = new ArrayList(); final List nodes = node.getAllExternalDescendants(); if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) { @@ -4113,44 +4117,49 @@ public final class TreePanel extends JPanel implements ActionListener, MouseWhee if ( n.getNodeData().isHasSequence() && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getMolecularSequence() ) ) { final StringBuilder ann = new StringBuilder(); - if ( !ForesterUtil.isEmpty( n.getName() ) ) { + if ( getControlPanel().isShowNodeNames() && !ForesterUtil.isEmpty( n.getName() ) ) { ann.append( n.getName() ); - ann.append( "|" ); + ann.append( separator ); } - if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { - ann.append( "SYM=" ); + if ( getControlPanel().isShowSeqSymbols() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) { ann.append( n.getNodeData().getSequence().getSymbol() ); - ann.append( "|" ); + ann.append( separator ); } - if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { - ann.append( "NAME=" ); + if ( getControlPanel().isShowSeqNames() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) { ann.append( n.getNodeData().getSequence().getName() ); - ann.append( "|" ); + ann.append( separator ); } - if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { - ann.append( "GN=" ); + if ( getControlPanel().isShowGeneNames() + && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) { ann.append( n.getNodeData().getSequence().getGeneName() ); - ann.append( "|" ); + ann.append( separator ); } - if ( n.getNodeData().getSequence().getAccession() != null ) { - ann.append( "ACC=" ); + if ( getControlPanel().isShowSequenceAcc() + && n.getNodeData().getSequence().getAccession() != null ) { ann.append( n.getNodeData().getSequence().getAccession().asText() ); - ann.append( "|" ); + ann.append( separator ); } if ( n.getNodeData().isHasTaxonomy() ) { - if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { - ann.append( "TAXID=" ); + if ( getControlPanel().isShowTaxonomyCode() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) { ann.append( n.getNodeData().getTaxonomy().getTaxonomyCode() ); - ann.append( "|" ); + ann.append( separator ); } - if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { - ann.append( "SN=" ); + if ( getControlPanel().isShowTaxonomyScientificNames() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) { ann.append( n.getNodeData().getTaxonomy().getScientificName() ); - ann.append( "|" ); + ann.append( separator ); + } + if ( getControlPanel().isShowTaxonomyCommonNames() + && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) { + ann.append( n.getNodeData().getTaxonomy().getCommonName() ); + ann.append( separator ); } } String ann_str; - if ( ann.charAt( ann.length() - 1 ) == '|' ) { + if ( ann.length() > 0 && ann.charAt( ann.length() - 1 ) == separator ) { ann_str = ann.substring( 0, ann.length() - 1 ); } else { diff --git a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java index c75e521..fbaf157 100644 --- a/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java +++ b/forester/java/src/org/forester/io/parsers/HmmscanPerDomainTableParser.java @@ -426,12 +426,16 @@ public final class HmmscanPerDomainTableParser { throw new IOException( "more than one protein named [" + query + "]" ); } } + final String fail_query = prev_query; //TODO prev_query = query; prev_qlen = qlen; prev_queries.add( query ); if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) { addProtein( proteins, current_protein ); } + else { + System.out.println(fail_query ); //TODO + } if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) { current_protein = new BasicProtein( query, getSpecies(), qlen ); } diff --git a/forester/java/src/org/forester/tools/PhylogenyDecorator.java b/forester/java/src/org/forester/tools/PhylogenyDecorator.java index 8aa59d8..3a5ba40 100644 --- a/forester/java/src/org/forester/tools/PhylogenyDecorator.java +++ b/forester/java/src/org/forester/tools/PhylogenyDecorator.java @@ -71,7 +71,8 @@ public final class PhylogenyDecorator { public static void decorate( final Phylogeny phylogeny, final Map> map, - final boolean picky ) throws IllegalArgumentException, PhyloXmlDataFormatException { + final boolean picky ) + throws IllegalArgumentException, PhyloXmlDataFormatException { for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) { final PhylogenyNode node = iter.next(); final String name = node.getName(); @@ -86,15 +87,14 @@ public final class PhylogenyDecorator { if ( new_values.containsKey( TP_TAXONOMY_ID ) && new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); - node.getNodeData() - .getTaxonomy() - .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ), - new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) ); + node.getNodeData().getTaxonomy() + .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ), + new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) ); } else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); node.getNodeData().getTaxonomy() - .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) ); + .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) ); } if ( new_values.containsKey( TP_TAXONOMY_SN ) ) { ForesterUtil.ensurePresenceOfTaxonomy( node ); @@ -111,10 +111,9 @@ public final class PhylogenyDecorator { if ( new_values.containsKey( TP_SEQ_ACCESSION ) && new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) { ForesterUtil.ensurePresenceOfSequence( node ); - node.getNodeData() - .getSequence() - .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ), - new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) ); + node.getNodeData().getSequence() + .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ), + new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) ); } if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) { ForesterUtil.ensurePresenceOfSequence( node ); @@ -159,8 +158,8 @@ public final class PhylogenyDecorator { final boolean picky, final boolean cut_name_after_space, final boolean trim_after_tilde, - final boolean verbose ) throws IllegalArgumentException, NHXFormatException, - PhyloXmlDataFormatException { + final boolean verbose ) + throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException { return PhylogenyDecorator.decorate( phylogeny, map, field, @@ -197,7 +196,8 @@ public final class PhylogenyDecorator { final Map intermediate_map, final boolean cut_name_after_space, final boolean trim_after_tilde, - final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException { + final boolean verbose ) + throws IllegalArgumentException, PhyloXmlDataFormatException { if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) { throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" ); } @@ -255,7 +255,7 @@ public final class PhylogenyDecorator { } else if ( picky ) { throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value - + "\"" ); + + "\"" ); } } switch ( field ) { @@ -299,7 +299,7 @@ public final class PhylogenyDecorator { node.getNodeData().setSequence( new Sequence() ); } node.getNodeData().getSequence() - .setDomainArchitecture( new DomainArchitecture( new_value ) ); + .setDomainArchitecture( new DomainArchitecture( new_value ) ); break; case TAXONOMY_CODE: if ( verbose ) { @@ -353,13 +353,13 @@ public final class PhylogenyDecorator { throw new IllegalArgumentException( "node name \"" + name + "\" maps to empty value" ); } } - else if ( picky ) { + else if ( picky && node.isExternal() ) { throw new IllegalArgumentException( "node name \"" + name + "\" not found in map" ); } } } return "updated " + ext_nodes_updated + "/" + ext_nodes + " external nodes, updated " + int_nodes_updated + "/" - + int_nodes + " internal nodes"; + + int_nodes + " internal nodes"; } public static Map> parseMappingTable( final File mapping_table_file ) @@ -459,12 +459,12 @@ public final class PhylogenyDecorator { } public static enum FIELD { - DOMAIN_STRUCTURE, - MOL_SEQ, - NODE_NAME, - SEQUENCE_ANNOTATION_DESC, - SEQUENCE_NAME, - TAXONOMY_CODE, - TAXONOMY_SCIENTIFIC_NAME; + DOMAIN_STRUCTURE, + MOL_SEQ, + NODE_NAME, + SEQUENCE_ANNOTATION_DESC, + SEQUENCE_NAME, + TAXONOMY_CODE, + TAXONOMY_SCIENTIFIC_NAME; } } -- 1.7.10.2