--- /dev/null
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.forester.io.parsers.PhylogenyParser;
+import org.forester.io.parsers.util.ParserUtils;
+import org.forester.io.writers.PhylogenyWriter;
+import org.forester.phylogeny.Phylogeny;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
+import org.forester.phylogeny.factories.PhylogenyFactory;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.util.ForesterUtil;
+
+public class rename {
+
+ public static void main( final String args[] ) {
+ final File infile = new File( args[ 0 ] );
+ final File outfile = new File( args[ 1 ] );
+ Phylogeny p = null;
+ try {
+ final PhylogenyFactory factory = ParserBasedPhylogenyFactory.getInstance();
+ final PhylogenyParser pp = ParserUtils.createParserDependingOnFileType( infile, true );
+ p = factory.create( infile, pp )[ 0 ];
+ }
+ catch ( final Exception e ) {
+ System.out.println( "\nCould not read \"" + infile + "\" [" + e.getMessage() + "]\n" );
+ System.exit( -1 );
+ }
+ for( final PhylogenyNodeIterator iter = p.iteratorPreorder(); iter.hasNext(); ) {
+ final PhylogenyNode node = iter.next();
+ final String node_name = node.getName();
+ if ( !ForesterUtil.isEmpty( node_name ) ) {
+ final int i = node_name.lastIndexOf( '_' );
+ if ( i > 0 ) {
+ node.setName( node_name.substring( i + 1 ) );
+ }
+ }
+ }
+ try {
+ final PhylogenyWriter w = new PhylogenyWriter();
+ w.toNewHampshire( p, true, outfile );
+ }
+ catch ( final IOException e ) {
+ System.out.println( "\nFailure to write output [" + e.getMessage() + "]\n" );
+ System.exit( -1 );
+ }
+ }
+}
--- /dev/null
+// $Id:
+// FORESTER -- software libraries and applications
+// for evolutionary biology research and applications.
+//
+// Copyright (C) 2008-2009 Christian M. Zmasek
+// Copyright (C) 2008-2009 Burnham Institute for Medical Research
+// All rights reserved
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2.1 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+//
+// Contact: phylosoft @ gmail . com
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
+//
+//
+// "java -Xmx1024m -cp path\to\forester.jar org.forester.application.fasta_split
+//
+//
+
+package org.forester.application;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.forester.io.parsers.FastaParser;
+import org.forester.io.writers.SequenceWriter;
+import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
+import org.forester.util.CommandLineArguments;
+import org.forester.util.ForesterUtil;
+
+public final class rename_fasta {
+
+
+
+ public static void main( final String args[] ) {
+ try {
+ final File infile = new File( args[ 0 ] );
+ final File outfile = new File( args[ 1 ] );
+ List<MolecularSequence> seqs;
+ seqs = FastaParser.parse( new FileInputStream( infile ) );
+ for( MolecularSequence seq : seqs ) {
+ BasicSequence bseq = ( BasicSequence ) seq;
+ final int i = bseq.getIdentifier().lastIndexOf( '_' );
+ if ( i > 0 ) {
+ bseq.setIdentifier( bseq.getIdentifier().substring( i + 1 ) );
+ }
+ }
+ SequenceWriter.writeSeqs( seqs, outfile, SEQ_FORMAT.FASTA, 60 );
+ }
+ catch ( FileNotFoundException e ) {
+ e.printStackTrace();
+ }
+ catch ( IOException e ) {
+ e.printStackTrace();
+ }
+ }
+
+
+}
final static boolean __ALLOW_PHYLOGENETIC_INFERENCE = true;
public final static String PRG_NAME = "Archaeopteryx";
final static String VERSION = "0.9921 beta";
- final static String PRG_DATE = "170322";
+ final static String PRG_DATE = "170712";
final static String DEFAULT_CONFIGURATION_FILE_NAME = "_aptx_configuration_file";
final static String[] DEFAULT_FONT_CHOICES = {
"Arial Unicode MS", "Dialog", "SansSerif", "Sans", "Arial", "Helvetica" };
public final static Color DOMAIN_BASE_COLOR_FOR_PDF = new Color( 100,
100,
100 );
- public final static Color DOMAIN_LABEL_COLOR_FOR_PDF = new Color( 150,
- 150,
- 150 );
+ public final static Color DOMAIN_LABEL_COLOR_FOR_PDF = new Color( 0,
+ 0,
+ 0 );
final static short DEFAULT_NODE_SHAPE_SIZE_DEFAULT = 7;
static final int MAX_LENGTH_FOR_COLLAPSED_NAME = 8;
}
sortDescendants( node );
break;
case GET_EXT_DESC_DATA:
- showExtDescNodeData( node );
+ showExtDescNodeData( node, '_' );
break;
case UNCOLLAPSE_ALL:
uncollapseAll( node );
final boolean to_pdf,
final boolean to_graphics_file ) {
g.setFont( getTreeFontSet().getSmallFont() );
- if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) {
+ if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) {
g.setColor( Color.BLACK );
}
else {
else if ( getPhylogenyGraphicsType() == PHYLOGENY_GRAPHICS_TYPE.ROUNDED ) {
x += ROUNDED_D;
}
- if ( ( to_pdf || to_graphics_file ) && getOptions().isPrintBlackAndWhite() ) {
+ if ( to_pdf || ( to_graphics_file && getOptions().isPrintBlackAndWhite() ) ) {
g.setColor( Color.BLACK );
}
else {
if ( getOptions().isLineUpRendarableNodeData() ) {
if ( getOptions().isRightLineUpDomains() ) {
rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() )
- + _length_of_longest_text
+ + _length_of_longest_text + 50 //TODO why plus 50?
+ ( ( _longest_domain - rds.getTotalLength() ) * rds.getRenderingFactorWidth() ) ),
node.getYcoord() - ( h / 2.0f ),
g,
}
else {
rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() )
- + _length_of_longest_text ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf );
+ + _length_of_longest_text + 50 ),
+ node.getYcoord() - ( h / 2.0f ),
+ g,
+ this,
+ to_pdf );
}
}
else {
_urt_factor_ov = urt_factor_ov;
}
- private void showExtDescNodeData( final PhylogenyNode node ) {
+ private void showExtDescNodeData( final PhylogenyNode node, final char separator ) {
final List<String> data = new ArrayList<String>();
final List<PhylogenyNode> nodes = node.getAllExternalDescendants();
if ( ( getFoundNodes0() != null ) || ( getFoundNodes1() != null ) ) {
if ( n.getNodeData().isHasSequence()
&& !ForesterUtil.isEmpty( n.getNodeData().getSequence().getMolecularSequence() ) ) {
final StringBuilder ann = new StringBuilder();
- if ( !ForesterUtil.isEmpty( n.getName() ) ) {
+ if ( getControlPanel().isShowNodeNames() && !ForesterUtil.isEmpty( n.getName() ) ) {
ann.append( n.getName() );
- ann.append( "|" );
+ ann.append( separator );
}
- if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
- ann.append( "SYM=" );
+ if ( getControlPanel().isShowSeqSymbols()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getSymbol() ) ) {
ann.append( n.getNodeData().getSequence().getSymbol() );
- ann.append( "|" );
+ ann.append( separator );
}
- if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
- ann.append( "NAME=" );
+ if ( getControlPanel().isShowSeqNames()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getName() ) ) {
ann.append( n.getNodeData().getSequence().getName() );
- ann.append( "|" );
+ ann.append( separator );
}
- if ( !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
- ann.append( "GN=" );
+ if ( getControlPanel().isShowGeneNames()
+ && !ForesterUtil.isEmpty( n.getNodeData().getSequence().getGeneName() ) ) {
ann.append( n.getNodeData().getSequence().getGeneName() );
- ann.append( "|" );
+ ann.append( separator );
}
- if ( n.getNodeData().getSequence().getAccession() != null ) {
- ann.append( "ACC=" );
+ if ( getControlPanel().isShowSequenceAcc()
+ && n.getNodeData().getSequence().getAccession() != null ) {
ann.append( n.getNodeData().getSequence().getAccession().asText() );
- ann.append( "|" );
+ ann.append( separator );
}
if ( n.getNodeData().isHasTaxonomy() ) {
- if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
- ann.append( "TAXID=" );
+ if ( getControlPanel().isShowTaxonomyCode()
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getTaxonomyCode() ) ) {
ann.append( n.getNodeData().getTaxonomy().getTaxonomyCode() );
- ann.append( "|" );
+ ann.append( separator );
}
- if ( !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
- ann.append( "SN=" );
+ if ( getControlPanel().isShowTaxonomyScientificNames()
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getScientificName() ) ) {
ann.append( n.getNodeData().getTaxonomy().getScientificName() );
- ann.append( "|" );
+ ann.append( separator );
+ }
+ if ( getControlPanel().isShowTaxonomyCommonNames()
+ && !ForesterUtil.isEmpty( n.getNodeData().getTaxonomy().getCommonName() ) ) {
+ ann.append( n.getNodeData().getTaxonomy().getCommonName() );
+ ann.append( separator );
}
}
String ann_str;
- if ( ann.charAt( ann.length() - 1 ) == '|' ) {
+ if ( ann.length() > 0 && ann.charAt( ann.length() - 1 ) == separator ) {
ann_str = ann.substring( 0, ann.length() - 1 );
}
else {
throw new IOException( "more than one protein named [" + query + "]" );
}
}
+ final String fail_query = prev_query; //TODO
prev_query = query;
prev_qlen = qlen;
prev_queries.add( query );
if ( ( current_protein != null ) && ( current_protein.getProteinDomains().size() > 0 ) ) {
addProtein( proteins, current_protein );
}
+ else {
+ System.out.println(fail_query ); //TODO
+ }
if ( getReturnType() == ReturnType.UNORDERED_PROTEIN_DOMAIN_COLLECTION_PER_PROTEIN ) {
current_protein = new BasicProtein( query, getSpecies(), qlen );
}
public static void decorate( final Phylogeny phylogeny,
final Map<String, Map<String, String>> map,
- final boolean picky ) throws IllegalArgumentException, PhyloXmlDataFormatException {
+ final boolean picky )
+ throws IllegalArgumentException, PhyloXmlDataFormatException {
for( final PhylogenyNodeIterator iter = phylogeny.iteratorPostorder(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
final String name = node.getName();
if ( new_values.containsKey( TP_TAXONOMY_ID )
&& new_values.containsKey( TP_TAXONOMY_ID_PROVIDER ) ) {
ForesterUtil.ensurePresenceOfTaxonomy( node );
- node.getNodeData()
- .getTaxonomy()
- .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
- new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
+ node.getNodeData().getTaxonomy()
+ .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ),
+ new_values.get( TP_TAXONOMY_ID_PROVIDER ) ) );
}
else if ( new_values.containsKey( TP_TAXONOMY_ID ) ) {
ForesterUtil.ensurePresenceOfTaxonomy( node );
node.getNodeData().getTaxonomy()
- .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
+ .setIdentifier( new Identifier( new_values.get( TP_TAXONOMY_ID ) ) );
}
if ( new_values.containsKey( TP_TAXONOMY_SN ) ) {
ForesterUtil.ensurePresenceOfTaxonomy( node );
if ( new_values.containsKey( TP_SEQ_ACCESSION )
&& new_values.containsKey( TP_SEQ_ACCESSION_SOURCE ) ) {
ForesterUtil.ensurePresenceOfSequence( node );
- node.getNodeData()
- .getSequence()
- .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
- new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
+ node.getNodeData().getSequence()
+ .setAccession( new Accession( new_values.get( TP_SEQ_ACCESSION ),
+ new_values.get( TP_SEQ_ACCESSION_SOURCE ) ) );
}
if ( new_values.containsKey( TP_SEQ_ANNOTATION_DESC ) ) {
ForesterUtil.ensurePresenceOfSequence( node );
final boolean picky,
final boolean cut_name_after_space,
final boolean trim_after_tilde,
- final boolean verbose ) throws IllegalArgumentException, NHXFormatException,
- PhyloXmlDataFormatException {
+ final boolean verbose )
+ throws IllegalArgumentException, NHXFormatException, PhyloXmlDataFormatException {
return PhylogenyDecorator.decorate( phylogeny,
map,
field,
final Map<String, String> intermediate_map,
final boolean cut_name_after_space,
final boolean trim_after_tilde,
- final boolean verbose ) throws IllegalArgumentException, PhyloXmlDataFormatException {
+ final boolean verbose )
+ throws IllegalArgumentException, PhyloXmlDataFormatException {
if ( extract_bracketed_scientific_name && ( field == FIELD.TAXONOMY_SCIENTIFIC_NAME ) ) {
throw new IllegalArgumentException( "attempt to extract bracketed scientific name together with data field pointing to scientific name" );
}
}
else if ( picky ) {
throw new IllegalArgumentException( " could not get taxonomy from \"" + new_value
- + "\"" );
+ + "\"" );
}
}
switch ( field ) {
node.getNodeData().setSequence( new Sequence() );
}
node.getNodeData().getSequence()
- .setDomainArchitecture( new DomainArchitecture( new_value ) );
+ .setDomainArchitecture( new DomainArchitecture( new_value ) );
break;
case TAXONOMY_CODE:
if ( verbose ) {
throw new IllegalArgumentException( "node name \"" + name + "\" maps to empty value" );
}
}
- else if ( picky ) {
+ else if ( picky && node.isExternal() ) {
throw new IllegalArgumentException( "node name \"" + name + "\" not found in map" );
}
}
}
return "updated " + ext_nodes_updated + "/" + ext_nodes + " external nodes, updated " + int_nodes_updated + "/"
- + int_nodes + " internal nodes";
+ + int_nodes + " internal nodes";
}
public static Map<String, Map<String, String>> parseMappingTable( final File mapping_table_file )
}
public static enum FIELD {
- DOMAIN_STRUCTURE,
- MOL_SEQ,
- NODE_NAME,
- SEQUENCE_ANNOTATION_DESC,
- SEQUENCE_NAME,
- TAXONOMY_CODE,
- TAXONOMY_SCIENTIFIC_NAME;
+ DOMAIN_STRUCTURE,
+ MOL_SEQ,
+ NODE_NAME,
+ SEQUENCE_ANNOTATION_DESC,
+ SEQUENCE_NAME,
+ TAXONOMY_CODE,
+ TAXONOMY_SCIENTIFIC_NAME;
}
}