final static private String OUTPUT_FORMAT_PHYLIP_OPTION = "p";
final static private String OUTPUT_REMOVED_SEQS_OPTION = "ro";
final static private String MAFFT_OPTIONS = "mo";
+ final static private String PERFORM_PHYLOGENETIC_INFERENCE = "t";
//
final static private String PATH_TO_MAFFT_OPTION = "mafft";
final static private String DO_NOT_NORMALIZE_FOR_EFF_LENGTH_OPTION = "nn";
MSA_FORMAT output_format = MSA_FORMAT.FASTA;
File removed_seqs_out_base = null;
String mafft_options = "--auto";
+ boolean perform_phylogenetic_inference = false;
final List<String> allowed_options = new ArrayList<String>();
allowed_options.add( REMOVE_WORST_OFFENDERS_OPTION );
allowed_options.add( AV_GAPINESS_OPTION );
allowed_options.add( OUTPUT_FORMAT_PHYLIP_OPTION );
allowed_options.add( OUTPUT_REMOVED_SEQS_OPTION );
allowed_options.add( MAFFT_OPTIONS );
+ allowed_options.add( PERFORM_PHYLOGENETIC_INFERENCE );
final String dissallowed_options = cla.validateAllowedOptionsAsString( allowed_options );
if ( dissallowed_options.length() > 0 ) {
ForesterUtil.fatalError( PRG_NAME, "unknown option(s): " + dissallowed_options );
else if ( cla.isOptionSet( MAFFT_OPTIONS ) ) {
ForesterUtil.fatalError( PRG_NAME, "no need to indicate MAFFT options without realigning" );
}
+ if ( cla.isOptionSet( PERFORM_PHYLOGENETIC_INFERENCE ) ) {
+ perform_phylogenetic_inference = true;
+ }
if ( chart_only ) {
if ( ( out != null ) || ( removed_seqs_out_base != null ) ) {
ForesterUtil
if ( realign ) {
System.out.println( "MAFFT options : " + mafft_options );
}
+ System.out.println( "Simple tree (Kimura distances, NJ) : " + perform_phylogenetic_inference );
System.out.println();
final int initial_number_of_seqs = msa.getNumberOfSequences();
List<MsaProperties> msa_props = null;
final MsaCompactor mc = new MsaCompactor( msa );
+ mc.setInfileName( in.getName() );
mc.setNorm( norm );
mc.setRealign( realign );
if ( realign ) {
mc.setStep( step );
mc.setStepForDiagnostics( step_for_diagnostics );
mc.setReportAlnMeanIdentity( report_aln_mean_identity );
+ mc.setPeformPhylogenticInference( perform_phylogenetic_inference );
if ( ( worst_remove > 0 ) || ( av_gap > 0 ) || ( length > 0 ) ) {
mc.setOutputFormat( output_format );
mc.setOutFileBase( out );
else {
msa_props = mc.chart( step, realign, norm );
}
- Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.toString() );
+ Chart.display( msa_props, initial_number_of_seqs, report_aln_mean_identity, in.getName() );
}
catch ( final IllegalArgumentException iae ) {
// iae.printStackTrace(); //TODO remove me
+ "=<integer> minimal effecive sequence length (for deleting of shorter sequences)" );
System.out.println( " -" + GAP_RATIO_LENGTH_OPTION
+ "=<decimal> maximal allowed gap ratio per column (for deleting of columms) (0.0-1.0)" );
+ System.out.println( " -" + PERFORM_PHYLOGENETIC_INFERENCE
+ + " to calculate a simple phylogenetic tree (Kimura distances, NJ)" );
System.out.println();
System.out.println();
System.out.println();
return MainFrameApplication.createInstance( phylogenies, config_file_name, title );
}
+ public static MainFrame createApplication( final Phylogeny phylogeny, final Configuration config, final String title ) {
+ final Phylogeny[] phylogenies = new Phylogeny[ 1 ];
+ phylogenies[ 0 ] = phylogeny;
+ return MainFrameApplication.createInstance( phylogenies, config, title );
+ }
+
public static void main( final String args[] ) {
Phylogeny[] phylogenies = null;
String config_filename = null;
import java.util.SortedSet;
import java.util.TreeSet;
+import org.forester.archaeopteryx.Archaeopteryx;
+import org.forester.archaeopteryx.Configuration;
import org.forester.evoinference.distance.NeighborJoiningF;
import org.forester.evoinference.distance.PairwiseDistanceCalculator;
import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD;
import org.forester.evoinference.matrix.distance.BasicSymmetricalDistanceMatrix;
import org.forester.evoinference.tools.BootstrapResampler;
+import org.forester.io.parsers.nhx.NHXParser.TAXONOMY_EXTRACTION;
+import org.forester.io.parsers.phyloxml.PhyloXmlDataFormatException;
+import org.forester.io.parsers.util.ParserUtils;
import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.msa.DeleteableMsa;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
+import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
import org.forester.sequence.Sequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
private String _maffts_opts = "--auto";
private int _min_length = -1;
//
+ private String _infile_name = null;
private DeleteableMsa _msa = null;
private boolean _norm = true;
private File _out_file_base = null;
private boolean _report_aln_mean_identity = false;
private int _step = -1;
private int _step_for_diagnostics = -1;
+ private boolean _phylogentic_inference = false;
static {
NF_4.setRoundingMode( RoundingMode.HALF_UP );
NF_3.setRoundingMode( RoundingMode.HALF_UP );
for( final GapContribution gap_gontribution : stats ) {
to_remove_ids.add( gap_gontribution.getId() );
}
+ if ( _phylogentic_inference ) {
+ System.out.println( "calculating phylegentic tree..." );
+ System.out.println();
+ pi();
+ }
if ( !_realign ) {
_step = -1;
}
return "";
}
- private final Phylogeny pi( final String matrix ) {
+ private final Phylogeny pi( final String matrix, final int boostrap ) {
final Phylogeny master_phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, true, matrix );
final int seed = 15;
final int n = 100;
return master_phy;
}
+ private final Phylogeny pi() {
+ final Phylogeny phy = inferNJphylogeny( PWD_DISTANCE_METHOD.KIMURA_DISTANCE, _msa, false, "" );
+ PhylogenyMethods.midpointRoot( phy );
+ final boolean x = PhylogenyMethods.extractFastaInformation( phy );
+ if ( !x ) {
+ final PhylogenyNodeIterator it = phy.iteratorExternalForward();
+ while ( it.hasNext() ) {
+ final PhylogenyNode n = it.next();
+ final String name = n.getName().trim();
+ if ( !ForesterUtil.isEmpty( name ) ) {
+ try {
+ ParserUtils.extractTaxonomyDataFromNodeName( n, TAXONOMY_EXTRACTION.AGGRESSIVE );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ // Ignore.
+ }
+ }
+ }
+ }
+ final Configuration config = new Configuration();
+ config.setDisplayAsPhylogram( true );
+ Archaeopteryx.createApplication( phy, config, _infile_name );
+ return phy;
+ }
+
private final void printMsaProperties( final String id, final MsaProperties msa_properties ) {
if ( ( _step == 1 ) || ( _step_for_diagnostics == 1 ) ) {
System.out.print( ForesterUtil.pad( id, _longest_id_length, ' ', false ) );
msa.write( w, format );
w.close();
}
+
+ public void setPeformPhylogenticInference( final boolean phylogentic_inference ) {
+ _phylogentic_inference = phylogentic_inference;
+ }
+
+ public void setInfileName( final String infile_name ) {
+ _infile_name = infile_name;
+ }
}
throw new CloneNotSupportedException();
}
- public static void extractFastaInformation( final Phylogeny phy ) {
+ public static boolean extractFastaInformation( final Phylogeny phy ) {
+ boolean could_extract = false;
for( final PhylogenyNodeIterator iter = phy.iteratorExternalForward(); iter.hasNext(); ) {
final PhylogenyNode node = iter.next();
if ( !ForesterUtil.isEmpty( node.getName() ) ) {
final Matcher name_m = FastaParser.FASTA_DESC_LINE.matcher( node.getName() );
if ( name_m.lookingAt() ) {
- System.out.println();
- // System.out.println( name_m.group( 1 ) );
- // System.out.println( name_m.group( 2 ) );
- // System.out.println( name_m.group( 3 ) );
- // System.out.println( name_m.group( 4 ) );
+ could_extract = true;
final String acc_source = name_m.group( 1 );
final String acc = name_m.group( 2 );
final String seq_name = name_m.group( 3 );
}
}
}
+ return could_extract;
}
public static DescriptiveStatistics calculatBranchLengthStatistics( final Phylogeny phy ) {