import java.util.ArrayList;
import java.util.List;
+import org.forester.clade_analysis.AnalysisMulti;
import org.forester.clade_analysis.AnalysisSingle;
+import org.forester.clade_analysis.Prefix;
+import org.forester.clade_analysis.ResultMulti;
import org.forester.clade_analysis.ResultSingle;
import org.forester.io.parsers.PhylogenyParser;
import org.forester.io.parsers.util.ParserUtils;
public final class cladinator {
final static private String PRG_NAME = "cladinator";
- final static private String PRG_VERSION = "0.101";
- final static private String PRG_DATE = "170810";
+ final static private String PRG_VERSION = "0.100";
+ final static private String PRG_DATE = "170823";
final static private String PRG_DESC = "clades within clades -- analysis of pplacer type outputs";
final static private String E_MAIL = "phyloxml@gmail.com";
final static private String WWW = "https://sites.google.com/site/cmzmasek/home/software/forester";
final static private String HELP_OPTION_1 = "help";
final static private String HELP_OPTION_2 = "h";
final static private String SEP_OPTION = "s";
- private final static DecimalFormat df2 = new DecimalFormat( ".##" );
+ private final static DecimalFormat df2 = new DecimalFormat( "0.0#" );
public static void main( final String args[] ) {
try {
System.out.println( "\nCould not read \"" + intreefile + "\" [" + e.getMessage() + "]\n" );
System.exit( -1 );
}
- final ResultSingle res = AnalysisSingle.execute( p, query, separator );
+
+ final ResultMulti res = AnalysisMulti.execute( p, query, separator, 0.5 );
+
System.out.println();
System.out.println( "Result:" );
System.out.println( "Query : " + query );
+
+ ///////////////////
+
+
+
+ System.out.println( "Collapsed:" );
+
+ for( final Prefix prefix : res.getCollapsedMultiHitPrefixes() ) {
+ System.out.println( prefix );
+ }
+ if ( _has_specifics ) {
+
+ System.out.println( "Specifics:" );
+
+ for( final Prefix prefix : _cleaned_spec ) {
+ System.out.println( prefix );
+
+ }
+
+ System.out.println( "Collapsed With Specifics:" );
+
+ for( final Prefix prefix : _collapsed ) {
+ System.out.println( prefix );
+
+ for( final Prefix spec : _cleaned_spec ) {
+ if ( spec.getPrefix().startsWith( prefix.getPrefix() ) ) {
+ System.out.println( " " + spec );
+
+ }
+ }
+ }
+ }
+ if ( !ForesterUtil.isEmpty( _all_down ) ) {
+
+ System.out.println( "Collapsed Down:" );
+
+ for( final Prefix prefix : _collapsed_down ) {
+ System.out.println( prefix );
+
+ }
+
+ }
+ if ( !ForesterUtil.isEmpty( _all_up ) ) {
+
+
+ System.out.println( "Collapsed Up:" );
+
+ for( final Prefix prefix : _collapsed_up ) {
+ System.out.println( prefix );
+
+ }
+
+ }
+
+ ///////////////////
+
+
System.out.print( "Greatest Common Prefix : " + res.getGreatestCommonPrefix() );
if ( !ForesterUtil.isEmpty( res.getGreatestCommonPrefix() )
&& !ForesterUtil.isEmpty( res.getGreatestCommonCladeSubtreeConfidence() ) ) {
msa = FastaParser.parseMsa( is );
}
else {
- msa = GeneralMsaParser.parse( is );
+ msa = GeneralMsaParser.parseMsa( is );
}
if ( cla.isOptionSet( FROM_OPTION ) ) {
singleCalc( in, from, to, msa );
msa = DeleteableMsa.createInstance( FastaParser.parseMsa( is ) );
}
else {
- msa = DeleteableMsa.createInstance( GeneralMsaParser.parse( is ) );
+ msa = DeleteableMsa.createInstance( GeneralMsaParser.parseMsa( is ) );
}
final DescriptiveStatistics initial_msa_stats = MsaMethods.calculateEffectiveLengthStatistics( msa );
if (cla.isOptionSet( INFO_ONLY_OPTION ) ) {
msa = FastaParser.parseMsa( is );
}
else {
- msa = GeneralMsaParser.parse( is );
+ msa = GeneralMsaParser.parseMsa( is );
}
}
catch ( final MsaFormatException e ) {
final Phylogeny p1 = factory.create( in, pp )[ 0 ];
ResultMulti res = AnalysisMulti.execute( p1, 0.5 );
- System.out.println( "DEMO 1:" );
+ System.out.println( "DEMO 2:" );
System.out.println( "+++++++" );
System.out.print( res.toString() );
System.out.println( "------------------------- " );
private static boolean testDistanceCalculationMethods( final File test_dir ) {
try {
- final Msa msa0 = GeneralMsaParser.parse( new FileInputStream( test_dir + ForesterUtil.FILE_SEPARATOR
+ final Msa msa0 = GeneralMsaParser.parseMsa( new FileInputStream( test_dir + ForesterUtil.FILE_SEPARATOR
+ "bcl.aln" ) );
final BasicSymmetricalDistanceMatrix pwd0 = PairwiseDistanceCalculator.calcKimuraDistances( msa0 );
if ( pwd0.getSize() != 120 ) {
.matcher( line ).lookingAt() );
}
- static public Msa parse( final InputStream is ) throws IOException {
+ static final public Msa parseMsa( final InputStream is ) throws IOException {
+ final Msa msa = BasicMsa.createInstance( parseSeqs( is ));
+ return msa;
+ }
+
+ static final public List<MolecularSequence> parseSeqs( final InputStream is ) throws IOException {
int block = -1;
int current_seq_index_per_block = -1;
String current_name = null;
name = names_in_order.get( current_seq_index_per_block );
}
catch ( final IndexOutOfBoundsException e ) {
- throw new MsaFormatException( "illegalmsa format (line: " + line_counter + "):\n\""
+ throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\""
+ trim( line ) + "\"" );
}
if ( temp_msa.containsKey( name ) ) {
seqs.add( BasicSequence.createAaSequence( names_in_order.get( i ), temp_msa.get( names_in_order.get( i ) )
.toString() ) );
}
- final Msa msa = BasicMsa.createInstance( seqs );
- return msa;
+
+ return seqs;
}
private static String trim( final String line ) {
return new BasicSequence( new String( seq.getIdentifier() ), s, seq.getType() );
}
+ public static MolecularSequence createSequence( final String identifier, final String mol_sequence ) {
+ check( identifier, mol_sequence );
+ final TYPE type = ForesterUtil.guessMolecularSequenceType( mol_sequence );
+ final String re;
+ final char repl;
+ if ( type == TYPE.AA ) {
+ re = AA_REGEXP;
+ repl = UNSPECIFIED_AA;
+ }
+ else if ( type == TYPE.DNA ) {
+ re = DNA_REGEXP;
+ repl = UNSPECIFIED_NUC;
+ }
+ else if ( type == TYPE.RNA ) {
+ re = RNA_REGEXP;
+ repl = UNSPECIFIED_NUC;
+ }
+ else {
+ throw new IllegalArgumentException( "could not determine sequence type for: " + mol_sequence);
+ }
+ return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
+ .replaceAll( re, Character.toString( repl ) ), type );
+ }
+
public static MolecularSequence createAaSequence( final String identifier, final String mol_sequence ) {
check( identifier, mol_sequence );
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
private static boolean testGeneralMsaParser() {
try {
final String msa_str_0 = "seq1 abcd\n\nseq2 efgh\n";
- final Msa msa_0 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_0.getBytes() ) );
+ final Msa msa_0 = GeneralMsaParser.parseMsa( new ByteArrayInputStream( msa_str_0.getBytes() ) );
final String msa_str_1 = "seq1 abc\nseq2 ghi\nseq1 def\nseq2 jkm\n";
- final Msa msa_1 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_1.getBytes() ) );
+ final Msa msa_1 = GeneralMsaParser.parseMsa( new ByteArrayInputStream( msa_str_1.getBytes() ) );
final String msa_str_2 = "seq1 abc\nseq2 ghi\n\ndef\njkm\n";
- final Msa msa_2 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_2.getBytes() ) );
+ final Msa msa_2 = GeneralMsaParser.parseMsa( new ByteArrayInputStream( msa_str_2.getBytes() ) );
final String msa_str_3 = "seq1 abc\n def\nseq2 ghi\n jkm\n";
- final Msa msa_3 = GeneralMsaParser.parse( new ByteArrayInputStream( msa_str_3.getBytes() ) );
+ final Msa msa_3 = GeneralMsaParser.parseMsa( new ByteArrayInputStream( msa_str_3.getBytes() ) );
if ( !msa_1.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdef" ) ) {
return false;
}
if ( !msa_3.getIdentifier( 1 ).toString().equals( "seq2" ) ) {
return false;
}
- final Msa msa_4 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) );
+ final Msa msa_4 = GeneralMsaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "msa_1.txt" ) );
if ( !msa_4.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
return false;
}
if ( !msa_4.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxphhhhhhhhzz" ) ) {
return false;
}
- final Msa msa_5 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_2.txt" ) );
+ final Msa msa_5 = GeneralMsaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "msa_2.txt" ) );
if ( !msa_5.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefxx" ) ) {
return false;
}
if ( !msa_5.getSequenceAsString( 2 ).toString().equalsIgnoreCase( "klmnxpzz" ) ) {
return false;
}
- final Msa msa_6 = GeneralMsaParser.parse( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) );
+ final Msa msa_6 = GeneralMsaParser.parseMsa( new FileInputStream( PATH_TO_TEST_DATA + "msa_3.txt" ) );
if ( !msa_6.getSequenceAsString( 0 ).toString().equalsIgnoreCase( "abcdefeeeeeeeexx" ) ) {
return false;
}
public final class ForesterConstants {
- public final static String FORESTER_VERSION = "1.045";
- public final static String FORESTER_DATE = "161214";
- public final static String PHYLO_XML_VERSION = "1.20";
- public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
- public final static String PHYLO_XML_XSD = "phyloxml.xsd";
- public final static String XML_SCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
- public final static String LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd";
- public final static String PHYLO_XML_SUFFIX = ".xml";
- public final static String UTF_8 = "UTF-8";
- public final static String ISO_8859_1 = "ISO-8859-1";
- public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
- public final static boolean RELEASE = false;
+ public final static String FORESTER_VERSION = "1.045";
+ public final static String FORESTER_DATE = "161214";
+ public final static String PHYLO_XML_VERSION = "1.20";
+ public final static String PHYLO_XML_LOCATION = "http://www.phyloxml.org";
+ public final static String PHYLO_XML_XSD = "phyloxml.xsd";
+ public final static String XML_SCHEMA_INSTANCE = "http://www.w3.org/2001/XMLSchema-instance";
+ public final static String LOCAL_PHYLOXML_XSD_RESOURCE = "resources/phyloxml.xsd";
+ public final static String PHYLO_XML_SUFFIX = ".xml";
+ public final static String ID_NORMALIZED_FASTA_FILE_SUFFIX = "_ni.fasta";
+ public final static String ID_MAP_FILE_SUFFIX = ".nim";
+ public final static String UTF_8 = "UTF-8";
+ public final static String ISO_8859_1 = "ISO-8859-1";
+ public final static String PHYLO_XML_REFERENCE = "Han MV and Zmasek CM (2009): \"phyloXML: XML for evolutionary biology and comparative genomics\", BMC Bioinformatics 10:356";
+ public final static boolean RELEASE = false;
public enum PhylogeneticTreeFormats {
- NH, NHX, NEXUS, PHYLOXML
+ NH,
+ NHX,
+ NEXUS,
+ PHYLOXML
}
-
-
}
final public static boolean isEqual( final double a, final double b ) {
return ( ( Math.abs( a - b ) ) < ZERO_DIFF );
}
-
+
final public static boolean isEqual( final double a, final double b, final double tolerance ) {
return ( ( Math.abs( a - b ) ) < tolerance );
}
private ForesterUtil() {
}
- public static List<String> spliIntoPrefixes(final String prefix, final String separator ) {
+ public static List<String> spliIntoPrefixes( final String prefix, final String separator ) {
final String[] a = prefix.split( Pattern.quote( separator ) );
- final List<String> l= new ArrayList<String>();
+ final List<String> l = new ArrayList<String>();
for( int i = 0; i < a.length; ++i ) {
final StringBuilder sb = new StringBuilder();
for( int j = 0; j <= i; ++j ) {
sb.append( separator );
}
}
- // System.out.println( sb.toString() );
- l.add( sb.toString());
+ // System.out.println( sb.toString() );
+ l.add( sb.toString() );
}
return l;
}
+
+ //
+ public static boolean isLooksLikeFasta( final File file ) throws IOException {
+ final String first_line = ForesterUtil.getFirstLine( file ).trim().toLowerCase();
+ return ( ( !isEmptyTrimmed( first_line ) && first_line.trim().startsWith( ">" ) ) );
+ }
}