// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
//
// Contact: phylosoft @ gmail . com
-// WWW: www.phylosoft.org/forester
+// WWW: https://sites.google.com/site/cmzmasek/home/software/forester
package org.forester.io.parsers;
import org.forester.msa.Msa;
import org.forester.msa.MsaFormatException;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
public final class GeneralMsaParser {
private static final Pattern NAME_SEQ_PATTERN = Pattern.compile( "(\\S+)\\s+(\\S+)\\s*" );
private static final Pattern INDENTED_SEQ_PATTERN = Pattern.compile( "\\s+(\\S+)\\s*" );
private static final Pattern NON_INDENTED_SEQ_PATTERN = Pattern.compile( "(\\S+).*" );
- private static final Pattern PROBCONS_REGEX = Pattern.compile( "^CLUSTAL\\s" );
+ private static final Pattern PROBCONS_REGEX = Pattern.compile( "^CLUSTAL" );
private static final Pattern MUSCLE_REGEX = Pattern.compile( "^MUSCLE\\s\\(" );
private static final Pattern CLUSTAL_REGEX = Pattern.compile( "^PROBCONS\\s" );
private static final Pattern ANYTHING_REGEX = Pattern.compile( "[\\d\\s]+" );
.matcher( line ).lookingAt() );
}
- static public Msa parse( final InputStream is ) throws IOException {
+ static final public Msa parseMsa( final InputStream is ) throws IOException {
+ final Msa msa = BasicMsa.createInstance( parseSeqs( is ));
+ return msa;
+ }
+
+ static final public List<MolecularSequence> parseSeqs( final InputStream is ) throws IOException {
int block = -1;
int current_seq_index_per_block = -1;
String current_name = null;
name = names_in_order.get( current_seq_index_per_block );
}
catch ( final IndexOutOfBoundsException e ) {
- throw new MsaFormatException( "illegalmsa format (line: " + line_counter + "):\n\""
+ throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\""
+ trim( line ) + "\"" );
}
if ( temp_msa.containsKey( name ) ) {
}
else {
throw new MsaFormatException( "illegal msa format (line: " + line_counter + "):\n\"" + trim( line )
- + "\"" );
+ + "\"" );
}
if ( is_first ) {
is_first = false;
}
}
} // while ( ( line = reader.readLine() ) != null )
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int i = 0; i < names_in_order.size(); ++i ) {
seqs.add( BasicSequence.createAaSequence( names_in_order.get( i ), temp_msa.get( names_in_order.get( i ) )
- .toString() ) );
+ .toString() ) );
}
- final Msa msa = BasicMsa.createInstance( seqs );
- return msa;
+
+ return seqs;
}
private static String trim( final String line ) {