import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.CommandLineArguments;
import org.forester.util.ForesterUtil;
}
else {
try {
- final List<Sequence> seqs = FastaParser.parse( new FileInputStream( infile ) );
+ final List<MolecularSequence> seqs = FastaParser.parse( new FileInputStream( infile ) );
final Map<String, Short> names = new HashMap<String, Short>();
int duplicates = 0;
- for( final Sequence seq : seqs ) {
+ for( final MolecularSequence seq : seqs ) {
if ( procSeq( infile.toString(), names, seq ) ) {
++duplicates;
}
}
}
- private static boolean procSeq( final String infile, final Map<String, Short> names, final Sequence seq ) {
+ private static boolean procSeq( final String infile, final Map<String, Short> names, final MolecularSequence seq ) {
boolean duplicate = false;
final String name = seq.getIdentifier();
if ( !names.containsKey( name ) ) {
import org.forester.phylogeny.data.Identifier;
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;
import org.forester.phylogeny.factories.PhylogenyFactory;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.tools.PhylogenyDecorator;
import org.forester.tools.PhylogenyDecorator.FIELD;
import org.forester.util.BasicTable;
}
private static Map<String, String> readFastaFileIntoMap( final File mapping_infile, final boolean verbose ) {
- List<Sequence> seqs = null;
+ List<MolecularSequence> seqs = null;
try {
seqs = FastaParser.parse( new FileInputStream( mapping_infile ) );
}
+ "] is devoid of fasta-formatted sequences" );
}
final Map<String, String> map = new HashMap<String, String>();
- for( final Sequence seq : seqs ) {
+ for( final MolecularSequence seq : seqs ) {
if ( ForesterUtil.isEmpty( seq.getIdentifier() ) ) {
ForesterUtil.fatalError( decorator.PRG_NAME, "fasta-file [" + mapping_infile
+ "] contains sequence with empty identifier" );
import org.forester.phylogeny.factories.ParserBasedPhylogenyFactory;\r
import org.forester.phylogeny.factories.PhylogenyFactory;\r
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;\r
-import org.forester.sequence.Sequence;\r
+import org.forester.sequence.MolecularSequence;\r
import org.forester.util.BasicDescriptiveStatistics;\r
import org.forester.util.BasicTable;\r
import org.forester.util.BasicTableParser;\r
private PhylogeneticInferenceOptions _phylogenetic_inference_options = null;\r
private Msa _msa = null;\r
private File _msa_file = null;\r
- private List<Sequence> _seqs = null;\r
+ private List<MolecularSequence> _seqs = null;\r
private File _seqs_file = null;\r
JMenuItem _read_values_jmi;\r
JMenuItem _read_seqs_jmi;\r
return _msa_file;\r
}\r
\r
- public List<Sequence> getSeqs() {\r
+ public List<MolecularSequence> getSeqs() {\r
return _seqs;\r
}\r
\r
if ( ( file != null ) && !file.isDirectory() && ( result == JFileChooser.APPROVE_OPTION ) ) {\r
setSeqsFile( null );\r
setSeqs( null );\r
- List<Sequence> seqs = null;\r
+ List<MolecularSequence> seqs = null;\r
try {\r
if ( FastaParser.isLikelyFasta( new FileInputStream( file ) ) ) {\r
seqs = FastaParser.parse( new FileInputStream( file ) );\r
- for( final Sequence seq : seqs ) {\r
+ for( final MolecularSequence seq : seqs ) {\r
System.out.println( SequenceWriter.toFasta( seq, 60 ) );\r
}\r
}\r
_msa_file = msa_file;\r
}\r
\r
- void setSeqs( final List<Sequence> seqs ) {\r
+ void setSeqs( final List<MolecularSequence> seqs ) {\r
_seqs = seqs;\r
}\r
\r
}\r
final int result = _sequences_filechooser.showOpenDialog( _contentpane );\r
final File file = _sequences_filechooser.getSelectedFile();\r
- List<Sequence> seqs = null;\r
+ List<MolecularSequence> seqs = null;\r
if ( ( file != null ) && !file.isDirectory() && ( result == JFileChooser.APPROVE_OPTION ) ) {\r
try {\r
if ( FastaParser.isLikelyFasta( new FileInputStream( file ) ) ) {\r
}\r
}\r
if ( seqs != null ) {\r
- for( final Sequence seq : seqs ) {\r
+ for( final MolecularSequence seq : seqs ) {\r
System.out.println( seq.getIdentifier() );\r
}\r
final Phylogeny phy = getCurrentTreePanel().getPhylogeny();\r
int total_counter = 0;\r
int attached_counter = 0;\r
- for( final Sequence seq : seqs ) {\r
+ for( final MolecularSequence seq : seqs ) {\r
++total_counter;\r
final String seq_name = seq.getIdentifier();\r
if ( !ForesterUtil.isEmpty( seq_name ) ) {\r
if ( getOptions().isRightLineUpDomains() ) {
rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() )
+ _length_of_longest_text + ( ( _longest_domain - rds.getTotalLength() ) * rds
- .getRenderingFactorWidth() ) ), node.getYcoord() - ( h / 2 ), g, this, to_pdf );
+ .getRenderingFactorWidth() ) ), node.getYcoord() - ( h / 2.0f ), g, this, to_pdf );
}
else {
rds.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) + _length_of_longest_text ),
- node.getYcoord() - ( h / 2 ),
+ node.getYcoord() - ( h / 2.0f ),
g,
this,
to_pdf );
}
}
else {
- rds.render( node.getXcoord() + x, node.getYcoord() - ( h / 2 ), g, this, to_pdf );
+ rds.render( node.getXcoord() + x, node.getYcoord() - ( h / 2.0f ), g, this, to_pdf );
}
}
else {
rds.render( ( ( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text ) - 20 )
+ ( ( _longest_domain - rds.getTotalLength() ) * rds
.getRenderingFactorWidth() ),
- node.getYcoord() - ( h / 2 ),
+ node.getYcoord() - ( h / 2.0f ),
g,
this,
to_pdf );
}
else {
rds.render( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text,
- node.getYcoord() - ( h / 2 ),
+ node.getYcoord() - ( h / 2.0f ),
g,
this,
to_pdf );
&& ( node.getNodeData().getSequence().isMolecularSequenceAligned() )
&& ( !ForesterUtil.isEmpty( node.getNodeData().getSequence().getMolecularSequence() ) ) ) {
final RenderableMsaSequence rs = RenderableMsaSequence.createInstance( node.getNodeData().getSequence()
- .getMolecularSequence(), getConfiguration() );
+ .getMolecularSequence(), node.getNodeData().getSequence().getType(), getConfiguration() );
if ( rs != null ) {
final int default_height = 7;
float y = getYdistance();
rs.setRenderingHeight( h > 1 ? h : 2 );
if ( getControlPanel().isDrawPhylogram() ) {
rs.render( ( float ) ( ( getMaxDistanceToRoot() * getXcorrectionFactor() ) + _length_of_longest_text ),
- node.getYcoord() - ( h / 2 ),
+ node.getYcoord() - ( h / 2.0f ),
g,
this,
to_pdf );
}
else {
rs.render( getPhylogeny().getFirstExternalNode().getXcoord() + _length_of_longest_text,
- node.getYcoord() - ( h / 2 ),
+ node.getYcoord() - ( h / 2.0f ),
g,
this,
to_pdf );
import org.forester.archaeopteryx.Configuration;
import org.forester.archaeopteryx.TreePanel;
import org.forester.phylogeny.data.PhylogenyData;
+import org.forester.sequence.MolecularSequence;
+import org.forester.sequence.MolecularSequence.TYPE;
public final class RenderableMsaSequence implements RenderablePhylogenyData {
private final Rectangle2D _rectangle = new Rectangle2D.Float();
private double _height = DEFAULT_HEIGHT;
private final float _width = DEFAULT_WIDTH;
+ private MolecularSequence.TYPE _type;
private static RenderableMsaSequence _instance = null;
private RenderableMsaSequence() {
}
private Color calculateColor( final char c ) {
- if ( ( c == 'G' ) || ( c == 'A' ) || ( c == 'S' ) || ( c == 'T' ) ) {
+ if ( _type == TYPE.AA ) {
+ return calculateAAColor( c );
+ }
+ return calculateNucleotideColor( c );
+ }
+
+ private Color calculateNucleotideColor( final char c ) {
+ if ( c == 'A' ) {
+ return Color.YELLOW;
+ }
+ if ( ( c == 'T' ) || ( c == 'U' ) ) {
return Color.ORANGE;
}
+ if ( c == 'G' ) {
+ return Color.BLUE;
+ }
+ if ( c == 'C' ) {
+ return Color.CYAN;
+ }
+ else if ( c == '-' ) {
+ return Color.GRAY;
+ }
+ else {
+ return Color.GRAY;
+ }
+ }
+
+ private Color calculateAAColor( final char c ) {
+ if ( ( c == 'G' ) || ( c == 'A' ) || ( c == 'S' ) || ( c == 'T' ) ) {
+ return Color.YELLOW;
+ }
else if ( ( c == 'N' ) || ( c == 'Q' ) || ( c == 'H' ) ) {
- return Color.MAGENTA;
+ return Color.PINK;
}
else if ( ( c == 'D' ) || ( c == 'E' ) ) {
return Color.RED;
else if ( c == '-' ) {
return Color.GRAY;
}
+ else if ( c == 'X' ) {
+ return Color.GRAY;
+ }
else {
return Color.GREEN;
}
return _height;
}
- public static RenderableMsaSequence createInstance( final String seq, final Configuration configuration ) {
+ public static RenderableMsaSequence createInstance( final String seq,
+ final String type,
+ final Configuration configuration ) {
if ( _instance == null ) {
_instance = new RenderableMsaSequence();
}
+ if ( type.equals( "protein" ) ) {
+ _instance._type = TYPE.AA;
+ }
+ else if ( type.equals( "dna" ) ) {
+ _instance._type = TYPE.DNA;
+ }
+ else {
+ _instance._type = TYPE.RNA;
+ }
_instance._seq = seq.toCharArray();
if ( configuration != null ) {
}
import org.forester.archaeopteryx.AptxUtil;
import org.forester.archaeopteryx.MainFrameApplication;
import org.forester.evoinference.distance.PairwiseDistanceCalculator.PWD_DISTANCE_METHOD;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
}
}
- DescriptiveStatistics calcSequenceStats( final List<Sequence> seqs ) {
+ DescriptiveStatistics calcSequenceStats( final List<MolecularSequence> seqs ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
- for( final Sequence s : seqs ) {
+ for( final MolecularSequence s : seqs ) {
stats.addValue( s.getLength() );
}
return stats;
import org.forester.msa.ResampleableMsa;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyMethods;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.ForesterUtil;
private Msa _msa;
private final MainFrameApplication _mf;
private final PhylogeneticInferenceOptions _options;
- private final List<Sequence> _seqs;
+ private final List<MolecularSequence> _seqs;
private final boolean DEBUG = true;
public final static String MSA_FILE_SUFFIX = ".aln";
public final static String PWD_FILE_SUFFIX = ".pwd";
- public PhylogeneticInferrer( final List<Sequence> seqs,
+ public PhylogeneticInferrer( final List<MolecularSequence> seqs,
final PhylogeneticInferenceOptions options,
final MainFrameApplication mf ) {
_msa = null;
}
}
- private Msa runMAFFT( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
+ private Msa runMAFFT( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
Msa msa = null;
final MsaInferrer mafft = Mafft.createInstance( _mf.getInferenceManager().getPathToLocalMafft()
.getCanonicalPath() );
import org.forester.msa.Msa;
import org.forester.msa.MsaFormatException;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
public class FastaParser {
return parseMsa( new ByteArrayInputStream( bytes ) );
}
- static public List<Sequence> parse( final File f ) throws IOException {
+ static public List<MolecularSequence> parse( final File f ) throws IOException {
return parse( new FileInputStream( f ) );
}
- static public List<Sequence> parse( final InputStream is ) throws IOException {
+ static public List<MolecularSequence> parse( final InputStream is ) throws IOException {
final BufferedReader reader = new BufferedReader( new InputStreamReader( is, "UTF-8" ) );
String line = null;
int line_counter = 0;
}
addSeq( name, current_seq, temp_msa );
reader.close();
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int i = 0; i < temp_msa.size(); ++i ) {
seqs.add( BasicSequence.createAaSequence( temp_msa.get( i )[ 0 ].toString(),
temp_msa.get( i )[ 1 ].toString() ) );
import org.forester.msa.Msa;
import org.forester.msa.MsaFormatException;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
public final class GeneralMsaParser {
}
}
} // while ( ( line = reader.readLine() ) != null )
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int i = 0; i < names_in_order.size(); ++i ) {
seqs.add( BasicSequence.createAaSequence( names_in_order.get( i ), temp_msa.get( names_in_order.get( i ) )
.toString() ) );
public final static String END = "End;";
public final static String MATRIX = "Matrix";
public final static String BEGIN_CHARACTERS = "Begin Characters;";
+ public final static String BEGIN_DATA = "Begin Data;";
public final static String FORMAT = "Format";
public final static String DATATYPE = "DataType";
public final static String STANDARD = "Standard";
import org.forester.io.parsers.util.PhylogenyParserException;
import org.forester.phylogeny.Phylogeny;
import org.forester.phylogeny.PhylogenyNode;
+import org.forester.phylogeny.data.Sequence;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
+import org.forester.sequence.BasicSequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
public final class NexusPhylogeniesParser implements IteratingPhylogenyParser, PhylogenyParser {
- final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase();
- final private static String end = NexusConstants.END.toLowerCase();
- final private static String endblock = "endblock";
- final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
- final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase();
- final private static Pattern TITLE_PATTERN = Pattern.compile( "TITLE.?\\s+([^;]+)",
- Pattern.CASE_INSENSITIVE );
- final private static String translate = NexusConstants.TRANSLATE.toLowerCase();
- final private static String tree = NexusConstants.TREE.toLowerCase();
- final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
- Pattern.CASE_INSENSITIVE );
- final private static Pattern TRANSLATE_PATTERN = Pattern.compile( "([0-9A-Za-z]+)\\s+(.+)" );
- final private static String utree = NexusConstants.UTREE.toLowerCase();
- private BufferedReader _br;
- private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
- private boolean _in_taxalabels;
- private boolean _in_translate;
- private boolean _in_tree;
- private boolean _in_trees_block;
- private boolean _is_rooted;
- private String _name;
- private Phylogeny _next;
- private Object _nexus_source;
- private StringBuilder _nh;
- private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
- private boolean _rooted_info_present;
- private List<String> _taxlabels;
- private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO;
- private String _title;
- private Map<String, String> _translate_map;
- private StringBuilder _translate_sb;
+ final private static String begin_trees = NexusConstants.BEGIN_TREES.toLowerCase();
+ final private static String end = NexusConstants.END.toLowerCase();
+ final private static String endblock = "endblock";
+ final private static Pattern ROOTEDNESS_PATTERN = Pattern.compile( ".+=\\s*\\[&([R|U])\\].*" );
+ final private static String taxlabels = NexusConstants.TAXLABELS.toLowerCase();
+ final private static Pattern TITLE_PATTERN = Pattern.compile( "TITLE.?\\s+([^;]+)",
+ Pattern.CASE_INSENSITIVE );
+ final private static String translate = NexusConstants.TRANSLATE.toLowerCase();
+ final private static String data = NexusConstants.BEGIN_CHARACTERS.toLowerCase();
+ final private static String characters = NexusConstants.BEGIN_DATA.toLowerCase();
+ final private static String tree = NexusConstants.TREE.toLowerCase();
+ final private static Pattern TREE_NAME_PATTERN = Pattern.compile( "\\s*.?Tree\\s+(.+?)\\s*=.+",
+ Pattern.CASE_INSENSITIVE );
+ final private static Pattern TRANSLATE_PATTERN = Pattern.compile( "([0-9A-Za-z]+)\\s+(.+)" );
+ final private static Pattern ALN_PATTERN = Pattern.compile( "(.+)\\s+([A-Za-z-_\\*\\?]+)" );
+ final private static Pattern DATATYPE_PATTERN = Pattern.compile( "datatype\\s?.\\s?([a-z]+)" );
+ final private static Pattern LINK_TAXA_PATTERN = Pattern.compile( "link\\s+taxa\\s?.\\s?([^;]+)",
+ Pattern.CASE_INSENSITIVE );
+ final private static String utree = NexusConstants.UTREE.toLowerCase();
+ private BufferedReader _br;
+ private boolean _ignore_quotes_in_nh_data = Constants.NH_PARSING_IGNORE_QUOTES_DEFAULT;
+ private boolean _in_taxalabels;
+ private boolean _in_translate;
+ private boolean _in_tree;
+ private boolean _in_trees_block;
+ private boolean _in_data_block;
+ private boolean _is_rooted;
+ private String _datatype;
+ private String _name;
+ private Phylogeny _next;
+ private Object _nexus_source;
+ private StringBuilder _nh;
+ private boolean _replace_underscores = NHXParser.REPLACE_UNDERSCORES_DEFAULT;
+ private boolean _rooted_info_present;
+ private List<String> _taxlabels;
+ private TAXONOMY_EXTRACTION _taxonomy_extraction = TAXONOMY_EXTRACTION.NO;
+ private String _title;
+ private Map<String, String> _translate_map;
+ private StringBuilder _translate_sb;
+ private Map<String, MolecularSequence> _seqs;
+ private final boolean _add_sequences = true;
@Override
public String getName() {
_in_tree = false;
_rooted_info_present = false;
_is_rooted = false;
+ _seqs = new HashMap<String, MolecularSequence>();
_br = ParserUtils.createReader( _nexus_source );
getNext();
}
node.setName( node.getName().replace( '_', ' ' ).trim() );
}
}
+ if ( _add_sequences ) {
+ if ( _seqs.containsKey( node.getName() ) ) {
+ final MolecularSequence s = _seqs.get( node.getName() );
+ //TODO need to check for uniqueness when adding seqs....
+ final Sequence ns = new Sequence( s );
+ ns.setMolecularSequenceAligned( true ); //TODO need to check if all same length
+ node.getNodeData().addSequence( ns );
+ }
+ }
}
}
_next = p;
_in_trees_block = true;
_in_taxalabels = false;
_in_translate = false;
+ _in_data_block = false;
+ _datatype = null;
_title = "";
}
else if ( line_lc.startsWith( taxlabels ) ) {
+ //TODO need to be taxa block instead
_in_trees_block = false;
_in_taxalabels = true;
_in_translate = false;
+ _in_data_block = false;
+ _datatype = null;
}
else if ( line_lc.startsWith( translate ) ) {
_translate_sb = new StringBuilder();
_in_taxalabels = false;
_in_translate = true;
+ _in_data_block = false;
+ _datatype = null;
+ }
+ else if ( line_lc.startsWith( characters ) || line_lc.startsWith( data ) ) {
+ _in_taxalabels = false;
+ _in_trees_block = false;
+ _in_translate = false;
+ _in_data_block = true;
+ _datatype = null;
}
else if ( _in_trees_block ) {
if ( line_lc.startsWith( "title" ) ) {
}
}
else if ( line_lc.startsWith( "link" ) ) {
+ final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
+ if ( link_m.lookingAt() ) {
+ final String link = link_m.group( 1 );
+ System.out.println( "link taxa:" + link );
+ }
}
else if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
_in_trees_block = false;
}
}
}
+ if ( _in_data_block ) {
+ if ( line_lc.startsWith( end ) || line_lc.startsWith( endblock ) ) {
+ _in_data_block = false;
+ _datatype = null;
+ }
+ else if ( line_lc.startsWith( "link" ) ) {
+ final Matcher link_m = LINK_TAXA_PATTERN.matcher( line );
+ if ( link_m.lookingAt() ) {
+ final String link = link_m.group( 1 );
+ System.out.println( "link taxa:" + link );
+ }
+ }
+ else {
+ final Matcher datatype_matcher = DATATYPE_PATTERN.matcher( line_lc );
+ if ( datatype_matcher.find() ) {
+ _datatype = datatype_matcher.group( 1 );
+ System.out.println( _datatype );
+ }
+ else {
+ if ( ( _datatype != null )
+ && ( _datatype.equals( "protein" ) || _datatype.equals( "dna" ) || _datatype
+ .equals( "rna" ) ) ) {
+ if ( line.endsWith( ";" ) ) {
+ _in_data_block = false;
+ line = line.substring( 0, line.length() - 1 );
+ }
+ final Matcher aln_matcher = ALN_PATTERN.matcher( line );
+ if ( aln_matcher.matches() ) {
+ final String id = aln_matcher.group( 1 );
+ final String seq = aln_matcher.group( 2 );
+ MolecularSequence s = null;
+ if ( _datatype.equals( "protein" ) ) {
+ s = BasicSequence.createAaSequence( id, seq );
+ }
+ else if ( _datatype.equals( "dna" ) ) {
+ s = BasicSequence.createDnaSequence( id, seq );
+ }
+ else {
+ s = BasicSequence.createRnaSequence( id, seq );
+ }
+ _seqs.put( id, s );
+ System.out.println( s );
+ }
+ }
+ }
+ }
+ }
}
}
if ( _nh.length() > 0 ) {
import java.io.Writer;
import java.util.List;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.ForesterUtil;
public class SequenceWriter {
FASTA;
}
- public static StringBuilder toFasta( final Sequence seq, final int width ) {
+ public static StringBuilder toFasta( final MolecularSequence seq, final int width ) {
return toFasta( seq.getIdentifier(), seq.getMolecularSequenceAsString(), width );
}
return sb;
}
- public static void toFasta( final Sequence seq, final Writer w, final int width ) throws IOException {
+ public static void toFasta( final MolecularSequence seq, final Writer w, final int width ) throws IOException {
w.write( ">" );
w.write( seq.getIdentifier() );
w.write( ForesterUtil.LINE_SEPARATOR );
}
}
- public static void writeSeqs( final List<Sequence> seqs, final File file, final SEQ_FORMAT format, final int width )
- throws IOException {
+ public static void writeSeqs( final List<MolecularSequence> seqs,
+ final File file,
+ final SEQ_FORMAT format,
+ final int width ) throws IOException {
final Writer w = ForesterUtil.createBufferedWriter( file );
SequenceWriter.writeSeqs( seqs, w, format, width );
w.close();
}
- public static void writeSeqs( final List<Sequence> seqs,
+ public static void writeSeqs( final List<MolecularSequence> seqs,
final Writer writer,
final SEQ_FORMAT format,
final int width ) throws IOException {
switch ( format ) {
case FASTA:
- for( final Sequence s : seqs ) {
+ for( final MolecularSequence s : seqs ) {
toFasta( s, writer, width );
writer.write( ForesterUtil.LINE_SEPARATOR );
}
import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
-import org.forester.sequence.Sequence.TYPE;
+import org.forester.sequence.MolecularSequence;
+import org.forester.sequence.MolecularSequence.TYPE;
import org.forester.util.ForesterUtil;
public class BasicMsa implements Msa {
}
@Override
- public List<Sequence> asSequenceList() {
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ public List<MolecularSequence> asSequenceList() {
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int i = 0; i < getNumberOfSequences(); ++i ) {
seqs.add( getSequence( i ) );
}
}
@Override
- public Sequence getSequence( final int row ) {
+ public MolecularSequence getSequence( final int row ) {
return new BasicSequence( getIdentifier( row ), _data[ row ], getType() );
}
@Override
- public Sequence getSequence( final String id ) {
+ public MolecularSequence getSequence( final String id ) {
for( int i = 0; i < getNumberOfSequences(); ++i ) {
if ( getIdentifier( i ).equals( id ) ) {
return getSequence( i );
@Override
public boolean isGapAt( final int row, final int col ) {
- return getResidueAt( row, col ) == Sequence.GAP;
+ return getResidueAt( row, col ) == MolecularSequence.GAP;
}
@Override
w.write( " Matrix" );
w.write( ForesterUtil.LINE_SEPARATOR );
for( int row = 0; row < getNumberOfSequences(); ++row ) {
- final Sequence seq = getSequence( row );
+ final MolecularSequence seq = getSequence( row );
final String s = seq.getMolecularSequenceAsString();
w.write( " " );
w.write( ForesterUtil.pad( getIdentifier( row ).replace( ' ', '_' ), max, ' ', false ).toString() );
}
}
- public static Msa createInstance( final List<Sequence> seqs ) {
+ public static Msa createInstance( final List<MolecularSequence> seqs ) {
if ( seqs.size() < 1 ) {
throw new IllegalArgumentException( "cannot create msa from less than one sequence" );
}
final int length = seqs.get( 0 ).getLength();
final BasicMsa msa = new BasicMsa( seqs.size(), length, seqs.get( 0 ).getType() );
for( int row = 0; row < seqs.size(); ++row ) {
- final Sequence seq = seqs.get( row );
+ final MolecularSequence seq = seqs.get( row );
if ( seq.getLength() != length ) {
throw new IllegalArgumentException( "illegal attempt to build msa from sequences of unequal length ["
+ seq.getIdentifier() + "]" );
import org.forester.io.parsers.FastaParser;
import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.SystemCommandExecutor;
public final class ClustalOmega extends MsaInferrer {
}
@Override
- public Msa infer( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
+ public Msa infer( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
final File file = File.createTempFile( "__clustalo_input_", ".fasta" );
file.deleteOnExit();
final BufferedWriter writer = new BufferedWriter( new FileWriter( file ) );
import java.util.List;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
public final class DeleteableMsa extends BasicMsa {
}
}
- final public Sequence deleteRow( final String id, final boolean return_removed_seq ) {
+ final public MolecularSequence deleteRow( final String id, final boolean return_removed_seq ) {
int row = -1;
for( int r = 0; r < getNumberOfSequences(); ++r ) {
if ( getIdentifier( r ).equals( id ) ) {
if ( row < 0 ) {
throw new IllegalArgumentException( "id [" + id + "] not found" );
}
- Sequence s = null;
+ MolecularSequence s = null;
StringBuilder sb = null;
if ( return_removed_seq ) {
s = getSequence( row );
final char[] x = s.getMolecularSequence();
sb = new StringBuilder( x.length );
for( final char element : x ) {
- if ( element != Sequence.GAP ) {
+ if ( element != MolecularSequence.GAP ) {
sb.append( element );
}
}
}
@Override
- public Sequence getSequence( final int row ) {
+ public MolecularSequence getSequence( final int row ) {
checkRow( row );
return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() );
}
final public boolean isAllGap( final int col ) {
final int m_col = _mapped_col_positions[ col ];
for( int j = 0; j < getNumberOfSequences(); ++j ) {
- if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != Sequence.GAP ) {
+ if ( super.getResidueAt( _mapped_row_positions[ j ], m_col ) != MolecularSequence.GAP ) {
return false;
}
}
--_seqs;
}
- public final static DeleteableMsa createInstance( final List<Sequence> seqs ) {
+ public final static DeleteableMsa createInstance( final List<MolecularSequence> seqs ) {
return new DeleteableMsa( ( BasicMsa ) BasicMsa.createInstance( seqs ) );
}
import org.forester.io.parsers.FastaParser;
import org.forester.io.writers.SequenceWriter;
import org.forester.io.writers.SequenceWriter.SEQ_FORMAT;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.SystemCommandExecutor;
public final class Mafft extends MsaInferrer {
}
@Override
- public Msa infer( final List<Sequence> seqs, final List<String> opts ) throws IOException, InterruptedException {
+ public Msa infer( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
+ InterruptedException {
final File file = File.createTempFile( "__mafft_input_", ".fasta" );
file.deleteOnExit();
final BufferedWriter writer = new BufferedWriter( new FileWriter( file ) );
import java.io.Writer;
import java.util.List;
-import org.forester.sequence.Sequence;
-import org.forester.sequence.Sequence.TYPE;
+import org.forester.sequence.MolecularSequence;
+import org.forester.sequence.MolecularSequence.TYPE;
public interface Msa {
public List<Character> getColumnAt( int col );
- public Sequence getSequence( final String id );
+ public MolecularSequence getSequence( final String id );
- public Sequence getSequence( final int row );
+ public MolecularSequence getSequence( final int row );
- public List<Sequence> asSequenceList();
+ public List<MolecularSequence> asSequenceList();
public StringBuffer getSequenceAsString( int row );
import java.io.IOException;
import java.util.List;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.SystemCommandExecutor;
public abstract class MsaInferrer {
public abstract Msa infer( File path_to_input_seqs, List<String> opts ) throws IOException, InterruptedException;
- public abstract Msa infer( final List<Sequence> seqs, final List<String> opts ) throws IOException,
+ public abstract Msa infer( final List<MolecularSequence> seqs, final List<String> opts ) throws IOException,
InterruptedException;
}
import java.util.TreeMap;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
++new_length;
}
}
- final List<Sequence> seqs = new ArrayList<Sequence>( msa.getNumberOfSequences() );
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>( msa.getNumberOfSequences() );
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
final char[] mol_seq = new char[ new_length ];
int new_col = 0;
if ( !delete_cols[ col ] ) {
final char residue = msa.getResidueAt( row, col );
mol_seq[ new_col++ ] = ( residue );
- if ( residue != Sequence.GAP ) {
+ if ( residue != MolecularSequence.GAP ) {
++non_gap_cols_sum;
}
}
int gaps = 0;
for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) {
for( int i = 0; i < msa.getLength(); ++i ) {
- if ( msa.getResidueAt( seq, i ) == Sequence.GAP ) {
+ if ( msa.getResidueAt( seq, i ) == MolecularSequence.GAP ) {
gaps++;
}
}
final public static DescriptiveStatistics calculateEffectiveLengthStatistics( final Msa msa ) {
final DescriptiveStatistics stats = new BasicDescriptiveStatistics();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
- final Sequence s = msa.getSequence( row );
+ final MolecularSequence s = msa.getSequence( row );
stats.addValue( s.getLength() - s.getNumberOfGapResidues() );
}
return stats;
public static SortedMap<Character, Integer> calculateResidueDestributionPerColumn( final Msa msa, final int column ) {
final SortedMap<Character, Integer> map = new TreeMap<Character, Integer>();
for( final Character r : msa.getColumnAt( column ) ) {
- if ( r != Sequence.GAP ) {
+ if ( r != MolecularSequence.GAP ) {
if ( !map.containsKey( r ) ) {
map.put( r, 1 );
}
}
final public static Msa removeSequence( final Msa msa, final String to_remove_id ) {
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_id.equals( msa.getIdentifier( row ) ) ) {
seqs.add( msa.getSequence( row ) );
}
final public static Msa removeSequences( final Msa msa, final List<String> to_remove_ids ) {
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_ids.contains( msa.getIdentifier( row ) ) ) {
seqs.add( msa.getSequence( row ) );
for( int seq = 0; seq < msa.getNumberOfSequences(); ++seq ) {
int eff_length = 0;
for( int i = 0; i < msa.getLength(); ++i ) {
- if ( msa.getResidueAt( seq, i ) != Sequence.GAP ) {
+ if ( msa.getResidueAt( seq, i ) != MolecularSequence.GAP ) {
eff_length++;
}
}
}
final public static Msa removeSequencesByRow( final Msa msa, final List<Integer> to_remove_rows ) {
- final List<Sequence> seqs = new ArrayList<Sequence>();
+ final List<MolecularSequence> seqs = new ArrayList<MolecularSequence>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
if ( !to_remove_rows.contains( row ) ) {
seqs.add( msa.getSequence( row ) );
final HashMap<Character, Integer> counts = new HashMap<Character, Integer>();
for( int row = 0; row < msa.getNumberOfSequences(); ++row ) {
final char c = msa.getResidueAt( row, col );
- if ( c != Sequence.GAP ) {
+ if ( c != MolecularSequence.GAP ) {
if ( !counts.containsKey( c ) ) {
counts.put( c, 1 );
}
package org.forester.msa;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
public final class ResampleableMsa extends BasicMsa {
}
@Override
- public Sequence getSequence( final int row ) {
+ public MolecularSequence getSequence( final int row ) {
return new BasicSequence( getIdentifier( row ), getSequenceAsString( row ).toString(), getType() );
}
}
import org.forester.phylogeny.data.NodeVisualData.NodeFill;
import org.forester.phylogeny.data.NodeVisualData.NodeShape;
import org.forester.phylogeny.iterators.PhylogenyNodeIterator;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.tools.ConfidenceAssessor;
import org.forester.util.BasicDescriptiveStatistics;
import org.forester.util.DescriptiveStatistics;
public class MsaCompactor {
- final private static NumberFormat NF_1 = new DecimalFormat( "0.#" );
- final private static NumberFormat NF_3 = new DecimalFormat( "0.###" );
- final private static NumberFormat NF_4 = new DecimalFormat( "0.####" );
- private boolean _calculate_shannon_entropy = false;
+ final private static NumberFormat NF_1 = new DecimalFormat( "0.#" );
+ final private static NumberFormat NF_3 = new DecimalFormat( "0.###" );
+ final private static NumberFormat NF_4 = new DecimalFormat( "0.####" );
+ private boolean _calculate_shannon_entropy = false;
//
- private String _infile_name = null;
- private final short _longest_id_length;
+ private String _infile_name = null;
+ private final short _longest_id_length;
//
- private String _maffts_opts = "--auto";
- private DeleteableMsa _msa = null;
- private boolean _norm = true;
- private File _out_file_base = null;
- private MSA_FORMAT _output_format = MSA_FORMAT.FASTA;
- private String _path_to_mafft = null;
- private boolean _phylogentic_inference = false;
+ private String _maffts_opts = "--auto";
+ private DeleteableMsa _msa = null;
+ private boolean _norm = true;
+ private File _out_file_base = null;
+ private MSA_FORMAT _output_format = MSA_FORMAT.FASTA;
+ private String _path_to_mafft = null;
+ private boolean _phylogentic_inference = false;
//
- private boolean _realign = false;
- private final SortedSet<String> _removed_seq_ids;
- private final ArrayList<Sequence> _removed_seqs;
- private File _removed_seqs_out_base = null;
- private int _step = -1;
- private int _step_for_diagnostics = -1;
+ private boolean _realign = false;
+ private final SortedSet<String> _removed_seq_ids;
+ private final ArrayList<MolecularSequence> _removed_seqs;
+ private File _removed_seqs_out_base = null;
+ private int _step = -1;
+ private int _step_for_diagnostics = -1;
static {
NF_1.setRoundingMode( RoundingMode.HALF_UP );
NF_4.setRoundingMode( RoundingMode.HALF_UP );
_msa = msa;
_removed_seq_ids = new TreeSet<String>();
_longest_id_length = _msa.determineMaxIdLength();
- _removed_seqs = new ArrayList<Sequence>();
+ _removed_seqs = new ArrayList<MolecularSequence>();
}
public final Phylogeny calcTree() {
while ( MsaMethods.calcGapRatio( _msa ) > mean_gapiness ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id, true );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( MsaMethods.calcGapRatio( _msa ) <= mean_gapiness ) ) {
while ( _msa.getLength() > length ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id, true );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( _msa.getLength() <= length ) ) {
for( int i = 0; i < to_remove_ids.size(); ++i ) {
final String id = to_remove_ids.get( i );
_removed_seq_ids.add( id );
- final Sequence deleted = _msa.deleteRow( id, true );
+ final MolecularSequence deleted = _msa.deleteRow( id, true );
_removed_seqs.add( deleted );
removeGapColumns();
if ( isPrintMsaStatsWriteOutfileAndRealign( i ) || ( i == ( to_remove_ids.size() - 1 ) ) ) {
return s;
}
- final int calcNonGapResidues( final Sequence seq ) {
+ final int calcNonGapResidues( final MolecularSequence seq ) {
int ng = 0;
for( int i = 0; i < seq.getLength(); ++i ) {
if ( !seq.isGapAt( i ) ) {
\r
public static void addMolecularSeqsToTree( final Phylogeny phy, final Msa msa ) {\r
for( int s = 0; s < msa.getNumberOfSequences(); ++s ) {\r
- final org.forester.sequence.Sequence seq = msa.getSequence( s );\r
+ final org.forester.sequence.MolecularSequence seq = msa.getSequence( s );\r
final PhylogenyNode node = phy.getNode( seq.getIdentifier() );\r
final org.forester.phylogeny.data.Sequence new_seq = new Sequence();\r
new_seq.setMolecularSequenceAligned( true );\r
import org.forester.io.parsers.phyloxml.PhyloXmlMapping;
import org.forester.io.parsers.phyloxml.PhyloXmlUtil;
import org.forester.io.writers.PhylogenyWriter;
+import org.forester.sequence.MolecularSequence;
+import org.forester.sequence.MolecularSequence.TYPE;
import org.forester.util.ForesterUtil;
public class Sequence implements PhylogenyData, MultipleUris, Comparable<Sequence> {
init();
}
+ public Sequence( final MolecularSequence mol_seq ) {
+ init();
+ setMolecularSequence( mol_seq.getMolecularSequenceAsString() );
+ setName( mol_seq.getIdentifier() );
+ String type;
+ if ( mol_seq.getType() == TYPE.AA ) {
+ type = "protein";
+ }
+ else if ( mol_seq.getType() == TYPE.DNA ) {
+ type = "dna";
+ }
+ else if ( mol_seq.getType() == TYPE.RNA ) {
+ type = "rna";
+ }
+ else {
+ throw new IllegalArgumentException( "unknown sequence type " + mol_seq.getType() );
+ }
+ try {
+ setType( type );
+ }
+ catch ( final PhyloXmlDataFormatException e ) {
+ throw new IllegalArgumentException( "don't know how to handle type " + mol_seq.getType() );
+ }
+ }
+
public void addAnnotation( final Annotation annotation ) {
getAnnotations().add( annotation );
}
import org.forester.util.ForesterUtil;
-public class BasicSequence implements Sequence {
+public class BasicSequence implements MolecularSequence {
private final char[] _mol_sequence;
private String _identifier;
if ( obj.getClass() != getClass() ) {
return false;
}
- final Sequence other = ( Sequence ) obj;
+ final MolecularSequence other = ( MolecularSequence ) obj;
if ( getMolecularSequenceAsString().equals( other.getMolecularSequenceAsString() ) ) {
return true;
}
return sb.toString();
}
- public static Sequence copySequence( final Sequence seq ) {
+ public static MolecularSequence copySequence( final MolecularSequence seq ) {
final char[] s = new char[ seq.getMolecularSequence().length ];
for( int i = 0; i < seq.getMolecularSequence().length; i++ ) {
s[ i ] = seq.getMolecularSequence()[ i ];
return new BasicSequence( new String( seq.getIdentifier() ), s, seq.getType() );
}
- public static Sequence createAaSequence( final String identifier, final String mol_sequence ) {
+ public static MolecularSequence createAaSequence( final String identifier, final String mol_sequence ) {
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
.replaceAll( AA_REGEXP, Character.toString( UNSPECIFIED_AA ) ), TYPE.AA );
}
- public static Sequence createDnaSequence( final String identifier, final String mol_sequence ) {
+ public static MolecularSequence createDnaSequence( final String identifier, final String mol_sequence ) {
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
.replaceAll( DNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.DNA );
}
- public static Sequence createRnaSequence( final String identifier, final String mol_sequence ) {
+ public static MolecularSequence createRnaSequence( final String identifier, final String mol_sequence ) {
return new BasicSequence( identifier, mol_sequence.toUpperCase().replaceAll( "\\.", GAP_STR )
.replaceAll( RNA_REGEXP, Character.toString( UNSPECIFIED_NUC ) ), TYPE.RNA );
}
package org.forester.sequence;
-public interface Sequence {
+public interface MolecularSequence {
public static final char UNSPECIFIED_AA = 'X';
public static final char UNSPECIFIED_NUC = 'N';
import org.forester.sdi.SDIR;
import org.forester.sdi.TestGSDI;
import org.forester.sequence.BasicSequence;
-import org.forester.sequence.Sequence;
+import org.forester.sequence.MolecularSequence;
import org.forester.species.BasicSpecies;
import org.forester.species.Species;
import org.forester.surfacing.TestSurfacing;
private static boolean testAminoAcidSequence() {
try {
- final Sequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" );
+ final MolecularSequence aa1 = BasicSequence.createAaSequence( "aa1", "aAklm-?xX*z$#" );
if ( aa1.getLength() != 13 ) {
return false;
}
if ( !new String( aa1.getMolecularSequence() ).equals( "AAKLM-XXX*ZXX" ) ) {
return false;
}
- final Sequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" );
+ final MolecularSequence aa2 = BasicSequence.createAaSequence( "aa3", "ARNDCQEGHILKMFPSTWYVX*-BZOJU" );
if ( !new String( aa2.getMolecularSequence() ).equals( "ARNDCQEGHILKMFPSTWYVX*-BZXXU" ) ) {
return false;
}
- final Sequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" );
+ final MolecularSequence dna1 = BasicSequence.createDnaSequence( "dna1", "ACGTUX*-?RYMKWSN" );
if ( !new String( dna1.getMolecularSequence() ).equals( "ACGTNN*-NRYMKWSN" ) ) {
return false;
}
- final Sequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" );
+ final MolecularSequence rna1 = BasicSequence.createRnaSequence( "rna1", "..ACGUTX*-?RYMKWSN" );
if ( !new String( rna1.getMolecularSequence() ).equals( "--ACGUNN*-NRYMKWSN" ) ) {
return false;
}
private static boolean testMsaQualityMethod() {
try {
- final Sequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" );
- final Sequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" );
- final Sequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" );
- final Sequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" );
- final List<Sequence> l = new ArrayList<Sequence>();
+ final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "ABAXEFGHIJJE-" );
+ final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "ABBXEFGHIJJBB" );
+ final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AXCXEFGHIJJ--" );
+ final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AXDDEFGHIJ---" );
+ final List<MolecularSequence> l = new ArrayList<MolecularSequence>();
l.add( s0 );
l.add( s1 );
l.add( s2 );
private static boolean testMsaEntropy() {
try {
- final Sequence s0 = BasicSequence.createAaSequence( "a", "AAAAAAA" );
- final Sequence s1 = BasicSequence.createAaSequence( "b", "AAAIACC" );
- final Sequence s2 = BasicSequence.createAaSequence( "c", "AAIIIIF" );
- final Sequence s3 = BasicSequence.createAaSequence( "d", "AIIIVVW" );
- final List<Sequence> l = new ArrayList<Sequence>();
+ final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAAAAA" );
+ final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "AAAIACC" );
+ final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "AAIIIIF" );
+ final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "AIIIVVW" );
+ final List<MolecularSequence> l = new ArrayList<MolecularSequence>();
l.add( s0 );
l.add( s1 );
l.add( s2 );
System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 4 ) );
System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 5 ) );
System.out.println( MsaMethods.calcNormalizedShannonsEntropy( 6, msa, 6 ) );
- final List<Sequence> l2 = new ArrayList<Sequence>();
+ final List<MolecularSequence> l2 = new ArrayList<MolecularSequence>();
l2.add( BasicSequence.createAaSequence( "1", "AAAAAAA" ) );
l2.add( BasicSequence.createAaSequence( "2", "AAAIACC" ) );
l2.add( BasicSequence.createAaSequence( "3", "AAIIIIF" ) );
private static boolean testDeleteableMsa() {
try {
- final Sequence s0 = BasicSequence.createAaSequence( "a", "AAAA" );
- final Sequence s1 = BasicSequence.createAaSequence( "b", "BAAA" );
- final Sequence s2 = BasicSequence.createAaSequence( "c", "CAAA" );
- final Sequence s3 = BasicSequence.createAaSequence( "d", "DAAA" );
- final Sequence s4 = BasicSequence.createAaSequence( "e", "EAAA" );
- final Sequence s5 = BasicSequence.createAaSequence( "f", "FAAA" );
- final List<Sequence> l0 = new ArrayList<Sequence>();
+ final MolecularSequence s0 = BasicSequence.createAaSequence( "a", "AAAA" );
+ final MolecularSequence s1 = BasicSequence.createAaSequence( "b", "BAAA" );
+ final MolecularSequence s2 = BasicSequence.createAaSequence( "c", "CAAA" );
+ final MolecularSequence s3 = BasicSequence.createAaSequence( "d", "DAAA" );
+ final MolecularSequence s4 = BasicSequence.createAaSequence( "e", "EAAA" );
+ final MolecularSequence s5 = BasicSequence.createAaSequence( "f", "FAAA" );
+ final List<MolecularSequence> l0 = new ArrayList<MolecularSequence>();
l0.add( s0 );
l0.add( s1 );
l0.add( s2 );
return false;
}
//
- final Sequence s_0 = BasicSequence.createAaSequence( "a", "--A---B-C--X----" );
- final Sequence s_1 = BasicSequence.createAaSequence( "b", "--B-----C-------" );
- final Sequence s_2 = BasicSequence.createAaSequence( "c", "--C--AB-C------Z" );
- final Sequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-C-------" );
- final Sequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-C-------" );
- final Sequence s_5 = BasicSequence.createAaSequence( "f", "--F--AB-CD--Y---" );
- final List<Sequence> l1 = new ArrayList<Sequence>();
+ final MolecularSequence s_0 = BasicSequence.createAaSequence( "a", "--A---B-C--X----" );
+ final MolecularSequence s_1 = BasicSequence.createAaSequence( "b", "--B-----C-------" );
+ final MolecularSequence s_2 = BasicSequence.createAaSequence( "c", "--C--AB-C------Z" );
+ final MolecularSequence s_3 = BasicSequence.createAaSequence( "d", "--D--AA-C-------" );
+ final MolecularSequence s_4 = BasicSequence.createAaSequence( "e", "--E--AA-C-------" );
+ final MolecularSequence s_5 = BasicSequence.createAaSequence( "f", "--F--AB-CD--Y---" );
+ final List<MolecularSequence> l1 = new ArrayList<MolecularSequence>();
l1.add( s_0 );
l1.add( s_1 );
l1.add( s_2 );
return false;
}
//
- final Sequence s__0 = BasicSequence.createAaSequence( "a", "A------" );
- final Sequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" );
- final Sequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" );
- final Sequence s__3 = BasicSequence.createAaSequence( "d", "DDDD---" );
- final Sequence s__4 = BasicSequence.createAaSequence( "e", "EEEEE--" );
- final Sequence s__5 = BasicSequence.createAaSequence( "f", "FFFFFF-" );
- final List<Sequence> l2 = new ArrayList<Sequence>();
+ final MolecularSequence s__0 = BasicSequence.createAaSequence( "a", "A------" );
+ final MolecularSequence s__1 = BasicSequence.createAaSequence( "b", "BB-----" );
+ final MolecularSequence s__2 = BasicSequence.createAaSequence( "c", "CCC----" );
+ final MolecularSequence s__3 = BasicSequence.createAaSequence( "d", "DDDD---" );
+ final MolecularSequence s__4 = BasicSequence.createAaSequence( "e", "EEEEE--" );
+ final MolecularSequence s__5 = BasicSequence.createAaSequence( "f", "FFFFFF-" );
+ final List<MolecularSequence> l2 = new ArrayList<MolecularSequence>();
l2.add( s__0 );
l2.add( s__1 );
l2.add( s__2 );
dmsa2.setIdentifier( 0, "new_c" );
dmsa2.setIdentifier( 1, "new_d" );
dmsa2.setResidueAt( 0, 0, 'x' );
- final Sequence s = dmsa2.deleteRow( "new_d", true );
+ final MolecularSequence s = dmsa2.deleteRow( "new_d", true );
if ( !s.getMolecularSequenceAsString().equals( "D" ) ) {
return false;
}