String newName = SeqsetUtils.unique_name(seqences.size() + 1);
if (namesMap != null)
namesMap.put(newName, sq);
+ char[] seqChars = sq.getSequence(start, end + 1);
+ if (filterNonStandardResidues)
+ replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein());
Sequence seq;
if (submitGaps)
{
- seq = new Sequence(newName, sq.getSequenceAsString());
+ seq = new Sequence(newName, seqChars);
updateResidueMap(residueMap, seq, filterNonStandardResidues);
}
else
{
// TODO: add ability to exclude hidden regions
seq = new Sequence(newName,
- AlignSeq.extractGaps(Comparison.GapChars,
- sq.getSequenceAsString(start, end + 1)));
+ AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
// for annotation need to also record map to sequence start/end
// position in range
// then transfer back to original sequence on return.
minSize);
}
+ private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein)
+ {
+ for (int i = 0; i < seq.length; i++)
+ {
+ char chr = seq[i];
+ if (isProtein
+ ? ResidueProperties.aaIndex[chr] >= 20
+ : ResidueProperties.nucleotideIndex[chr] >= 5)
+ {
+ seq[i] = replacement;
+ }
+ }
+ }
+
private static void updateResidueMap(BitSet residueMap, SequenceI seq,
boolean filterNonStandardResidues)
{