import java.util.ArrayList;
import java.util.BitSet;
-import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import jalview.analysis.AlignSeq;
import jalview.analysis.SeqsetUtils;
-import jalview.api.FeatureColourI;
-import jalview.datamodel.AlignmentAnnotation;
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceCollectionI;
import jalview.datamodel.SequenceI;
-import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.schemes.ResidueProperties;
import jalview.util.Comparison;
import jalview.ws2.actions.BaseJob;
final Map<String, SequenceI> seqNames;
- final int start, end;
-
- final int minSize;
+ final int regionStart, regionEnd;
- List<AlignmentAnnotation> returnedAnnotations = Collections.emptyList();
-
- Map<String, FeatureColourI> featureColours = Collections.emptyMap();
-
- Map<String, FeatureMatcherSetI> featureFilters = Collections.emptyMap();
-
+ final int minSize;
public AnnotationJob(List<SequenceI> inputSeqs, boolean[] gapMap,
- Map<String, SequenceI> seqNames, int start, int end, int minSize)
+ Map<String, SequenceI> seqNames, int start, int end, int minSize)
{
super(inputSeqs);
this.gapMap = gapMap;
this.seqNames = seqNames;
- this.start = start;
- this.end = end;
+ this.regionStart = start;
+ this.regionEnd = end;
this.minSize = minSize;
}
return nvalid >= minSize;
}
- public static AnnotationJob create(AnnotatedCollectionI inputSeqs,
- boolean bySequence, boolean submitGaps, boolean requireAligned,
- boolean filterNonStandardResidues, int minSize)
+ public static AnnotationJob create(SequenceCollectionI inputSeqs,
+ boolean bySequence, boolean submitGaps, boolean requireAligned,
+ boolean filterNonStandardResidues, int minSize)
{
- List<SequenceI> seqs = new ArrayList<>();
+ List<SequenceI> seqences = new ArrayList<>();
int minlen = 10;
- int ln = -1;
- Map<String, SequenceI> seqNames = bySequence ? new HashMap<>() : null;
- BitSet gapMap = new BitSet();
- int gapMapSize = 0;
+ int width = 0;
+ Map<String, SequenceI> namesMap = bySequence ? new HashMap<>() : null;
+ BitSet residueMap = new BitSet();
int start = inputSeqs.getStartRes();
int end = inputSeqs.getEndRes();
// TODO: URGENT! unify with JPred / MSA code to handle hidden regions
// persisted/restored
for (SequenceI sq : inputSeqs.getSequences())
{
- int sqlen;
- if (bySequence)
- sqlen = sq.findPosition(end + 1) - sq.findPosition(start + 1);
+ int sqLen = (bySequence)
+ ? sq.findPosition(end + 1) - sq.findPosition(start + 1)
+ : sq.getEnd() - sq.getStart();
+ if (sqLen < minlen)
+ continue;
+ String newName = SeqsetUtils.unique_name(seqences.size() + 1);
+ if (namesMap != null)
+ namesMap.put(newName, sq);
+ char[] seqChars = sq.getSequence(start, end + 1);
+ if (filterNonStandardResidues)
+ replaceNonStandardResidues(seqChars, Comparison.GAP_DASH, sq.isProtein());
+ Sequence seq;
+ if (submitGaps)
+ {
+ seq = new Sequence(newName, seqChars);
+ updateResidueMap(residueMap, seq, filterNonStandardResidues);
+ }
else
- sqlen = sq.getEnd() - sq.getStart();
- if (sqlen >= minlen)
{
- String newName = SeqsetUtils.unique_name(seqs.size() + 1);
- if (seqNames != null)
- seqNames.put(newName, sq);
- Sequence seq;
- if (submitGaps)
- {
- seq = new Sequence(newName, sq.getSequenceAsString());
- gapMapSize = Math.max(gapMapSize, seq.getLength());
- for (int pos : sq.gapMap())
- {
- char sqchr = sq.getCharAt(pos);
- boolean include = !filterNonStandardResidues;
- include |= sq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
- : ResidueProperties.nucleotideIndex[sqchr] < 5;
- if (include)
- gapMap.set(pos);
- }
- }
- else
- {
- // TODO: add ability to exclude hidden regions
- seq = new Sequence(newName, AlignSeq.extractGaps(Comparison.GapChars,
- sq.getSequenceAsString(start, end + 1)));
- // for annotation need to also record map to sequence start/end
- // position in range
- // then transfer back to original sequence on return.
- }
- seqs.add(seq);
- ln = Math.max(ln, seq.getLength());
+ // TODO: add ability to exclude hidden regions
+ seq = new Sequence(newName,
+ AlignSeq.extractGaps(Comparison.GapChars, new String(seqChars)));
+ // for annotation need to also record map to sequence start/end
+ // position in range
+ // then transfer back to original sequence on return.
}
+ seqences.add(seq);
+ width = Math.max(width, seq.getLength());
}
if (requireAligned && submitGaps)
{
- int realWidth = gapMap.cardinality();
- for (int i = 0; i < seqs.size(); i++)
+ for (int i = 0; i < seqences.size(); i++)
+ {
+ SequenceI sq = seqences.get(i);
+ char[] padded = fitSequenceToResidueMap(sq.getSequence(),
+ residueMap);
+ seqences.set(i, new Sequence(sq.getName(), padded));
+ }
+ }
+ boolean[] gapMapArray = null;
+ if (submitGaps)
+ {
+ gapMapArray = new boolean[width];
+ for (int i = 0; i < width; i++)
+ gapMapArray[i] = residueMap.get(i);
+ }
+ return new AnnotationJob(seqences, gapMapArray, namesMap, start, end,
+ minSize);
+ }
+
+ private static void replaceNonStandardResidues(char[] seq, char replacement, boolean isProtein)
+ {
+ for (int i = 0; i < seq.length; i++)
+ {
+ char chr = seq[i];
+ if (isProtein
+ ? ResidueProperties.aaIndex[chr] >= 20
+ : ResidueProperties.nucleotideIndex[chr] >= 5)
{
- SequenceI sq = seqs.get(i);
- char[] padded = new char[realWidth];
- char[] original = sq.getSequence();
- for (int op = 0, pp = 0; pp < realWidth; op++)
- {
- if (gapMap.get(op))
- {
- if (original.length > op)
- padded[pp++] = original[op];
- else
- padded[pp++] = '-';
- }
- }
- seqs.set(i, new Sequence(sq.getName(), padded));
+ seq[i] = replacement;
+ }
+ }
+ }
+
+ private static void updateResidueMap(BitSet residueMap, SequenceI seq,
+ boolean filterNonStandardResidues)
+ {
+ for (int pos : seq.gapMap())
+ {
+ char sqchr = seq.getCharAt(pos);
+ boolean include = !filterNonStandardResidues;
+ include |= seq.isProtein() ? ResidueProperties.aaIndex[sqchr] < 20
+ : ResidueProperties.nucleotideIndex[sqchr] < 5;
+ if (include)
+ residueMap.set(pos);
+ }
+ }
+
+ /**
+ * Fits the sequence to the residue map removing empty columns where residue
+ * map is unset and padding the sequence with gaps at the end if needed.
+ */
+ private static char[] fitSequenceToResidueMap(char[] sequence,
+ BitSet residueMap)
+ {
+ int width = residueMap.cardinality();
+ char[] padded = new char[width];
+ for (int op = 0, pp = 0; pp < width; op++)
+ {
+ if (residueMap.get(op))
+ {
+ if (sequence.length > op)
+ padded[pp++] = sequence[op];
+ else
+ padded[pp++] = '-';
}
}
- boolean[] gapMapArray = new boolean[gapMapSize];
- for (int i = 0; i < gapMapSize; i++)
- gapMapArray[i] = gapMap.get(i);
- return new AnnotationJob(seqs, gapMapArray, seqNames, start, end, minSize);
+ return padded;
}
}