From ff450fad8709ae81919af7a15ea382af7292794c Mon Sep 17 00:00:00 2001 From: gmungoc Date: Wed, 14 Jun 2017 09:04:14 +0100 Subject: [PATCH] JAL-2598 Sequence.getSequence return a copy of the char array --- src/jalview/analysis/AAFrequency.java | 5 +- src/jalview/analysis/AlignmentUtils.java | 73 +++++++++++++++---------- src/jalview/analysis/Conservation.java | 33 +++++------ src/jalview/analysis/CrossRef.java | 10 ++-- src/jalview/datamodel/AlignedCodonFrame.java | 7 ++- src/jalview/datamodel/Alignment.java | 11 +--- src/jalview/datamodel/BinarySequence.java | 6 +- src/jalview/datamodel/Mapping.java | 11 ++-- src/jalview/datamodel/Sequence.java | 41 ++++++++++++-- src/jalview/datamodel/SequenceI.java | 13 ++++- src/jalview/datamodel/xdb/embl/EmblEntry.java | 6 +- src/jalview/ext/jmol/JmolParser.java | 6 +- src/jalview/gui/AppVarna.java | 3 +- src/jalview/io/BLCFile.java | 5 +- src/jalview/io/ClustalFile.java | 11 ++-- src/jalview/io/JnetAnnotationMaker.java | 2 +- src/jalview/io/MSFfile.java | 15 ++--- src/jalview/io/PfamFile.java | 5 +- src/jalview/io/PhylipFile.java | 6 +- src/jalview/io/PileUpfile.java | 8 +-- src/jalview/io/StockholmFile.java | 5 +- src/jalview/io/StructureFile.java | 4 +- src/jalview/schemes/ClustalxColourScheme.java | 9 ++- src/jalview/util/Comparison.java | 31 ++--------- src/jalview/ws/rest/params/SeqVector.java | 2 +- test/jalview/datamodel/AlignmentTest.java | 21 +++++++ test/jalview/datamodel/SeqCigarTest.java | 5 +- test/jalview/datamodel/SequenceTest.java | 50 ++++++++++++++--- test/jalview/ext/paradise/TestAnnotate3D.java | 4 +- 29 files changed, 235 insertions(+), 173 deletions(-) diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index b806355..a792d24 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -151,10 +151,9 @@ public class AAFrequency .println("WARNING: Consensus skipping null sequence - possible race condition."); continue; } - char[] seq = sequences[row].getSequence(); - if (seq.length > column) + if (sequences[row].getLength() > column) { - char c = seq[column]; + char c = sequences[row].getCharAt(column); residueCounts.add(c); if (Comparison.isNucleotide(c)) { diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 69ac947..b65096c 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -650,15 +650,16 @@ public class AlignmentUtils int toOffset = alignTo.getStart() - 1; int sourceGapMappedLength = 0; boolean inExon = false; - final char[] thisSeq = alignTo.getSequence(); - final char[] thatAligned = alignFrom.getSequence(); - StringBuilder thisAligned = new StringBuilder(2 * thisSeq.length); + final int toLength = alignTo.getLength(); + final int fromLength = alignFrom.getLength(); + StringBuilder thisAligned = new StringBuilder(2 * toLength); /* * Traverse the 'model' aligned sequence */ - for (char sourceChar : thatAligned) + for (int i = 0; i < fromLength; i++) { + char sourceChar = alignFrom.getCharAt(i); if (sourceChar == sourceGap) { sourceGapMappedLength += ratio; @@ -698,9 +699,9 @@ public class AlignmentUtils */ int intronLength = 0; while (basesWritten + toOffset < mappedCodonEnd - && thisSeqPos < thisSeq.length) + && thisSeqPos < toLength) { - final char c = thisSeq[thisSeqPos++]; + final char c = alignTo.getCharAt(thisSeqPos++); if (c != myGapChar) { basesWritten++; @@ -726,7 +727,7 @@ public class AlignmentUtils int gapsToAdd = calculateGapsToInsert(preserveMappedGaps, preserveUnmappedGaps, sourceGapMappedLength, inExon, trailingCopiedGap.length(), intronLength, startOfCodon); - for (int i = 0; i < gapsToAdd; i++) + for (int k = 0; k < gapsToAdd; k++) { thisAligned.append(myGapChar); } @@ -754,9 +755,9 @@ public class AlignmentUtils * At end of model aligned sequence. Copy any remaining target sequence, optionally * including (intron) gaps. */ - while (thisSeqPos < thisSeq.length) + while (thisSeqPos < toLength) { - final char c = thisSeq[thisSeqPos++]; + final char c = alignTo.getCharAt(thisSeqPos++); if (c != myGapChar || preserveUnmappedGaps) { thisAligned.append(c); @@ -946,7 +947,7 @@ public class AlignmentUtils SequenceI peptide = mapping.findAlignedSequence(cdsSeq, protein); if (peptide != null) { - int peptideLength = peptide.getLength(); + final int peptideLength = peptide.getLength(); Mapping map = mapping.getMappingBetween(cdsSeq, peptide); if (map != null) { @@ -955,7 +956,7 @@ public class AlignmentUtils { mapList = mapList.getInverse(); } - int cdsLength = cdsDss.getLength(); + final int cdsLength = cdsDss.getLength(); int mappedFromLength = MappingUtils.getLength(mapList .getFromRanges()); int mappedToLength = MappingUtils @@ -983,14 +984,15 @@ public class AlignmentUtils * walk over the aligned peptide sequence and insert mapped * codons for residues in the aligned cds sequence */ - char[] alignedPeptide = peptide.getSequence(); - char[] nucleotides = cdsDss.getSequence(); int copiedBases = 0; int cdsStart = cdsDss.getStart(); int proteinPos = peptide.getStart() - 1; int cdsCol = 0; - for (char residue : alignedPeptide) + + for (int col = 0; col < peptideLength; col++) { + char residue = peptide.getCharAt(col); + if (Comparison.isGap(residue)) { cdsCol += CODON_LENGTH; @@ -1008,7 +1010,7 @@ public class AlignmentUtils { for (int j = codon[0]; j <= codon[1]; j++) { - char mappedBase = nucleotides[j - cdsStart]; + char mappedBase = cdsDss.getCharAt(j - cdsStart); alignedCds[cdsCol++] = mappedBase; copiedBases++; } @@ -1020,7 +1022,7 @@ public class AlignmentUtils * append stop codon if not mapped from protein, * closing it up to the end of the mapped sequence */ - if (copiedBases == nucleotides.length - CODON_LENGTH) + if (copiedBases == cdsLength - CODON_LENGTH) { for (int i = alignedCds.length - 1; i >= 0; i--) { @@ -1030,9 +1032,9 @@ public class AlignmentUtils break; } } - for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++) + for (int i = cdsLength - CODON_LENGTH; i < cdsLength; i++) { - alignedCds[cdsCol++] = nucleotides[i]; + alignedCds[cdsCol++] = cdsDss.getCharAt(i); } } cdsSeq.setSequence(new String(alignedCds)); @@ -1202,21 +1204,26 @@ public class AlignmentUtils List unmappedProtein) { /* - * Prefill aligned sequences with gaps before inserting aligned protein - * residues. + * prefill peptide sequences with gaps */ int alignedWidth = alignedCodons.size(); char[] gaps = new char[alignedWidth]; Arrays.fill(gaps, protein.getGapCharacter()); - String allGaps = String.valueOf(gaps); + Map peptides = new HashMap<>(); for (SequenceI seq : protein.getSequences()) { if (!unmappedProtein.contains(seq)) { - seq.setSequence(allGaps); + peptides.put(seq, Arrays.copyOf(gaps, gaps.length)); } } + /* + * Traverse the codons left to right (as defined by CodonComparator) + * and insert peptides in each column where the sequence is mapped. + * This gives a peptide 'alignment' where residues are aligned if their + * corresponding codons occupy the same columns in the cdna alignment. + */ int column = 0; for (AlignedCodon codon : alignedCodons.keySet()) { @@ -1224,12 +1231,20 @@ public class AlignmentUtils .get(codon); for (Entry entry : columnResidues.entrySet()) { - // place translated codon at its column position in sequence - entry.getKey().getSequence()[column] = entry.getValue().product - .charAt(0); + char residue = entry.getValue().product.charAt(0); + peptides.get(entry.getKey())[column] = residue; } column++; } + + /* + * and finally set the constructed sequences + */ + for (Entry entry : peptides.entrySet()) + { + entry.getKey().setSequence(new String(entry.getValue())); + } + return 0; } @@ -2874,9 +2889,7 @@ public class AlignmentUtils .getInverse()); } - char[] fromChars = fromSeq.getSequence(); int toStart = seq.getStart(); - char[] toChars = seq.getSequence(); /* * traverse [start, end, start, end...] ranges in fromSeq @@ -2907,10 +2920,10 @@ public class AlignmentUtils * of the next character of the mapped-to sequence; stop when all * the characters of the range have been counted */ - while (mappedCharPos <= range[1] && fromCol <= fromChars.length + while (mappedCharPos <= range[1] && fromCol <= fromSeq.getLength() && fromCol >= 0) { - if (!Comparison.isGap(fromChars[fromCol - 1])) + if (!Comparison.isGap(fromSeq.getCharAt(fromCol - 1))) { /* * mapped from sequence has a character in this column @@ -2922,7 +2935,7 @@ public class AlignmentUtils seqsMap = new HashMap(); map.put(fromCol, seqsMap); } - seqsMap.put(seq, toChars[mappedCharPos - toStart]); + seqsMap.put(seq, seq.getCharAt(mappedCharPos - toStart)); mappedCharPos++; } fromCol += (forward ? 1 : -1); diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index 2b5a8f6..f94a658 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -734,28 +734,23 @@ public class Conservation public void completeAnnotations(AlignmentAnnotation conservation, AlignmentAnnotation quality2, int istart, int alWidth) { - char[] sequence = getConsSequence().getSequence(); - float minR; - float minG; - float minB; - float maxR; - float maxG; - float maxB; - minR = 0.3f; - minG = 0.0f; - minB = 0f; - maxR = 1.0f - minR; - maxG = 0.9f - minG; - maxB = 0f - minB; // scalable range for colouring both Conservation and - // Quality + SequenceI cons = getConsSequence(); + + /* + * colour scale for Conservation and Quality; + */ + float minR = 0.3f; + float minG = 0.0f; + float minB = 0f; + float maxR = 1.0f - minR; + float maxG = 0.9f - minG; + float maxB = 0f - minB; float min = 0f; float max = 11f; float qmin = 0f; float qmax = 0f; - char c; - if (conservation != null && conservation.annotations != null && conservation.annotations.length < alWidth) { @@ -778,7 +773,7 @@ public class Conservation { float value = 0; - c = sequence[i]; + char c = cons.getCharAt(i); if (Character.isDigit(c)) { @@ -865,8 +860,8 @@ public class Conservation */ String getTooltip(int column) { - char[] sequence = getConsSequence().getSequence(); - char val = column < sequence.length ? sequence[column] : '-'; + SequenceI cons = getConsSequence(); + char val = column < cons.getLength() ? cons.getCharAt(column) : '-'; boolean hasConservation = val != '-' && val != '0'; int consp = column - start; String tip = (hasConservation && consp > -1 && consp < consSymbs.length) ? consSymbs[consp] diff --git a/src/jalview/analysis/CrossRef.java b/src/jalview/analysis/CrossRef.java index 103025c..b77e403 100644 --- a/src/jalview/analysis/CrossRef.java +++ b/src/jalview/analysis/CrossRef.java @@ -783,15 +783,15 @@ public class CrossRef { return false; } - char[] c1 = seq1.getSequence(); - char[] c2 = seq2.getSequence(); - if (c1.length != c2.length) + + if (seq1.getLength() != seq2.getLength()) { return false; } - for (int i = 0; i < c1.length; i++) + int length = seq1.getLength(); + for (int i = 0; i < length; i++) { - int diff = c1[i] - c2[i]; + int diff = seq1.getCharAt(i) - seq2.getCharAt(i); /* * same char or differ in case only ('a'-'A' == 32) */ diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index 4fbfd62..54f41f8 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -504,10 +504,11 @@ public class AlignedCodonFrame * Read off the mapped nucleotides (converting to position base 0) */ codonPos = MappingUtils.flattenRanges(codonPos); - char[] dna = dnaSeq.getSequence(); int start = dnaSeq.getStart(); - result.add(new char[] { dna[codonPos[0] - start], - dna[codonPos[1] - start], dna[codonPos[2] - start] }); + char c1 = dnaSeq.getCharAt(codonPos[0] - start); + char c2 = dnaSeq.getCharAt(codonPos[1] - start); + char c3 = dnaSeq.getCharAt(codonPos[2] - start); + result.add(new char[] { c1, c2, c3 }); } } return result.isEmpty() ? null : result; diff --git a/src/jalview/datamodel/Alignment.java b/src/jalview/datamodel/Alignment.java index f5e6fc7..5553840 100755 --- a/src/jalview/datamodel/Alignment.java +++ b/src/jalview/datamodel/Alignment.java @@ -1473,8 +1473,8 @@ public class Alignment implements AlignmentI { // TODO JAL-1270 needs test coverage // currently tested for use in jalview.gui.SequenceFetcher - boolean samegap = toappend.getGapCharacter() == getGapCharacter(); char oldc = toappend.getGapCharacter(); + boolean samegap = oldc == getGapCharacter(); boolean hashidden = toappend.getHiddenSequences() != null && toappend.getHiddenSequences().hiddenSequences != null; // get all sequences including any hidden ones @@ -1490,14 +1490,7 @@ public class Alignment implements AlignmentI { if (!samegap) { - char[] oldseq = addedsq.getSequence(); - for (int c = 0; c < oldseq.length; c++) - { - if (oldseq[c] == oldc) - { - oldseq[c] = gapCharacter; - } - } + addedsq.replace(oldc, gapCharacter); } toappendsq.add(addedsq); } diff --git a/src/jalview/datamodel/BinarySequence.java b/src/jalview/datamodel/BinarySequence.java index b7e15a6..477f4a7 100755 --- a/src/jalview/datamodel/BinarySequence.java +++ b/src/jalview/datamodel/BinarySequence.java @@ -70,7 +70,7 @@ public class BinarySequence extends Sequence int nores = (isNa) ? ResidueProperties.maxNucleotideIndex : ResidueProperties.maxProteinIndex; - dbinary = new double[getSequence().length * nores]; + dbinary = new double[getLength() * nores]; return nores; } @@ -88,7 +88,7 @@ public class BinarySequence extends Sequence { int nores = initMatrixGetNoRes(); final int[] sindex = getSymbolmatrix(); - for (int i = 0; i < getSequence().length; i++) + for (int i = 0; i < getLength(); i++) { int aanum = nores - 1; @@ -132,7 +132,7 @@ public class BinarySequence extends Sequence { int nores = initMatrixGetNoRes(); - for (int i = 0, iSize = getSequence().length; i < iSize; i++) + for (int i = 0, iSize = getLength(); i < iSize; i++) { int aanum = nores - 1; diff --git a/src/jalview/datamodel/Mapping.java b/src/jalview/datamodel/Mapping.java index c741603..66425d2 100644 --- a/src/jalview/datamodel/Mapping.java +++ b/src/jalview/datamodel/Mapping.java @@ -46,7 +46,7 @@ public class Mapping /* * The characters of the aligned sequence e.g. "-cGT-ACgTG-" */ - private final char[] alignedSeq; + private final SequenceI alignedSeq; /* * the sequence start residue @@ -102,7 +102,7 @@ public class Mapping */ public AlignedCodonIterator(SequenceI seq, char gapChar) { - this.alignedSeq = seq.getSequence(); + this.alignedSeq = seq; this.start = seq.getStart(); this.gap = gapChar; fromRanges = map.getFromRanges().iterator(); @@ -176,7 +176,7 @@ public class Mapping if (toPosition <= currentToRange[1]) { SequenceI seq = Mapping.this.to; - char pep = seq.getSequence()[toPosition - seq.getStart()]; + char pep = seq.getCharAt(toPosition - seq.getStart()); toPosition++; return String.valueOf(pep); } @@ -257,9 +257,10 @@ public class Mapping * allow for offset e.g. treat pos 8 as 2 if sequence starts at 7 */ int truePos = sequencePos - (start - 1); - while (alignedBases < truePos && alignedColumn < alignedSeq.length) + int length = alignedSeq.getLength(); + while (alignedBases < truePos && alignedColumn < length) { - char c = alignedSeq[alignedColumn++]; + char c = alignedSeq.getCharAt(alignedColumn++); if (c != gap && !Comparison.isGap(c)) { alignedBases++; diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index a442cf0..96747e4 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -260,9 +260,8 @@ public class Sequence extends ASequence implements SequenceI protected void initSeqFrom(SequenceI seq, AlignmentAnnotation[] alAnnotation) { - char[] oseq = seq.getSequence(); - initSeqAndName(seq.getName(), Arrays.copyOf(oseq, oseq.length), - seq.getStart(), seq.getEnd()); + char[] oseq = seq.getSequence(); // returns a copy of the array + initSeqAndName(seq.getName(), oseq, seq.getStart(), seq.getEnd()); description = seq.getDescription(); if (seq != datasetSequence) @@ -563,7 +562,9 @@ public class Sequence extends ASequence implements SequenceI @Override public char[] getSequence() { - return sequence; + // return sequence; + return sequence == null ? null : Arrays.copyOf(sequence, + sequence.length); } /* @@ -1395,7 +1396,7 @@ public class Sequence extends ASequence implements SequenceI private boolean _isNa; - private long _seqhash = 0; + private int _seqhash = 0; /** * Answers false if the sequence is more than 85% nucleotide (ACGTU), else @@ -1825,4 +1826,34 @@ public class Sequence extends ASequence implements SequenceI { changeCount++; } + + /** + * {@inheritDoc} + */ + @Override + public int replace(char c1, char c2) + { + if (c1 == c2) + { + return 0; + } + int count = 0; + synchronized (sequence) + { + for (int c = 0; c < sequence.length; c++) + { + if (sequence[c] == c1) + { + sequence[c] = c2; + count++; + } + } + } + if (count > 0) + { + sequenceChanged(); + } + + return count; + } } diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 6992a8d..6840df8 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -119,9 +119,9 @@ public interface SequenceI extends ASequenceI public String getSequenceAsString(int start, int end); /** - * Get the sequence as a character array + * Answers a copy of the sequence as a character array * - * @return seqeunce and any gaps + * @return */ public char[] getSequence(); @@ -515,4 +515,13 @@ public interface SequenceI extends ASequenceI * returns true. */ BitSet getInsertionsAsBits(); + + /** + * Replaces every occurrence of c1 in the sequence with c2 and returns the + * number of characters changed + * + * @param c1 + * @param c2 + */ + public int replace(char c1, char c2); } diff --git a/src/jalview/datamodel/xdb/embl/EmblEntry.java b/src/jalview/datamodel/xdb/embl/EmblEntry.java index c3d4e66..5784f04 100644 --- a/src/jalview/datamodel/xdb/embl/EmblEntry.java +++ b/src/jalview/datamodel/xdb/embl/EmblEntry.java @@ -367,7 +367,8 @@ public class EmblEntry System.err .println("Implementation Notice: EMBLCDS records not properly supported yet - Making up the CDNA region of this sequence... may be incorrect (" + sourceDb + ":" + getAccession() + ")"); - if (translationLength * 3 == (1 - codonStart + dna.getSequence().length)) + int dnaLength = dna.getLength(); + if (translationLength * 3 == (1 - codonStart + dnaLength)) { System.err .println("Not allowing for additional stop codon at end of cDNA fragment... !"); @@ -377,8 +378,7 @@ public class EmblEntry dnaToProteinMapping = new Mapping(product, exons, new int[] { 1, translationLength }, 3, 1); } - if ((translationLength + 1) * 3 == (1 - codonStart + dna - .getSequence().length)) + if ((translationLength + 1) * 3 == (1 - codonStart + dnaLength)) { System.err .println("Allowing for additional stop codon at end of cDNA fragment... will probably cause an error in VAMSAs!"); diff --git a/src/jalview/ext/jmol/JmolParser.java b/src/jalview/ext/jmol/JmolParser.java index f08e40e..beaaf79 100644 --- a/src/jalview/ext/jmol/JmolParser.java +++ b/src/jalview/ext/jmol/JmolParser.java @@ -351,10 +351,10 @@ public class JmolParser extends StructureFile implements JmolStatusListener SequenceI sq, char[] secstr, char[] secstrcode, String chainId, int firstResNum) { - char[] seq = sq.getSequence(); + int length = sq.getLength(); boolean ssFound = false; - Annotation asecstr[] = new Annotation[seq.length + firstResNum - 1]; - for (int p = 0; p < seq.length; p++) + Annotation asecstr[] = new Annotation[length + firstResNum - 1]; + for (int p = 0; p < length; p++) { if (secstr[p] >= 'A' && secstr[p] <= 'z') { diff --git a/src/jalview/gui/AppVarna.java b/src/jalview/gui/AppVarna.java index a50de77..079645f 100644 --- a/src/jalview/gui/AppVarna.java +++ b/src/jalview/gui/AppVarna.java @@ -622,11 +622,10 @@ public class AppVarna extends JInternalFrame implements SelectionListener, ShiftList offset = new ShiftList(); int ofstart = -1; int sleng = seq.getLength(); - char[] seqChars = seq.getSequence(); for (int i = 0; i < sleng; i++) { - if (Comparison.isGap(seqChars[i])) + if (Comparison.isGap(seq.getCharAt(i))) { if (ofstart == -1) { diff --git a/src/jalview/io/BLCFile.java b/src/jalview/io/BLCFile.java index 6317e83..1b93892 100755 --- a/src/jalview/io/BLCFile.java +++ b/src/jalview/io/BLCFile.java @@ -246,10 +246,7 @@ public class BLCFile extends AlignFile out.append(newline); - if (s[i].getSequence().length > max) - { - max = s[i].getSequence().length; - } + max = Math.max(max, s[i].getLength()); i++; } diff --git a/src/jalview/io/ClustalFile.java b/src/jalview/io/ClustalFile.java index 5d58d42..d618809 100755 --- a/src/jalview/io/ClustalFile.java +++ b/src/jalview/io/ClustalFile.java @@ -210,10 +210,7 @@ public class ClustalFile extends AlignFile { String tmp = printId(s[i], jvsuffix); - if (s[i].getSequence().length > max) - { - max = s[i].getSequence().length; - } + max = Math.max(max, s[i].getLength()); if (tmp.length() > maxid) { @@ -245,14 +242,14 @@ public class ClustalFile extends AlignFile int start = i * len; int end = start + len; - if ((end < s[j].getSequence().length) - && (start < s[j].getSequence().length)) + int length = s[j].getLength(); + if ((end < length) && (start < length)) { out.append(s[j].getSequenceAsString(start, end)); } else { - if (start < s[j].getSequence().length) + if (start < length) { out.append(s[j].getSequenceAsString().substring(start)); } diff --git a/src/jalview/io/JnetAnnotationMaker.java b/src/jalview/io/JnetAnnotationMaker.java index 3feae5d..2a8a00f 100755 --- a/src/jalview/io/JnetAnnotationMaker.java +++ b/src/jalview/io/JnetAnnotationMaker.java @@ -59,7 +59,7 @@ public class JnetAnnotationMaker // in the future we could search for the query // sequence in the alignment before calling this function. SequenceI seqRef = al.getSequenceAt(firstSeq); - int width = preds[0].getSequence().length; + int width = preds[0].getLength(); int[] gapmap = al.getSequenceAt(firstSeq).gapMap(); if ((delMap != null && delMap.length > width) || (delMap == null && gapmap.length != width)) diff --git a/src/jalview/io/MSFfile.java b/src/jalview/io/MSFfile.java index f379724..b05acff 100755 --- a/src/jalview/io/MSFfile.java +++ b/src/jalview/io/MSFfile.java @@ -294,7 +294,7 @@ public class MSFfile extends AlignFile } long maxNB = 0; - out.append(" MSF: " + s[0].getSequence().length + " Type: " + out.append(" MSF: " + s[0].getLength() + " Type: " + (is_NA ? "N" : "P") + " Check: " + (bigChecksum % 10000) + " .."); out.append(newline); @@ -310,9 +310,9 @@ public class MSFfile extends AlignFile nameBlock[i] = new String(" Name: " + printId(s[i], jvSuffix) + " "); - idBlock[i] = new String("Len: " - + maxLenpad.form(s[i].getSequence().length) + " Check: " - + maxChkpad.form(checksums[i]) + " Weight: 1.00" + newline); + idBlock[i] = new String("Len: " + maxLenpad.form(s[i].getLength()) + + " Check: " + maxChkpad.form(checksums[i]) + + " Weight: 1.00" + newline); if (s[i].getName().length() > maxid) { @@ -369,8 +369,9 @@ public class MSFfile extends AlignFile int start = (i * 50) + (k * 10); int end = start + 10; - if ((end < s[j].getSequence().length) - && (start < s[j].getSequence().length)) + int length = s[j].getLength(); + if ((end < length) + && (start < length)) { out.append(s[j].getSequence(start, end)); @@ -385,7 +386,7 @@ public class MSFfile extends AlignFile } else { - if (start < s[j].getSequence().length) + if (start < length) { out.append(s[j].getSequenceAsString().substring(start)); out.append(newline); diff --git a/src/jalview/io/PfamFile.java b/src/jalview/io/PfamFile.java index bc22fae..9f152cc 100755 --- a/src/jalview/io/PfamFile.java +++ b/src/jalview/io/PfamFile.java @@ -157,10 +157,7 @@ public class PfamFile extends AlignFile { String tmp = printId(s[i], jvsuffix); - if (s[i].getSequence().length > max) - { - max = s[i].getSequence().length; - } + max = Math.max(max, s[i].getLength()); if (tmp.length() > maxid) { diff --git a/src/jalview/io/PhylipFile.java b/src/jalview/io/PhylipFile.java index e8fe7e9..e1d82ee 100644 --- a/src/jalview/io/PhylipFile.java +++ b/src/jalview/io/PhylipFile.java @@ -247,7 +247,7 @@ public class PhylipFile extends AlignFile sb.append(" "); // if there are no sequences, then define the number of characters as 0 sb.append( - (sqs.length > 0) ? Integer.toString(sqs[0].getSequence().length) +(sqs.length > 0) ? Integer.toString(sqs[0].getLength()) : "0") .append(newline); @@ -279,13 +279,13 @@ public class PhylipFile extends AlignFile // sequential has the entire sequence following the name if (sequential) { - sb.append(s.getSequence()); + sb.append(s.getSequenceAsString()); } else { // Jalview ensures all sequences are of same length so no need // to keep track of min/max length - sequenceLength = s.getSequence().length; + sequenceLength = s.getLength(); // interleaved breaks the sequence into chunks for // interleavedColumns characters sb.append(s.getSequence(0, diff --git a/src/jalview/io/PileUpfile.java b/src/jalview/io/PileUpfile.java index 84be72c..4a0885c 100755 --- a/src/jalview/io/PileUpfile.java +++ b/src/jalview/io/PileUpfile.java @@ -92,7 +92,7 @@ public class PileUpfile extends MSFfile i++; } - out.append(" MSF: " + s[0].getSequence().length + out.append(" MSF: " + s[0].getLength() + " Type: P Check: " + bigChecksum % 10000 + " .."); out.append(newline); out.append(newline); @@ -151,8 +151,8 @@ public class PileUpfile extends MSFfile int start = (i * 50) + (k * 10); int end = start + 10; - if ((end < s[j].getSequence().length) - && (start < s[j].getSequence().length)) + int length = s[j].getLength(); + if ((end < length) && (start < length)) { out.append(s[j].getSequence(start, end)); @@ -167,7 +167,7 @@ public class PileUpfile extends MSFfile } else { - if (start < s[j].getSequence().length) + if (start < length) { out.append(s[j].getSequenceAsString().substring(start)); out.append(newline); diff --git a/src/jalview/io/StockholmFile.java b/src/jalview/io/StockholmFile.java index 936d2b9..798a77e 100644 --- a/src/jalview/io/StockholmFile.java +++ b/src/jalview/io/StockholmFile.java @@ -930,10 +930,7 @@ public class StockholmFile extends AlignFile while ((in < s.length) && (s[in] != null)) { String tmp = printId(s[in], jvSuffix); - if (s[in].getSequence().length > max) - { - max = s[in].getSequence().length; - } + max = Math.max(max, s[in].getLength()); if (tmp.length() > maxid) { diff --git a/src/jalview/io/StructureFile.java b/src/jalview/io/StructureFile.java index ab220f0..7628115 100644 --- a/src/jalview/io/StructureFile.java +++ b/src/jalview/io/StructureFile.java @@ -392,8 +392,10 @@ public abstract class StructureFile extends AlignFile public static boolean isRNA(SequenceI seq) { - for (char c : seq.getSequence()) + int length = seq.getLength(); + for (int i = 0; i < length; i++) { + char c = seq.getCharAt(i); if ((c != 'A') && (c != 'C') && (c != 'G') && (c != 'U')) { return false; diff --git a/src/jalview/schemes/ClustalxColourScheme.java b/src/jalview/schemes/ClustalxColourScheme.java index f13a90c..9df7ab8 100755 --- a/src/jalview/schemes/ClustalxColourScheme.java +++ b/src/jalview/schemes/ClustalxColourScheme.java @@ -106,19 +106,18 @@ public class ClustalxColourScheme extends ResidueColourScheme for (SequenceI sq : seqs) { - char[] seq = sq.getSequence(); - - int end_j = seq.length - 1; + int end_j = sq.getLength() - 1; + int length = sq.getLength(); for (int i = 0; i <= end_j; i++) { - if ((seq.length - 1) < i) + if (length - 1 < i) { res = 23; } else { - res = ResidueProperties.aaIndex[seq[i]]; + res = ResidueProperties.aaIndex[sq.getCharAt(i)]; } cons2[i][res]++; } diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 22e1ab7..94d6300 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -282,35 +282,10 @@ public class Comparison { return false; } - char[][] letters = new char[seqs.length][]; - for (int i = 0; i < seqs.length; i++) - { - if (seqs[i] != null) - { - char[] sequence = seqs[i].getSequence(); - if (sequence != null) - { - letters[i] = sequence; - } - } - } - - return areNucleotide(letters); - } - /** - * Answers true if more than 85% of the sequence residues (ignoring gaps) are - * A, G, C, T or U, else false. This is just a heuristic guess and may give a - * wrong answer (as AGCT are also amino acid codes). - * - * @param letters - * @return - */ - static final boolean areNucleotide(char[][] letters) - { int ntCount = 0; int aaCount = 0; - for (char[] seq : letters) + for (SequenceI seq : seqs) { if (seq == null) { @@ -318,8 +293,10 @@ public class Comparison } // TODO could possibly make an informed guess just from the first sequence // to save a lengthy calculation - for (char c : seq) + int len = seq.getLength(); + for (int i = 0; i < len; i++) { + char c = seq.getCharAt(i); if (isNucleotide(c)) { ntCount++; diff --git a/src/jalview/ws/rest/params/SeqVector.java b/src/jalview/ws/rest/params/SeqVector.java index cbd73dd..578e7cc 100644 --- a/src/jalview/ws/rest/params/SeqVector.java +++ b/src/jalview/ws/rest/params/SeqVector.java @@ -65,7 +65,7 @@ public class SeqVector extends InputType { idvector.append(sep); } - idvector.append(seq.getSequence()); + idvector.append(seq.getSequenceAsString()); } return new StringBody(idvector.toString()); } diff --git a/test/jalview/datamodel/AlignmentTest.java b/test/jalview/datamodel/AlignmentTest.java index d6e09fd..2adefc9 100644 --- a/test/jalview/datamodel/AlignmentTest.java +++ b/test/jalview/datamodel/AlignmentTest.java @@ -1330,4 +1330,25 @@ public class AlignmentTest AlignmentI alignment = new Alignment(new SequenceI[] { seq }); alignment.setDataset(alignment); } + + @Test(groups = "Functional") + public void testAppend() + { + SequenceI seq = new Sequence("seq1", "FRMLPSRT-A--L-"); + AlignmentI alignment = new Alignment(new SequenceI[] { seq }); + alignment.setGapCharacter('-'); + SequenceI seq2 = new Sequence("seq1", "KP..L.FQII."); + AlignmentI alignment2 = new Alignment(new SequenceI[] { seq2 }); + alignment2.setGapCharacter('.'); + + alignment.append(alignment2); + + assertEquals('-', alignment.getGapCharacter()); + assertSame(seq, alignment.getSequenceAt(0)); + assertEquals("KP--L-FQII-", alignment.getSequenceAt(1) + .getSequenceAsString()); + + // todo test coverage for annotations, mappings, groups, + // hidden sequences, properties + } } diff --git a/test/jalview/datamodel/SeqCigarTest.java b/test/jalview/datamodel/SeqCigarTest.java index ab25aa6..89169d6 100644 --- a/test/jalview/datamodel/SeqCigarTest.java +++ b/test/jalview/datamodel/SeqCigarTest.java @@ -121,7 +121,7 @@ public class SeqCigarTest /* * TODO: can we add assertions to the sysouts that follow? */ - System.out.println("Original sequence align:\n" + sub_gapped_s + System.out.println("\nOriginal sequence align:\n" + sub_gapped_s + "\nReconstructed window from 8 to 48\n" + "XXXXXXXX" + sub_se_gp.getSequenceString('-') + "..." + "\nCigar String:" + sub_se_gp.getCigarstring() + "\n"); @@ -193,7 +193,8 @@ public class SeqCigarTest SequenceI gen_sgapped_s = gen_sgapped.getSeq('-'); // assertEquals("Couldn't reconstruct sequence", s_gapped.getSequence(), // gen_sgapped_s); - if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence())) + if (!gen_sgapped_s.getSequenceAsString().equals( + s_gapped.getSequenceAsString())) { // TODO: investigate errors reported here, to allow full conversion to // passing JUnit assertion form diff --git a/test/jalview/datamodel/SequenceTest.java b/test/jalview/datamodel/SequenceTest.java index 90856e8..2496a5b 100644 --- a/test/jalview/datamodel/SequenceTest.java +++ b/test/jalview/datamodel/SequenceTest.java @@ -106,15 +106,6 @@ public class SequenceTest // change sequence, should trigger an update of cached result sq.setSequence("ASDFASDFADSF"); assertTrue(sq.isProtein()); - /* - * in situ change of sequence doesn't change hashcode :-O - * (sequence should not expose internal implementation) - */ - for (int i = 0; i < sq.getSequence().length; i++) - { - sq.getSequence()[i] = "acgtu".charAt(i % 5); - } - assertTrue(sq.isProtein()); // but it isn't } @Test(groups = { "Functional" }) @@ -1571,4 +1562,45 @@ public class SequenceTest cursor = (SequenceCursor) PA.getValue(sq, "cursor"); assertEquals(new SequenceCursor(sq, 13, 10, ++token), cursor); } + + @Test(groups = { "Functional" }) + public void testGetSequence() + { + String seqstring = "-A--BCD-EF--"; + Sequence sq = new Sequence("test/8-13", seqstring); + sq.createDatasetSequence(); + assertTrue(Arrays.equals(sq.getSequence(), seqstring.toCharArray())); + assertTrue(Arrays.equals(sq.getDatasetSequence().getSequence(), + "ABCDEF".toCharArray())); + + // verify a copy of the sequence array is returned + char[] theSeq = (char[]) PA.getValue(sq, "sequence"); + assertNotSame(theSeq, sq.getSequence()); + theSeq = (char[]) PA.getValue(sq.getDatasetSequence(), "sequence"); + assertNotSame(theSeq, sq.getDatasetSequence().getSequence()); + } + + @Test(groups = { "Functional" }) + public void testReplace() + { + String seqstring = "-A--BCD-EF--"; + SequenceI sq = new Sequence("test/8-13", seqstring); + assertEquals(0, PA.getValue(sq, "changeCount")); + + assertEquals(0, sq.replace('A', 'A')); // same char + assertEquals(seqstring, sq.getSequenceAsString()); + assertEquals(0, PA.getValue(sq, "changeCount")); + + assertEquals(0, sq.replace('X', 'Y')); // not there + assertEquals(seqstring, sq.getSequenceAsString()); + assertEquals(0, PA.getValue(sq, "changeCount")); + + assertEquals(1, sq.replace('A', 'K')); + assertEquals("-K--BCD-EF--", sq.getSequenceAsString()); + assertEquals(1, PA.getValue(sq, "changeCount")); + + assertEquals(6, sq.replace('-', '.')); + assertEquals(".K..BCD.EF..", sq.getSequenceAsString()); + assertEquals(2, PA.getValue(sq, "changeCount")); + } } diff --git a/test/jalview/ext/paradise/TestAnnotate3D.java b/test/jalview/ext/paradise/TestAnnotate3D.java index 85fc039..c6c1a29 100644 --- a/test/jalview/ext/paradise/TestAnnotate3D.java +++ b/test/jalview/ext/paradise/TestAnnotate3D.java @@ -152,10 +152,10 @@ public class TestAnnotate3D { { SequenceI struseq = null; - String sq_ = new String(sq.getSequence()).toLowerCase(); + String sq_ = sq.getSequenceAsString().toLowerCase(); for (SequenceI _struseq : pdbf.getSeqsAsArray()) { - final String lowerCase = new String(_struseq.getSequence()) + final String lowerCase = _struseq.getSequenceAsString() .toLowerCase(); if (lowerCase.equals(sq_)) { -- 1.7.10.2