X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSeqCigar.java;h=c2a6a9cdd1b294dade726b95237af449d429f182;hb=HEAD;hp=07abd538c186d2c8c33b4e1c56c75a6021dbc199;hpb=b46b15a5edcd1fa2cd670503287ec753e46245f3;p=jalview.git diff --git a/src/jalview/datamodel/SeqCigar.java b/src/jalview/datamodel/SeqCigar.java index 07abd53..c2a6a9c 100644 --- a/src/jalview/datamodel/SeqCigar.java +++ b/src/jalview/datamodel/SeqCigar.java @@ -1,69 +1,127 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer - * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; -import jalview.analysis.*; +import jalview.analysis.AlignSeq; +import jalview.analysis.SeqsetUtils; +import jalview.util.MessageManager; import jalview.util.ShiftList; -import java.util.Vector; -public class SeqCigar - extends CigarSimple +import java.util.Enumeration; +import java.util.Hashtable; + +public class SeqCigar extends CigarSimple { - /** - * start(inclusive) and end(exclusive) of subsequence on refseq - */ - private int start, end; + /** + * start(inclusive) and end(exclusive) of subsequence on refseq + */ + private int start, end; + private SequenceI refseq = null; + + private Hashtable seqProps; + /** * Reference dataset sequence for the cigar string + * * @return SequenceI */ public SequenceI getRefSeq() { return refseq; } + /** - * + * * @return int start index of cigar ops on refSeq */ - public int getStart() { + public int getStart() + { return start; } + /** - * + * * @return int end index (exclusive) of cigar ops on refSeq */ - public int getEnd() { + public int getEnd() + { return end; } + + /** + * + * @param column + * @return position in sequence for column (or -1 if no match state exists) + */ + public int findPosition(int column) + { + int w = 0, ew, p = refseq.findPosition(start); + if (column < 0) + { + return -1; + } + if (range != null) + { + for (int i = 0; i < length; i++) + { + if (operation[i] == M || operation[i] == D) + { + p += range[i]; + } + if (operation[i] == M || operation[i] == I) + { + ew = w + range[i]; + if (column < ew) + { + if (operation[i] == I) + { + return -1; + } + return p - (ew - column); + } + w = ew; + } + } + } + return -1; + } + /** * Returns sequence as a string with cigar operations applied to it + * * @return String */ + @Override public String getSequenceString(char GapChar) { - return (length == 0) ? "" : - (String) getSequenceAndDeletions(refseq.getSequenceAsString(start, end), GapChar)[0]; + return (length == 0) ? "" + : (String) getSequenceAndDeletions( + refseq.getSequenceAsString(start, end), GapChar)[0]; } /** - * recreates a gapped and edited version of RefSeq or null for an empty cigar string + * recreates a gapped and edited version of RefSeq or null for an empty cigar + * string + * * @return SequenceI */ public SequenceI getSeq(char GapChar) @@ -73,62 +131,86 @@ public class SeqCigar { return null; } - Object[] edit_result = getSequenceAndDeletions(refseq.getSequenceAsString(start,end), - GapChar); + Object[] edit_result = getSequenceAndDeletions( + refseq.getSequenceAsString(start, end), GapChar); if (edit_result == null) { - throw new Error( - "Implementation Error - unexpected null from getSequenceAndDeletions"); + throw new Error(MessageManager.getString( + "error.implementation_error_unexpected_null_from_get_sequence_and_deletions")); } int bounds[] = (int[]) edit_result[1]; seq = new Sequence(refseq.getName(), (String) edit_result[0], - refseq.getStart() + start+bounds[0], - refseq.getStart() + start+((bounds[2]==0) ? -1 : bounds[2])); + refseq.getStart() + start + bounds[0], refseq.getStart() + start + + ((bounds[2] == 0) ? -1 : bounds[2])); + seq.setDescription(refseq.getDescription()); + int sstart = seq.getStart(), send = seq.getEnd(); // seq.checkValidRange(); probably not needed + // recover local properties if present + if (seqProps != null) + { + // this recovers dataset sequence reference as well as local features, + // names, start/end settings. + SeqsetUtils.SeqCharacterUnhash(seq, seqProps); + } + // ensure dataset sequence is up to date from local reference seq.setDatasetSequence(refseq); - seq.setDescription(refseq.getDescription()); + seq.setStart(sstart); + seq.setEnd(send); return seq; } /* - We don't allow this - refseq is given at construction time only - public void setSeq(SequenceI seq) { - this.seq = seq; - } + * We don't allow this - refseq is given at construction time only public void + * setSeq(SequenceI seq) { this.seq = seq; } */ /** - * internal constructor - sets seq to a gapless sequence derived from seq - * and prepends any 'D' operations needed to get to the first residue of seq. - * @param seq SequenceI - * @param initialDeletion true to mark initial dataset sequence residues as deleted in subsequence - * @param _s index of first position in seq - * @param _e index after last position in (possibly gapped) seq + * internal constructor - sets seq to a gapless sequence derived from seq and + * prepends any 'D' operations needed to get to the first residue of seq. + * + * @param seq + * SequenceI + * @param initialDeletion + * true to mark initial dataset sequence residues as deleted in + * subsequence + * @param _s + * index of first position in seq + * @param _e + * index after last position in (possibly gapped) seq * @return true if gaps are present in seq */ - private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, int _e) + private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, + int _e) { boolean hasgaps = false; if (seq == null) { - throw new Error("Implementation Error - _setSeq(null,...)"); + throw new Error(MessageManager + .getString("error.implementation_error_set_seq_null")); + } + if (_s < 0) + { + throw new Error(MessageManager + .formatMessage("error.implementation_error_s", new String[] + { Integer.valueOf(_s).toString() })); } - if (_s<0) - throw new Error("Implementation Error: _s="+_s); String seq_string = seq.getSequenceAsString(); - if (_e==0 || _e<_s || _e>seq_string.length()) - _e=seq_string.length(); + if (_e == 0 || _e < _s || _e > seq_string.length()) + { + _e = seq_string.length(); + } // resolve start and end positions relative to ungapped reference sequence - start = seq.findPosition(_s)-seq.getStart(); - end = seq.findPosition(_e)-seq.getStart(); - int l_ungapped = end-start; - // Find correct sequence to reference and correct start and end - if necessary + start = seq.findPosition(_s) - seq.getStart(); + end = seq.findPosition(_e) - seq.getStart(); + int l_ungapped = end - start; + // Find correct sequence to reference and correct start and end - if + // necessary SequenceI ds = seq.getDatasetSequence(); if (ds == null) { // make a new dataset sequence - String ungapped = AlignSeq.extractGaps(jalview.util.Comparison.GapChars, - new String(seq_string)); - l_ungapped=ungapped.length(); + String ungapped = AlignSeq.extractGaps( + jalview.util.Comparison.GapChars, new String(seq_string)); + l_ungapped = ungapped.length(); // check that we haven't just duplicated an ungapped sequence. if (l_ungapped == seq.getLength()) { @@ -136,63 +218,77 @@ public class SeqCigar } else { - ds = new Sequence(seq.getName(), ungapped, - seq.getStart(), - seq.getStart()+ungapped.length()-1); + ds = new Sequence(seq.getName(), ungapped, seq.getStart(), + seq.getStart() + ungapped.length() - 1); // JBPNote: this would be consistent but may not be useful - // seq.setDatasetSequence(ds); + // seq.setDatasetSequence(ds); } } // add in offset between seq and the dataset sequence if (ds.getStart() < seq.getStart()) { - int offset=seq.getStart()-ds.getStart(); - if (initialDeletion) { - // absolute cigar string - addDeleted(_s+offset); - start=0; - end+=offset; - } else { - // normal behaviour - just mark start and end subsequence - start+=offset; - end+=offset; + int offset = seq.getStart() - ds.getStart(); + if (initialDeletion) + { + // absolute cigar string + addDeleted(_s + offset); + start = 0; + end += offset; + } + else + { + // normal behaviour - just mark start and end subsequence + start += offset; + end += offset; } } // any gaps to process ? - if (l_ungapped!=(_e-_s)) - hasgaps=true; - - this.refseq = ds; + if (l_ungapped != (_e - _s)) + { + hasgaps = true; + } - // Check offsets - if (end>ds.getLength()) { - throw new Error("SeqCigar: Possible implementation error: sequence is longer than dataset sequence"); -// end = ds.getLength(); + refseq = ds; + // copy over local properties for the sequence instance of the refseq + seqProps = SeqsetUtils.SeqCharacterHash(seq); + // Check offsets + if (end > ds.getLength()) + { + throw new Error(MessageManager + .getString("error.implementation_error_seqcigar_possible")); + // end = ds.getLength(); } return hasgaps; } /** - * directly initialise a cigar object with a sequence of range, operation pairs and a sequence to apply it to. - * operation and range should be relative to the seq.getStart()'th residue of the dataset seq resolved from seq. - * @param seq SequenceI - * @param operation char[] - * @param range int[] + * directly initialise a cigar object with a sequence of range, operation + * pairs and a sequence to apply it to. operation and range should be relative + * to the seq.getStart()'th residue of the dataset seq resolved from seq. + * + * @param seq + * SequenceI + * @param operation + * char[] + * @param range + * int[] */ public SeqCigar(SequenceI seq, char operation[], int range[]) { super(); if (seq == null) { - throw new Error("Implementation Bug. Null seq !"); + throw new Error( + MessageManager.getString("error.implmentation_bug_seq_null")); } if (operation.length != range.length) { - throw new Error("Implementation Bug. Cigar Operation list!= range list"); + throw new Error(MessageManager.getString( + "error.implementation_bug_cigar_operation_list_range_list")); } if (operation != null) @@ -202,16 +298,21 @@ public class SeqCigar if (_setSeq(seq, false, 0, 0)) { - throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence."); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } for (int i = this.length, j = 0; j < operation.length; i++, j++) { char op = operation[j]; if (op != M && op != I && op != D) { - throw new Error( - "Implementation Bug. Cigar Operation '" + j + "' '" + op + - "' not one of '" + M + "', '" + I + "', or '" + D + "'."); + throw new Error(MessageManager.formatMessage( + "error.implementation_bug_cigar_operation", new String[] + { Integer.valueOf(j).toString(), + Integer.valueOf(op).toString(), + Integer.valueOf(M).toString(), + Integer.valueOf(I).toString(), + Integer.valueOf(D).toString() })); } this.operation[i] = op; this.range[i] = range[j]; @@ -223,47 +324,59 @@ public class SeqCigar this.operation = null; this.range = null; this.length = 0; - if (_setSeq(seq, false,0, 0)) + if (_setSeq(seq, false, 0, 0)) { - throw new Error("NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence."); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } } } /** * add range matched residues to cigar string - * @param range int + * + * @param range + * int */ public void addMatch(int range) { this.addOperation(M, range); } + /** - * Adds - * insertion and match operations based on seq to the cigar up to - * the endpos column of seq. - * - * @param cigar CigarBase - * @param seq SequenceI - * @param startpos int - * @param endpos int - * @param initialDeletions if true then initial deletions will be added from start of seq to startpos + * Adds insertion and match operations based on seq to the cigar up to the + * endpos column of seq. + * + * @param cigar + * CigarBase + * @param seq + * SequenceI + * @param startpos + * int + * @param endpos + * int + * @param initialDeletions + * if true then initial deletions will be added from start of seq to + * startpos */ protected static void addSequenceOps(CigarBase cigar, SequenceI seq, - int startpos, int endpos, boolean initialDeletions) + int startpos, int endpos, boolean initialDeletions) { char op = '\0'; int range = 0; int p = 0, res = seq.getLength(); if (!initialDeletions) - p=startpos; - + { + p = startpos; + } while (p <= endpos) { - boolean isGap = (p < res) ? jalview.util.Comparison.isGap(seq.getCharAt(p)) : true; - if ( (startpos <= p) && (p <= endpos)) + boolean isGap = (p < res) + ? jalview.util.Comparison.isGap(seq.getCharAt(p)) + : true; + if ((startpos <= p) && (p <= endpos)) { if (isGap) { @@ -313,84 +426,111 @@ public class SeqCigar /** * create a cigar string for given sequence - * @param seq SequenceI + * + * @param seq + * SequenceI */ public SeqCigar(SequenceI seq) { super(); if (seq == null) { - throw new Error("Implementation error for new Cigar(SequenceI)"); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } _setSeq(seq, false, 0, 0); // there is still work to do - addSequenceOps(this, seq, 0, seq.getLength()-1, false); + addSequenceOps(this, seq, 0, seq.getLength() - 1, false); } /** * Create Cigar from a range of gaps and residues on a sequence object - * @param seq SequenceI - * @param start int - first column in range - * @param end int - last column in range + * + * @param seq + * SequenceI + * @param start + * int - first column in range + * @param end + * int - last column in range */ public SeqCigar(SequenceI seq, int start, int end) { super(); if (seq == null) { - throw new Error("Implementation error for new Cigar(SequenceI)"); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } - _setSeq(seq, false, start, end+1); + _setSeq(seq, false, start, end + 1); // there is still work to do addSequenceOps(this, seq, start, end, false); } /** - * Create a cigar object from a cigar string like '[]+' - * Will fail if the given seq already contains gaps (JBPNote: future implementation will fix) - * @param seq SequenceI object resolvable to a dataset sequence - * @param cigarString String + * Create a cigar object from a cigar string like '[]+' Will + * fail if the given seq already contains gaps (JBPNote: future implementation + * will fix) + * + * @param seq + * SequenceI object resolvable to a dataset sequence + * @param cigarString + * String * @return Cigar */ public static SeqCigar parseCigar(SequenceI seq, String cigarString) - throws Exception + throws Exception { Object[] opsandrange = parseCigarString(cigarString); - return new SeqCigar(seq, (char[]) opsandrange[0], (int[]) opsandrange[1]); + return new SeqCigar(seq, (char[]) opsandrange[0], + (int[]) opsandrange[1]); } /** - * createAlignment - * - * @param alseqs SeqCigar[] - * @param gapCharacter char + * create an alignment from the given array of cigar sequences and gap + * character, and marking the given segments as visible in the given + * hiddenColumns. + * + * @param alseqs + * @param gapCharacter + * @param hidden + * - hiddenColumns where hidden regions are marked + * @param segments + * - visible regions of alignment * @return SequenceI[] */ public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs, - char gapCharacter, ColumnSelection colsel, int[] segments) + char gapCharacter, HiddenColumns hidden, int[] segments) { SequenceI[] seqs = new SequenceI[alseqs.length]; StringBuffer[] g_seqs = new StringBuffer[alseqs.length]; - String[] alseqs_string=new String[alseqs.length]; + String[] alseqs_string = new String[alseqs.length]; Object[] gs_regions = new Object[alseqs.length]; for (int i = 0; i < alseqs.length; i++) { - alseqs_string[i]=alseqs[i].getRefSeq(). - getSequenceAsString(alseqs[i].start,alseqs[i].end); - gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], gapCharacter); // gapped sequence, {start, start col, end. endcol}, hidden regions {{start, end, col}}) + alseqs_string[i] = alseqs[i].getRefSeq() + .getSequenceAsString(alseqs[i].start, alseqs[i].end); + gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], + gapCharacter); // gapped sequence, {start, start col, end. + // endcol}, hidden regions {{start, end, col}}) if (gs_regions[i] == null) { - throw new Error("Implementation error: " + i + - "'th sequence Cigar has no operations."); + throw new Error(MessageManager.formatMessage( + "error.implementation_error_cigar_seq_no_operations", + new String[] + { Integer.valueOf(i).toString() })); } - g_seqs[i] = new StringBuffer( (String) ( (Object[]) gs_regions[i])[0]); // the visible gapped sequence + g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the + // visible + // gapped + // sequence } // Now account for insertions. (well - deletions) - // this is complicated because we must keep track of shifted positions in each sequence + // this is complicated because we must keep track of shifted positions in + // each sequence ShiftList shifts = new ShiftList(); for (int i = 0; i < alseqs.length; i++) { - Object[] gs_region = ( (Object[]) ( (Object[]) gs_regions[i])[2]); + Object[] gs_region = ((Object[]) ((Object[]) gs_regions[i])[2]); if (gs_region != null) { @@ -402,7 +542,9 @@ public class SeqCigar { insert[s] = gapCharacter; } - int inspos = shifts.shift(region[2]); // resolve insertion position in current alignment frame of reference + int inspos = shifts.shift(region[2]); // resolve insertion position in + // current alignment frame of + // reference for (int s = 0; s < alseqs.length; s++) { if (s != i) @@ -412,7 +554,8 @@ public class SeqCigar // prefix insertion with more gaps. for (int l = inspos - g_seqs[s].length(); l > 0; l--) { - g_seqs[s].append(gapCharacter); // to debug - use a diffferent gap character here + g_seqs[s].append(gapCharacter); // to debug - use a diffferent + // gap character here } } g_seqs[s].insert(inspos, insert); @@ -420,185 +563,115 @@ public class SeqCigar else { g_seqs[s].insert(inspos, - alseqs_string[i].substring(region[0], region[1] + 1)); + alseqs_string[i].substring(region[0], region[1] + 1)); } } - shifts.addShift(region[2], insert.length); // update shift in alignment frame of reference - if (segments==null) + shifts.addShift(region[2], insert.length); // update shift in + // alignment frame of + // reference + if (segments == null) + { // add a hidden column for this deletion - colsel.hideColumns(inspos, inspos+insert.length-1); + hidden.hideColumns(inspos, inspos + insert.length - 1); + } } } } for (int i = 0; i < alseqs.length; i++) { - int[] bounds = ( (int[]) ( (Object[]) gs_regions[i])[1]); + int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]); SequenceI ref = alseqs[i].getRefSeq(); seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(), - ref.getStart() + alseqs[i].start+bounds[0], - ref.getStart() + alseqs[i].start+(bounds[2]==0 ? -1 : bounds[2])); + ref.getStart() + alseqs[i].start + bounds[0], + ref.getStart() + alseqs[i].start + + (bounds[2] == 0 ? -1 : bounds[2])); seqs[i].setDatasetSequence(ref); seqs[i].setDescription(ref.getDescription()); } - if (segments!=null) { - for (int i=0; igetCigarString()->'" + gen_sgapped.getCigarstring() + - "'"); - } - testSeqRecovery(gen_sgapped, s_gapped); - // Test dataset resolution - SeqCigar sub_gapped = new SeqCigar(s_subsequence_gapped); - if (!testSeqRecovery(sub_gapped, s_subsequence_gapped)) - { - System.err.println("Failed recovery for subsequence of dataset sequence"); - } - // width functions - if (sub_gapped.getWidth() != sub_gapped_s.length()) + if (selGroups != null) { - System.err.println("Failed getWidth()"); + selGroups.clear(); } + selGroups = null; + } - sub_gapped.getFullWidth(); - if (sub_gapped.hasDeletedRegions()) - { - System.err.println("hasDeletedRegions is incorrect."); - } - // Test start-end region SeqCigar - SeqCigar sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - if (sub_se_gp.getWidth() != 41) - { - System.err.println( - "SeqCigar(seq, start, end) not properly clipped alignsequence."); - } - System.out.println("Original sequence align:\n" + sub_gapped_s + - "\nReconstructed window from 8 to 48\n" - + "XXXXXXXX" + sub_se_gp.getSequenceString('-') + "..." - + "\nCigar String:" + sub_se_gp.getCigarstring() + "\n" - ); - SequenceI ssgp = sub_se_gp.getSeq('-'); - System.out.println("\t " + ssgp.getSequence()); - for (int r = 0; r < 10; r++) - { - sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - int sl = sub_se_gp.getWidth(); - int st = sl - 1 - r; - for (int rs = 0; rs < 10; rs++) - { - int e = st + rs; - sub_se_gp.deleteRange(st, e); - String ssgapedseq = sub_se_gp.getSeq('-').getSequenceAsString(); - System.out.println(st + "," + e + "\t:" + ssgapedseq); - st -=3; - } - } + /** + * + * @return null or array of all associated entities + */ + public Object[] getAllMemberships() + { + if (selGroups == null) { - SeqCigar[] set = new SeqCigar[] - { - new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped)}; - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" + - al.getSequenceAt(i).getStart() + "\t" + - al.getSequenceAt(i).getEnd() + "\t" + - al.getSequenceAt(i).getSequence()); - } + return null; } + Object[] mmbs = new Object[selGroups.size()]; + Enumeration en = selGroups.keys(); + for (int i = 0; en.hasMoreElements(); i++) { - System.out.println("Gapped."); - SeqCigar[] set = new SeqCigar[] - { - new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped)}; - set[0].deleteRange(20, 25); - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" + - al.getSequenceAt(i).getStart() + "\t" + - al.getSequenceAt(i).getEnd() + "\t" + - al.getSequenceAt(i).getSequence()); - } + mmbs[i] = en.nextElement(); } -// if (!ssgapedseq.equals("ryas---dtqqwa----slchvh")) -// System.err.println("Subseqgaped\n------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhryas---dtqwrtsasll--qwa----slchvh\n"+ssgapedseq+"\n"+sub_se_gp.getCigarstring()); + return mmbs; } + /** + * Test for group membership + * + * @param sgr + * - a selection group or some other object that may be associated + * with seqCigar + * @return true if sgr is associated with this seqCigar + */ + public boolean isMemberOf(Object sgr) + { + return (selGroups != null) && selGroups.get(sgr) != null; + } }