X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSeqCigar.java;h=34697b366ad1fed635e93a351359f1ed9af3238b;hb=26816cdcb7617e74fad006d2c675efb298e0d9b1;hp=cc9eaa7536aed658c46f390f46dbd73c456b4204;hpb=506d60f0e188723ddc91c26824b41ac7034df3fe;p=jalview.git diff --git a/src/jalview/datamodel/SeqCigar.java b/src/jalview/datamodel/SeqCigar.java index cc9eaa7..34697b3 100644 --- a/src/jalview/datamodel/SeqCigar.java +++ b/src/jalview/datamodel/SeqCigar.java @@ -1,27 +1,32 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4) - * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. + * This file is part of Jalview. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; -import java.util.Hashtable; +import jalview.analysis.AlignSeq; +import jalview.analysis.SeqsetUtils; +import jalview.util.MessageManager; +import jalview.util.ShiftList; -import jalview.analysis.*; -import jalview.util.*; +import java.util.Enumeration; +import java.util.Hashtable; public class SeqCigar extends CigarSimple { @@ -63,14 +68,54 @@ public class SeqCigar extends CigarSimple } /** + * + * @param column + * @return position in sequence for column (or -1 if no match state exists) + */ + public int findPosition(int column) + { + int w = 0, ew, p = refseq.findPosition(start); + if (column < 0) + { + return -1; + } + if (range != null) + { + for (int i = 0; i < length; i++) + { + if (operation[i] == M || operation[i] == D) + { + p += range[i]; + } + if (operation[i] == M || operation[i] == I) + { + ew = w + range[i]; + if (column < ew) + { + if (operation[i] == I) + { + return -1; + } + return p - (ew - column); + } + w = ew; + } + } + } + return -1; + } + + /** * Returns sequence as a string with cigar operations applied to it * * @return String */ + @Override public String getSequenceString(char GapChar) { - return (length == 0) ? "" : (String) getSequenceAndDeletions(refseq - .getSequenceAsString(start, end), GapChar)[0]; + return (length == 0) ? "" + : (String) getSequenceAndDeletions( + refseq.getSequenceAsString(start, end), GapChar)[0]; } /** @@ -86,18 +131,17 @@ public class SeqCigar extends CigarSimple { return null; } - Object[] edit_result = getSequenceAndDeletions(refseq - .getSequenceAsString(start, end), GapChar); + Object[] edit_result = getSequenceAndDeletions( + refseq.getSequenceAsString(start, end), GapChar); if (edit_result == null) { - throw new Error( - "Implementation Error - unexpected null from getSequenceAndDeletions"); + throw new Error(MessageManager.getString( + "error.implementation_error_unexpected_null_from_get_sequence_and_deletions")); } int bounds[] = (int[]) edit_result[1]; - seq = new Sequence(refseq.getName(), (String) edit_result[0], refseq - .getStart() - + start + bounds[0], refseq.getStart() + start - + ((bounds[2] == 0) ? -1 : bounds[2])); + seq = new Sequence(refseq.getName(), (String) edit_result[0], + refseq.getStart() + start + bounds[0], refseq.getStart() + start + + ((bounds[2] == 0) ? -1 : bounds[2])); seq.setDescription(refseq.getDescription()); int sstart = seq.getStart(), send = seq.getEnd(); // seq.checkValidRange(); probably not needed @@ -124,14 +168,14 @@ public class SeqCigar extends CigarSimple * prepends any 'D' operations needed to get to the first residue of seq. * * @param seq - * SequenceI + * SequenceI * @param initialDeletion - * true to mark initial dataset sequence residues as deleted in - * subsequence + * true to mark initial dataset sequence residues as deleted in + * subsequence * @param _s - * index of first position in seq + * index of first position in seq * @param _e - * index after last position in (possibly gapped) seq + * index after last position in (possibly gapped) seq * @return true if gaps are present in seq */ private boolean _setSeq(SequenceI seq, boolean initialDeletion, int _s, @@ -140,11 +184,14 @@ public class SeqCigar extends CigarSimple boolean hasgaps = false; if (seq == null) { - throw new Error("Implementation Error - _setSeq(null,...)"); + throw new Error(MessageManager + .getString("error.implementation_error_set_seq_null")); } if (_s < 0) { - throw new Error("Implementation Error: _s=" + _s); + throw new Error(MessageManager + .formatMessage("error.implementation_error_s", new String[] + { Integer.valueOf(_s).toString() })); } String seq_string = seq.getSequenceAsString(); if (_e == 0 || _e < _s || _e > seq_string.length()) @@ -171,9 +218,8 @@ public class SeqCigar extends CigarSimple } else { - ds = new Sequence(seq.getName(), ungapped, seq.getStart(), seq - .getStart() - + ungapped.length() - 1); + ds = new Sequence(seq.getName(), ungapped, seq.getStart(), + seq.getStart() + ungapped.length() - 1); // JBPNote: this would be consistent but may not be useful // seq.setDatasetSequence(ds); } @@ -211,8 +257,8 @@ public class SeqCigar extends CigarSimple // Check offsets if (end > ds.getLength()) { - throw new Error( - "SeqCigar: Possible implementation error: sequence is longer than dataset sequence"); + throw new Error(MessageManager + .getString("error.implementation_error_seqcigar_possible")); // end = ds.getLength(); } @@ -225,23 +271,24 @@ public class SeqCigar extends CigarSimple * to the seq.getStart()'th residue of the dataset seq resolved from seq. * * @param seq - * SequenceI + * SequenceI * @param operation - * char[] + * char[] * @param range - * int[] + * int[] */ public SeqCigar(SequenceI seq, char operation[], int range[]) { super(); if (seq == null) { - throw new Error("Implementation Bug. Null seq !"); + throw new Error( + MessageManager.getString("error.implmentation_bug_seq_null")); } if (operation.length != range.length) { - throw new Error( - "Implementation Bug. Cigar Operation list!= range list"); + throw new Error(MessageManager.getString( + "error.implementation_bug_cigar_operation_list_range_list")); } if (operation != null) @@ -251,17 +298,21 @@ public class SeqCigar extends CigarSimple if (_setSeq(seq, false, 0, 0)) { - throw new Error( - "NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence."); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } for (int i = this.length, j = 0; j < operation.length; i++, j++) { char op = operation[j]; if (op != M && op != I && op != D) { - throw new Error("Implementation Bug. Cigar Operation '" + j - + "' '" + op + "' not one of '" + M + "', '" + I - + "', or '" + D + "'."); + throw new Error(MessageManager.formatMessage( + "error.implementation_bug_cigar_operation", new String[] + { Integer.valueOf(j).toString(), + Integer.valueOf(op).toString(), + Integer.valueOf(M).toString(), + Integer.valueOf(I).toString(), + Integer.valueOf(D).toString() })); } this.operation[i] = op; this.range[i] = range[j]; @@ -275,8 +326,8 @@ public class SeqCigar extends CigarSimple this.length = 0; if (_setSeq(seq, false, 0, 0)) { - throw new Error( - "NOT YET Implemented: Constructing a Cigar object from a cigar string and a gapped sequence."); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } } } @@ -285,7 +336,7 @@ public class SeqCigar extends CigarSimple * add range matched residues to cigar string * * @param range - * int + * int */ public void addMatch(int range) { @@ -297,16 +348,16 @@ public class SeqCigar extends CigarSimple * endpos column of seq. * * @param cigar - * CigarBase + * CigarBase * @param seq - * SequenceI + * SequenceI * @param startpos - * int + * int * @param endpos - * int + * int * @param initialDeletions - * if true then initial deletions will be added from start of - * seq to startpos + * if true then initial deletions will be added from start of seq to + * startpos */ protected static void addSequenceOps(CigarBase cigar, SequenceI seq, int startpos, int endpos, boolean initialDeletions) @@ -322,8 +373,9 @@ public class SeqCigar extends CigarSimple while (p <= endpos) { - boolean isGap = (p < res) ? jalview.util.Comparison.isGap(seq - .getCharAt(p)) : true; + boolean isGap = (p < res) + ? jalview.util.Comparison.isGap(seq.getCharAt(p)) + : true; if ((startpos <= p) && (p <= endpos)) { if (isGap) @@ -376,14 +428,15 @@ public class SeqCigar extends CigarSimple * create a cigar string for given sequence * * @param seq - * SequenceI + * SequenceI */ public SeqCigar(SequenceI seq) { super(); if (seq == null) { - throw new Error("Implementation error for new Cigar(SequenceI)"); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } _setSeq(seq, false, 0, 0); // there is still work to do @@ -394,18 +447,19 @@ public class SeqCigar extends CigarSimple * Create Cigar from a range of gaps and residues on a sequence object * * @param seq - * SequenceI + * SequenceI * @param start - * int - first column in range + * int - first column in range * @param end - * int - last column in range + * int - last column in range */ public SeqCigar(SequenceI seq, int start, int end) { super(); if (seq == null) { - throw new Error("Implementation error for new Cigar(SequenceI)"); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } _setSeq(seq, false, start, end + 1); // there is still work to do @@ -418,9 +472,9 @@ public class SeqCigar extends CigarSimple * will fix) * * @param seq - * SequenceI object resolvable to a dataset sequence + * SequenceI object resolvable to a dataset sequence * @param cigarString - * String + * String * @return Cigar */ public static SeqCigar parseCigar(SequenceI seq, String cigarString) @@ -432,16 +486,20 @@ public class SeqCigar extends CigarSimple } /** - * createAlignment + * create an alignment from the given array of cigar sequences and gap + * character, and marking the given segments as visible in the given + * hiddenColumns. * * @param alseqs - * SeqCigar[] * @param gapCharacter - * char + * @param hidden + * - hiddenColumns where hidden regions are marked + * @param segments + * - visible regions of alignment * @return SequenceI[] */ public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs, - char gapCharacter, ColumnSelection colsel, int[] segments) + char gapCharacter, HiddenColumns hidden, int[] segments) { SequenceI[] seqs = new SequenceI[alseqs.length]; StringBuffer[] g_seqs = new StringBuffer[alseqs.length]; @@ -449,20 +507,22 @@ public class SeqCigar extends CigarSimple Object[] gs_regions = new Object[alseqs.length]; for (int i = 0; i < alseqs.length; i++) { - alseqs_string[i] = alseqs[i].getRefSeq().getSequenceAsString( - alseqs[i].start, alseqs[i].end); + alseqs_string[i] = alseqs[i].getRefSeq() + .getSequenceAsString(alseqs[i].start, alseqs[i].end); gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], gapCharacter); // gapped sequence, {start, start col, end. - // endcol}, hidden regions {{start, end, col}}) + // endcol}, hidden regions {{start, end, col}}) if (gs_regions[i] == null) { - throw new Error("Implementation error: " + i - + "'th sequence Cigar has no operations."); + throw new Error(MessageManager.formatMessage( + "error.implementation_error_cigar_seq_no_operations", + new String[] + { Integer.valueOf(i).toString() })); } g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the - // visible - // gapped - // sequence + // visible + // gapped + // sequence } // Now account for insertions. (well - deletions) // this is complicated because we must keep track of shifted positions in @@ -483,8 +543,8 @@ public class SeqCigar extends CigarSimple insert[s] = gapCharacter; } int inspos = shifts.shift(region[2]); // resolve insertion position in - // current alignment frame of - // reference + // current alignment frame of + // reference for (int s = 0; s < alseqs.length; s++) { if (s != i) @@ -495,24 +555,24 @@ public class SeqCigar extends CigarSimple for (int l = inspos - g_seqs[s].length(); l > 0; l--) { g_seqs[s].append(gapCharacter); // to debug - use a diffferent - // gap character here + // gap character here } } g_seqs[s].insert(inspos, insert); } else { - g_seqs[s].insert(inspos, alseqs_string[i].substring( - region[0], region[1] + 1)); + g_seqs[s].insert(inspos, + alseqs_string[i].substring(region[0], region[1] + 1)); } } shifts.addShift(region[2], insert.length); // update shift in - // alignment frame of - // reference + // alignment frame of + // reference if (segments == null) { // add a hidden column for this deletion - colsel.hideColumns(inspos, inspos + insert.length - 1); + hidden.hideColumns(inspos, inspos + insert.length - 1); } } } @@ -521,12 +581,13 @@ public class SeqCigar extends CigarSimple { int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]); SequenceI ref = alseqs[i].getRefSeq(); - seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(), ref - .getStart() - + alseqs[i].start + bounds[0], ref.getStart() - + alseqs[i].start + (bounds[2] == 0 ? -1 : bounds[2])); + seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(), + ref.getStart() + alseqs[i].start + bounds[0], + ref.getStart() + alseqs[i].start + + (bounds[2] == 0 ? -1 : bounds[2])); seqs[i].setDatasetSequence(ref); seqs[i].setDescription(ref.getDescription()); + SeqsetUtils.SeqCharacterUnhash(seqs[i],alseqs[i].seqProps,true,true); } if (segments != null) { @@ -534,162 +595,84 @@ public class SeqCigar extends CigarSimple { // int start=shifts.shift(segments[i]-1)+1; // int end=shifts.shift(segments[i]+segments[i+1]-1)-1; - colsel.hideColumns(segments[i + 1], segments[i + 1] - + segments[i + 2] - 1); + hidden.hideColumns(segments[i + 1], + segments[i + 1] + segments[i + 2] - 1); } } return seqs; } /** - * non rigorous testing - */ - /** - * - * @param seq - * Sequence - * @param ex_cs_gapped - * String - * @return String + * references to entities that this sequence cigar is associated with. */ - public static String testCigar_string(Sequence seq, String ex_cs_gapped) + private Hashtable selGroups = null; + + public void setGroupMembership(Object group) { - SeqCigar c_sgapped = new SeqCigar(seq); - String cs_gapped = c_sgapped.getCigarstring(); - if (!cs_gapped.equals(ex_cs_gapped)) + if (selGroups == null) { - System.err.println("Failed getCigarstring: incorect string '" - + cs_gapped + "' != " + ex_cs_gapped); + selGroups = new Hashtable(); } - return cs_gapped; + selGroups.put(group, new int[0]); } - public static boolean testSeqRecovery(SeqCigar gen_sgapped, - SequenceI s_gapped) + /** + * Test for and if present remove association to group. + * + * @param group + * @return true if group was associated and it was removed + */ + public boolean removeGroupMembership(Object group) { - // this is non-rigorous - start and end recovery is not tested. - SequenceI gen_sgapped_s = gen_sgapped.getSeq('-'); - if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence())) + if (selGroups != null && selGroups.containsKey(group)) { - System.err.println("Couldn't reconstruct sequence.\n" - + gen_sgapped_s.getSequenceAsString() + "\n" - + s_gapped.getSequenceAsString()); - return false; + selGroups.remove(group); + return true; } - return true; + return false; } - public static void main(String argv[]) throws Exception + /** + * forget all associations for this sequence. + */ + public void clearMemberships() { - String o_seq; - Sequence s = new Sequence("MySeq", - o_seq = "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt", 39, 80); - String orig_gapped; - Sequence s_gapped = new Sequence( - "MySeq", - orig_gapped = "----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt", - 39, 80); - String ex_cs_gapped = "4I4M6I6M3I11M4I12M4I9M"; - s_gapped.setDatasetSequence(s); - String sub_gapped_s; - Sequence s_subsequence_gapped = new Sequence( - "MySeq", - sub_gapped_s = "------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh", - 43, 77); - - s_subsequence_gapped.setDatasetSequence(s); - SeqCigar c_null = new SeqCigar(s); - String cs_null = c_null.getCigarstring(); - if (!cs_null.equals("42M")) - { - System.err - .println("Failed to recover ungapped sequence cigar operations:" - + ((cs_null == "") ? "empty string" : cs_null)); - } - testCigar_string(s_gapped, ex_cs_gapped); - SeqCigar gen_sgapped = SeqCigar.parseCigar(s, ex_cs_gapped); - if (!gen_sgapped.getCigarstring().equals(ex_cs_gapped)) - { - System.err.println("Failed parseCigar(" + ex_cs_gapped - + ")->getCigarString()->'" + gen_sgapped.getCigarstring() - + "'"); - } - testSeqRecovery(gen_sgapped, s_gapped); - // Test dataset resolution - SeqCigar sub_gapped = new SeqCigar(s_subsequence_gapped); - if (!testSeqRecovery(sub_gapped, s_subsequence_gapped)) + if (selGroups != null) { - System.err - .println("Failed recovery for subsequence of dataset sequence"); - } - // width functions - if (sub_gapped.getWidth() != sub_gapped_s.length()) - { - System.err.println("Failed getWidth()"); + selGroups.clear(); } + selGroups = null; + } - sub_gapped.getFullWidth(); - if (sub_gapped.hasDeletedRegions()) - { - System.err.println("hasDeletedRegions is incorrect."); - } - // Test start-end region SeqCigar - SeqCigar sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - if (sub_se_gp.getWidth() != 41) - { - System.err - .println("SeqCigar(seq, start, end) not properly clipped alignsequence."); - } - System.out.println("Original sequence align:\n" + sub_gapped_s - + "\nReconstructed window from 8 to 48\n" + "XXXXXXXX" - + sub_se_gp.getSequenceString('-') + "..." + "\nCigar String:" - + sub_se_gp.getCigarstring() + "\n"); - SequenceI ssgp = sub_se_gp.getSeq('-'); - System.out.println("\t " + ssgp.getSequenceAsString()); - for (int r = 0; r < 10; r++) - { - sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - int sl = sub_se_gp.getWidth(); - int st = sl - 1 - r; - for (int rs = 0; rs < 10; rs++) - { - int e = st + rs; - sub_se_gp.deleteRange(st, e); - String ssgapedseq = sub_se_gp.getSeq('-').getSequenceAsString(); - System.out.println(st + "," + e + "\t:" + ssgapedseq); - st -= 3; - } - } + /** + * + * @return null or array of all associated entities + */ + public Object[] getAllMemberships() + { + if (selGroups == null) { - SeqCigar[] set = new SeqCigar[] - { new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped) }; - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" - + al.getSequenceAt(i).getStart() + "\t" - + al.getSequenceAt(i).getEnd() + "\t" - + al.getSequenceAt(i).getSequenceAsString()); - } + return null; } + Object[] mmbs = new Object[selGroups.size()]; + Enumeration en = selGroups.keys(); + for (int i = 0; en.hasMoreElements(); i++) { - System.out.println("Gapped."); - SeqCigar[] set = new SeqCigar[] - { new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped) }; - set[0].deleteRange(20, 25); - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" - + al.getSequenceAt(i).getStart() + "\t" - + al.getSequenceAt(i).getEnd() + "\t" - + al.getSequenceAt(i).getSequenceAsString()); - } + mmbs[i] = en.nextElement(); } - // if (!ssgapedseq.equals("ryas---dtqqwa----slchvh")) - // System.err.println("Subseqgaped\n------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhryas---dtqwrtsasll--qwa----slchvh\n"+ssgapedseq+"\n"+sub_se_gp.getCigarstring()); + return mmbs; } + /** + * Test for group membership + * + * @param sgr + * - a selection group or some other object that may be associated + * with seqCigar + * @return true if sgr is associated with this seqCigar + */ + public boolean isMemberOf(Object sgr) + { + return (selGroups != null) && selGroups.get(sgr) != null; + } }