X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSeqCigar.java;h=34697b366ad1fed635e93a351359f1ed9af3238b;hb=26816cdcb7617e74fad006d2c675efb298e0d9b1;hp=8441609643c31a29453ecbd1054d105f291aef13;hpb=aced09c4feeaf3406269442c14e54abeeb4cad81;p=jalview.git diff --git a/src/jalview/datamodel/SeqCigar.java b/src/jalview/datamodel/SeqCigar.java index 8441609..34697b3 100644 --- a/src/jalview/datamodel/SeqCigar.java +++ b/src/jalview/datamodel/SeqCigar.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -20,12 +20,14 @@ */ package jalview.datamodel; +import jalview.analysis.AlignSeq; +import jalview.analysis.SeqsetUtils; +import jalview.util.MessageManager; +import jalview.util.ShiftList; + import java.util.Enumeration; import java.util.Hashtable; -import jalview.analysis.*; -import jalview.util.*; - public class SeqCigar extends CigarSimple { /** @@ -66,14 +68,54 @@ public class SeqCigar extends CigarSimple } /** + * + * @param column + * @return position in sequence for column (or -1 if no match state exists) + */ + public int findPosition(int column) + { + int w = 0, ew, p = refseq.findPosition(start); + if (column < 0) + { + return -1; + } + if (range != null) + { + for (int i = 0; i < length; i++) + { + if (operation[i] == M || operation[i] == D) + { + p += range[i]; + } + if (operation[i] == M || operation[i] == I) + { + ew = w + range[i]; + if (column < ew) + { + if (operation[i] == I) + { + return -1; + } + return p - (ew - column); + } + w = ew; + } + } + } + return -1; + } + + /** * Returns sequence as a string with cigar operations applied to it * * @return String */ + @Override public String getSequenceString(char GapChar) { - return (length == 0) ? "" : (String) getSequenceAndDeletions( - refseq.getSequenceAsString(start, end), GapChar)[0]; + return (length == 0) ? "" + : (String) getSequenceAndDeletions( + refseq.getSequenceAsString(start, end), GapChar)[0]; } /** @@ -93,12 +135,13 @@ public class SeqCigar extends CigarSimple refseq.getSequenceAsString(start, end), GapChar); if (edit_result == null) { - throw new Error(MessageManager.getString("error.implementation_error_unexpected_null_from_get_sequence_and_deletions")); + throw new Error(MessageManager.getString( + "error.implementation_error_unexpected_null_from_get_sequence_and_deletions")); } int bounds[] = (int[]) edit_result[1]; seq = new Sequence(refseq.getName(), (String) edit_result[0], - refseq.getStart() + start + bounds[0], refseq.getStart() - + start + ((bounds[2] == 0) ? -1 : bounds[2])); + refseq.getStart() + start + bounds[0], refseq.getStart() + start + + ((bounds[2] == 0) ? -1 : bounds[2])); seq.setDescription(refseq.getDescription()); int sstart = seq.getStart(), send = seq.getEnd(); // seq.checkValidRange(); probably not needed @@ -141,11 +184,14 @@ public class SeqCigar extends CigarSimple boolean hasgaps = false; if (seq == null) { - throw new Error(MessageManager.getString("error.implementation_error_set_seq_null")); + throw new Error(MessageManager + .getString("error.implementation_error_set_seq_null")); } if (_s < 0) { - throw new Error(MessageManager.formatMessage("error.implementation_error_s", new String[]{Integer.valueOf(_s).toString()})); + throw new Error(MessageManager + .formatMessage("error.implementation_error_s", new String[] + { Integer.valueOf(_s).toString() })); } String seq_string = seq.getSequenceAsString(); if (_e == 0 || _e < _s || _e > seq_string.length()) @@ -211,7 +257,8 @@ public class SeqCigar extends CigarSimple // Check offsets if (end > ds.getLength()) { - throw new Error(MessageManager.getString("error.implementation_error_seqcigar_possible")); + throw new Error(MessageManager + .getString("error.implementation_error_seqcigar_possible")); // end = ds.getLength(); } @@ -235,11 +282,13 @@ public class SeqCigar extends CigarSimple super(); if (seq == null) { - throw new Error(MessageManager.getString("error.implmentation_bug_seq_null")); + throw new Error( + MessageManager.getString("error.implmentation_bug_seq_null")); } if (operation.length != range.length) { - throw new Error(MessageManager.getString("error.implementation_bug_cigar_operation_list_range_list")); + throw new Error(MessageManager.getString( + "error.implementation_bug_cigar_operation_list_range_list")); } if (operation != null) @@ -249,14 +298,21 @@ public class SeqCigar extends CigarSimple if (_setSeq(seq, false, 0, 0)) { - throw new Error(MessageManager.getString("error.not_yet_implemented_cigar_object_from_cigar_string")); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } for (int i = this.length, j = 0; j < operation.length; i++, j++) { char op = operation[j]; if (op != M && op != I && op != D) { - throw new Error(MessageManager.formatMessage("error.implementation_bug_cigar_operation", new String[]{Integer.valueOf(j).toString(),Integer.valueOf(op).toString(),Integer.valueOf(M).toString(),Integer.valueOf(I).toString(),Integer.valueOf(D).toString()})); + throw new Error(MessageManager.formatMessage( + "error.implementation_bug_cigar_operation", new String[] + { Integer.valueOf(j).toString(), + Integer.valueOf(op).toString(), + Integer.valueOf(M).toString(), + Integer.valueOf(I).toString(), + Integer.valueOf(D).toString() })); } this.operation[i] = op; this.range[i] = range[j]; @@ -270,7 +326,8 @@ public class SeqCigar extends CigarSimple this.length = 0; if (_setSeq(seq, false, 0, 0)) { - throw new Error(MessageManager.getString("error.not_yet_implemented_cigar_object_from_cigar_string")); + throw new Error(MessageManager.getString( + "error.not_yet_implemented_cigar_object_from_cigar_string")); } } } @@ -316,8 +373,9 @@ public class SeqCigar extends CigarSimple while (p <= endpos) { - boolean isGap = (p < res) ? jalview.util.Comparison.isGap(seq - .getCharAt(p)) : true; + boolean isGap = (p < res) + ? jalview.util.Comparison.isGap(seq.getCharAt(p)) + : true; if ((startpos <= p) && (p <= endpos)) { if (isGap) @@ -377,7 +435,8 @@ public class SeqCigar extends CigarSimple super(); if (seq == null) { - throw new Error(MessageManager.getString("error.implementation_error_for_new_cigar")); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } _setSeq(seq, false, 0, 0); // there is still work to do @@ -399,7 +458,8 @@ public class SeqCigar extends CigarSimple super(); if (seq == null) { - throw new Error(MessageManager.getString("error.implementation_error_for_new_cigar")); + throw new Error(MessageManager + .getString("error.implementation_error_for_new_cigar")); } _setSeq(seq, false, start, end + 1); // there is still work to do @@ -428,18 +488,18 @@ public class SeqCigar extends CigarSimple /** * create an alignment from the given array of cigar sequences and gap * character, and marking the given segments as visible in the given - * columselection. + * hiddenColumns. * * @param alseqs * @param gapCharacter - * @param colsel - * - columnSelection where hidden regions are marked + * @param hidden + * - hiddenColumns where hidden regions are marked * @param segments * - visible regions of alignment * @return SequenceI[] */ public static SequenceI[] createAlignmentSequences(SeqCigar[] alseqs, - char gapCharacter, ColumnSelection colsel, int[] segments) + char gapCharacter, HiddenColumns hidden, int[] segments) { SequenceI[] seqs = new SequenceI[alseqs.length]; StringBuffer[] g_seqs = new StringBuffer[alseqs.length]; @@ -447,14 +507,17 @@ public class SeqCigar extends CigarSimple Object[] gs_regions = new Object[alseqs.length]; for (int i = 0; i < alseqs.length; i++) { - alseqs_string[i] = alseqs[i].getRefSeq().getSequenceAsString( - alseqs[i].start, alseqs[i].end); + alseqs_string[i] = alseqs[i].getRefSeq() + .getSequenceAsString(alseqs[i].start, alseqs[i].end); gs_regions[i] = alseqs[i].getSequenceAndDeletions(alseqs_string[i], gapCharacter); // gapped sequence, {start, start col, end. // endcol}, hidden regions {{start, end, col}}) if (gs_regions[i] == null) { - throw new Error(MessageManager.formatMessage("error.implementation_error_cigar_seq_no_operations", new String[]{Integer.valueOf(i).toString()})); + throw new Error(MessageManager.formatMessage( + "error.implementation_error_cigar_seq_no_operations", + new String[] + { Integer.valueOf(i).toString() })); } g_seqs[i] = new StringBuffer((String) ((Object[]) gs_regions[i])[0]); // the // visible @@ -509,7 +572,7 @@ public class SeqCigar extends CigarSimple if (segments == null) { // add a hidden column for this deletion - colsel.hideColumns(inspos, inspos + insert.length - 1); + hidden.hideColumns(inspos, inspos + insert.length - 1); } } } @@ -519,10 +582,12 @@ public class SeqCigar extends CigarSimple int[] bounds = ((int[]) ((Object[]) gs_regions[i])[1]); SequenceI ref = alseqs[i].getRefSeq(); seqs[i] = new Sequence(ref.getName(), g_seqs[i].toString(), - ref.getStart() + alseqs[i].start + bounds[0], ref.getStart() - + alseqs[i].start + (bounds[2] == 0 ? -1 : bounds[2])); + ref.getStart() + alseqs[i].start + bounds[0], + ref.getStart() + alseqs[i].start + + (bounds[2] == 0 ? -1 : bounds[2])); seqs[i].setDatasetSequence(ref); seqs[i].setDescription(ref.getDescription()); + SeqsetUtils.SeqCharacterUnhash(seqs[i],alseqs[i].seqProps,true,true); } if (segments != null) { @@ -530,165 +595,14 @@ public class SeqCigar extends CigarSimple { // int start=shifts.shift(segments[i]-1)+1; // int end=shifts.shift(segments[i]+segments[i+1]-1)-1; - colsel.hideColumns(segments[i + 1], segments[i + 1] - + segments[i + 2] - 1); + hidden.hideColumns(segments[i + 1], + segments[i + 1] + segments[i + 2] - 1); } } return seqs; } /** - * non rigorous testing - */ - /** - * - * @param seq - * Sequence - * @param ex_cs_gapped - * String - * @return String - */ - public static String testCigar_string(Sequence seq, String ex_cs_gapped) - { - SeqCigar c_sgapped = new SeqCigar(seq); - String cs_gapped = c_sgapped.getCigarstring(); - if (!cs_gapped.equals(ex_cs_gapped)) - { - System.err.println("Failed getCigarstring: incorect string '" - + cs_gapped + "' != " + ex_cs_gapped); - } - return cs_gapped; - } - - public static boolean testSeqRecovery(SeqCigar gen_sgapped, - SequenceI s_gapped) - { - // this is non-rigorous - start and end recovery is not tested. - SequenceI gen_sgapped_s = gen_sgapped.getSeq('-'); - if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence())) - { - System.err.println("Couldn't reconstruct sequence.\n" - + gen_sgapped_s.getSequenceAsString() + "\n" - + s_gapped.getSequenceAsString()); - return false; - } - return true; - } - - public static void main(String argv[]) throws Exception - { - String o_seq; - Sequence s = new Sequence("MySeq", - o_seq = "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt", 39, 80); - String orig_gapped; - Sequence s_gapped = new Sequence( - "MySeq", - orig_gapped = "----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt", - 39, 80); - String ex_cs_gapped = "4I4M6I6M3I11M4I12M4I9M"; - s_gapped.setDatasetSequence(s); - String sub_gapped_s; - Sequence s_subsequence_gapped = new Sequence( - "MySeq", - sub_gapped_s = "------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh", - 43, 77); - - s_subsequence_gapped.setDatasetSequence(s); - SeqCigar c_null = new SeqCigar(s); - String cs_null = c_null.getCigarstring(); - if (!cs_null.equals("42M")) - { - System.err - .println("Failed to recover ungapped sequence cigar operations:" - + ((cs_null == "") ? "empty string" : cs_null)); - } - testCigar_string(s_gapped, ex_cs_gapped); - SeqCigar gen_sgapped = SeqCigar.parseCigar(s, ex_cs_gapped); - if (!gen_sgapped.getCigarstring().equals(ex_cs_gapped)) - { - System.err.println("Failed parseCigar(" + ex_cs_gapped - + ")->getCigarString()->'" + gen_sgapped.getCigarstring() - + "'"); - } - testSeqRecovery(gen_sgapped, s_gapped); - // Test dataset resolution - SeqCigar sub_gapped = new SeqCigar(s_subsequence_gapped); - if (!testSeqRecovery(sub_gapped, s_subsequence_gapped)) - { - System.err - .println("Failed recovery for subsequence of dataset sequence"); - } - // width functions - if (sub_gapped.getWidth() != sub_gapped_s.length()) - { - System.err.println("Failed getWidth()"); - } - - sub_gapped.getFullWidth(); - if (sub_gapped.hasDeletedRegions()) - { - System.err.println("hasDeletedRegions is incorrect."); - } - // Test start-end region SeqCigar - SeqCigar sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - if (sub_se_gp.getWidth() != 41) - { - System.err - .println("SeqCigar(seq, start, end) not properly clipped alignsequence."); - } - System.out.println("Original sequence align:\n" + sub_gapped_s - + "\nReconstructed window from 8 to 48\n" + "XXXXXXXX" - + sub_se_gp.getSequenceString('-') + "..." + "\nCigar String:" - + sub_se_gp.getCigarstring() + "\n"); - SequenceI ssgp = sub_se_gp.getSeq('-'); - System.out.println("\t " + ssgp.getSequenceAsString()); - for (int r = 0; r < 10; r++) - { - sub_se_gp = new SeqCigar(s_subsequence_gapped, 8, 48); - int sl = sub_se_gp.getWidth(); - int st = sl - 1 - r; - for (int rs = 0; rs < 10; rs++) - { - int e = st + rs; - sub_se_gp.deleteRange(st, e); - String ssgapedseq = sub_se_gp.getSeq('-').getSequenceAsString(); - System.out.println(st + "," + e + "\t:" + ssgapedseq); - st -= 3; - } - } - { - SeqCigar[] set = new SeqCigar[] - { new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped) }; - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" - + al.getSequenceAt(i).getStart() + "\t" - + al.getSequenceAt(i).getEnd() + "\t" - + al.getSequenceAt(i).getSequenceAsString()); - } - } - { - System.out.println("Gapped."); - SeqCigar[] set = new SeqCigar[] - { new SeqCigar(s), new SeqCigar(s_subsequence_gapped, 8, 48), - new SeqCigar(s_gapped) }; - set[0].deleteRange(20, 25); - Alignment al = new Alignment(set); - for (int i = 0; i < al.getHeight(); i++) - { - System.out.println("" + al.getSequenceAt(i).getName() + "\t" - + al.getSequenceAt(i).getStart() + "\t" - + al.getSequenceAt(i).getEnd() + "\t" - + al.getSequenceAt(i).getSequenceAsString()); - } - } - // if (!ssgapedseq.equals("ryas---dtqqwa----slchvh")) - // System.err.println("Subseqgaped\n------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhryas---dtqwrtsasll--qwa----slchvh\n"+ssgapedseq+"\n"+sub_se_gp.getCigarstring()); - } - - /** * references to entities that this sequence cigar is associated with. */ private Hashtable selGroups = null;