package jalview.datamodel; import jalview.analysis.AlignSeq; public class SeqCigar extends CigarSimple { private SequenceI refseq=null; /** * Reference dataset sequence for the cigar string * @return SequenceI */ public SequenceI getRefSeq() { return refseq; } /** * Returns sequence as a string with cigar operations applied to it * @return String */ public String getSequenceString(char GapChar) { return (length==0) ? "" : (String) getSequenceAndDeletions(refseq.getSequence(), GapChar)[0]; } /** * recreates a gapped and edited version of RefSeq or null for an empty cigar string * @return SequenceI */ public SequenceI getSeq(char GapChar) { Sequence seq; if (refseq==null || length==0) return null; Object[] edit_result=getSequenceAndDeletions(refseq.getSequence(), GapChar); if (edit_result==null) throw new Error("Implementation Error - unexpected null from getSequenceAndDeletions"); seq = new Sequence(refseq.getName(), (String) edit_result[0], refseq.getStart()+((int[]) edit_result[1])[0], refseq.getStart()+((int[]) edit_result[1])[2]); seq.setDatasetSequence(refseq); return seq; } /* We don't allow this - refseq is given at construction time only public void setSeq(SequenceI seq) { this.seq = seq; } */ /** * internal constructor - sets seq to a gapless sequence derived from seq * and prepends any 'D' operations needed to get to the first residue of seq. * @param seq SequenceI * @return true if gaps are present in seq */ private boolean _setSeq(SequenceI seq) { boolean hasgaps=false; if (seq==null) throw new Error("Implementation Error - _setSeq(null)"); // Find correct sequence to reference and add initial hidden offset SequenceI ds = seq.getDatasetSequence(); if (ds==null) { ds = new Sequence(seq.getName(), AlignSeq.extractGaps(jalview.util.Comparison.GapChars, new String(seq.getSequence())), seq.getStart(), seq.getEnd()); } // check that we haven't just duplicated an ungapped sequence. if (ds.getLength()==seq.getLength()) { ds = seq; } else { hasgaps = true; } this.refseq = ds; // Adjust offset if (ds.getStart() 0 && op != I) { cigar.addOperation(op, range); range = 0; } op = I; range++; } else { if (range > 0 && op != M) { cigar.addOperation(op, range); range = 0; } op = M; range++; } } else { if (!isGap) { if (range > 0 && op != D) { cigar.addOperation(op, range); range = 0; } op = D; range++; } else { // do nothing - insertions are not recorded in flanking regions. } } } if (range > 0) { cigar.addOperation(op, range); } } /** * create a cigar string for given sequence * @param seq SequenceI */ public SeqCigar(SequenceI seq) { super(); if (seq == null) throw new Error("Implementation error for new Cigar(SequenceI)"); if (_setSeq(seq)) { // there is still work to do addSequenceOps(this, seq, 0, seq.getLength()); } } public SeqCigar(SequenceI seq, int start, int end) { super(); if (seq == null) throw new Error("Implementation error for new Cigar(SequenceI)"); if (_setSeq(seq)) { // there is still work to do addSequenceOps(this, seq, start, end); } } /** * Create a cigar object from a cigar string like '[]+' * Will fail if the given seq already contains gaps (JBPNote: future implementation will fix) * @param seq SequenceI object resolvable to a dataset sequence * @param cigarString String * @return Cigar */ public static SeqCigar parseCigar(SequenceI seq, String cigarString) throws Exception { Object[] opsandrange = parseCigarString(cigarString); return new SeqCigar(seq, (char[]) opsandrange[0], (int[]) opsandrange[1]); } /** * non rigorous testing */ /** * * @param seq Sequence * @param ex_cs_gapped String * @return String */ public static String testCigar_string(Sequence seq, String ex_cs_gapped) { SeqCigar c_sgapped = new SeqCigar(seq); String cs_gapped = c_sgapped.getCigarstring(); if (!cs_gapped.equals(ex_cs_gapped)) System.err.println("Failed getCigarstring: incorect string '"+cs_gapped+"' != "+ex_cs_gapped); return cs_gapped; } public static boolean testSeqRecovery(SeqCigar gen_sgapped, SequenceI s_gapped) { SequenceI gen_sgapped_s = gen_sgapped.getSeq('-'); if (!gen_sgapped_s.getSequence().equals(s_gapped.getSequence())) { System.err.println("Couldn't reconstruct sequence.\n" + gen_sgapped_s.getSequence() + "\n" + s_gapped.getSequence()); return false; } return true; } public static void main(String argv[]) throws Exception { Sequence s=new Sequence("MySeq", "asdfktryasdtqwrtsaslldddptyipqqwaslchvhttt",39,80); String orig_gapped; Sequence s_gapped=new Sequence("MySeq", orig_gapped="----asdf------ktryas---dtqwrtsasll----dddptyipqqwa----slchvhttt", 39,80); String ex_cs_gapped="4I4M6I6M3I11M4I12M4I9M"; s_gapped.setDatasetSequence(s); String sub_gapped_s; Sequence s_subsequence_gapped=new Sequence("MySeq", sub_gapped_s="------ktryas---dtqwrtsasll----dddptyipqqwa----slchvh", 43,77); s_subsequence_gapped.setDatasetSequence(s); SeqCigar c_null = new SeqCigar(s); String cs_null = c_null.getCigarstring(); if (cs_null.length()>0) System.err.println("Failed getCigarstring: Unexpected cigar operations:"+cs_null); testCigar_string(s_gapped, ex_cs_gapped); SeqCigar gen_sgapped = SeqCigar.parseCigar(s, ex_cs_gapped); if (!gen_sgapped.getCigarstring().equals(ex_cs_gapped)) System.err.println("Failed parseCigar("+ex_cs_gapped+")->getCigarString()->'"+gen_sgapped.getCigarstring()+"'"); testSeqRecovery(gen_sgapped, s_gapped); // Test dataset resolution SeqCigar sub_gapped = new SeqCigar(s_subsequence_gapped); if (!testSeqRecovery(sub_gapped, s_subsequence_gapped)) System.err.println("Failed recovery for subsequence of dataset sequence"); // width functions if (sub_gapped.getWidth()!=sub_gapped_s.length()) System.err.println("Failed getWidth()"); sub_gapped.getFullWidth(); if (sub_gapped.hasDeletedRegions()) System.err.println("hasDeletedRegions is incorrect."); // Test start-end region SeqCigar SeqCigar sub_se_gp= new SeqCigar(s_subsequence_gapped, 8, 48); if (sub_se_gp.getWidth()!=40) System.err.println("SeqCigar(seq, start, end) not properly clipped alignsequence."); System.out.println("Original sequence align:\n"+sub_gapped_s+"\nReconstructed window from 8 to 48\n"+"XXXXXXXX"+sub_se_gp.getSequenceString('-')+"...."+"\nCigar String:"+sub_se_gp.getCigarstring()+""); } }