import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
+import java.util.Iterator;
import java.util.List;
-import java.util.Map;
public class Dna
{
* 'final' variables describe the inputs to the translation, which should not
* be modified.
*/
- final private List<SequenceI> selection;
+ private final List<SequenceI> selection;
- final private String[] seqstring;
+ private final String[] seqstring;
- final private int[] contigs;
+ private final Iterator<int[]> contigs;
- final private char gapChar;
+ private final char gapChar;
- final private AlignmentAnnotation[] annotations;
+ private final AlignmentAnnotation[] annotations;
- final private int dnaWidth;
+ private final int dnaWidth;
- final private Alignment dataset;
+ private final AlignmentI dataset;
+
+ private ShiftList vismapping;
+
+ private int[] startcontigs;
/*
* Working variables for the translation.
* @param viewport
* @param visibleContigs
*/
- public Dna(AlignViewportI viewport, int[] visibleContigs)
+ public Dna(AlignViewportI viewport, Iterator<int[]> visibleContigs)
{
this.selection = Arrays.asList(viewport.getSequenceSelection());
this.seqstring = viewport.getViewAsString(true);
this.annotations = viewport.getAlignment().getAlignmentAnnotation();
this.dnaWidth = viewport.getAlignment().getWidth();
this.dataset = viewport.getAlignment().getDataset();
+ initContigs();
+ }
+
+ /**
+ * Initialise contigs used as starting point for translateCodingRegion
+ */
+ private void initContigs()
+ {
+ vismapping = new ShiftList(); // map from viscontigs to seqstring
+ // intervals
+
+ int npos = 0;
+ int[] lastregion = null;
+ ArrayList<Integer> tempcontigs = new ArrayList<>();
+ while (contigs.hasNext())
+ {
+ int[] region = contigs.next();
+ if (lastregion == null)
+ {
+ vismapping.addShift(npos, region[0]);
+ }
+ else
+ {
+ // hidden region
+ vismapping.addShift(npos, region[0] - lastregion[1] + 1);
+ }
+ lastregion = region;
+ tempcontigs.add(region[0]);
+ tempcontigs.add(region[1]);
+ }
+
+ startcontigs = new int[tempcontigs.size()];
+ int i = 0;
+ for (Integer val : tempcontigs)
+ {
+ startcontigs[i] = val;
+ i++;
+ }
+ tempcontigs = null;
}
/**
* @param ac2
* @return
*/
- private static int jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)
+ private static int jalview_2_8_2compare(AlignedCodon ac1,
+ AlignedCodon ac2)
{
if (ac1 == null || ac2 == null || (ac1.equals(ac2)))
{
}
/**
+ * Translates cDNA using the specified code table
*
* @return
*/
- public AlignmentI translateCdna()
+ public AlignmentI translateCdna(GeneticCodeI codeTable)
{
AlignedCodonFrame acf = new AlignedCodonFrame();
int s;
int sSize = selection.size();
- List<SequenceI> pepseqs = new ArrayList<SequenceI>();
+ List<SequenceI> pepseqs = new ArrayList<>();
for (s = 0; s < sSize; s++)
{
SequenceI newseq = translateCodingRegion(selection.get(s),
- seqstring[s], acf, pepseqs);
+ seqstring[s], acf, pepseqs, codeTable);
if (newseq != null)
{
for (int gd = 0; gd < selection.length; gd++)
{
SequenceI dna = selection[gd];
- DBRefEntry[] dnarefs = DBRefUtils
- .selectRefs(dna.getDBRef(),
- jalview.datamodel.DBRefSource.DNACODINGDBS);
+ List<DBRefEntry> dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(),
+ jalview.datamodel.DBRefSource.DNACODINGDBS);
if (dnarefs != null)
{
// intersect with pep
- List<DBRefEntry> mappedrefs = new ArrayList<DBRefEntry>();
- DBRefEntry[] refs = dna.getDBRef();
- for (int d = 0; d < refs.length; d++)
+ List<DBRefEntry> mappedrefs = new ArrayList<>();
+ List<DBRefEntry> refs = dna.getDBRefs();
+ for (int d = 0, nd = refs.size(); d < nd; d++)
{
- if (refs[d].getMap() != null && refs[d].getMap().getMap() != null
- && refs[d].getMap().getMap().getFromRatio() == 3
- && refs[d].getMap().getMap().getToRatio() == 1)
+ DBRefEntry ref = refs.get(d);
+ if (ref.getMap() != null && ref.getMap().getMap() != null
+ && ref.getMap().getMap().getFromRatio() == 3
+ && ref.getMap().getMap().getToRatio() == 1)
{
- mappedrefs.add(refs[d]); // add translated protein maps
+ mappedrefs.add(ref); // add translated protein maps
}
}
- dnarefs = mappedrefs.toArray(new DBRefEntry[mappedrefs.size()]);
- for (int d = 0; d < dnarefs.length; d++)
+ dnarefs = mappedrefs;//.toArray(new DBRefEntry[mappedrefs.size()]);
+ for (int d = 0, nd = dnarefs.size(); d < nd; d++)
{
- Mapping mp = dnarefs[d].getMap();
+ Mapping mp = dnarefs.get(d).getMap();
if (mp != null)
{
- for (int vc = 0; vc < viscontigs.length; vc += 2)
+ for (int vc = 0, nv = viscontigs.length; vc < nv; vc += 2)
{
int[] mpr = mp.locateMappedRange(viscontigs[vc],
viscontigs[vc + 1]);
aa.graphHeight = annotation.graphHeight;
if (annotation.getThreshold() != null)
{
- aa.setThreshold(new GraphLine(annotation
- .getThreshold()));
+ aa.setThreshold(new GraphLine(annotation.getThreshold()));
}
if (annotation.hasScore)
{
* @param acf
* Definition of global ORF alignment reference frame
* @param proteinSeqs
+ * @param codeTable
* @return sequence ready to be added to alignment.
*/
protected SequenceI translateCodingRegion(SequenceI selection,
String seqstring, AlignedCodonFrame acf,
- List<SequenceI> proteinSeqs)
+ List<SequenceI> proteinSeqs, GeneticCodeI codeTable)
{
- List<int[]> skip = new ArrayList<int[]>();
- int skipint[] = null;
- ShiftList vismapping = new ShiftList(); // map from viscontigs to seqstring
- // intervals
- int vc;
- int[] scontigs = new int[contigs.length];
+ List<int[]> skip = new ArrayList<>();
+ int[] skipint = null;
+
int npos = 0;
- for (vc = 0; vc < contigs.length; vc += 2)
- {
- if (vc == 0)
- {
- vismapping.addShift(npos, contigs[vc]);
- }
- else
- {
- // hidden region
- vismapping.addShift(npos, contigs[vc] - contigs[vc - 1] + 1);
- }
- scontigs[vc] = contigs[vc];
- scontigs[vc + 1] = contigs[vc + 1];
- }
+ int vc = 0;
+
+ int[] scontigs = new int[startcontigs.length];
+ System.arraycopy(startcontigs, 0, scontigs, 0, startcontigs.length);
// allocate a roughly sized buffer for the protein sequence
StringBuilder protein = new StringBuilder(seqstring.length() / 2);
* Filled up a reading frame...
*/
AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]);
- String aa = ResidueProperties.codonTranslate(new String(codon));
+ String aa = codeTable.translate(new String(codon));
rf = 0;
final String gapString = String.valueOf(gapChar);
if (aa == null)
aa = gapString;
if (skipint == null)
{
- skipint = new int[]
- { alignedCodon.pos1, alignedCodon.pos3 /* cdp[0], cdp[2] */};
+ skipint = new int[] { alignedCodon.pos1,
+ alignedCodon.pos3 /*
+ * cdp[0],
+ * cdp[2]
+ */ };
}
skipint[1] = alignedCodon.pos3; // cdp[2];
}
}
if (vc + 2 < t.length)
{
- System.arraycopy(scontigs, vc + 2, t, vc, t.length
- - vc + 2);
+ System.arraycopy(scontigs, vc + 2, t, vc,
+ t.length - vc + 2);
}
scontigs = t;
}
skip.add(skipint);
skipint = null;
}
- if (aa.equals("STOP"))
+ if (aa.equals(ResidueProperties.STOP))
{
aa = STOP_ASTERIX;
}
}
else if (!alignedCodons[aspos].equals(alignedCodon))
{
- throw new IllegalStateException("Tried to coalign "
- + alignedCodons[aspos].toString() + " with "
- + alignedCodon.toString());
+ throw new IllegalStateException(
+ "Tried to coalign " + alignedCodons[aspos].toString()
+ + " with " + alignedCodon.toString());
}
if (aspos >= aaWidth)
{
* range iv[0] = skipint[1]; } else { } } } else if (iv[0]<skipint[0]) {
* iv = (int[]) narange.elementAt(vc+1); } } while (iv[0]) } }
*/
- MapList map = new MapList(scontigs, new int[]
- { 1, resSize }, 3, 1);
+ MapList map = new MapList(scontigs, new int[] { 1, resSize }, 3, 1);
- transferCodedFeatures(selection, newseq, map, null, null);
+ transferCodedFeatures(selection, newseq, map);
/*
* Construct a dataset sequence for our new peptide.
* @param proteinSeqs
* @return
*/
- protected void insertAAGap(int pos,
- List<SequenceI> proteinSeqs)
+ protected void insertAAGap(int pos, List<SequenceI> proteinSeqs)
{
aaWidth++;
for (SequenceI seq : proteinSeqs)
/**
* Given a peptide newly translated from a dna sequence, copy over and set any
- * features on the peptide from the DNA. If featureTypes is null, all features
- * on the dna sequence are searched (rather than just the displayed ones), and
- * similarly for featureGroups.
+ * features on the peptide from the DNA.
*
* @param dna
* @param pep
* @param map
- * @param featureTypes
- * hash whose keys are the displayed feature type strings
- * @param featureGroups
- * hash where keys are feature groups and values are Boolean objects
- * indicating if they are displayed.
*/
private static void transferCodedFeatures(SequenceI dna, SequenceI pep,
- MapList map, Map<String, Object> featureTypes,
- Map<String, Boolean> featureGroups)
+ MapList map)
{
- SequenceFeature[] sfs = dna.getSequenceFeatures();
- Boolean fgstate;
- DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRef(),
- DBRefSource.DNACODINGDBS);
- if (dnarefs != null)
+ // BH 2019.01.25 nop?
+// List<DBRefEntry> dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(),
+// DBRefSource.DNACODINGDBS);
+// if (dnarefs != null)
+// {
+// // intersect with pep
+// for (int d = 0, nd = dnarefs.size(); d < nd; d++)
+// {
+// Mapping mp = dnarefs.get(d).getMap();
+// if (mp != null)
+// {
+// }
+// }
+// }
+ for (SequenceFeature sf : dna.getFeatures().getAllFeatures())
{
- // intersect with pep
- for (int d = 0; d < dnarefs.length; d++)
- {
- Mapping mp = dnarefs[d].getMap();
- if (mp != null)
+ if (FeatureProperties.isCodingFeature(null, sf.getType()))
{
+ // if (map.intersectsFrom(sf[f].begin, sf[f].end))
+ {
+
+ }
}
- }
}
- if (sfs != null)
+ }
+
+ /**
+ * Returns an alignment consisting of the reversed (and optionally
+ * complemented) sequences set in this object's constructor
+ *
+ * @param complement
+ * @return
+ */
+ public AlignmentI reverseCdna(boolean complement)
+ {
+ int sSize = selection.size();
+ List<SequenceI> reversed = new ArrayList<>();
+ for (int s = 0; s < sSize; s++)
{
- for (SequenceFeature sf : sfs)
+ SequenceI newseq = reverseSequence(selection.get(s).getName(),
+ seqstring[s], complement);
+
+ if (newseq != null)
{
- fgstate = (featureGroups == null) ? null : featureGroups
- .get(sf.featureGroup);
- if ((featureTypes == null || featureTypes.containsKey(sf.getType()))
- && (fgstate == null || fgstate.booleanValue()))
- {
- if (FeatureProperties.isCodingFeature(null, sf.getType()))
- {
- // if (map.intersectsFrom(sf[f].begin, sf[f].end))
- {
+ reversed.add(newseq);
+ }
+ }
- }
- }
- }
+ SequenceI[] newseqs = reversed.toArray(new SequenceI[reversed.size()]);
+ AlignmentI al = new Alignment(newseqs);
+ ((Alignment) al).createDatasetAlignment();
+ return al;
+ }
+
+ /**
+ * Returns a reversed, and optionally complemented, sequence. The new
+ * sequence's name is the original name with "|rev" or "|revcomp" appended.
+ * aAcCgGtT and DNA ambiguity codes are complemented, any other characters are
+ * left unchanged.
+ *
+ * @param seq
+ * @param complement
+ * @return
+ */
+ public static SequenceI reverseSequence(String seqName, String sequence,
+ boolean complement)
+ {
+ String newName = seqName + "|rev" + (complement ? "comp" : "");
+ char[] originalSequence = sequence.toCharArray();
+ int length = originalSequence.length;
+ char[] reversedSequence = new char[length];
+ int bases = 0;
+ for (int i = 0; i < length; i++)
+ {
+ char c = complement ? getComplement(originalSequence[i])
+ : originalSequence[i];
+ reversedSequence[length - i - 1] = c;
+ if (!Comparison.isGap(c))
+ {
+ bases++;
}
}
+ SequenceI reversed = new Sequence(newName, reversedSequence, 1, bases);
+ return reversed;
+ }
+
+ /**
+ * Answers the reverse complement of the input string
+ *
+ * @see #getComplement(char)
+ * @param s
+ * @return
+ */
+ public static String reverseComplement(String s)
+ {
+ StringBuilder sb = new StringBuilder(s.length());
+ for (int i = s.length() - 1; i >= 0; i--)
+ {
+ sb.append(Dna.getComplement(s.charAt(i)));
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes
+ * are treated as on http://reverse-complement.com/. Anything else is left
+ * unchanged.
+ *
+ * @param c
+ * @return
+ */
+ public static char getComplement(char c)
+ {
+ char result = c;
+ switch (c)
+ {
+ case '-':
+ case '.':
+ case ' ':
+ break;
+ case 'a':
+ result = 't';
+ break;
+ case 'A':
+ result = 'T';
+ break;
+ case 'c':
+ result = 'g';
+ break;
+ case 'C':
+ result = 'G';
+ break;
+ case 'g':
+ result = 'c';
+ break;
+ case 'G':
+ result = 'C';
+ break;
+ case 't':
+ result = 'a';
+ break;
+ case 'T':
+ result = 'A';
+ break;
+ case 'u':
+ result = 'a';
+ break;
+ case 'U':
+ result = 'A';
+ break;
+ case 'r':
+ result = 'y';
+ break;
+ case 'R':
+ result = 'Y';
+ break;
+ case 'y':
+ result = 'r';
+ break;
+ case 'Y':
+ result = 'R';
+ break;
+ case 'k':
+ result = 'm';
+ break;
+ case 'K':
+ result = 'M';
+ break;
+ case 'm':
+ result = 'k';
+ break;
+ case 'M':
+ result = 'K';
+ break;
+ case 'b':
+ result = 'v';
+ break;
+ case 'B':
+ result = 'V';
+ break;
+ case 'v':
+ result = 'b';
+ break;
+ case 'V':
+ result = 'B';
+ break;
+ case 'd':
+ result = 'h';
+ break;
+ case 'D':
+ result = 'H';
+ break;
+ case 'h':
+ result = 'd';
+ break;
+ case 'H':
+ result = 'D';
+ break;
+ }
+
+ return result;
}
}