*/
package jalview.analysis;
+import jalview.datamodel.AlignedCodonFrame;
+import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceI;
+import jalview.schemes.ResidueProperties;
+import jalview.util.MapList;
import java.util.ArrayList;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
/**
* grab bag of useful alignment manipulation operations Expect these to be
}
AlignmentI newAl = new jalview.datamodel.Alignment(
sq.toArray(new SequenceI[0]));
+ for (SequenceI s : sq)
+ {
+ if (s.getAnnotation() != null)
+ {
+ for (AlignmentAnnotation aa : s.getAnnotation())
+ {
+ newAl.addAnnotation(aa);
+ }
+ }
+ }
newAl.setDataset(core.getDataset());
return newAl;
}
}
return result;
}
+
+ /**
+ * Returns a map of lists of sequences in the alignment, keyed by sequence
+ * name. For use in mapping between different alignment views of the same
+ * sequences.
+ *
+ * @see jalview.datamodel.AlignmentI#getSequencesByName()
+ */
+ public static Map<String, List<SequenceI>> getSequencesByName(
+ AlignmentI al)
+ {
+ Map<String, List<SequenceI>> theMap = new LinkedHashMap<String, List<SequenceI>>();
+ for (SequenceI seq : al.getSequences())
+ {
+ String name = seq.getName();
+ if (name != null)
+ {
+ List<SequenceI> seqs = theMap.get(name);
+ if (seqs == null)
+ {
+ seqs = new ArrayList<SequenceI>();
+ theMap.put(name, seqs);
+ }
+ seqs.add(seq);
+ }
+ }
+ return theMap;
+ }
+
+ /**
+ * Build mapping of protein to cDNA alignment. Mappings are made between
+ * sequences which have the same name and compatible lengths. Returns true if
+ * at least one sequence mapping was made, else false.
+ *
+ * @param proteinAlignment
+ * @param cdnaAlignment
+ * @return
+ */
+ public static boolean mapProteinToCdna(final AlignmentI proteinAlignment,
+ final AlignmentI cdnaAlignment)
+ {
+ boolean mapped = false;
+ List<SequenceI> thisSeqs = proteinAlignment.getSequences();
+
+ /*
+ * Build a look-up of cDNA sequences by name, for matching purposes.
+ */
+ Map<String, List<SequenceI>> cdnaSeqs = cdnaAlignment
+ .getSequencesByName();
+
+ for (SequenceI aaSeq : thisSeqs)
+ {
+ AlignedCodonFrame acf = new AlignedCodonFrame(
+ proteinAlignment.getWidth());
+ List<SequenceI> candidates = cdnaSeqs.get(aaSeq.getName());
+ if (candidates == null)
+ {
+ /*
+ * No cDNA sequence with matching name, so no mapping for this protein
+ * sequence
+ */
+ continue;
+ }
+ for (SequenceI cdnaSeq : candidates)
+ {
+ MapList map = mapProteinToCdna(aaSeq, cdnaSeq);
+ if (map != null)
+ {
+ acf.addMap(cdnaSeq, aaSeq, map);
+ mapped = true;
+ }
+ }
+ proteinAlignment.addCodonFrame(acf);
+ }
+ return mapped;
+ }
+
+ /**
+ * Build a mapping (if possible) of a protein to a cDNA sequence. The cDNA
+ * must be three times the length of the protein, possibly after ignoring
+ * start and/or stop codons. Returns null if no mapping is determined.
+ *
+ * @param proteinSeqs
+ * @param cdnaSeq
+ * @return
+ */
+ public static MapList mapProteinToCdna(SequenceI proteinSeq,
+ SequenceI cdnaSeq)
+ {
+ String aaSeqString = proteinSeq.getDatasetSequence()
+ .getSequenceAsString();
+ String cdnaSeqString = cdnaSeq.getDatasetSequence()
+ .getSequenceAsString();
+ if (aaSeqString == null || cdnaSeqString == null)
+ {
+ return null;
+ }
+
+ final int mappedLength = 3 * aaSeqString.length();
+ int cdnaLength = cdnaSeqString.length();
+ int cdnaStart = 1;
+ int cdnaEnd = cdnaLength;
+ final int proteinStart = 1;
+ final int proteinEnd = aaSeqString.length();
+
+ /*
+ * If lengths don't match, try ignoring stop codon.
+ */
+ if (cdnaLength != mappedLength)
+ {
+ for (Object stop : ResidueProperties.STOP)
+ {
+ if (cdnaSeqString.toUpperCase().endsWith((String) stop))
+ {
+ cdnaEnd -= 3;
+ cdnaLength -= 3;
+ break;
+ }
+ }
+ }
+
+ /*
+ * If lengths still don't match, try ignoring start codon.
+ */
+ if (cdnaLength != mappedLength
+ && cdnaSeqString.toUpperCase().startsWith(
+ ResidueProperties.START))
+ {
+ cdnaStart += 3;
+ cdnaLength -= 3;
+ }
+
+ if (cdnaLength == mappedLength)
+ {
+ MapList map = new MapList(new int[]
+ { cdnaStart, cdnaEnd }, new int[]
+ { proteinStart, proteinEnd }, 3, 1);
+ return map;
+ }
+ else
+ {
+ return null;
+ }
+ }
}