X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=a811d846f52fd26b490d844107408471827d0164;hb=2b0142bf85bf5b3d20612c01433e7ce29b633ec4;hp=6f0125d97eb5e287aae5f90fc8d9a359375bd1d1;hpb=be32c14cd8e48fe0a207cd7030cb9cd46f894678;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 6f0125d..a811d84 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -35,15 +35,22 @@ import java.util.TreeMap; import jalview.datamodel.AlignedCodon; import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; +import jalview.datamodel.DBRefEntry; +import jalview.datamodel.DBRefSource; +import jalview.datamodel.FeatureProperties; import jalview.datamodel.Mapping; import jalview.datamodel.SearchResults; import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.schemes.ResidueProperties; +import jalview.util.DBRefUtils; import jalview.util.MapList; +import jalview.util.MappingUtils; /** * grab bag of useful alignment manipulation operations Expect these to be @@ -282,7 +289,7 @@ public class AlignmentUtils * many-to-many, as that would risk mixing species with similar cDNA * sequences. */ - if (xrefsOnly && !CrossRef.haveCrossRef(aaSeq, cdnaSeq)) + if (xrefsOnly && !AlignmentUtils.haveCrossRef(aaSeq, cdnaSeq)) { continue; } @@ -1212,4 +1219,169 @@ public class AlignmentUtils } } } + + /** + * Returns true if either sequence has a cross-reference to the other + * + * @param seq1 + * @param seq2 + * @return + */ + public static boolean haveCrossRef(SequenceI seq1, SequenceI seq2) + { + // Note: moved here from class CrossRef as the latter class has dependencies + // not availability to the applet's classpath + return hasCrossRef(seq1, seq2) || hasCrossRef(seq2, seq1); + } + + /** + * Returns true if seq1 has a cross-reference to seq2. Currently this assumes + * that sequence name is structured as Source|AccessId. + * + * @param seq1 + * @param seq2 + * @return + */ + public static boolean hasCrossRef(SequenceI seq1, SequenceI seq2) + { + if (seq1 == null || seq2 == null) + { + return false; + } + String name = seq2.getName(); + final DBRefEntry[] xrefs = seq1.getDBRef(); + if (xrefs != null) + { + for (DBRefEntry xref : xrefs) + { + String xrefName = xref.getSource() + "|" + xref.getAccessionId(); + // case-insensitive test, consistent with DBRefEntry.equalRef() + if (xrefName.equalsIgnoreCase(name)) + { + return true; + } + } + } + return false; + } + + /** + * Constructs an alignment consisting of the mapped exon regions in the given + * nucleotide sequences, and updates mappings to match. + * + * @param dna + * aligned dna sequences + * @param mappings + * from dna to protein; these are replaced with new mappings + * @return an alignment whose sequences are the exon-only parts of the dna + * sequences (or null if no exons are found) + */ + public static AlignmentI makeExonAlignment(SequenceI[] dna, + Set mappings) + { + Set newMappings = new HashSet(); + List exonSequences = new ArrayList(); + + for (SequenceI dnaSeq : dna) + { + final SequenceI ds = dnaSeq.getDatasetSequence(); + List seqMappings = MappingUtils + .findMappingsForSequence(ds, mappings); + if (!seqMappings.isEmpty()) + { + /* + * We assume here that only one protein mapping is expected per dna + * sequence. Mapping to multiple protein sequences is conceivable but + * undefined. Splitting a mapping to one protein sequence across + * multiple mappings is possible but pathological. Need closer + * constraints on the contents of AlignedCodonFrame. + */ + AlignedCodonFrame newMapping = new AlignedCodonFrame(); + final SequenceI exonSequence = makeExonSequence(ds, + seqMappings.get(0), newMapping); + exonSequences.add(exonSequence); + newMappings.add(newMapping); + } + } + AlignmentI al = new Alignment( + exonSequences.toArray(new SequenceI[exonSequences.size()])); + al.setDataset(null); + + /* + * Replace the old mappings with the new ones + */ + mappings.clear(); + mappings.addAll(newMappings); + + return al; + } + + /** + * Helper method to make an exon-only sequence and populate its mapping to + * protein + *

+ * For example, if ggCCaTTcGAg has mappings [3, 4, 6, 7, 9, 10] to protein + * then generate a sequence CCTTGA with mapping [1, 6] to the same protein + * residues + * + * @param dnaSeq + * a dna dataset sequence + * @param mapping + * the current mapping of the sequence to protein + * @param newMapping + * the new mapping to populate, from the exon-only sequence + * @return + */ + protected static SequenceI makeExonSequence(SequenceI dnaSeq, + AlignedCodonFrame acf, AlignedCodonFrame newMapping) + { + Mapping mapping = acf.getMappingForSequence(dnaSeq); + final char[] dna = dnaSeq.getSequence(); + StringBuilder newSequence = new StringBuilder(dnaSeq.getLength()); + + /* + * Get the codon regions as { [2, 5], [7, 12], [14, 14] etc } + */ + List exonRanges = mapping.getMap().getFromRanges(); + for (int[] range : exonRanges) + { + for (int pos = range[0]; pos <= range[1]; pos++) + { + newSequence.append(dna[pos - 1]); + } + } + + SequenceI exon = new Sequence(dnaSeq.getName(), newSequence.toString()); + + /* + * Locate any xrefs to CDS database on the protein product and attach to the + * CDS sequence. Also add as a sub-token of the sequence name. + */ + // default to "CDS" if we can't locate an actual gene id + String cdsAccId = FeatureProperties.getCodingFeature(DBRefSource.EMBL); + DBRefEntry[] cdsRefs = DBRefUtils.selectRefs( + mapping.getTo().getDBRef(), DBRefSource.CODINGDBS); + if (cdsRefs != null) + { + for (DBRefEntry cdsRef : cdsRefs) + { + exon.addDBRef(new DBRefEntry(cdsRef)); + cdsAccId = cdsRef.getAccessionId(); + } + } + exon.setName(exon.getName() + "|" + cdsAccId); + exon.createDatasetSequence(); + + /* + * Build new mappings - from the same protein regions, but now to contiguous + * exons + */ + List exonRange = new ArrayList(); + exonRange.add(new int[] + { 1, newSequence.length() }); + MapList map = new MapList(exonRange, mapping.getMap().getToRanges(), 3, 1); + newMapping.addMap(exon.getDatasetSequence(), mapping.getTo(), map); + + return exon; + } }