X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FAlignmentUtils.java;h=66be62322ece3531aa1658be54e765b647627971;hb=5166594fc092fe3128ca31567ef104d80581c729;hp=34fe221a98d9099fe4e072f403a7bf12b61d580e;hpb=37de9310bec3501cbc6381e0c3dcb282fcaad812;p=jalview.git diff --git a/src/jalview/analysis/AlignmentUtils.java b/src/jalview/analysis/AlignmentUtils.java index 34fe221..66be623 100644 --- a/src/jalview/analysis/AlignmentUtils.java +++ b/src/jalview/analysis/AlignmentUtils.java @@ -35,11 +35,12 @@ import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import jalview.io.gff.SequenceOntologyFactory; +import jalview.datamodel.features.SequenceFeatures; import jalview.io.gff.SequenceOntologyI; import jalview.schemes.ResidueProperties; import jalview.util.Comparison; import jalview.util.DBRefUtils; +import jalview.util.IntRangeComparator; import jalview.util.MapList; import jalview.util.MappingUtils; import jalview.util.StringUtils; @@ -50,7 +51,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -60,6 +60,7 @@ import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Set; +import java.util.SortedMap; import java.util.TreeMap; /** @@ -2053,11 +2054,11 @@ public class AlignmentUtils * * @param fromSeq * @param toSeq + * @param mapping + * the mapping from 'fromSeq' to 'toSeq' * @param select * if not null, only features of this type are copied (including * subtypes in the Sequence Ontology) - * @param mapping - * the mapping from 'fromSeq' to 'toSeq' * @param omitting */ public static int transferFeatures(SequenceI fromSeq, SequenceI toSeq, @@ -2069,75 +2070,74 @@ public class AlignmentUtils copyTo = copyTo.getDatasetSequence(); } - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + /* + * get features, optionally restricted by an ontology term + */ + List sfs = select == null ? fromSeq.getFeatures() + .getPositionalFeatures() : fromSeq.getFeatures() + .getFeaturesByOntology(select); + int count = 0; - SequenceFeature[] sfs = fromSeq.getSequenceFeatures(); - if (sfs != null) + for (SequenceFeature sf : sfs) { - for (SequenceFeature sf : sfs) + String type = sf.getType(); + boolean omit = false; + for (String toOmit : omitting) { - String type = sf.getType(); - if (select != null && !so.isA(type, select)) - { - continue; - } - boolean omit = false; - for (String toOmit : omitting) - { - if (type.equals(toOmit)) - { - omit = true; - } - } - if (omit) + if (type.equals(toOmit)) { - continue; + omit = true; } + } + if (omit) + { + continue; + } - /* - * locate the mapped range - null if either start or end is - * not mapped (no partial overlaps are calculated) - */ - int start = sf.getBegin(); - int end = sf.getEnd(); - int[] mappedTo = mapping.locateInTo(start, end); - /* - * if whole exon range doesn't map, try interpreting it - * as 5' or 3' exon overlapping the CDS range - */ - if (mappedTo == null) - { - mappedTo = mapping.locateInTo(end, end); - if (mappedTo != null) - { - /* - * end of exon is in CDS range - 5' overlap - * to a range from the start of the peptide - */ - mappedTo[0] = 1; - } - } - if (mappedTo == null) + /* + * locate the mapped range - null if either start or end is + * not mapped (no partial overlaps are calculated) + */ + int start = sf.getBegin(); + int end = sf.getEnd(); + int[] mappedTo = mapping.locateInTo(start, end); + /* + * if whole exon range doesn't map, try interpreting it + * as 5' or 3' exon overlapping the CDS range + */ + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(end, end); + if (mappedTo != null) { - mappedTo = mapping.locateInTo(start, start); - if (mappedTo != null) - { - /* - * start of exon is in CDS range - 3' overlap - * to a range up to the end of the peptide - */ - mappedTo[1] = toSeq.getLength(); - } + /* + * end of exon is in CDS range - 5' overlap + * to a range from the start of the peptide + */ + mappedTo[0] = 1; } + } + if (mappedTo == null) + { + mappedTo = mapping.locateInTo(start, start); if (mappedTo != null) { - SequenceFeature copy = new SequenceFeature(sf); - copy.setBegin(Math.min(mappedTo[0], mappedTo[1])); - copy.setEnd(Math.max(mappedTo[0], mappedTo[1])); - copyTo.addSequenceFeature(copy); - count++; + /* + * start of exon is in CDS range - 3' overlap + * to a range up to the end of the peptide + */ + mappedTo[1] = toSeq.getLength(); } } + if (mappedTo != null) + { + int newBegin = Math.min(mappedTo[0], mappedTo[1]); + int newEnd = Math.max(mappedTo[0], mappedTo[1]); + SequenceFeature copy = new SequenceFeature(sf, newBegin, newEnd, + sf.getFeatureGroup()); + copyTo.addSequenceFeature(copy); + count++; + } } return count; } @@ -2202,49 +2202,44 @@ public class AlignmentUtils public static List findCdsPositions(SequenceI dnaSeq) { List result = new ArrayList(); - SequenceFeature[] sfs = dnaSeq.getSequenceFeatures(); - if (sfs == null) + + List sfs = dnaSeq.getFeatures().getFeaturesByOntology( + SequenceOntologyI.CDS); + if (sfs.isEmpty()) { return result; } - - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + SequenceFeatures.sortFeatures(sfs, true); int startPhase = 0; for (SequenceFeature sf : sfs) { + int phase = 0; + try + { + phase = Integer.parseInt(sf.getPhase()); + } catch (NumberFormatException e) + { + // ignore + } /* - * process a CDS feature (or a sub-type of CDS) + * phase > 0 on first codon means 5' incomplete - skip to the start + * of the next codon; example ENST00000496384 */ - if (so.isA(sf.getType(), SequenceOntologyI.CDS)) + int begin = sf.getBegin(); + int end = sf.getEnd(); + if (result.isEmpty()) { - int phase = 0; - try - { - phase = Integer.parseInt(sf.getPhase()); - } catch (NumberFormatException e) - { - // ignore - } - /* - * phase > 0 on first codon means 5' incomplete - skip to the start - * of the next codon; example ENST00000496384 - */ - int begin = sf.getBegin(); - int end = sf.getEnd(); - if (result.isEmpty()) + begin += phase; + if (begin > end) { - begin += phase; - if (begin > end) - { - // shouldn't happen! - System.err - .println("Error: start phase extends beyond start CDS in " - + dnaSeq.getName()); - } + // shouldn't happen! + System.err + .println("Error: start phase extends beyond start CDS in " + + dnaSeq.getName()); } - result.add(new int[] { begin, end }); } + result.add(new int[] { begin, end }); } /* @@ -2264,14 +2259,7 @@ public class AlignmentUtils * ranges are assembled in order. Other cases should not use this method, * but instead construct an explicit mapping for CDS (e.g. EMBL parsing). */ - Collections.sort(result, new Comparator() - { - @Override - public int compare(int[] o1, int[] o2) - { - return Integer.compare(o1[0], o2[0]); - } - }); + Collections.sort(result, IntRangeComparator.ASCENDING); return result; } @@ -2324,24 +2312,6 @@ public class AlignmentUtils count += computePeptideVariants(peptide, peptidePos, codonVariants); } - /* - * sort to get sequence features in start position order - * - would be better to store in Sequence as a TreeSet or NCList? - */ - if (peptide.getSequenceFeatures() != null) - { - Arrays.sort(peptide.getSequenceFeatures(), - new Comparator() - { - @Override - public int compare(SequenceFeature o1, SequenceFeature o2) - { - int c = Integer.compare(o1.getBegin(), o2.getBegin()); - return c == 0 ? Integer.compare(o1.getEnd(), o2.getEnd()) - : c; - } - }); - } return count; } @@ -2532,10 +2502,10 @@ public class AlignmentUtils * LinkedHashMap ensures we keep the peptide features in sequence order */ LinkedHashMap[]> variants = new LinkedHashMap[]>(); - SequenceOntologyI so = SequenceOntologyFactory.getInstance(); - SequenceFeature[] dnaFeatures = dnaSeq.getSequenceFeatures(); - if (dnaFeatures == null) + List dnaFeatures = dnaSeq.getFeatures() + .getFeaturesByOntology(SequenceOntologyI.SEQUENCE_VARIANT); + if (dnaFeatures.isEmpty()) { return variants; } @@ -2555,84 +2525,80 @@ public class AlignmentUtils // not handling multi-locus variant features continue; } - if (so.isA(sf.getType(), SequenceOntologyI.SEQUENCE_VARIANT)) + int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); + if (mapsTo == null) { - int[] mapsTo = dnaToProtein.locateInTo(dnaCol, dnaCol); - if (mapsTo == null) - { - // feature doesn't lie within coding region - continue; - } - int peptidePosition = mapsTo[0]; - List[] codonVariants = variants.get(peptidePosition); - if (codonVariants == null) - { - codonVariants = new ArrayList[CODON_LENGTH]; - codonVariants[0] = new ArrayList(); - codonVariants[1] = new ArrayList(); - codonVariants[2] = new ArrayList(); - variants.put(peptidePosition, codonVariants); - } + // feature doesn't lie within coding region + continue; + } + int peptidePosition = mapsTo[0]; + List[] codonVariants = variants.get(peptidePosition); + if (codonVariants == null) + { + codonVariants = new ArrayList[CODON_LENGTH]; + codonVariants[0] = new ArrayList(); + codonVariants[1] = new ArrayList(); + codonVariants[2] = new ArrayList(); + variants.put(peptidePosition, codonVariants); + } - /* - * extract dna variants to a string array - */ - String alls = (String) sf.getValue("alleles"); - if (alls == null) - { - continue; - } - String[] alleles = alls.toUpperCase().split(","); - int i = 0; - for (String allele : alleles) - { - alleles[i++] = allele.trim(); // lose any space characters "A, G" - } + /* + * extract dna variants to a string array + */ + String alls = (String) sf.getValue("alleles"); + if (alls == null) + { + continue; + } + String[] alleles = alls.toUpperCase().split(","); + int i = 0; + for (String allele : alleles) + { + alleles[i++] = allele.trim(); // lose any space characters "A, G" + } - /* - * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] - */ - int[] codon = peptidePosition == lastPeptidePostion ? lastCodon - : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( - peptidePosition, peptidePosition)); - lastPeptidePostion = peptidePosition; - lastCodon = codon; + /* + * get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10] + */ + int[] codon = peptidePosition == lastPeptidePostion ? lastCodon + : MappingUtils.flattenRanges(dnaToProtein.locateInFrom( + peptidePosition, peptidePosition)); + lastPeptidePostion = peptidePosition; + lastCodon = codon; - /* - * save nucleotide (and any variant) for each codon position - */ - for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++) + /* + * save nucleotide (and any variant) for each codon position + */ + for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++) + { + String nucleotide = String.valueOf( + dnaSeq.getCharAt(codon[codonPos] - dnaStart)).toUpperCase(); + List codonVariant = codonVariants[codonPos]; + if (codon[codonPos] == dnaCol) { - String nucleotide = String.valueOf( - dnaSeq.getCharAt(codon[codonPos] - dnaStart)) - .toUpperCase(); - List codonVariant = codonVariants[codonPos]; - if (codon[codonPos] == dnaCol) + if (!codonVariant.isEmpty() + && codonVariant.get(0).variant == null) { - if (!codonVariant.isEmpty() - && codonVariant.get(0).variant == null) - { - /* - * already recorded base value, add this variant - */ - codonVariant.get(0).variant = sf; - } - else - { - /* - * add variant with base value - */ - codonVariant.add(new DnaVariant(nucleotide, sf)); - } + /* + * already recorded base value, add this variant + */ + codonVariant.get(0).variant = sf; } - else if (codonVariant.isEmpty()) + else { /* - * record (possibly non-varying) base value + * add variant with base value */ - codonVariant.add(new DnaVariant(nucleotide)); + codonVariant.add(new DnaVariant(nucleotide, sf)); } } + else if (codonVariant.isEmpty()) + { + /* + * record (possibly non-varying) base value + */ + codonVariant.add(new DnaVariant(nucleotide)); + } } } return variants; @@ -2840,7 +2806,7 @@ public class AlignmentUtils * @param unmapped * @return */ - static Map> buildMappedColumnsMap( + static SortedMap> buildMappedColumnsMap( AlignmentI unaligned, AlignmentI aligned, List unmapped) { /* @@ -2848,7 +2814,7 @@ public class AlignmentUtils * {unalignedSequence, characterPerSequence} at that position. * TreeMap keeps the entries in ascending column order. */ - Map> map = new TreeMap>(); + SortedMap> map = new TreeMap>(); /* * record any sequences that have no mapping so can't be realigned