*/
package jalview.analysis;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.NoSuchElementException;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+
+import jalview.bin.Console;
import jalview.commands.RemoveGapColCommand;
import jalview.datamodel.AlignedCodon;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.ContactMatrixI;
import jalview.datamodel.DBRefEntry;
import jalview.datamodel.GeneLociI;
import jalview.datamodel.IncompleteCodonException;
import jalview.util.MapList;
import jalview.util.MappingUtils;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.NoSuchElementException;
-import java.util.Set;
-import java.util.SortedMap;
-import java.util.TreeMap;
-
/**
* grab bag of useful alignment manipulation operations Expect these to be
* refactored elsewhere at some point.
// TODO use Character.toLowerCase to avoid creating String objects?
char[] upstream = new String(ds
.getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))
- .toLowerCase().toCharArray();
+ .toLowerCase(Locale.ROOT).toCharArray();
char[] downstream = new String(
- ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase()
- .toCharArray();
+ ds.getSequence(s_end - 1, s_end + dstream_ds))
+ .toLowerCase(Locale.ROOT).toCharArray();
char[] coreseq = s.getSequence();
char[] nseq = new char[offset + upstream.length + downstream.length
+ coreseq.length];
if (cdnaLength != mappedLength && cdnaLength > 2)
{
String lastCodon = String.valueOf(cdnaSeqChars,
- cdnaLength - CODON_LENGTH, CODON_LENGTH).toUpperCase();
+ cdnaLength - CODON_LENGTH, CODON_LENGTH)
+ .toUpperCase(Locale.ROOT);
for (String stop : ResidueProperties.STOP_CODONS)
{
if (lastCodon.equals(stop))
*/
int startOffset = 0;
if (cdnaLength != mappedLength && cdnaLength > 2
- && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()
+ && String.valueOf(cdnaSeqChars, 0, CODON_LENGTH)
+ .toUpperCase(Locale.ROOT)
.equals(ResidueProperties.START))
{
startOffset += CODON_LENGTH;
mapList = mapList.getInverse();
}
final int cdsLength = cdsDss.getLength();
- int mappedFromLength = MappingUtils.getLength(mapList
- .getFromRanges());
+ int mappedFromLength = MappingUtils
+ .getLength(mapList.getFromRanges());
int mappedToLength = MappingUtils
.getLength(mapList.getToRanges());
boolean addStopCodon = (cdsLength == mappedFromLength
*/
final Iterable<AlignmentAnnotation> matchedAlignmentAnnotations = al
.findAnnotations(seq, dsann.getCalcId(), dsann.label);
- if (!matchedAlignmentAnnotations.iterator().hasNext())
+ boolean found = false;
+ if (matchedAlignmentAnnotations != null)
+ {
+ for (AlignmentAnnotation matched : matchedAlignmentAnnotations)
+ {
+ if (dsann.description.equals(matched.description))
+ {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (!found)
{
result.add(dsann);
if (labelForCalcId != null)
/**
* Adds annotations to the top of the alignment annotations, in the same order
- * as their related sequences.
+ * as their related sequences. If you already have an annotation and want to
+ * add it to a sequence in an alignment use {@code addReferenceAnnotationTo}
*
* @param annotations
* the annotations to add
{
for (AlignmentAnnotation ann : annotations.get(seq))
{
- AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);
- int startRes = 0;
- int endRes = ann.annotations.length;
- if (selectionGroup != null)
- {
- startRes = selectionGroup.getStartRes();
- endRes = selectionGroup.getEndRes();
- }
- copyAnn.restrict(startRes, endRes);
+ addReferenceAnnotationTo(alignment, seq, ann, selectionGroup);
+ }
+ }
+ }
- /*
- * Add to the sequence (sets copyAnn.datasetSequence), unless the
- * original annotation is already on the sequence.
- */
- if (!seq.hasAnnotation(ann))
- {
- seq.addAlignmentAnnotation(copyAnn);
- }
- // adjust for gaps
- copyAnn.adjustForAlignment();
- // add to the alignment and set visible
- alignment.addAnnotation(copyAnn);
- copyAnn.visible = true;
+ /**
+ * Make a copy of a reference annotation {@code ann} and add it to an
+ * alignment sequence {@code seq} in {@code alignment}, optionally limited to
+ * the extent of {@code selectionGroup}
+ *
+ * @param alignment
+ * @param seq
+ * @param ann
+ * @param selectionGroup
+ * - may be null
+ * @return annotation added to {@code seq and {@code alignment}
+ */
+ public static AlignmentAnnotation addReferenceAnnotationTo(
+ final AlignmentI alignment, final SequenceI seq,
+ final AlignmentAnnotation ann, final SequenceGroup selectionGroup)
+ {
+ AlignmentAnnotation copyAnn = new AlignmentAnnotation(ann);
+ int startRes = 0;
+ int endRes = ann.annotations.length;
+ if (selectionGroup != null)
+ {
+ startRes = -1 + Math.min(seq.getEnd(), Math.max(seq.getStart(),
+ seq.findPosition(selectionGroup.getStartRes())));
+ endRes = -1 + Math.min(seq.getEnd(),
+ seq.findPosition(selectionGroup.getEndRes()));
+
+ }
+ copyAnn.restrict(startRes, endRes + 0);
+
+ /*
+ * Add to the sequence (sets copyAnn.datasetSequence), unless the
+ * original annotation is already on the sequence.
+ */
+ if (!seq.hasAnnotation(ann))
+ {
+ ContactMatrixI cm = seq.getDatasetSequence().getContactMatrixFor(ann);
+ if (cm != null)
+ {
+ seq.addContactListFor(copyAnn, cm);
}
+ seq.addAlignmentAnnotation(copyAnn);
}
+ // adjust for gaps
+ copyAnn.adjustForAlignment();
+ // add to the alignment and set visible
+ alignment.addAnnotation(copyAnn);
+ copyAnn.visible = true;
+
+ return copyAnn;
}
/**
}
}
+ public static AlignmentAnnotation getFirstSequenceAnnotationOfType(
+ AlignmentI al, int graphType)
+ {
+ AlignmentAnnotation[] anns = al.getAlignmentAnnotation();
+ if (anns != null)
+ {
+ for (AlignmentAnnotation aa : anns)
+ {
+ if (aa.sequenceRef != null && aa.graph == graphType)
+ return aa;
+ }
+ }
+ return null;
+ }
+
/**
* Returns true if either sequence has a cross-reference to the other
*
productSeqs = new HashSet<>();
for (SequenceI seq : products)
{
- productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
- .getDatasetSequence());
+ productSeqs.add(seq.getDatasetSequence() == null ? seq
+ : seq.getDatasetSequence());
}
}
cdsSeqs.add(cdsSeq);
- if (!dataset.getSequences().contains(cdsSeqDss))
- {
- // check if this sequence is a newly created one
- // so needs adding to the dataset
- dataset.addSequence(cdsSeqDss);
- }
-
/*
- * add a mapping from CDS to the (unchanged) mapped to range
+ * build the mapping from CDS to protein
*/
List<int[]> cdsRange = Collections
.singletonList(new int[]
MapList cdsToProteinMap = new MapList(cdsRange,
mapList.getToRanges(), mapList.getFromRatio(),
mapList.getToRatio());
- AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
- cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
- cdsToProteinMap);
- /*
- * guard against duplicating the mapping if repeating this action
- */
- if (!mappings.contains(cdsToProteinMapping))
+ if (!dataset.getSequences().contains(cdsSeqDss))
{
- mappings.add(cdsToProteinMapping);
+ /*
+ * if this sequence is a newly created one, add it to the dataset
+ * and made a CDS to protein mapping (if sequence already exists,
+ * CDS-to-protein mapping _is_ the transcript-to-protein mapping)
+ */
+ dataset.addSequence(cdsSeqDss);
+ AlignedCodonFrame cdsToProteinMapping = new AlignedCodonFrame();
+ cdsToProteinMapping.addMap(cdsSeqDss, proteinProduct,
+ cdsToProteinMap);
+
+ /*
+ * guard against duplicating the mapping if repeating this action
+ */
+ if (!mappings.contains(cdsToProteinMapping))
+ {
+ mappings.add(cdsToProteinMapping);
+ }
}
propagateDBRefsToCDS(cdsSeqDss, dnaSeq.getDatasetSequence(),
List<DBRefEntry> primrefs = dnaDss.getPrimaryDBRefs();
for (int ip = 0, np = primrefs.size(); ip < np; ip++)
{
- DBRefEntry primRef = primrefs.get(ip);
+ DBRefEntry primRef = primrefs.get(ip);
/*
* create a cross-reference from CDS to the source sequence's
* primary reference and vice versa
*/
String source = primRef.getSource();
String version = primRef.getVersion();
- DBRefEntry cdsCrossRef = new DBRefEntry(source, source + ":"
- + version, primRef.getAccessionId());
- cdsCrossRef.setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
+ DBRefEntry cdsCrossRef = new DBRefEntry(source,
+ source + ":" + version, primRef.getAccessionId());
+ cdsCrossRef
+ .setMap(new Mapping(dnaDss, new MapList(cdsToDnaMap)));
cdsSeqDss.addDBRef(cdsCrossRef);
- dnaSeq.addDBRef(new DBRefEntry(source, version, cdsSeq
- .getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
+ dnaSeq.addDBRef(new DBRefEntry(source, version,
+ cdsSeq.getName(), new Mapping(cdsSeqDss, dnaToCdsMap)));
// problem here is that the cross-reference is synthesized -
// cdsSeq.getName() may be like 'CDS|dnaaccession' or
// 'CDS|emblcdsacc'
DBRefEntry proteinToCdsRef = new DBRefEntry(source, version,
cdsSeq.getName());
//
- proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
- .getInverse()));
+ proteinToCdsRef.setMap(
+ new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
}
/*
}
}
- AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
- .size()]));
+ AlignmentI cds = new Alignment(
+ cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
cds.setDataset(dataset);
return cds;
SequenceI newSeq = null;
- final MapList maplist = mapping.getMap();
- if (maplist.isContiguous() && maplist.isFromForwardStrand())
- {
- /*
- * just a subsequence, keep same dataset sequence
- */
- int start = maplist.getFromLowest();
- int end = maplist.getFromHighest();
- newSeq = seq.getSubSequence(start - 1, end);
- newSeq.setName(seqId);
- }
- else
- {
- /*
- * construct by splicing mapped from ranges
- */
- char[] seqChars = seq.getSequence();
- List<int[]> fromRanges = maplist.getFromRanges();
- int cdsWidth = MappingUtils.getLength(fromRanges);
- char[] newSeqChars = new char[cdsWidth];
+ /*
+ * construct CDS sequence by splicing mapped from ranges
+ */
+ char[] seqChars = seq.getSequence();
+ List<int[]> fromRanges = mapping.getMap().getFromRanges();
+ int cdsWidth = MappingUtils.getLength(fromRanges);
+ char[] newSeqChars = new char[cdsWidth];
- int newPos = 0;
- for (int[] range : fromRanges)
+ int newPos = 0;
+ for (int[] range : fromRanges)
+ {
+ if (range[0] <= range[1])
{
- if (range[0] <= range[1])
- {
- // forward strand mapping - just copy the range
- int length = range[1] - range[0] + 1;
- System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
- length);
- newPos += length;
- }
- else
+ // forward strand mapping - just copy the range
+ int length = range[1] - range[0] + 1;
+ System.arraycopy(seqChars, range[0] - 1, newSeqChars, newPos,
+ length);
+ newPos += length;
+ }
+ else
+ {
+ // reverse strand mapping - copy and complement one by one
+ for (int i = range[0]; i >= range[1]; i--)
{
- // reverse strand mapping - copy and complement one by one
- for (int i = range[0]; i >= range[1]; i--)
- {
- newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
- }
+ newSeqChars[newPos++] = Dna.getComplement(seqChars[i - 1]);
}
}
}
else
{
- System.err.println(
- "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+ Console.error(
+ "JAL-2154 regression: warning - found (and ignored) a duplicate CDS sequence:"
+ mtch.toString());
}
}
/*
* get features, optionally restricted by an ontology term
*/
- List<SequenceFeature> sfs = select == null ? fromSeq.getFeatures()
- .getPositionalFeatures() : fromSeq.getFeatures()
- .getFeaturesByOntology(select);
+ List<SequenceFeature> sfs = select == null
+ ? fromSeq.getFeatures().getPositionalFeatures()
+ : fromSeq.getFeatures().getFeaturesByOntology(select);
int count = 0;
for (SequenceFeature sf : sfs)
{
List<int[]> result = new ArrayList<>();
- List<SequenceFeature> sfs = dnaSeq.getFeatures().getFeaturesByOntology(
- SequenceOntologyI.CDS);
+ List<SequenceFeature> sfs = dnaSeq.getFeatures()
+ .getFeaturesByOntology(SequenceOntologyI.CDS);
if (sfs.isEmpty())
{
return result;
int phase = 0;
try
{
- String s = sf.getPhase();
- if (s != null)
- {
- phase = Integer.parseInt(s);
- }
+ String s = sf.getPhase();
+ if (s != null)
+ {
+ phase = Integer.parseInt(s);
+ }
} catch (NumberFormatException e)
{
// leave as zero
SequenceIdMatcher matcher = new SequenceIdMatcher(seqs);
if (xrefs != null)
{
- // BH 2019.01.25 recoded to remove iterators
-
+ // BH 2019.01.25 recoded to remove iterators
+
for (int ix = 0, nx = xrefs.length; ix < nx; ix++)
{
- SequenceI xref = xrefs[ix];
+ SequenceI xref = xrefs[ix];
List<DBRefEntry> dbrefs = xref.getDBRefs();
if (dbrefs != null)
{
* true; else returns false
*
* @param unaligned
- * - sequences to be aligned based on aligned
+ * - sequences to be aligned based on aligned
* @param aligned
- * - 'guide' alignment containing sequences derived from same
- * dataset as unaligned
+ * - 'guide' alignment containing sequences derived from same dataset
+ * as unaligned
* @return
*/
static boolean alignAsSameSequences(AlignmentI unaligned,
{
return false;
}
- SequenceI alignedSeq = alignedDatasets.get(ds)
- .get(0);
+ SequenceI alignedSeq = alignedDatasets.get(ds).get(0);
int startCol = alignedSeq.findIndex(seq.getStart()); // 1..
leftmost = Math.min(leftmost, startCol);
}
{
List<SequenceI> alignedSequences = alignedDatasets
.get(seq.getDatasetSequence());
+ if (alignedSequences.isEmpty())
+ {
+ /*
+ * defensive check - shouldn't happen! (JAL-3536)
+ */
+ continue;
+ }
SequenceI alignedSeq = alignedSequences.get(0);
/*