import jalview.util.DBRefUtils;
import jalview.util.MapList;
import jalview.util.MappingUtils;
+import jalview.util.RangeComparator;
import jalview.util.StringUtils;
import java.io.UnsupportedEncodingException;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
+import java.util.SortedMap;
import java.util.TreeMap;
/**
}
}
// TODO use Character.toLowerCase to avoid creating String objects?
- char[] upstream = new String(ds.getSequence(s.getStart() - 1
- - ustream_ds, s.getStart() - 1)).toLowerCase().toCharArray();
- char[] downstream = new String(ds.getSequence(s_end - 1, s_end
- + dstream_ds)).toLowerCase().toCharArray();
+ char[] upstream = new String(ds
+ .getSequence(s.getStart() - 1 - ustream_ds, s.getStart() - 1))
+ .toLowerCase().toCharArray();
+ char[] downstream = new String(
+ ds.getSequence(s_end - 1, s_end + dstream_ds)).toLowerCase()
+ .toCharArray();
char[] coreseq = s.getSequence();
char[] nseq = new char[offset + upstream.length + downstream.length
+ coreseq.length];
System.arraycopy(upstream, 0, nseq, p, upstream.length);
System.arraycopy(coreseq, 0, nseq, p + upstream.length,
coreseq.length);
- System.arraycopy(downstream, 0, nseq, p + coreseq.length
- + upstream.length, downstream.length);
+ System.arraycopy(downstream, 0, nseq,
+ p + coreseq.length + upstream.length, downstream.length);
s.setSequence(new String(nseq));
s.setStart(s.getStart() - ustream_ds);
s.setEnd(s_end + downstream.length);
* @return
*/
protected static boolean mapProteinToCdna(
- final AlignmentI proteinAlignment,
- final AlignmentI cdnaAlignment, Set<SequenceI> mappedDna,
- Set<SequenceI> mappedProtein, boolean xrefsOnly)
+ final AlignmentI proteinAlignment, final AlignmentI cdnaAlignment,
+ Set<SequenceI> mappedDna, Set<SequenceI> mappedProtein,
+ boolean xrefsOnly)
{
boolean mappingExistsOrAdded = false;
List<SequenceI> thisSeqs = proteinAlignment.getSequences();
* Don't map non-xrefd sequences more than once each. This heuristic
* allows us to pair up similar sequences in ordered alignments.
*/
- if (!xrefsOnly
- && (mappedProtein.contains(aaSeq) || mappedDna
- .contains(cdnaSeq)))
+ if (!xrefsOnly && (mappedProtein.contains(aaSeq)
+ || mappedDna.contains(cdnaSeq)))
{
continue;
}
/**
* Builds a mapping (if possible) of a cDNA to a protein sequence.
* <ul>
- * <li>first checks if the cdna translates exactly to the protein sequence</li>
+ * <li>first checks if the cdna translates exactly to the protein
+ * sequence</li>
* <li>else checks for translation after removing a STOP codon</li>
* <li>else checks for translation after removing a START codon</li>
* <li>if that fails, inspect CDS features on the cDNA sequence</li>
* String objects.
*/
final SequenceI proteinDataset = proteinSeq.getDatasetSequence();
- char[] aaSeqChars = proteinDataset != null ? proteinDataset
- .getSequence() : proteinSeq.getSequence();
+ char[] aaSeqChars = proteinDataset != null
+ ? proteinDataset.getSequence()
+ : proteinSeq.getSequence();
final SequenceI cdnaDataset = cdnaSeq.getDatasetSequence();
char[] cdnaSeqChars = cdnaDataset != null ? cdnaDataset.getSequence()
: cdnaSeq.getSequence();
* If lengths still don't match, try ignoring start codon.
*/
int startOffset = 0;
- if (cdnaLength != mappedLength
- && cdnaLength > 2
+ if (cdnaLength != mappedLength && cdnaLength > 2
&& String.valueOf(cdnaSeqChars, 0, CODON_LENGTH).toUpperCase()
.equals(ResidueProperties.START))
{
/*
* protein is translation of dna (+/- start/stop codons)
*/
- MapList map = new MapList(new int[] { cdnaStart, cdnaEnd }, new int[]
- { proteinStart, proteinEnd }, CODON_LENGTH, 1);
+ MapList map = new MapList(new int[] { cdnaStart, cdnaEnd },
+ new int[]
+ { proteinStart, proteinEnd }, CODON_LENGTH, 1);
return map;
}
int aaPos = 0;
int dnaPos = cdnaStart;
- for (; dnaPos < cdnaSeqChars.length - 2 && aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)
+ for (; dnaPos < cdnaSeqChars.length - 2
+ && aaPos < aaSeqChars.length; dnaPos += CODON_LENGTH, aaPos++)
{
String codon = String.valueOf(cdnaSeqChars, dnaPos, CODON_LENGTH);
final String translated = ResidueProperties.codonTranslate(codon);
* @param preserveUnmappedGaps
* @param preserveMappedGaps
*/
- public static void alignSequenceAs(SequenceI alignTo,
- SequenceI alignFrom, AlignedCodonFrame mapping, String myGap,
- char sourceGap, boolean preserveMappedGaps,
- boolean preserveUnmappedGaps)
+ public static void alignSequenceAs(SequenceI alignTo, SequenceI alignFrom,
+ AlignedCodonFrame mapping, String myGap, char sourceGap,
+ boolean preserveMappedGaps, boolean preserveUnmappedGaps)
{
// TODO generalise to work for Protein-Protein, dna-dna, dna-protein
}
else
{
- gapsToAdd = Math.min(intronLength + trailingGapLength
- - sourceGapMappedLength, trailingGapLength);
+ gapsToAdd = Math.min(
+ intronLength + trailingGapLength - sourceGapMappedLength,
+ trailingGapLength);
}
}
}
* @return
*/
static boolean alignCdsSequenceAsProtein(SequenceI cdsSeq,
- AlignmentI protein, List<AlignedCodonFrame> mappings, char gapChar)
+ AlignmentI protein, List<AlignedCodonFrame> mappings,
+ char gapChar)
{
SequenceI cdsDss = cdsSeq.getDatasetSequence();
if (cdsDss == null)
mapList = mapList.getInverse();
}
int cdsLength = cdsDss.getLength();
- int mappedFromLength = MappingUtils.getLength(mapList
- .getFromRanges());
+ int mappedFromLength = MappingUtils
+ .getLength(mapList.getFromRanges());
int mappedToLength = MappingUtils
.getLength(mapList.getToRanges());
boolean addStopCodon = (cdsLength == mappedFromLength
* CODON_LENGTH + CODON_LENGTH)
- || (peptide.getDatasetSequence().getLength() == mappedFromLength - 1);
+ || (peptide.getDatasetSequence()
+ .getLength() == mappedFromLength - 1);
if (cdsLength != mappedToLength && !addStopCodon)
{
- System.err
- .println(String
- .format("Can't align cds as protein (length mismatch %d/%d): %s",
- cdsLength, mappedToLength,
- cdsSeq.getName()));
+ System.err.println(String.format(
+ "Can't align cds as protein (length mismatch %d/%d): %s",
+ cdsLength, mappedToLength, cdsSeq.getName()));
}
/*
break;
}
}
- for (int i = nucleotides.length - CODON_LENGTH; i < nucleotides.length; i++)
+ for (int i = nucleotides.length
+ - CODON_LENGTH; i < nucleotides.length; i++)
{
alignedCds[cdsCol++] = nucleotides[i];
}
if (prot != null)
{
Mapping seqMap = mapping.getMappingForSequence(dnaSeq);
- addCodonPositions(dnaSeq, prot, protein.getGapCharacter(),
- seqMap, alignedCodons);
+ addCodonPositions(dnaSeq, prot, protein.getGapCharacter(), seqMap,
+ alignedCodons);
unmappedProtein.remove(prot);
}
}
AlignedCodon codon = sequenceCodon.getValue();
if (codon.peptideCol > 1)
{
- System.err
- .println("Problem mapping protein with >1 unmapped start positions: "
+ System.err.println(
+ "Problem mapping protein with >1 unmapped start positions: "
+ seq.getName());
}
else if (codon.peptideCol == 1)
if (lastCodon != null)
{
AlignedCodon firstPeptide = new AlignedCodon(lastCodon.pos1,
- lastCodon.pos2, lastCodon.pos3, String.valueOf(seq
- .getCharAt(0)), 0);
+ lastCodon.pos2, lastCodon.pos3,
+ String.valueOf(seq.getCharAt(0)), 0);
toAdd.put(seq, firstPeptide);
}
else
* <ul>
* <li>One alignment must be nucleotide, and the other protein</li>
* <li>At least one pair of sequences must be already mapped, or mappable</li>
- * <li>Mappable means the nucleotide translation matches the protein sequence</li>
+ * <li>Mappable means the nucleotide translation matches the protein
+ * sequence</li>
* <li>The translation may ignore start and stop codons if present in the
* nucleotide</li>
* </ul>
return false;
}
- SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq : dnaSeq
- .getDatasetSequence();
- SequenceI proteinDs = proteinSeq.getDatasetSequence() == null ? proteinSeq
+ SequenceI dnaDs = dnaSeq.getDatasetSequence() == null ? dnaSeq
+ : dnaSeq.getDatasetSequence();
+ SequenceI proteinDs = proteinSeq.getDatasetSequence() == null
+ ? proteinSeq
: proteinSeq.getDatasetSequence();
for (AlignedCodonFrame mapping : mappings)
* the alignment to check for presence of annotations
*/
public static void findAddableReferenceAnnotations(
- List<SequenceI> sequenceScope,
- Map<String, String> labelForCalcId,
+ List<SequenceI> sequenceScope, Map<String, String> labelForCalcId,
final Map<SequenceI, List<AlignmentAnnotation>> candidates,
AlignmentI al)
{
/**
* Set visibility of alignment annotations of specified types (labels), for
- * specified sequences. This supports controls like
- * "Show all secondary structure", "Hide all Temp factor", etc.
+ * specified sequences. This supports controls like "Show all secondary
+ * structure", "Hide all Temp factor", etc.
*
* @al the alignment to scan for annotations
* @param types
{
if (anyType || types.contains(aa.label))
{
- if ((aa.sequenceRef != null)
- && (forSequences == null || forSequences
- .contains(aa.sequenceRef)))
+ if ((aa.sequenceRef != null) && (forSequences == null
+ || forSequences.contains(aa.sequenceRef)))
{
aa.visible = doShow;
}
productSeqs = new HashSet<SequenceI>();
for (SequenceI seq : products)
{
- productSeqs.add(seq.getDatasetSequence() == null ? seq : seq
- .getDatasetSequence());
+ productSeqs.add(seq.getDatasetSequence() == null ? seq
+ : seq.getDatasetSequence());
}
}
/*
* add a mapping from CDS to the (unchanged) mapped to range
*/
- List<int[]> cdsRange = Collections.singletonList(new int[] { 1,
- cdsSeq.getLength() });
+ List<int[]> cdsRange = Collections
+ .singletonList(new int[]
+ { 1, cdsSeq.getLength() });
MapList cdsToProteinMap = new MapList(cdsRange,
mapList.getToRanges(), mapList.getFromRatio(),
mapList.getToRatio());
// 'CDS|emblcdsacc'
// assuming cds version same as dna ?!?
- DBRefEntry proteinToCdsRef = new DBRefEntry(
- primRef.getSource(), primRef.getVersion(),
- cdsSeq.getName());
+ DBRefEntry proteinToCdsRef = new DBRefEntry(primRef.getSource(),
+ primRef.getVersion(), cdsSeq.getName());
//
- proteinToCdsRef.setMap(new Mapping(cdsSeqDss, cdsToProteinMap
- .getInverse()));
+ proteinToCdsRef.setMap(
+ new Mapping(cdsSeqDss, cdsToProteinMap.getInverse()));
proteinProduct.addDBRef(proteinToCdsRef);
}
}
}
- AlignmentI cds = new Alignment(cdsSeqs.toArray(new SequenceI[cdsSeqs
- .size()]));
+ AlignmentI cds = new Alignment(
+ cdsSeqs.toArray(new SequenceI[cdsSeqs.size()]));
cds.setDataset(dataset);
return cds;
* is this mapping from the whole dna sequence (i.e. CDS)?
* allowing for possible stop codon on dna but not peptide
*/
- int mappedFromLength = MappingUtils.getLength(aMapping.getMap()
- .getFromRanges());
+ int mappedFromLength = MappingUtils
+ .getLength(aMapping.getMap().getFromRanges());
int dnaLength = seqDss.getLength();
if (mappedFromLength == dnaLength
|| mappedFromLength == dnaLength - CODON_LENGTH)
&& proteinProduct == mapping.getTo()
&& seqDss != map.getFromSeq())
{
- mappedFromLength = MappingUtils.getLength(mapping.getMap()
- .getFromRanges());
+ mappedFromLength = MappingUtils
+ .getLength(mapping.getMap().getFromRanges());
if (mappedFromLength == map.getFromSeq().getLength())
{
/*
}
else
{
- System.err
- .println("JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+ System.err.println(
+ "JAL-2154 regression: warning - found (and ignnored a duplicate CDS sequence):"
+ mtch.toString());
}
}
for (DBRefEntry cdsref : direct)
{
// clone maplist and mapping
- MapList cdsposmap = new MapList(Arrays.asList(new int[][] { new int[]
- { cdsSeq.getStart(), cdsSeq.getEnd() } }), cdsref.getMap().getMap()
- .getToRanges(), 3, 1);
- Mapping cdsmap = new Mapping(cdsref.getMap().getTo(), cdsref.getMap()
- .getMap());
+ MapList cdsposmap = new MapList(
+ Arrays.asList(new int[][]
+ { new int[] { cdsSeq.getStart(), cdsSeq.getEnd() } }),
+ cdsref.getMap().getMap().getToRanges(), 3, 1);
+ Mapping cdsmap = new Mapping(cdsref.getMap().getTo(),
+ cdsref.getMap().getMap());
// create dbref
DBRefEntry newref = new DBRefEntry(cdsref.getSource(),
- cdsref.getVersion(), cdsref.getAccessionId(), new Mapping(
- cdsmap.getTo(), cdsposmap));
+ cdsref.getVersion(), cdsref.getAccessionId(),
+ new Mapping(cdsmap.getTo(), cdsposmap));
// and see if we can map to the protein product for this mapping.
// onSource is the filtered set of accessions on protein that we are
if (begin > end)
{
// shouldn't happen!
- System.err
- .println("Error: start phase extends beyond start CDS in "
+ System.err.println(
+ "Error: start phase extends beyond start CDS in "
+ dnaSeq.getName());
}
}
* ranges are assembled in order. Other cases should not use this method,
* but instead construct an explicit mapping for CDS (e.g. EMBL parsing).
*/
- Collections.sort(result, new Comparator<int[]>()
- {
- @Override
- public int compare(int[] o1, int[] o2)
- {
- return Integer.compare(o1[0], o2[0]);
- }
- });
+ Collections.sort(result, new RangeComparator(true));
return result;
}
* are currently ignored here
*/
String trans = codon.contains("-") ? "-"
- : (codon.length() > CODON_LENGTH ? null : ResidueProperties
- .codonTranslate(codon));
+ : (codon.length() > CODON_LENGTH ? null
+ : ResidueProperties.codonTranslate(codon));
if (trans != null && !trans.equals(residue))
{
String residue3Char = StringUtils
StringBuilder link = new StringBuilder(32);
try
{
- link.append(desc)
- .append(" ")
- .append(id)
- .append("|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
+ link.append(desc).append(" ").append(id).append(
+ "|http://www.ensembl.org/Homo_sapiens/Variation/Summary?v=")
.append(URLEncoder.encode(id, "UTF-8"));
sf.addLink(link.toString());
} catch (UnsupportedEncodingException e)
* get this peptide's codon positions e.g. [3, 4, 5] or [4, 7, 10]
*/
int[] codon = peptidePosition == lastPeptidePostion ? lastCodon
- : MappingUtils.flattenRanges(dnaToProtein.locateInFrom(
- peptidePosition, peptidePosition));
+ : MappingUtils.flattenRanges(dnaToProtein
+ .locateInFrom(peptidePosition, peptidePosition));
lastPeptidePostion = peptidePosition;
lastCodon = codon;
*/
for (int codonPos = 0; codonPos < CODON_LENGTH; codonPos++)
{
- String nucleotide = String.valueOf(
- dnaSeq.getCharAt(codon[codonPos] - dnaStart))
+ String nucleotide = String
+ .valueOf(dnaSeq.getCharAt(codon[codonPos] - dnaStart))
.toUpperCase();
List<DnaVariant> codonVariant = codonVariants[codonPos];
if (codon[codonPos] == dnaCol)
*/
for (SequenceI seq : unaligned.getSequences())
{
- List<SequenceI> alignedSequences = alignedDatasets.get(seq
- .getDatasetSequence());
+ List<SequenceI> alignedSequences = alignedDatasets
+ .get(seq.getDatasetSequence());
// TODO: getSequenceAsString() will be deprecated in the future
// TODO: need to leave to SequenceI implementor to update gaps
seq.setSequence(alignedSequences.get(0).getSequenceAsString());
* @param unmapped
* @return
*/
- static Map<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(
- AlignmentI unaligned, AlignmentI aligned, List<SequenceI> unmapped)
+ static SortedMap<Integer, Map<SequenceI, Character>> buildMappedColumnsMap(
+ AlignmentI unaligned, AlignmentI aligned,
+ List<SequenceI> unmapped)
{
/*
* Map will hold, for each aligned column position, a map of
* {unalignedSequence, characterPerSequence} at that position.
* TreeMap keeps the entries in ascending column order.
*/
- Map<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
+ SortedMap<Integer, Map<SequenceI, Character>> map = new TreeMap<Integer, Map<SequenceI, Character>>();
/*
* record any sequences that have no mapping so can't be realigned
}
/**
- * Helper method that adds to a map the mapped column positions of a sequence. <br>
+ * Helper method that adds to a map the mapped column positions of a sequence.
+ * <br>
* For example if aaTT-Tg-gAAA is mapped to TTTAAA then the map should record
* that columns 3,4,6,10,11,12 map to characters T,T,T,A,A,A of the mapped to
* sequence.
*/
if (seqMap.getTo() == fromSeq.getDatasetSequence())
{
- seqMap = new Mapping(seq.getDatasetSequence(), seqMap.getMap()
- .getInverse());
+ seqMap = new Mapping(seq.getDatasetSequence(),
+ seqMap.getMap().getInverse());
}
char[] fromChars = fromSeq.getSequence();