X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=2d91a085a8a48cd165f1045da59764172cab61a6;hb=1f9b82cddbfb470585d5f4b53c101f9ffcb84b5a;hp=ee6ba11a18cdd9615055a351cf8211992330eed1;hpb=26ba864a6c290121fe6cf616794d2d0bea65fb7d;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index ee6ba11..2d91a08 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -20,14 +20,19 @@ */ package jalview.io; +import jalview.analysis.AlignmentUtils; import jalview.analysis.SequenceIdMatcher; import jalview.api.AlignViewportI; +import jalview.api.FeaturesSourceI; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.io.gff.GffHelperBase; +import jalview.io.gff.GffHelperFactory; +import jalview.io.gff.GffHelperI; import jalview.schemes.AnnotationColourGradient; import jalview.schemes.GraduatedColor; import jalview.schemes.UserColourScheme; @@ -63,13 +68,13 @@ import java.util.StringTokenizer; * @author jbprocter * @author gmcarstairs */ -public class FeaturesFile extends AlignFile +public class FeaturesFile extends AlignFile implements FeaturesSourceI { - protected static final String STRAND = "STRAND"; + private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; - protected static final String FRAME = "FRAME"; + private static final String NOTE = "Note"; - protected static final String ATTRIBUTES = "ATTRIBUTES"; + protected static final String FRAME = "FRAME"; protected static final String TAB = "\t"; @@ -186,7 +191,7 @@ public class FeaturesFile extends AlignFile String line = null; try { - StringTokenizer st; + String[] gffColumns; String featureGroup = null; while ((line = nextLine()) != null) @@ -201,41 +206,40 @@ public class FeaturesFile extends AlignFile continue; } - st = new StringTokenizer(line, TAB); - if (st.countTokens() == 1) + gffColumns = line.split("\\t"); // tab as regex + if (gffColumns.length == 1) { if (line.trim().equalsIgnoreCase("GFF")) { /* - * Jalview features file with appendded GFF - * assume GFF2 (though it may declare gff-version 3) + * Jalview features file with appended GFF + * assume GFF2 (though it may declare ##gff-version 3) */ gffVersion = 2; continue; } } - if (st.countTokens() > 1 && st.countTokens() < 4) + if (gffColumns.length > 1 && gffColumns.length < 4) { /* * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or - * a feature type colour specification; not GFF format + * a feature type colour specification */ - String ft = st.nextToken(); + String ft = gffColumns[0]; if (ft.equalsIgnoreCase("startgroup")) { - featureGroup = st.nextToken(); + featureGroup = gffColumns[1]; } else if (ft.equalsIgnoreCase("endgroup")) { // We should check whether this is the current group, // but at present theres no way of showing more than 1 group - st.nextToken(); featureGroup = null; } else { - parseFeatureColour(line, ft, st, colours); + parseFeatureColour(line, ft, gffColumns, colours); } continue; } @@ -247,12 +251,12 @@ public class FeaturesFile extends AlignFile */ if (gffVersion == 0) { - parseJalviewFeature(line, st, align, colours, removeHTML, + parseJalviewFeature(line, gffColumns, align, colours, removeHTML, relaxedIdmatching, featureGroup); } else { - parseGffFeature(st, align, relaxedIdmatching, newseqs); + parseGff(gffColumns, align, relaxedIdmatching, newseqs); } } resetMatcher(); @@ -267,56 +271,66 @@ public class FeaturesFile extends AlignFile return false; } + /* + * experimental - add any dummy sequences with features to the alignment + * - we need them for Ensembl feature extraction - though maybe not otherwise + */ + for (SequenceI newseq : newseqs) + { + if (newseq.getSequenceFeatures() != null) + { + align.addSequence(newseq); + } + } return true; } /** - * Try to parse a Jalview format feature specification. Returns true if - * successful or false if not. + * Try to parse a Jalview format feature specification and add it as a + * sequence feature to any matching sequences in the alignment. Returns true + * if successful (a feature was added), or false if not. * * @param line - * @param st + * @param gffColumns * @param alignment * @param featureColours * @param removeHTML * @param relaxedIdmatching * @param featureGroup */ - protected boolean parseJalviewFeature(String line, StringTokenizer st, + protected boolean parseJalviewFeature(String line, String[] gffColumns, AlignmentI alignment, Map featureColours, - boolean removeHTML, boolean relaxedIdmatching, String featureGroup) + boolean removeHTML, boolean relaxedIdMatching, String featureGroup) { /* - * Jalview: description seqid seqIndex start end type [score] + * tokens: description seqid seqIndex start end type [score] */ - String desc = st.nextToken(); - String seqId = st.nextToken(); - SequenceI seq = findName(alignment, seqId, relaxedIdmatching, null); - if (!st.hasMoreTokens()) + if (gffColumns.length < 6) { - System.err - .println("DEBUG: Run out of tokens when trying to identify the destination for the feature.. giving up."); - // in all probability, this isn't a file we understand, so bail - // quietly. + System.err.println("Ignoring feature line '" + line + + "' with too few columns (" + gffColumns.length + ")"); return false; } + String desc = gffColumns[0]; + String seqId = gffColumns[1]; + SequenceI seq = findSequence(seqId, alignment, null, relaxedIdMatching); - if (!seqId.equals("ID_NOT_SPECIFIED")) + if (!ID_NOT_SPECIFIED.equals(seqId)) { - seq = findName(alignment, seqId, relaxedIdmatching, null); - st.nextToken(); + seq = findSequence(seqId, alignment, null, relaxedIdMatching); } else { seqId = null; seq = null; + String seqIndex = gffColumns[2]; try { - int idx = Integer.parseInt(st.nextToken()); + int idx = Integer.parseInt(seqIndex); seq = alignment.getSequenceAt(idx); } catch (NumberFormatException ex) { - // continue + System.err.println("Invalid sequence index: " + seqIndex); } } @@ -326,10 +340,10 @@ public class FeaturesFile extends AlignFile return false; } - int startPos = Integer.parseInt(st.nextToken()); - int endPos = Integer.parseInt(st.nextToken()); + int startPos = Integer.parseInt(gffColumns[3]); + int endPos = Integer.parseInt(gffColumns[4]); - String ft = st.nextToken(); + String ft = gffColumns[5]; if (!featureColours.containsKey(ft)) { @@ -340,18 +354,18 @@ public class FeaturesFile extends AlignFile UserColourScheme ucs = new UserColourScheme(ft); featureColours.put(ft, ucs.findColour('A')); } - SequenceFeature sf = new SequenceFeature(ft, desc, "", - startPos, endPos, featureGroup); - if (st.hasMoreTokens()) + SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos, + endPos, featureGroup); + if (gffColumns.length > 6) { - float score = 0f; + float score = Float.NaN; try { - score = new Float(st.nextToken()).floatValue(); + score = new Float(gffColumns[6]).floatValue(); // update colourgradient bounds if allowed to } catch (NumberFormatException ex) { - // leave as 0 + // leave as NaN } sf.setScore(score); } @@ -375,16 +389,16 @@ public class FeaturesFile extends AlignFile * the current input line (for error messages only) * @param featureType * the first token on the line - * @param st - * holds remaining tokens on the line + * @param gffColumns + * holds tokens on the line * @param colours * map to which to add derived colour specification */ protected void parseFeatureColour(String line, String featureType, - StringTokenizer st, Map colours) + String[] gffColumns, Map colours) { Object colour = null; - String colscheme = st.nextToken(); + String colscheme = gffColumns[1]; if (colscheme.indexOf("|") > -1 || colscheme.trim().equalsIgnoreCase("label")) { @@ -599,24 +613,28 @@ public class FeaturesFile extends AlignFile /** * Returns a sequence matching the given id, as follows *
    - *
  • matching is on exact sequence name, or on a token within the sequence - * name, or a dbxref, if relaxed matching is selected
  • + *
  • strict matching is on exact sequence name
  • + *
  • relaxed matching allows matching on a token within the sequence name, + * or a dbxref
  • *
  • first tries to find a match in the alignment sequences
  • - *
  • else tries to find a match in the new sequences already generated + *
  • else tries to find a match in the new sequences already generated while * parsing the features file
  • *
  • else creates a new placeholder sequence, adds it to the new sequences * list, and returns it
  • *
* - * @param align * @param seqId - * @param relaxedIdMatching + * @param align * @param newseqs + * @param relaxedIdMatching + * * @return */ - protected SequenceI findName(AlignmentI align, String seqId, - boolean relaxedIdMatching, List newseqs) + protected SequenceI findSequence(String seqId, AlignmentI align, + List newseqs, boolean relaxedIdMatching) { + // TODO encapsulate in SequenceIdMatcher, share the matcher + // with the GffHelper (removing code duplication) SequenceI match = null; if (relaxedIdMatching) { @@ -986,7 +1004,8 @@ public class FeaturesFile extends AlignFile * a map whose keys are the type names of visible features * @return */ - public String printGffFormat(SequenceI[] sequences, Map visible) + public String printGffFormat(SequenceI[] sequences, + Map visible) { return printGffFormat(sequences, visible, true, true); } @@ -1002,7 +1021,8 @@ public class FeaturesFile extends AlignFile * @param includeNonPositionalFeatures * @return */ - public String printGffFormat(SequenceI[] sequences, Map visible, boolean outputVisibleOnly, + public String printGffFormat(SequenceI[] sequences, + Map visible, boolean outputVisibleOnly, boolean includeNonPositionalFeatures) { StringBuilder out = new StringBuilder(256); @@ -1034,13 +1054,13 @@ public class FeaturesFile extends AlignFile */ continue; } - + source = sf.featureGroup; if (source == null) { source = sf.getDescription(); } - + out.append(seq.getName()); out.append(TAB); out.append(source); @@ -1053,239 +1073,202 @@ public class FeaturesFile extends AlignFile out.append(TAB); out.append(sf.score); out.append(TAB); - - out.append(sf.getValue(STRAND, ".")); + + int strand = sf.getStrand(); + out.append(strand == 1 ? "+" : (strand == -1 ? "-" : ".")); out.append(TAB); - + out.append(sf.getValue(FRAME, ".")); - + // miscellaneous key-values (GFF column 9) - String attributes = (String) sf.getValue(ATTRIBUTES); + String attributes = sf.getAttributes(); if (attributes != null) { out.append(TAB).append(attributes); } - + out.append(newline); } } } - + return out.toString(); } /** - * Helper method to make a mapping given a set of attributes for a GFF feature + * Returns a mapping given list of one or more Align descriptors (exonerate + * format) * - * @param set - * @param attr + * @param alignedRegions + * a list of "Align fromStart toStart fromCount" + * @param mapIsFromCdna + * if true, 'from' is dna, else 'from' is protein * @param strand * either 1 (forward) or -1 (reverse) * @return - * @throws InvalidGFF3FieldException + * @throws IOException */ protected MapList constructCodonMappingFromAlign( - Map> set, String attr, - int strand) throws InvalidGFF3FieldException + List alignedRegions, boolean mapIsFromCdna, int strand) + throws IOException { if (strand == 0) { - throw new InvalidGFF3FieldException(attr, set, + throw new IOException( "Invalid strand for a codon mapping (cannot be 0)"); } - List fromrange = new ArrayList(); - List torange = new ArrayList(); - int lastppos = 0, lastpframe = 0; - for (String range : set.get(attr)) + int regions = alignedRegions.size(); + // arrays to hold [start, end] for each aligned region + int[] fromRanges = new int[regions * 2]; // from dna + int[] toRanges = new int[regions * 2]; // to protein + int fromRangesIndex = 0; + int toRangesIndex = 0; + + for (String range : alignedRegions) { - List ints = new ArrayList(); - StringTokenizer st = new StringTokenizer(range, " "); - while (st.hasMoreTokens()) - { - String num = st.nextToken(); - try - { - ints.add(new Integer(num)); - } catch (NumberFormatException nfe) - { - throw new InvalidGFF3FieldException(attr, set, - "Invalid number in field " + num); - } - } /* - * Align positionInRef positionInQuery LengthInRef - * contig_1146 exonerate:p2g:local similarity 8534 11269 3652 - . - * alignment_id 0 ; Query DDB_G0269124 Align 11270 143 120 + * Align mapFromStart mapToStart mapFromCount + * e.g. if mapIsFromCdna + * Align 11270 143 120 * means: - * 120 bases align at pos 143 in protein to 11270 on dna (-ve strand) - * and so on for additional ' ; Align x y z' groups + * 120 bases from pos 11270 align to pos 143 in peptide + * if !mapIsFromCdna this would instead be + * Align 143 11270 40 */ - if (ints.size() != 3) + String[] tokens = range.split(" "); + if (tokens.length != 3) { - throw new InvalidGFF3FieldException(attr, set, - "Invalid number of fields for this attribute (" - + ints.size() + ")"); + throw new IOException("Wrong number of fields for Align"); } - fromrange.add(ints.get(0)); - fromrange.add(ints.get(0) + strand * ints.get(2)); - // how are intron/exon boundaries that do not align in codons - // represented - if (ints.get(1).intValue() == lastppos && lastpframe > 0) + int fromStart = 0; + int toStart = 0; + int fromCount = 0; + try { - // extend existing to map - lastppos += ints.get(2) / 3; - lastpframe = ints.get(2) % 3; - torange.set(torange.size() - 1, new Integer(lastppos)); - } - else + fromStart = Integer.parseInt(tokens[0]); + toStart = Integer.parseInt(tokens[1]); + fromCount = Integer.parseInt(tokens[2]); + } catch (NumberFormatException nfe) { - // new to map range - torange.add(ints.get(1)); - lastppos = ints.get(1) + ints.get(2) / 3; - lastpframe = ints.get(2) % 3; - torange.add(new Integer(lastppos)); + throw new IOException("Invalid number in Align field: " + + nfe.getMessage()); } - } - // from and to ranges must end up being a series of start/end intervals - if (fromrange.size() % 2 == 1) - { - throw new InvalidGFF3FieldException(attr, set, - "Couldn't parse the DNA alignment range correctly"); - } - if (torange.size() % 2 == 1) - { - throw new InvalidGFF3FieldException(attr, set, - "Couldn't parse the protein alignment range correctly"); - } - // finally, build the map - int[] frommap = new int[fromrange.size()], tomap = new int[torange - .size()]; - int p = 0; - for (Integer ip : fromrange) - { - frommap[p++] = ip.intValue(); - } - p = 0; - for (Integer ip : torange) - { - tomap[p++] = ip.intValue(); - } - - return new MapList(frommap, tomap, 3, 1); - } - private List findNames(AlignmentI align, List newseqs, boolean relaxedIdMatching, - List list) - { - List found = new ArrayList(); - for (String seqId : list) - { - SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs); - if (seq != null) + /* + * Jalview always models from dna to protein, so adjust values if the + * GFF mapping is from protein to dna + */ + if (!mapIsFromCdna) { - found.add(seq); + fromCount *= 3; + int temp = fromStart; + fromStart = toStart; + toStart = temp; } + fromRanges[fromRangesIndex++] = fromStart; + fromRanges[fromRangesIndex++] = fromStart + strand * (fromCount - 1); + + /* + * If a codon has an intron gap, there will be contiguous 'toRanges'; + * this is handled for us by the MapList constructor. + * (It is not clear that exonerate ever generates this case) + */ + toRanges[toRangesIndex++] = toStart; + toRanges[toRangesIndex++] = toStart + (fromCount - 1) / 3; } - return found; + + return new MapList(fromRanges, toRanges, 3, 1); } /** - * Parse a GFF format feature. This may include creating a 'dummy' sequence - * for the feature or its mapped sequence + * Parse a GFF format feature. This may include creating a 'dummy' sequence to + * hold the feature, or for its mapped sequence, or both, to be resolved + * either later in the GFF file (##FASTA section), or when the user loads + * additional sequences. * - * @param st + * @param gffColumns * @param alignment - * @param relaxedIdmatching + * @param relaxedIdMatching * @param newseqs * @return */ - protected SequenceI parseGffFeature(StringTokenizer st, AlignmentI alignment, boolean relaxedIdmatching, - List newseqs) + protected SequenceI parseGff(String[] gffColumns, AlignmentI alignment, + boolean relaxedIdMatching, List newseqs) { - SequenceI seq; /* * GFF: seqid source type start end score strand phase [attributes] */ - String seqId = st.nextToken(); - + if (gffColumns.length < 5) + { + System.err.println("Ignoring GFF feature line with too few columns (" + + gffColumns.length + ")"); + return null; + } + /* * locate referenced sequence in alignment _or_ - * as a forward reference (SequenceDummy) + * as a forward or external reference (SequenceDummy) */ - seq = findName(alignment, seqId, relaxedIdmatching, newseqs); - - String desc = st.nextToken(); - String group = null; - if (desc.indexOf(' ') == -1) - { - // could also be a source term rather than description line - group = desc; - } - String ft = st.nextToken(); - int startPos = StringUtils.parseInt(st.nextToken()); - int endPos = StringUtils.parseInt(st.nextToken()); - // TODO: decide if non positional feature assertion for input data - // where end==0 is generally valid - if (endPos == 0) - { - // treat as non-positional feature, regardless. - startPos = 0; - } - float score = 0f; - try - { - score = new Float(st.nextToken()).floatValue(); - } catch (NumberFormatException ex) - { - // leave at 0 - } - - SequenceFeature sf = new SequenceFeature(ft, desc, startPos, - endPos, score, group); - if (st.hasMoreTokens()) - { - sf.setValue(STRAND, st.nextToken()); - } - if (st.hasMoreTokens()) - { - sf.setValue(FRAME, st.nextToken()); - } - - if (st.hasMoreTokens()) + String seqId = gffColumns[0]; + SequenceI seq = findSequence(seqId, alignment, newseqs, + relaxedIdMatching); + + SequenceFeature sf = null; + GffHelperI helper = GffHelperFactory.getHelper(gffColumns); + if (helper != null) { - String attributes = st.nextToken(); - sf.setValue(ATTRIBUTES, attributes); - - /* - * parse semi-structured attributes in column 9 and add them to the - * sequence feature's 'otherData' table; use Note as a best proxy for - * description - */ - Map> nameValues = StringUtils.parseNameValuePairs(attributes, ";", - new char[] { ' ', '=' }); - for (Entry> attr : nameValues.entrySet()) + try { - String values = StringUtils.listToDelimitedString(attr.getValue(), - "; "); - sf.setValue(attr.getKey(), values); - if ("Note".equals(attr.getKey())) + sf = helper.processGff(seq, gffColumns, alignment, newseqs, + relaxedIdMatching); + if (sf != null) { - sf.setDescription(values); + seq.addSequenceFeature(sf); + while ((seq = alignment.findName(seq, seqId, true)) != null) + { + seq.addSequenceFeature(new SequenceFeature(sf)); + } } + } catch (IOException e) + { + System.err.println("GFF parsing failed with: " + e.getMessage()); + return null; } } - - if (processOrAddSeqFeature(alignment, newseqs, seq, sf, - relaxedIdmatching)) - { - // check whether we should add the sequence feature to any other - // sequences in the alignment with the same or similar - while ((seq = alignment.findName(seq, seqId, true)) != null) + + return seq; + } + + /** + * Process the 'column 9' data of the GFF file. This is less formally defined, + * and its interpretation will vary depending on the tool that has generated + * it. + * + * @param attributes + * @param sf + */ + protected void processGffColumnNine(String attributes, SequenceFeature sf) + { + sf.setAttributes(attributes); + + /* + * Parse attributes in column 9 and add them to the sequence feature's + * 'otherData' table; use Note as a best proxy for description + */ + char nameValueSeparator = gffVersion == 3 ? '=' : ' '; + // TODO check we don't break GFF2 values which include commas here + Map> nameValues = GffHelperBase + .parseNameValuePairs(attributes, ";", nameValueSeparator, ","); + for (Entry> attr : nameValues.entrySet()) + { + String values = StringUtils.listToDelimitedString(attr.getValue(), + "; "); + sf.setValue(attr.getKey(), values); + if (NOTE.equals(attr.getKey())) { - seq.addSequenceFeature(new SequenceFeature(sf)); + sf.setDescription(values); } } - return seq; } /** @@ -1308,42 +1291,73 @@ public class FeaturesFile extends AlignFile } FastaFile parser = new FastaFile(this); List includedseqs = parser.getSeqs(); + SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs); - // iterate over includedseqs, and replacing matching ones with newseqs - // sequences. Generic iterator not used here because we modify includedseqs - // as we go + + /* + * iterate over includedseqs, and replacing matching ones with newseqs + * sequences. Generic iterator not used here because we modify + * includedseqs as we go + */ for (int p = 0, pSize = includedseqs.size(); p < pSize; p++) { // search for any dummy seqs that this sequence can be used to update - SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p)); - if (dummyseq != null) + SequenceI includedSeq = includedseqs.get(p); + SequenceI dummyseq = smatcher.findIdMatch(includedSeq); + if (dummyseq != null && dummyseq instanceof SequenceDummy) { - // dummyseq was created so it could be annotated and referred to in - // alignments/codon mappings - - SequenceI mseq = includedseqs.get(p); - // mseq is the 'template' imported from the FASTA file which we'll use - // to coomplete dummyseq - if (dummyseq instanceof SequenceDummy) + // probably have the pattern wrong + // idea is that a flyweight proxy for a sequence ID can be created for + // 1. stable reference creation + // 2. addition of annotation + // 3. future replacement by a real sequence + // current pattern is to create SequenceDummy objects - a convenience + // constructor for a Sequence. + // problem is that when promoted to a real sequence, all references + // need to be updated somehow. We avoid that by keeping the same object. + ((SequenceDummy) dummyseq).become(includedSeq); + dummyseq.createDatasetSequence(); + + /* + * Update mappings so they are now to the dataset sequence + */ + for (AlignedCodonFrame mapping : align.getCodonFrames()) { - // probably have the pattern wrong - // idea is that a flyweight proxy for a sequence ID can be created for - // 1. stable reference creation - // 2. addition of annotation - // 3. future replacement by a real sequence - // current pattern is to create SequenceDummy objects - a convenience - // constructor for a Sequence. - // problem is that when promoted to a real sequence, all references - // need - // to be updated somehow. - ((SequenceDummy) dummyseq).become(mseq); - includedseqs.set(p, dummyseq); // template is no longer needed + mapping.updateToDataset(dummyseq); } + + /* + * replace parsed sequence with the realised forward reference + */ + includedseqs.set(p, dummyseq); + + /* + * and remove from the newseqs list + */ + newseqs.remove(dummyseq); } } - // finally add sequences to the dataset + + /* + * finally add sequences to the dataset + */ for (SequenceI seq : includedseqs) { + // experimental: mapping-based 'alignment' to query sequence + AlignmentUtils.alignSequenceAs(seq, align, + String.valueOf(align.getGapCharacter()), false, true); + + // rename sequences if GFF handler requested this + // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ? + SequenceFeature[] sfs = seq.getSequenceFeatures(); + if (sfs != null) + { + String newName = (String) sfs[0].getValue(GffHelperI.RENAME_TOKEN); + if (newName != null) + { + seq.setName(newName); + } + } align.addSequence(seq); } } @@ -1357,7 +1371,8 @@ public class FeaturesFile extends AlignFile * @param newseqs * @throws IOException */ - protected void processGffPragma(String line, Map gffProps, AlignmentI align, + protected void processGffPragma(String line, + Map gffProps, AlignmentI align, List newseqs) throws IOException { line = line.trim(); @@ -1366,11 +1381,11 @@ public class FeaturesFile extends AlignFile // close off any open 'forward references' return; } - + String[] tokens = line.substring(2).split(" "); String pragma = tokens[0]; String value = tokens.length == 1 ? null : tokens[1]; - + if ("gff-version".equalsIgnoreCase(pragma)) { if (value != null) @@ -1385,6 +1400,10 @@ public class FeaturesFile extends AlignFile } } } + else if ("sequence-region".equalsIgnoreCase(pragma)) + { + // could capture if wanted here + } else if ("feature-ontology".equalsIgnoreCase(pragma)) { // should resolve against the specified feature ontology URI @@ -1413,108 +1432,4 @@ public class FeaturesFile extends AlignFile System.err.println("Ignoring unknown pragma: " + line); } } - - /** - * Processes the 'Query' and 'Align' properties associated with a GFF - * similarity feature; these properties define the mapping of the annotated - * feature to another from which it has transferred annotation - * - * @param set - * @param seq - * @param sf - * @return - */ - public void processGffSimilarity(Map> set, SequenceI seq, - SequenceFeature sf, AlignmentI align, List newseqs, boolean relaxedIdMatching) - throws InvalidGFF3FieldException - { - int strand = sf.getStrand(); - // exonerate cdna/protein map - // look for fields - List querySeq = findNames(align, newseqs, relaxedIdMatching, - set.get("Query")); - if (querySeq == null || querySeq.size() != 1) - { - throw new InvalidGFF3FieldException("Query", set, - "Expecting exactly one sequence in Query field (got " - + set.get("Query") + ")"); - } - if (set.containsKey("Align")) - { - // process the align maps and create cdna/protein maps - // ideally, the query sequences are in the alignment, but maybe not... - - AlignedCodonFrame alco = new AlignedCodonFrame(); - MapList codonmapping = constructCodonMappingFromAlign(set, "Align", - strand); - - // add codon mapping, and hope! - alco.addMap(seq, querySeq.get(0), codonmapping); - align.addCodonFrame(alco); - } - - } - - /** - * take a sequence feature and examine its attributes to decide how it should - * be added to a sequence - * - * @param seq - * - the destination sequence constructed or discovered in the - * current context - * @param sf - * - the base feature with ATTRIBUTES property containing any - * additional attributes - * @param gFFFile - * - true if we are processing a GFF annotation file - * @return true if sf was actually added to the sequence, false if it was - * processed in another way - */ - public boolean processOrAddSeqFeature(AlignmentI align, List newseqs, - SequenceI seq, SequenceFeature sf, boolean relaxedIdMatching) - { - String attr = (String) sf.getValue(ATTRIBUTES); - boolean addFeature = true; - if (attr != null) - { - for (String attset : attr.split(TAB)) - { - Map> set = StringUtils.parseNameValuePairs( - attset, ";", new char[] { ' ', '-' }); - - if ("similarity".equals(sf.getType())) - { - try - { - processGffSimilarity(set, seq, sf, align, newseqs, - relaxedIdMatching); - addFeature = false; - } catch (InvalidGFF3FieldException ivfe) - { - System.err.println(ivfe); - } - } - } - } - if (addFeature) - { - seq.addSequenceFeature(sf); - } - return addFeature; - } - -} - -class InvalidGFF3FieldException extends Exception -{ - String field, value; - - public InvalidGFF3FieldException(String field, - Map> set, String message) - { - super(message + " (Field was " + field + " and value was " - + set.get(field).toString()); - this.field = field; - this.value = set.get(field).toString(); - } }