2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.gff;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.SequenceFeature;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.StringUtils;
28 import java.io.IOException;
29 import java.util.List;
33 * A handler to parse GFF in the format generated by InterProScan
35 public class InterProScanHelper extends Gff3Helper
37 private static final String INTER_PRO_SCAN = "InterProScan";
39 private static final String SIGNATURE_DESC = "signature_desc";
42 * Process one GFF feature line (as modelled by SequenceFeature)
45 * the sequence with which this feature is associated
49 * the alignment we are adding GFF to
51 * any new sequences referenced by the GFF
52 * @param relaxedIdMatching
53 * if true, match word tokens in sequence names
54 * @return a sequence feature if one should be added to the sequence, else
55 * null (i.e. it has been processed in another way e.g. to generate a
60 public SequenceFeature processGff(SequenceI seq, String[] gff,
61 AlignmentI align, List<SequenceI> newseqs,
62 boolean relaxedIdMatching) throws IOException
65 * ignore the 'polypeptide' match of the whole sequence
67 if (".".equals(gff[SOURCE_COL]))
72 return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
79 protected SequenceFeature buildSequenceFeature(String[] gff,
80 Map<String, List<String>> attributes)
82 SequenceFeature sf = super.buildSequenceFeature(gff, attributes);
85 * signature_desc is a more informative source of description
87 List<String> desc = attributes.get(SIGNATURE_DESC);
88 String description = StringUtils.listToDelimitedString(desc, ", ");
89 if (description.length() > 0)
91 sf.setDescription(description);
95 * Set sequence feature group as 'InterProScan', and type as the source
96 * database for this match (e.g. 'Pfam')
98 sf.setType(gff[SOURCE_COL]);
99 sf.setFeatureGroup(INTER_PRO_SCAN);
105 * Tests whether the GFF data looks like it was generated by InterProScan
110 public static boolean recognises(String[] columns)
112 SequenceOntologyI so = SequenceOntologyFactory.getInstance();
113 String type = columns[TYPE_COL];
114 if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
115 || (".".equals(columns[SOURCE_COL])
116 && so.isA(type, SequenceOntologyI.POLYPEPTIDE)))
124 * Overriden method, because InterProScan GFF has the target sequence id in
125 * GFF field 'ID' rather than the usual 'Target' :-O
128 protected String findTargetId(String target,
129 Map<String, List<String>> set)
131 List<String> ids = set.get(ID);
132 if (ids == null || ids.size() != 1)