2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.io.gff;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.SequenceFeature;
25 import jalview.datamodel.SequenceI;
26 import jalview.util.StringUtils;
28 import java.io.IOException;
29 import java.util.List;
33 * A handler to parse GFF in the format generated by InterProScan
35 public class InterProScanHelper extends Gff3Helper
37 private static final String INTER_PRO_SCAN = "InterProScan";
39 private static final String SIGNATURE_DESC = "signature_desc";
42 * Process one GFF feature line (as modelled by SequenceFeature)
45 * the sequence with which this feature is associated
49 * the alignment we are adding GFF to
51 * any new sequences referenced by the GFF
52 * @param relaxedIdMatching
53 * if true, match word tokens in sequence names
54 * @return a sequence feature if one should be added to the sequence, else
55 * null (i.e. it has been processed in another way e.g. to generate a
60 public SequenceFeature processGff(SequenceI seq, String[] gff,
61 AlignmentI align, List<SequenceI> newseqs,
62 boolean relaxedIdMatching) throws IOException
65 * ignore the 'polypeptide' match of the whole sequence
67 if (".".equals(gff[SOURCE_COL]))
72 return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
78 * <li>uses Source (column 2) as feature type instead of the default column 3</li>
79 * <li>sets "InterProScan" as the feature group</li>
80 * <li>extracts "signature_desc" attribute as the feature description</li>
84 protected SequenceFeature buildSequenceFeature(String[] gff,
85 Map<String, List<String>> attributes)
87 SequenceFeature sf = super.buildSequenceFeature(gff, SOURCE_COL,
88 INTER_PRO_SCAN, attributes);
91 * signature_desc is a more informative source of description
93 List<String> desc = attributes.get(SIGNATURE_DESC);
94 String description = StringUtils.listToDelimitedString(desc, ", ");
95 if (description.length() > 0)
97 sf.setDescription(description);
104 * Tests whether the GFF data looks like it was generated by InterProScan
109 public static boolean recognises(String[] columns)
111 SequenceOntologyI so = SequenceOntologyFactory.getInstance();
112 String type = columns[TYPE_COL];
113 if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
114 || (".".equals(columns[SOURCE_COL])
115 && so.isA(type, SequenceOntologyI.POLYPEPTIDE)))
123 * Overriden method, because InterProScan GFF has the target sequence id in
124 * GFF field 'ID' rather than the usual 'Target' :-O
127 protected String findTargetId(String target,
128 Map<String, List<String>> set)
130 List<String> ids = set.get(ID);
131 if (ids == null || ids.size() != 1)