1 package jalview.io.gff;
3 import jalview.datamodel.AlignmentI;
4 import jalview.datamodel.SequenceFeature;
5 import jalview.datamodel.SequenceI;
6 import jalview.util.StringUtils;
8 import java.io.IOException;
13 * A handler to parse GFF in the format generated by InterProScan
15 public class InterProScanHelper extends Gff3Helper
17 private static final String INTER_PRO_SCAN = "InterProScan";
19 private static final String SIGNATURE_DESC = "signature_desc";
22 * Process one GFF feature line (as modelled by SequenceFeature)
25 * the sequence with which this feature is associated
29 * the alignment we are adding GFF to
31 * any new sequences referenced by the GFF
32 * @param relaxedIdMatching
33 * if true, match word tokens in sequence names
34 * @return a sequence feature if one should be added to the sequence, else
35 * null (i.e. it has been processed in another way e.g. to generate a
40 public SequenceFeature processGff(SequenceI seq, String[] gff,
41 AlignmentI align, List<SequenceI> newseqs,
42 boolean relaxedIdMatching) throws IOException
45 * ignore the 'polypeptide' match of the whole sequence
47 if (".".equals(gff[SOURCE_COL]))
52 return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
59 protected SequenceFeature buildSequenceFeature(String[] gff,
60 Map<String, List<String>> attributes)
62 SequenceFeature sf = super.buildSequenceFeature(gff, attributes);
65 * signature_desc is a more informative source of description
67 List<String> desc = attributes.get(SIGNATURE_DESC);
68 String description = StringUtils.listToDelimitedString(desc, ", ");
69 if (description.length() > 0)
71 sf.setDescription(description);
75 * Set sequence feature group as 'InterProScan', and type as the source
76 * database for this match (e.g. 'Pfam')
78 sf.setType(gff[SOURCE_COL]);
79 sf.setFeatureGroup(INTER_PRO_SCAN);
85 * Tests whether the GFF data looks like it was generated by InterProScan
90 public static boolean recognises(String[] columns)
92 SequenceOntologyI so = SequenceOntologyFactory.getInstance();
93 String type = columns[TYPE_COL];
94 if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
95 || (".".equals(columns[SOURCE_COL]) && so.isA(type,
96 SequenceOntologyI.POLYPEPTIDE)))
104 * Overriden method, because InterProScan GFF has the target sequence id in
105 * GFF field 'ID' rather than the usual 'Target' :-O
108 protected String findTargetId(String target, Map<String, List<String>> set)
110 List<String> ids = set.get(ID);
111 if (ids == null || ids.size() != 1)