--- /dev/null
+package jalview.io.gff;
+
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.util.StringUtils;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A handler to parse GFF in the format generated by InterProScan
+ */
+public class InterProScanHelper extends Gff3Helper
+{
+ private static final String INTER_PRO_SCAN = "InterProScan";
+
+ private static final String SIGNATURE_DESC = "signature_desc";
+
+ /**
+ * Process one GFF feature line (as modelled by SequenceFeature)
+ *
+ * @param seq
+ * the sequence with which this feature is associated
+ * @param gff
+ * the gff column data
+ * @param align
+ * the alignment we are adding GFF to
+ * @param newseqs
+ * any new sequences referenced by the GFF
+ * @param relaxedIdMatching
+ * if true, match word tokens in sequence names
+ * @return a sequence feature if one should be added to the sequence, else
+ * null (i.e. it has been processed in another way e.g. to generate a
+ * mapping)
+ * @throws IOException
+ */
+ @Override
+ public SequenceFeature processGff(SequenceI seq, String[] gff,
+ AlignmentI align, List<SequenceI> newseqs,
+ boolean relaxedIdMatching) throws IOException
+ {
+ /*
+ * ignore the 'polypeptide' match of the whole sequence
+ */
+ if (".".equals(gff[SOURCE_COL]))
+ {
+ return null;
+ }
+
+ return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
+ }
+
+ /**
+ *
+ */
+ @Override
+ protected SequenceFeature buildSequenceFeature(String[] gff,
+ Map<String, List<String>> attributes)
+ {
+ SequenceFeature sf = super.buildSequenceFeature(gff, attributes);
+
+ /*
+ * signature_desc is a more informative source of description
+ */
+ List<String> desc = attributes.get(SIGNATURE_DESC);
+ String description = StringUtils.listToDelimitedString(desc, ", ");
+ if (description.length() > 0)
+ {
+ sf.setDescription(description);
+ }
+
+ /*
+ * Set sequence feature group as 'InterProScan', and type as the source
+ * database for this match (e.g. 'Pfam')
+ */
+ sf.setType(gff[SOURCE_COL]);
+ sf.setFeatureGroup(INTER_PRO_SCAN);
+
+ return sf;
+ }
+
+ /**
+ * Tests whether the GFF data looks like it was generated by InterProScan
+ *
+ * @param columns
+ * @return
+ */
+ public static boolean recognises(String[] columns)
+ {
+ SequenceOntologyI so = SequenceOntologyFactory.getInstance();
+ String type = columns[TYPE_COL];
+ if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
+ || (".".equals(columns[SOURCE_COL]) && so.isA(type,
+ SequenceOntologyI.POLYPEPTIDE)))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Overriden method, because InterProScan GFF has the target sequence id in
+ * GFF field 'ID' rather than the usual 'Target' :-O
+ */
+ @Override
+ protected String findTargetId(String target, Map<String, List<String>> set)
+ {
+ List<String> ids = set.get(ID);
+ if (ids == null || ids.size() != 1)
+ {
+ return null;
+ }
+ return ids.get(0);
+ }
+
+}