/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io.gff; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.util.StringUtils; import java.io.IOException; import java.util.List; import java.util.Map; /** * A handler to parse GFF in the format generated by InterProScan */ public class InterProScanHelper extends Gff3Helper { private static final String INTER_PRO_SCAN = "InterProScan"; private static final String SIGNATURE_DESC = "signature_desc"; /** * Process one GFF feature line (as modelled by SequenceFeature) * * @param seq * the sequence with which this feature is associated * @param gff * the gff column data * @param align * the alignment we are adding GFF to * @param newseqs * any new sequences referenced by the GFF * @param relaxedIdMatching * if true, match word tokens in sequence names * @return a sequence feature if one should be added to the sequence, else * null (i.e. it has been processed in another way e.g. to generate a * mapping) * @throws IOException */ @Override public SequenceFeature processGff(SequenceI seq, String[] gff, AlignmentI align, List newseqs, boolean relaxedIdMatching) throws IOException { /* * ignore the 'polypeptide' match of the whole sequence */ if (".".equals(gff[SOURCE_COL])) { return null; } return super.processGff(seq, gff, align, newseqs, relaxedIdMatching); } /** * */ @Override protected SequenceFeature buildSequenceFeature(String[] gff, Map> attributes) { SequenceFeature sf = super.buildSequenceFeature(gff, attributes); /* * signature_desc is a more informative source of description */ List desc = attributes.get(SIGNATURE_DESC); String description = StringUtils.listToDelimitedString(desc, ", "); if (description.length() > 0) { sf.setDescription(description); } /* * Set sequence feature group as 'InterProScan', and type as the source * database for this match (e.g. 'Pfam') */ sf.setType(gff[SOURCE_COL]); sf.setFeatureGroup(INTER_PRO_SCAN); return sf; } /** * Tests whether the GFF data looks like it was generated by InterProScan * * @param columns * @return */ public static boolean recognises(String[] columns) { SequenceOntologyI so = SequenceOntologyFactory.getInstance(); String type = columns[TYPE_COL]; if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH) || (".".equals(columns[SOURCE_COL]) && so.isA(type, SequenceOntologyI.POLYPEPTIDE))) { return true; } return false; } /** * Overriden method, because InterProScan GFF has the target sequence id in * GFF field 'ID' rather than the usual 'Target' :-O */ @Override protected String findTargetId(String target, Map> set) { List ids = set.get(ID); if (ids == null || ids.size() != 1) { return null; } return ids.get(0); } }