/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io.gff;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.util.StringUtils;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* A handler to parse GFF in the format generated by InterProScan
*/
public class InterProScanHelper extends Gff3Helper
{
private static final String INTER_PRO_SCAN = "InterProScan";
private static final String SIGNATURE_DESC = "signature_desc";
/**
* Process one GFF feature line (as modelled by SequenceFeature)
*
* @param seq
* the sequence with which this feature is associated
* @param gff
* the gff column data
* @param align
* the alignment we are adding GFF to
* @param newseqs
* any new sequences referenced by the GFF
* @param relaxedIdMatching
* if true, match word tokens in sequence names
* @return a sequence feature if one should be added to the sequence, else
* null (i.e. it has been processed in another way e.g. to generate a
* mapping)
* @throws IOException
*/
@Override
public SequenceFeature processGff(SequenceI seq, String[] gff,
AlignmentI align, List newseqs,
boolean relaxedIdMatching) throws IOException
{
/*
* ignore the 'polypeptide' match of the whole sequence
*/
if (".".equals(gff[SOURCE_COL]))
{
return null;
}
return super.processGff(seq, gff, align, newseqs, relaxedIdMatching);
}
/**
* An override that
*
* - uses Source (column 2) as feature type instead of the default column 3
* - sets "InterProScan" as the feature group
* - extracts "signature_desc" attribute as the feature description
*
*/
@Override
protected SequenceFeature buildSequenceFeature(String[] gff,
Map> attributes)
{
SequenceFeature sf = super.buildSequenceFeature(gff, SOURCE_COL,
INTER_PRO_SCAN, attributes);
/*
* signature_desc is a more informative source of description
*/
List desc = attributes.get(SIGNATURE_DESC);
String description = StringUtils.listToDelimitedString(desc, ", ");
if (description.length() > 0)
{
sf.setDescription(description);
}
return sf;
}
/**
* Tests whether the GFF data looks like it was generated by InterProScan
*
* @param columns
* @return
*/
public static boolean recognises(String[] columns)
{
SequenceOntologyI so = SequenceOntologyFactory.getInstance();
String type = columns[TYPE_COL];
if (so.isA(type, SequenceOntologyI.PROTEIN_MATCH)
|| (".".equals(columns[SOURCE_COL])
&& so.isA(type, SequenceOntologyI.POLYPEPTIDE)))
{
return true;
}
return false;
}
/**
* Overriden method, because InterProScan GFF has the target sequence id in
* GFF field 'ID' rather than the usual 'Target' :-O
*/
@Override
protected String findTargetId(String target,
Map> set)
{
List ids = set.get(ID);
if (ids == null || ids.size() != 1)
{
return null;
}
return ids.get(0);
}
}