X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2Fgff%2FGff3Helper.java;h=1ef8848df6c8a052549230f25949f6803123cb2f;hb=9135cbb74bbff06ea60485540194515ebcc018b0;hp=4c67caa7fb4db009038dd878c8f553d96e54a9cf;hpb=8f920d337154e092f5f9056ffde3cdf2735eca43;p=jalview.git diff --git a/src/jalview/io/gff/Gff3Helper.java b/src/jalview/io/gff/Gff3Helper.java index 4c67caa..1ef8848 100644 --- a/src/jalview/io/gff/Gff3Helper.java +++ b/src/jalview/io/gff/Gff3Helper.java @@ -1,3 +1,23 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ package jalview.io.gff; import jalview.datamodel.AlignedCodonFrame; @@ -19,6 +39,8 @@ import java.util.Map; */ public class Gff3Helper extends GffHelperBase { + public static final String ALLELES = "alleles"; + protected static final String TARGET = "Target"; protected static final String ID = "ID"; @@ -61,19 +83,6 @@ public class Gff3Helper extends GffHelperBase AlignmentI align, List newseqs, boolean relaxedIdMatching) throws IOException { - /* - * (For now) we don't process mappings from reverse complement ; to do - * this would require (a) creating a virtual sequence placeholder for - * the reverse complement (b) resolving the sequence by its id from some - * source (GFF ##FASTA or other) (c) creating the reverse complement - * sequence (d) updating the mapping to be to the reverse complement - */ - if ("-".equals(gff[STRAND_COL])) - { - System.err - .println("Skipping mapping from reverse complement as not yet supported"); - return null; - } SequenceFeature sf = null; if (gff.length == 9) @@ -82,15 +91,16 @@ public class Gff3Helper extends GffHelperBase String atts = gff[ATTRIBUTES_COL]; Map> attributes = parseNameValuePairs(atts); - if (SequenceOntology.getInstance().isProteinMatch(soTerm)) + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + if (so.isA(soTerm, SequenceOntologyI.PROTEIN_MATCH)) { - sf = processProteinMatch(attributes, seq, gff, align, - newseqs, relaxedIdMatching); + sf = processProteinMatch(attributes, seq, gff, align, newseqs, + relaxedIdMatching); } - else if (SequenceOntology.getInstance().isNucleotideMatch(soTerm)) + else if (so.isA(soTerm, SequenceOntologyI.NUCLEOTIDE_MATCH)) { - sf = processNucleotideMatch(attributes, seq, gff, align, - newseqs, relaxedIdMatching); + sf = processNucleotideMatch(attributes, seq, gff, align, newseqs, + relaxedIdMatching); } else { @@ -104,7 +114,7 @@ public class Gff3Helper extends GffHelperBase */ sf = buildSequenceFeature(gff, null); } - + return sf; } @@ -131,14 +141,21 @@ public class Gff3Helper extends GffHelperBase protected SequenceFeature processNucleotideMatch( Map> attributes, SequenceI seq, String[] gffColumns, AlignmentI align, List newseqs, - boolean relaxedIdMatching) - throws IOException + boolean relaxedIdMatching) throws IOException { String strand = gffColumns[STRAND_COL]; - if ("-1".equals(strand)) + + /* + * (For now) we don't process mappings from reverse complement ; to do + * this would require (a) creating a virtual sequence placeholder for + * the reverse complement (b) resolving the sequence by its id from some + * source (GFF ##FASTA or other) (c) creating the reverse complement + * sequence (d) updating the mapping to be to the reverse complement + */ + if ("-".equals(strand)) { - System.err - .println("Currently ignoring mappings from reverse complement"); + System.err.println( + "Skipping mapping from reverse complement as not yet supported"); return null; } @@ -170,8 +187,8 @@ public class Gff3Helper extends GffHelperBase * (new or existing) virtual sequence in the newseqs list */ String targetId = findTargetId(tokens[0], attributes); - SequenceI mappedSequence1 = findSequence(targetId, align, - newseqs, relaxedIdMatching); + SequenceI mappedSequence1 = findSequence(targetId, align, newseqs, + relaxedIdMatching); SequenceI mappedSequence = mappedSequence1; if (mappedSequence == null) { @@ -199,8 +216,7 @@ public class Gff3Helper extends GffHelperBase int fromStart = Integer.parseInt(gffColumns[START_COL]); int fromEnd = Integer.parseInt(gffColumns[END_COL]); MapList mapping = constructMappingFromAlign(fromStart, fromEnd, - toStart, toEnd, - MappingType.NucleotideToNucleotide); + toStart, toEnd, MappingType.NucleotideToNucleotide); if (mapping != null) { @@ -230,7 +246,8 @@ public class Gff3Helper extends GffHelperBase * @return */ @SuppressWarnings("unused") - protected String findTargetId(String target, Map> set) + protected String findTargetId(String target, + Map> set) { return target; } @@ -261,8 +278,8 @@ public class Gff3Helper extends GffHelperBase * @throws IOException */ protected SequenceFeature processProteinMatch( - Map> set, SequenceI seq, - String[] gffColumns, AlignmentI align, List newseqs, + Map> set, SequenceI seq, String[] gffColumns, + AlignmentI align, List newseqs, boolean relaxedIdMatching) { // This is currently tailored to InterProScan GFF output: @@ -284,8 +301,8 @@ public class Gff3Helper extends GffHelperBase for (String target : targets) { - SequenceI mappedSequence1 = findSequence(findTargetId(target, set), align, - newseqs, relaxedIdMatching); + SequenceI mappedSequence1 = findSequence(findTargetId(target, set), + align, newseqs, relaxedIdMatching); SequenceI mappedSequence = mappedSequence1; if (mappedSequence == null) { @@ -296,10 +313,9 @@ public class Gff3Helper extends GffHelperBase * give the mapped sequence a copy of the sequence feature, with * start/end range adjusted */ - SequenceFeature sf2 = new SequenceFeature(sf); - sf2.setBegin(1); int sequenceFeatureLength = 1 + sf.getEnd() - sf.getBegin(); - sf2.setEnd(sequenceFeatureLength); + SequenceFeature sf2 = new SequenceFeature(sf, 1, + sequenceFeatureLength, sf.getFeatureGroup(), sf.getScore()); mappedSequence.addSequenceFeature(sf2); /* @@ -307,8 +323,8 @@ public class Gff3Helper extends GffHelperBase * renamed with its qualified accession id; renaming has to wait until * all sequence reference resolution is complete */ - String accessionId = StringUtils.listToDelimitedString( - set.get(NAME), ","); + String accessionId = StringUtils + .listToDelimitedString(set.get(NAME), ","); if (accessionId.length() > 0) { String database = sf.getType(); // TODO InterProScan only?? @@ -334,28 +350,76 @@ public class Gff3Helper extends GffHelperBase } /** - * Return '=' as the name-value separator used in column 9 attributes. + * Modifies the default SequenceFeature in order to set the Target sequence id + * as the description */ @Override - protected char getNameValueSeparator() + protected SequenceFeature buildSequenceFeature(String[] gff, + int typeColumn, String group, + Map> attributes) { - return '='; + SequenceFeature sf = super.buildSequenceFeature(gff, typeColumn, group, + attributes); + String desc = getDescription(sf, attributes); + if (desc != null) + { + sf.setDescription(desc); + } + return sf; } /** - * Modifies the default SequenceFeature in order to set the Target sequence id - * as the description + * Apply heuristic rules to try to get the most useful feature description + * + * @param sf + * @param attributes + * @return */ - @Override - protected SequenceFeature buildSequenceFeature(String[] gff, + protected String getDescription(SequenceFeature sf, Map> attributes) { - SequenceFeature sf = super.buildSequenceFeature(gff, attributes); + String desc = null; String target = (String) sf.getValue(TARGET); if (target != null) { - sf.setDescription(target.split(" ")[0]); + desc = target.split(" ")[0]; } - return sf; + + SequenceOntologyI so = SequenceOntologyFactory.getInstance(); + String type = sf.getType(); + if (so.isA(type, SequenceOntologyI.SEQUENCE_VARIANT)) + { + /* + * Ensembl returns dna variants as 'alleles' + */ + desc = StringUtils.listToDelimitedString(attributes.get(ALLELES), + ","); + } + + /* + * extract 'Name' for a transcript (to show gene name) + * or an exon (so 'colour by label' shows exon boundaries) + */ + if (SequenceOntologyI.NMD_TRANSCRIPT_VARIANT.equals(type) + || so.isA(type, SequenceOntologyI.TRANSCRIPT) + || so.isA(type, SequenceOntologyI.EXON)) + { + desc = StringUtils.listToDelimitedString(attributes.get("Name"), ","); + } + + /* + * if the above fails, try ID + */ + if (desc == null) + { + desc = (String) sf.getValue(ID); + } + + /* + * and decode comma, equals, semi-colon as required by GFF3 spec + */ + desc = StringUtils.urlDecode(desc, GFF_ENCODABLE); + + return desc; } }