/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9)
+ * Copyright (C) 2015 The Jalview Authors
*
* This file is part of Jalview.
*
}
/**
- * Creates a new FeaturesFile object.
- *
* @param inFile
- * DOCUMENT ME!
* @param type
- * DOCUMENT ME!
- *
* @throws IOException
- * DOCUMENT ME!
*/
public FeaturesFile(String inFile, String type) throws IOException
{
super(inFile, type);
}
+ /**
+ * @param source
+ * @throws IOException
+ */
public FeaturesFile(FileParse source) throws IOException
{
super(source);
}
/**
+ * @param parseImmediately
+ * @param source
+ * @throws IOException
+ */
+ public FeaturesFile(boolean parseImmediately, FileParse source)
+ throws IOException
+ {
+ super(parseImmediately, source);
+ }
+
+ /**
+ * @param parseImmediately
+ * @param inFile
+ * @param type
+ * @throws IOException
+ */
+ public FeaturesFile(boolean parseImmediately, String inFile, String type)
+ throws IOException
+ {
+ super(parseImmediately, inFile, type);
+ }
+
+ /**
* Parse GFF or sequence features file using case-independent matching,
* discarding URLs
*
return parse(align, colours, featureLink, removeHTML, false);
}
+ @Override
+ public void addAnnotations(AlignmentI al)
+ {
+ // TODO Auto-generated method stub
+ super.addAnnotations(al);
+ }
+
+ @Override
+ public void addProperties(AlignmentI al)
+ {
+ // TODO Auto-generated method stub
+ super.addProperties(al);
+ }
+
+ @Override
+ public void addSeqGroups(AlignmentI al)
+ {
+ // TODO Auto-generated method stub
+ super.addSeqGroups(al);
+ }
+
/**
* Parse GFF or sequence features file
*
{
SequenceI seq = null;
/**
- * keep track of any sequences we try to create from the data if it is a GFF3 file
+ * keep track of any sequences we try to create from the data if it is a
+ * GFF3 file
*/
ArrayList<SequenceI> newseqs = new ArrayList<SequenceI>();
String type, desc, token = null;
* when true, assume GFF style features rather than Jalview style.
*/
boolean GFFFile = true;
+ Map<String, String> gffProps = new HashMap<String, String>();
while ((line = nextLine()) != null)
{
+ // skip comments/process pragmas
if (line.startsWith("#"))
{
+ if (line.startsWith("##"))
+ {
+ // possibly GFF2/3 version and metadata header
+ processGffPragma(line, gffProps, align, newseqs);
+ line = "";
+ }
continue;
}
return true;
}
+ private enum GffPragmas
+ {
+ gff_version, sequence_region, feature_ontology, attribute_ontology, source_ontology, species_build, fasta, hash
+ };
+
+ private static Map<String, GffPragmas> GFFPRAGMA;
+ static
+ {
+ GFFPRAGMA = new HashMap<String, GffPragmas>();
+ GFFPRAGMA.put("sequence-region", GffPragmas.sequence_region);
+ GFFPRAGMA.put("feature-ontology", GffPragmas.feature_ontology);
+ GFFPRAGMA.put("#", GffPragmas.hash);
+ GFFPRAGMA.put("fasta", GffPragmas.fasta);
+ GFFPRAGMA.put("species-build", GffPragmas.species_build);
+ GFFPRAGMA.put("source-ontology", GffPragmas.source_ontology);
+ GFFPRAGMA.put("attribute-ontology", GffPragmas.attribute_ontology);
+ }
+
+ private void processGffPragma(String line, Map<String, String> gffProps,
+ AlignmentI align, ArrayList<SequenceI> newseqs)
+ throws IOException
+ {
+ // line starts with ##
+ int spacepos = line.indexOf(' ');
+ String pragma = spacepos == -1 ? line.substring(2).trim() : line
+ .substring(2, spacepos);
+ GffPragmas gffpragma = GFFPRAGMA.get(pragma.toLowerCase());
+ if (gffpragma == null)
+ {
+ return;
+ }
+ switch (gffpragma)
+ {
+ case gff_version:
+ try
+ {
+ gffversion = Integer.parseInt(line.substring(spacepos + 1));
+ } finally
+ {
+
+ }
+ break;
+ case feature_ontology:
+ // resolve against specific feature ontology
+ break;
+ case attribute_ontology:
+ // resolve against specific attribute ontology
+ break;
+ case source_ontology:
+ // resolve against specific source ontology
+ break;
+ case species_build:
+ // resolve against specific NCBI taxon version
+ break;
+ case hash:
+ // close off any open feature hierarchies
+ break;
+ case fasta:
+ // process the rest of the file as a fasta file and replace any dummy
+ // sequence IDs
+ process_as_fasta(align, newseqs);
+ break;
+ default:
+ // we do nothing ?
+ System.err.println("Ignoring unknown pragma:\n" + line);
+ }
+ }
+
+ private void process_as_fasta(AlignmentI align, List<SequenceI> newseqs)
+ throws IOException
+ {
+ try
+ {
+ mark();
+ } catch (IOException q)
+ {
+ }
+ FastaFile parser = new FastaFile(this);
+ List<SequenceI> includedseqs = parser.getSeqs();
+ SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
+ // iterate over includedseqs, and replacing matching ones with newseqs
+ // sequences. Generic iterator not used here because we modify includedseqs
+ // as we go
+ for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
+ {
+ // search for any dummy seqs that this sequence can be used to update
+ SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p));
+ if (dummyseq != null)
+ {
+ // dummyseq was created so it could be annotated and referred to in
+ // alignments/codon mappings
+
+ SequenceI mseq = includedseqs.get(p);
+ // mseq is the 'template' imported from the FASTA file which we'll use
+ // to coomplete dummyseq
+ if (dummyseq instanceof SequenceDummy)
+ {
+ // probably have the pattern wrong
+ // idea is that a flyweight proxy for a sequence ID can be created for
+ // 1. stable reference creation
+ // 2. addition of annotation
+ // 3. future replacement by a real sequence
+ // current pattern is to create SequenceDummy objects - a convenience
+ // constructor for a Sequence.
+ // problem is that when promoted to a real sequence, all references
+ // need
+ // to be updated somehow.
+ ((SequenceDummy) dummyseq).become(mseq);
+ includedseqs.set(p, dummyseq); // template is no longer needed
+ }
+ }
+ }
+ // finally add sequences to the dataset
+ for (SequenceI seq : includedseqs)
+ {
+ align.addSequence(seq);
+ }
+ }
/**
* take a sequence feature and examine its attributes to decide how it should
* @return true if sf was actually added to the sequence, false if it was
* processed in another way
*/
- public boolean processOrAddSeqFeature(AlignmentI align, List<SequenceI> newseqs, SequenceI seq, SequenceFeature sf,
+ public boolean processOrAddSeqFeature(AlignmentI align,
+ List<SequenceI> newseqs, SequenceI seq, SequenceFeature sf,
boolean gFFFile, boolean relaxedIdMatching)
{
String attr = (String) sf.getValue("ATTRIBUTES");
boolean add = true;
if (gFFFile && attr != null)
{
- int nattr=8;
+ int nattr = 8;
for (String attset : attr.split("\t"))
{
- if (attset==null || attset.trim().length()==0)
+ if (attset == null || attset.trim().length() == 0)
{
continue;
}
continue;
}
- // expect either space seperated (gff2) or '=' separated (gff3)
+ // expect either space seperated (gff2) or '=' separated (gff3)
// key/value pairs here
- int eqpos = pair.indexOf('='),sppos = pair.indexOf(' ');
+ int eqpos = pair.indexOf('='), sppos = pair.indexOf(' ');
String key = null, value = null;
if (sppos > -1 && (eqpos == -1 || sppos < eqpos))
{
key = pair.substring(0, sppos);
value = pair.substring(sppos + 1);
- } else {
+ }
+ else
+ {
if (eqpos > -1 && (sppos == -1 || eqpos < sppos))
{
key = pair.substring(0, eqpos);
value = pair.substring(eqpos + 1);
- } else
+ }
+ else
{
key = pair;
}
{
add &= processGffKey(set, nattr, seq, sf, align, newseqs,
relaxedIdMatching); // process decides if
- // feature is actually
- // added
+ // feature is actually
+ // added
} catch (InvalidGFF3FieldException ivfe)
{
System.err.println(ivfe);
{
int strand = sf.getStrand();
// exonerate cdna/protein map
- // look for fields
+ // look for fields
List<SequenceI> querySeq = findNames(align, newseqs,
- relaxedIdMatching, set.get(attr="Query"));
- if (querySeq==null || querySeq.size()!=1)
+ relaxedIdMatching, set.get(attr = "Query"));
+ if (querySeq == null || querySeq.size() != 1)
{
- throw new InvalidGFF3FieldException( attr, set,
+ throw new InvalidGFF3FieldException(attr, set,
"Expecting exactly one sequence in Query field (got "
+ set.get(attr) + ")");
}
- if (set.containsKey(attr="Align"))
+ if (set.containsKey(attr = "Align"))
{
// process the align maps and create cdna/protein maps
// ideally, the query sequences are in the alignment, but maybe not...
-
+
AlignedCodonFrame alco = new AlignedCodonFrame();
MapList codonmapping = constructCodonMappingFromAlign(set, attr,
strand);
}
private MapList constructCodonMappingFromAlign(
- Map<String, List<String>> set,
- String attr, int strand) throws InvalidGFF3FieldException
+ Map<String, List<String>> set, String attr, int strand)
+ throws InvalidGFF3FieldException
{
if (strand == 0)
{
else
{
match = align.findName(seqId, true);
-
+ if (match == null && newseqs != null)
+ {
+ for (SequenceI m : newseqs)
+ {
+ if (seqId.equals(m.getName()))
+ {
+ return m;
+ }
+ }
+ }
+
}
- if (match==null && newseqs!=null)
+ if (match == null && newseqs != null)
{
match = new SequenceDummy(seqId);
if (relaxedIdMatching)
{
- matcher.addAll(Arrays.asList(new SequenceI[]
- { match }));
+ matcher.addAll(Arrays.asList(new SequenceI[] { match }));
}
+ // add dummy sequence to the newseqs list
+ newseqs.add(match);
}
return match;
}
+
public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML)
{
if (sf.getDescription() == null)
* hash of feature types and colours
* @return features file contents
*/
- public String printJalviewFormat(SequenceI[] seqs, Map<String,Object> visible)
+ public String printJalviewFormat(SequenceI[] seqs,
+ Map<String, Object> visible)
{
return printJalviewFormat(seqs, visible, true, true);
}
out.append(next[j].end);
out.append("\t");
out.append(next[j].type);
- if (next[j].score != Float.NaN)
+ if (!Float.isNaN(next[j].score))
{
out.append("\t");
out.append(next[j].score);
* @param visible
* @return
*/
- public String printGFFFormat(SequenceI[] seqs, Map<String,Object> visible)
+ public String printGFFFormat(SequenceI[] seqs, Map<String, Object> visible)
{
return printGFFFormat(seqs, visible, true, true);
}
- public String printGFFFormat(SequenceI[] seqs, Map<String,Object> visible,
- boolean visOnly, boolean nonpos)
+ public String printGFFFormat(SequenceI[] seqs,
+ Map<String, Object> visible, boolean visOnly, boolean nonpos)
{
StringBuffer out = new StringBuffer();
SequenceFeature[] next;