* when true, assume GFF style features rather than Jalview style.
*/
boolean GFFFile = true;
+ Map<String, String> gffProps = new HashMap<String, String>();
while ((line = nextLine()) != null)
{
+ // skip comments/process pragmas
if (line.startsWith("#"))
{
+ if (line.startsWith("##"))
+ {
+ // possibly GFF2/3 version and metadata header
+ processGffPragma(line, gffProps, align, newseqs);
+ line = "";
+ }
continue;
}
return true;
}
+ private enum GffPragmas
+ {
+ gff_version, sequence_region, feature_ontology, attribute_ontology, source_ontology, species_build, fasta, hash
+ };
+
+ private static Map<String, GffPragmas> GFFPRAGMA;
+ static
+ {
+ GFFPRAGMA = new HashMap<String, GffPragmas>();
+ GFFPRAGMA.put("sequence-region", GffPragmas.sequence_region);
+ GFFPRAGMA.put("feature-ontology", GffPragmas.feature_ontology);
+ GFFPRAGMA.put("#", GffPragmas.hash);
+ GFFPRAGMA.put("fasta", GffPragmas.fasta);
+ GFFPRAGMA.put("species-build", GffPragmas.species_build);
+ GFFPRAGMA.put("source-ontology", GffPragmas.source_ontology);
+ GFFPRAGMA.put("attribute-ontology", GffPragmas.attribute_ontology);
+ }
+
+ private void processGffPragma(String line, Map<String, String> gffProps,
+ AlignmentI align, ArrayList<SequenceI> newseqs)
+ throws IOException
+ {
+ // line starts with ##
+ int spacepos = line.indexOf(' ');
+ String pragma = spacepos == -1 ? line.substring(2).trim() : line
+ .substring(2, spacepos);
+ GffPragmas gffpragma = GFFPRAGMA.get(pragma.toLowerCase());
+ if (gffpragma == null)
+ {
+ return;
+ }
+ switch (gffpragma)
+ {
+ case gff_version:
+ try
+ {
+ gffversion = Integer.parseInt(line.substring(spacepos + 1));
+ } finally
+ {
+
+ }
+ break;
+ case feature_ontology:
+ // resolve against specific feature ontology
+ break;
+ case attribute_ontology:
+ // resolve against specific attribute ontology
+ break;
+ case source_ontology:
+ // resolve against specific source ontology
+ break;
+ case species_build:
+ // resolve against specific NCBI taxon version
+ break;
+ case hash:
+ // close off any open feature hierarchies
+ break;
+ case fasta:
+ // process the rest of the file as a fasta file and replace any dummy
+ // sequence IDs
+ process_as_fasta(align, newseqs);
+ break;
+ default:
+ // we do nothing ?
+ System.err.println("Ignoring unknown pragma:\n" + line);
+ }
+ }
+
+ private void process_as_fasta(AlignmentI align, List<SequenceI> newseqs)
+ throws IOException
+ {
+ try
+ {
+ mark();
+ } catch (IOException q)
+ {
+ }
+ FastaFile parser = new FastaFile(this);
+ List<SequenceI> includedseqs = parser.getSeqs();
+ SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
+ // iterate over includedseqs, and replacing matching ones with newseqs
+ // sequences. Generic iterator not used here because we modify includedseqs
+ // as we go
+ for (int p = 0, pSize = includedseqs.size(); p < pSize; p++)
+ {
+ // search for any dummy seqs that this sequence can be used to update
+ SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p));
+ if (dummyseq != null)
+ {
+ // dummyseq was created so it could be annotated and referred to in
+ // alignments/codon mappings
+
+ SequenceI mseq = includedseqs.get(p);
+ // mseq is the 'template' imported from the FASTA file which we'll use
+ // to coomplete dummyseq
+ if (dummyseq instanceof SequenceDummy)
+ {
+ // probably have the pattern wrong
+ // idea is that a flyweight proxy for a sequence ID can be created for
+ // 1. stable reference creation
+ // 2. addition of annotation
+ // 3. future replacement by a real sequence
+ // current pattern is to create SequenceDummy objects - a convenience
+ // constructor for a Sequence.
+ // problem is that when promoted to a real sequence, all references
+ // need
+ // to be updated somehow.
+ ((SequenceDummy) dummyseq).become(mseq);
+ includedseqs.set(p, dummyseq); // template is no longer needed
+ }
+ }
+ }
+ // finally add sequences to the dataset
+ for (SequenceI seq : includedseqs)
+ {
+ align.addSequence(seq);
+ }
+ }
/**
* take a sequence feature and examine its attributes to decide how it should