X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=788cad8d0f00589961c5721e5f2e5b64f58a2639;hb=c19d2a91ca05e052e3408bf5852d88eb5d0608f1;hp=9bdc4e30f2e91631f23fbc1d23684f95be85dce8;hpb=ddf4f9e7bc4b26b6b9359da6e5567193c5ae9167;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 9bdc4e3..788cad8 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer (Version 2.9.0b2) + * Copyright (C) 2015 The Jalview Authors * * This file is part of Jalview. * @@ -70,27 +70,48 @@ public class FeaturesFile extends AlignFile } /** - * Creates a new FeaturesFile object. - * * @param inFile - * DOCUMENT ME! * @param type - * DOCUMENT ME! - * * @throws IOException - * DOCUMENT ME! */ public FeaturesFile(String inFile, String type) throws IOException { super(inFile, type); } + /** + * @param source + * @throws IOException + */ public FeaturesFile(FileParse source) throws IOException { super(source); } /** + * @param parseImmediately + * @param source + * @throws IOException + */ + public FeaturesFile(boolean parseImmediately, FileParse source) + throws IOException + { + super(parseImmediately, source); + } + + /** + * @param parseImmediately + * @param inFile + * @param type + * @throws IOException + */ + public FeaturesFile(boolean parseImmediately, String inFile, String type) + throws IOException + { + super(parseImmediately, inFile, type); + } + + /** * Parse GFF or sequence features file using case-independent matching, * discarding URLs * @@ -102,8 +123,7 @@ public class FeaturesFile extends AlignFile * - process html strings into plain text * @return true if features were added */ - public boolean parse(AlignmentI align, Hashtable colours, - boolean removeHTML) + public boolean parse(AlignmentI align, Map colours, boolean removeHTML) { return parse(align, colours, null, removeHTML, false); } @@ -148,6 +168,27 @@ public class FeaturesFile extends AlignFile return parse(align, colours, featureLink, removeHTML, false); } + @Override + public void addAnnotations(AlignmentI al) + { + // TODO Auto-generated method stub + super.addAnnotations(al); + } + + @Override + public void addProperties(AlignmentI al) + { + // TODO Auto-generated method stub + super.addProperties(al); + } + + @Override + public void addSeqGroups(AlignmentI al) + { + // TODO Auto-generated method stub + super.addSeqGroups(al); + } + /** * Parse GFF or sequence features file * @@ -172,7 +213,8 @@ public class FeaturesFile extends AlignFile { SequenceI seq = null; /** - * keep track of any sequences we try to create from the data if it is a GFF3 file + * keep track of any sequences we try to create from the data if it is a + * GFF3 file */ ArrayList newseqs = new ArrayList(); String type, desc, token = null; @@ -187,10 +229,18 @@ public class FeaturesFile extends AlignFile * when true, assume GFF style features rather than Jalview style. */ boolean GFFFile = true; + Map gffProps = new HashMap(); while ((line = nextLine()) != null) { + // skip comments/process pragmas if (line.startsWith("#")) { + if (line.startsWith("##")) + { + // possibly GFF2/3 version and metadata header + processGffPragma(line, gffProps, align, newseqs); + line = ""; + } continue; } @@ -649,6 +699,124 @@ public class FeaturesFile extends AlignFile return true; } + private enum GffPragmas + { + gff_version, sequence_region, feature_ontology, attribute_ontology, source_ontology, species_build, fasta, hash + }; + + private static Map GFFPRAGMA; + static + { + GFFPRAGMA = new HashMap(); + GFFPRAGMA.put("sequence-region", GffPragmas.sequence_region); + GFFPRAGMA.put("feature-ontology", GffPragmas.feature_ontology); + GFFPRAGMA.put("#", GffPragmas.hash); + GFFPRAGMA.put("fasta", GffPragmas.fasta); + GFFPRAGMA.put("species-build", GffPragmas.species_build); + GFFPRAGMA.put("source-ontology", GffPragmas.source_ontology); + GFFPRAGMA.put("attribute-ontology", GffPragmas.attribute_ontology); + } + + private void processGffPragma(String line, Map gffProps, + AlignmentI align, ArrayList newseqs) + throws IOException + { + // line starts with ## + int spacepos = line.indexOf(' '); + String pragma = spacepos == -1 ? line.substring(2).trim() : line + .substring(2, spacepos); + GffPragmas gffpragma = GFFPRAGMA.get(pragma.toLowerCase()); + if (gffpragma == null) + { + return; + } + switch (gffpragma) + { + case gff_version: + try + { + gffversion = Integer.parseInt(line.substring(spacepos + 1)); + } finally + { + + } + break; + case feature_ontology: + // resolve against specific feature ontology + break; + case attribute_ontology: + // resolve against specific attribute ontology + break; + case source_ontology: + // resolve against specific source ontology + break; + case species_build: + // resolve against specific NCBI taxon version + break; + case hash: + // close off any open feature hierarchies + break; + case fasta: + // process the rest of the file as a fasta file and replace any dummy + // sequence IDs + process_as_fasta(align, newseqs); + break; + default: + // we do nothing ? + System.err.println("Ignoring unknown pragma:\n" + line); + } + } + + private void process_as_fasta(AlignmentI align, List newseqs) + throws IOException + { + try + { + mark(); + } catch (IOException q) + { + } + FastaFile parser = new FastaFile(this); + List includedseqs = parser.getSeqs(); + SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs); + // iterate over includedseqs, and replacing matching ones with newseqs + // sequences. Generic iterator not used here because we modify includedseqs + // as we go + for (int p = 0, pSize = includedseqs.size(); p < pSize; p++) + { + // search for any dummy seqs that this sequence can be used to update + SequenceI dummyseq = smatcher.findIdMatch(includedseqs.get(p)); + if (dummyseq != null) + { + // dummyseq was created so it could be annotated and referred to in + // alignments/codon mappings + + SequenceI mseq = includedseqs.get(p); + // mseq is the 'template' imported from the FASTA file which we'll use + // to coomplete dummyseq + if (dummyseq instanceof SequenceDummy) + { + // probably have the pattern wrong + // idea is that a flyweight proxy for a sequence ID can be created for + // 1. stable reference creation + // 2. addition of annotation + // 3. future replacement by a real sequence + // current pattern is to create SequenceDummy objects - a convenience + // constructor for a Sequence. + // problem is that when promoted to a real sequence, all references + // need + // to be updated somehow. + ((SequenceDummy) dummyseq).become(mseq); + includedseqs.set(p, dummyseq); // template is no longer needed + } + } + } + // finally add sequences to the dataset + for (SequenceI seq : includedseqs) + { + align.addSequence(seq); + } + } /** * take a sequence feature and examine its attributes to decide how it should @@ -665,18 +833,19 @@ public class FeaturesFile extends AlignFile * @return true if sf was actually added to the sequence, false if it was * processed in another way */ - public boolean processOrAddSeqFeature(AlignmentI align, List newseqs, SequenceI seq, SequenceFeature sf, + public boolean processOrAddSeqFeature(AlignmentI align, + List newseqs, SequenceI seq, SequenceFeature sf, boolean gFFFile, boolean relaxedIdMatching) { String attr = (String) sf.getValue("ATTRIBUTES"); boolean add = true; if (gFFFile && attr != null) { - int nattr=8; + int nattr = 8; for (String attset : attr.split("\t")) { - if (attset==null || attset.trim().length()==0) + if (attset == null || attset.trim().length() == 0) { continue; } @@ -692,22 +861,25 @@ public class FeaturesFile extends AlignFile continue; } - // expect either space seperated (gff2) or '=' separated (gff3) + // expect either space seperated (gff2) or '=' separated (gff3) // key/value pairs here - int eqpos = pair.indexOf('='),sppos = pair.indexOf(' '); + int eqpos = pair.indexOf('='), sppos = pair.indexOf(' '); String key = null, value = null; if (sppos > -1 && (eqpos == -1 || sppos < eqpos)) { key = pair.substring(0, sppos); value = pair.substring(sppos + 1); - } else { + } + else + { if (eqpos > -1 && (sppos == -1 || eqpos < sppos)) { key = pair.substring(0, eqpos); value = pair.substring(eqpos + 1); - } else + } + else { key = pair; } @@ -730,8 +902,8 @@ public class FeaturesFile extends AlignFile { add &= processGffKey(set, nattr, seq, sf, align, newseqs, relaxedIdMatching); // process decides if - // feature is actually - // added + // feature is actually + // added } catch (InvalidGFF3FieldException ivfe) { System.err.println(ivfe); @@ -780,20 +952,20 @@ public class FeaturesFile extends AlignFile { int strand = sf.getStrand(); // exonerate cdna/protein map - // look for fields + // look for fields List querySeq = findNames(align, newseqs, - relaxedIdMatching, set.get(attr="Query")); - if (querySeq==null || querySeq.size()!=1) + relaxedIdMatching, set.get(attr = "Query")); + if (querySeq == null || querySeq.size() != 1) { - throw new InvalidGFF3FieldException( attr, set, + throw new InvalidGFF3FieldException(attr, set, "Expecting exactly one sequence in Query field (got " + set.get(attr) + ")"); } - if (set.containsKey(attr="Align")) + if (set.containsKey(attr = "Align")) { // process the align maps and create cdna/protein maps // ideally, the query sequences are in the alignment, but maybe not... - + AlignedCodonFrame alco = new AlignedCodonFrame(); MapList codonmapping = constructCodonMappingFromAlign(set, attr, strand); @@ -811,8 +983,8 @@ public class FeaturesFile extends AlignFile } private MapList constructCodonMappingFromAlign( - Map> set, - String attr, int strand) throws InvalidGFF3FieldException + Map> set, String attr, int strand) + throws InvalidGFF3FieldException { if (strand == 0) { @@ -956,19 +1128,31 @@ public class FeaturesFile extends AlignFile else { match = align.findName(seqId, true); - + if (match == null && newseqs != null) + { + for (SequenceI m : newseqs) + { + if (seqId.equals(m.getName())) + { + return m; + } + } + } + } - if (match==null && newseqs!=null) + if (match == null && newseqs != null) { match = new SequenceDummy(seqId); if (relaxedIdMatching) { - matcher.addAll(Arrays.asList(new SequenceI[] - { match })); + matcher.addAll(Arrays.asList(new SequenceI[] { match })); } + // add dummy sequence to the newseqs list + newseqs.add(match); } return match; } + public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) { if (sf.getDescription() == null) @@ -996,7 +1180,8 @@ public class FeaturesFile extends AlignFile * hash of feature types and colours * @return features file contents */ - public String printJalviewFormat(SequenceI[] seqs, Map visible) + public String printJalviewFormat(SequenceI[] seqs, + Map visible) { return printJalviewFormat(seqs, visible, true, true); } @@ -1204,7 +1389,7 @@ public class FeaturesFile extends AlignFile out.append(next[j].end); out.append("\t"); out.append(next[j].type); - if (next[j].score != Float.NaN) + if (!Float.isNaN(next[j].score)) { out.append("\t"); out.append(next[j].score); @@ -1244,13 +1429,13 @@ public class FeaturesFile extends AlignFile * @param visible * @return */ - public String printGFFFormat(SequenceI[] seqs, Map visible) + public String printGFFFormat(SequenceI[] seqs, Map visible) { return printGFFFormat(seqs, visible, true, true); } - public String printGFFFormat(SequenceI[] seqs, Map visible, - boolean visOnly, boolean nonpos) + public String printGFFFormat(SequenceI[] seqs, + Map visible, boolean visOnly, boolean nonpos) { StringBuffer out = new StringBuffer(); SequenceFeature[] next;