JAL-1260 v2 patch from David Roldán-Martínez
authorJim Procter <jprocter@dundee.ac.uk>
Mon, 9 Jun 2014 09:19:48 +0000 (10:19 +0100)
committerJim Procter <jprocter@dundee.ac.uk>
Mon, 9 Jun 2014 09:30:15 +0000 (10:30 +0100)
20 files changed:
src/jalview/datamodel/DBRefSource.java [changed mode: 0755->0644]
src/jalview/io/AppletFormatAdapter.java [changed mode: 0755->0644]
src/jalview/io/DnaUtils.java [new file with mode: 0644]
src/jalview/io/GenBankFile.java [new file with mode: 0644]
src/jalview/io/IdentifyFile.java [changed mode: 0755->0644]
src/jalview/io/xdb/genbank/GenBankFeature.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankLocation.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankLocationPoint.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankLocationRange.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankLocations.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankLocus.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankReference.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankSequence.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankSource.java [new file with mode: 0644]
src/jalview/io/xdb/genbank/GenBankVersion.java [new file with mode: 0644]
test/jalview/io/GU324925.1.gb [new file with mode: 0644]
test/jalview/io/GenBankTest.java [new file with mode: 0644]
test/jalview/io/M92650.1.gb [new file with mode: 0644]
test/jalview/io/NC_000011.10.gb [new file with mode: 0644]
test/jalview/io/V00505.gb [new file with mode: 0644]

old mode 100755 (executable)
new mode 100644 (file)
index 1af18b6..b3b3b64
@@ -74,7 +74,10 @@ public class DBRefSource
    * GeneDB ID
    */
   public static final String GENEDB = "GeneDB";
-
+  /**
+   * GeneBank
+   */
+  public static final String GENBANK = "GenBank";
   /**
    * List of databases whose sequences might have coding regions annotated
    */
old mode 100755 (executable)
new mode 100644 (file)
index d7da302..ead63c8
@@ -41,7 +41,7 @@ public class AppletFormatAdapter
    */
   public static final String[] READABLE_FORMATS = new String[]
   { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH",
-      "PDB", "JnetFile", "RNAML" }; // , "SimpleBLAST" };
+      "PDB", "JnetFile", "RNAML", "GENBANK" };
 
   /**
    * List of valid format strings for use by callers of the formatSequences
@@ -71,8 +71,8 @@ public class AppletFormatAdapter
    * corresponding to READABLE_FNAMES
    */
   public static final String[] READABLE_EXTENSIONS = new String[]
-  { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
-      "jar,jvp", "sto,stk", "xml,rnaml" }; // ".blast"
+  { "fa,faa,fasta,mfa,fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
+      "jar,jvp", "sto,stk", "xml,rnaml", "gb" }; // ".blast"
 
   /**
    * List of readable formats by application in order corresponding to
@@ -80,7 +80,7 @@ public class AppletFormatAdapter
    */
   public static final String[] READABLE_FNAMES = new String[]
   { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview",
-      "Stockholm", "RNAML" };// ,
+      "Stockholm", "RNAML", "GenBank" };
 
   // "SimpleBLAST"
   // };
@@ -245,7 +245,10 @@ public class AppletFormatAdapter
       {
         afile = new RnamlFile(inFile, type);
       }
-
+      else if (format.equals("GENBANK"))
+      {
+        afile = new GenBankFile(inFile, type);
+      }
       Alignment al = new Alignment(afile.getSeqsAsArray());
 
       afile.addAnnotations(al);
@@ -360,6 +363,10 @@ public class AppletFormatAdapter
       {
         afile = new SimpleBlastFile(source);
       }
+      else if (format.equals("GENBANK"))
+      {
+        afile = new GenBankFile(source);
+      }
 
       Alignment al = new Alignment(afile.getSeqsAsArray());
 
@@ -467,7 +474,10 @@ public class AppletFormatAdapter
       {
         afile = new RnamlFile();
       }
-
+      else if (format.equalsIgnoreCase("GENBANK"))
+      {
+        afile = new GenBankFile();
+      }
       else
       {
         throw new Exception(
diff --git a/src/jalview/io/DnaUtils.java b/src/jalview/io/DnaUtils.java
new file mode 100644 (file)
index 0000000..acd0bb9
--- /dev/null
@@ -0,0 +1,61 @@
+package jalview.io;
+
+import jalview.io.xdb.genbank.GenBankFeature;
+import jalview.io.xdb.genbank.GenBankSequence;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
+public class DnaUtils {
+
+       /**
+        * @param gbf CDS feature data
+        * @param sequences ORIGIN data
+        * @return Nucleotid String (sequence) of CDS
+        */
+       public static String getSequence(GenBankFeature gbf, Vector<GenBankSequence> sequences){
+               if (!gbf.getType().equals(GenBankFeature.CDS)){
+                       //If the feature is not a CDS, no sequence is returned
+                       return null;
+               }else{
+                       String range = gbf.getQualifier("range");
+                       if (range.startsWith("join")){
+                               //TODO
+                               //It's a composed sequence
+                       }else{
+                               //It's a simple range
+                               String[] positions = range.split("..");
+                               int initRange = Integer.parseInt(positions[0]);
+                               int endRange = Integer.parseInt(positions[1]);
+                               String sourceSequence = getNucleotidesFromSequenceVector(sequences);
+                               return sourceSequence.substring(initRange, endRange);
+                       }
+               }
+               return null;
+               
+       }
+       private static boolean isSequenceInRange(int initRange, int endRange, GenBankSequence gbs){             
+               return ((initRange>=gbs.getId()) && (endRange>=gbs.getId()));
+       }
+       private static String getNucleotidesInRangeFromSequence(int initRange, int endRange, GenBankSequence gbs){
+               return "";
+       }
+       public static String getNucleotidesFromSequenceVector(Vector<GenBankSequence> v){
+               StringBuffer sb = new StringBuffer();
+               for (GenBankSequence gbs:v){
+                       Vector<String> seqs = gbs.getSequences();
+                       for (String s:seqs)
+                               sb.append(s);
+               }
+               return sb.toString();
+       }
+       /**
+        * @param args
+        */
+       public static void main(String[] args) {
+               // TODO Auto-generated method stub
+
+       }
+
+}
diff --git a/src/jalview/io/GenBankFile.java b/src/jalview/io/GenBankFile.java
new file mode 100644 (file)
index 0000000..4715095
--- /dev/null
@@ -0,0 +1,873 @@
+package jalview.io;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.DBRefEntry;
+import jalview.datamodel.DBRefSource;
+import jalview.datamodel.Mapping;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.io.xdb.genbank.GenBankFeature;
+import jalview.io.xdb.genbank.GenBankLocation;
+import jalview.io.xdb.genbank.GenBankLocationPoint;
+import jalview.io.xdb.genbank.GenBankLocationRange;
+import jalview.io.xdb.genbank.GenBankLocations;
+import jalview.io.xdb.genbank.GenBankLocus;
+import jalview.io.xdb.genbank.GenBankReference;
+import jalview.io.xdb.genbank.GenBankSequence;
+import jalview.io.xdb.genbank.GenBankSource;
+import jalview.io.xdb.genbank.GenBankVersion;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Vector;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.james.mime4j.field.ParsedField;
+
+public class GenBankFile extends AlignFile {
+    private static final Logger log = Logger.getLogger(GenBankFile.class.getName());
+       private GenBankVersion version = new GenBankVersion();
+       private GenBankLocus locus = new GenBankLocus();
+       private GenBankSource source = new GenBankSource();
+    private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)");
+    private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
+    private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
+    private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
+    private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
+    
+    private String definition;
+    private String accession;
+    private String keywords;
+    private String dblink;
+    private String baseCount;
+
+       private Vector<GenBankFeature> features;
+       private Vector<String> comments;
+       //Items under origin 
+       private Vector<GenBankSequence> sequences;
+       private Vector<GenBankReference> references;
+       
+       private SequenceI genBankSequence;
+
+       public GenBankFile() {
+       }
+
+       public GenBankFile(String inFile, String type) throws IOException {
+               super(inFile, type);
+       }
+
+       public GenBankFile(FileParse source) throws IOException {
+               super(source);
+       }
+
+       public void initData() {
+               super.initData();
+               features = new Vector<GenBankFeature>();
+               comments = new Vector<String>();
+               sequences = new Vector<GenBankSequence>();
+               references = new Vector<GenBankReference>();
+       }
+
+       public void parse() throws IOException {
+               String line;
+               boolean featureMode = false;                                    //FEATURES found
+               boolean seqMode = false;                                                //Parsing Sequences from SOURCE
+               boolean referenceMode = false;                                  //REFERENCE found
+               boolean sourceMode = false;                                             //SOURCE found
+               boolean commentMode = false;                                    //COMMENT found
+               boolean parsingAuthors = false;                                 //Parsing authors (multiline)
+               boolean parsingDefinition = false;                              //Parsing definition (multiline)
+               boolean parsingKeywords = false;                                //Parsing keywords (multiline)
+               boolean parsingDbLink = false;                                  //Parsing DBLINK (multiline)
+               boolean parsingTitle = false;                                   //Parsing title (multiline)
+               boolean parsingQualifier = false;                               //Parsing feature qualifier (multine)
+               String currentQualifierName = "";
+               GenBankReference reference = null;
+               GenBankFeature feature = null;
+               List<String> sourceLines = new ArrayList<String>();
+
+               if (this.isValid()){
+                       
+                       while ((line = nextLine()) != null) {
+                               // We only process lines if they have contents within
+                               if (line.length() == 0)
+                                       continue;
+                               
+                               if (line.startsWith("FEATURES")){
+                                       featureMode = true;
+                                       seqMode = false;
+                                       referenceMode = false;
+                                       sourceMode = false;
+                                       commentMode = false;
+                                       feature = new GenBankFeature();
+                                       source = parseSource(sourceLines);
+                               }
+       
+                               
+                               if (seqMode) {
+                                       if (!line.startsWith("//")){
+                                               GenBankSequence seq = processSequenceLine(line);
+                                               sequences.add(seq);
+                                       }
+                                       featureMode = false;
+                                       referenceMode = false;
+                                       sourceMode = false;
+                               }
+                               
+                               if (line.startsWith("ORIGIN")){
+                                       if (feature.getType()!=null)
+                                               features.add(feature);
+                                       featureMode = false;
+                                       referenceMode = false;
+                                       sourceMode = false;
+                                       seqMode = true;                                 
+                               }
+       
+                               if (featureMode){
+                                       // Process feature line
+                                       if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){
+                                               //Parse type                    
+                                               if (!line.trim().startsWith("/")){                                                      
+                                                       Matcher featuresMatch = patFeatureKey.matcher(line);
+                                                       if (featuresMatch.find()){
+                                                               if (feature.getType()!=null)
+                                                                       features.add(feature);  //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature
+                                                               //It's a feature
+                                                               String type = featuresMatch.group(0);
+                                                               feature = new GenBankFeature();
+                                                               feature.setType(type);                                                   
+                                                               GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,""));
+                                                               feature.setLocation(loc);
+                                                               parsingQualifier = false;
+                                                               continue;
+                                                       }else if (parsingQualifier) {                                   //If not a feature, it's another part of a qualifier
+                                                               String qValue = feature.getQualifier(currentQualifierName);
+                                                               StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line));
+                                                               feature.updateQualifier(currentQualifierName, sb.toString());
+                                                               continue;                                       
+                                                       }
+                                               }else{
+                                                       //It's the begining of a qualifier line
+                                                       Matcher matcher = patQualifierKey.matcher(line);
+                                                       if (matcher.find()){
+                                                           String qName = matcher.group(1);
+                                                           currentQualifierName = qName.replace("/","");
+                                                               line = line.replace(qName,"").replace("/", "").replace("=","");
+                                                               feature.addQualifier(currentQualifierName, ltrim(line));
+                                                               parsingQualifier = true;
+                                                               continue;
+                                                       }
+                                               }
+                                       }
+                               }
+                               // Process REFERENCE line
+                               if (line.startsWith("REFERENCE")) {
+                                       if (!referenceMode){
+                                               //This is line is the REFERENCE line
+                                               referenceMode = true;
+                                               featureMode = false;
+                                               sourceMode = false;
+                                               seqMode = false;
+                                       }else{
+                                               //We were at referenceMode, then add current reference to the list and create a new one
+                                               references.add(reference);
+                                       }
+                                       reference = new GenBankReference();
+                                       String desc = processReferenceLine(line,"REFERENCE");
+                                       int[] ranges = parseReferenceDescriptor(desc);
+                                       reference.setDescriptor(desc);
+                                       reference.setOrder(ranges[0]);
+                                       reference.setBegin(ranges[1]);
+                                       reference.setEnd(ranges[2]);
+                                       parsingAuthors = false;
+                                       parsingTitle = false;
+                                       continue;
+                               }
+                               
+                               if (line.startsWith("  AUTHORS")){
+                                       if (referenceMode){
+                                               reference.setAuthors(processReferenceLine(line,"AUTHORS"));
+                                               parsingAuthors = true;
+                                               parsingTitle = false;
+                                       }
+                                        continue;
+                               }
+                               if (line.startsWith("  TITLE")){
+                                       if (referenceMode){
+                                               reference.setTitle(processReferenceLine(line,"TITLE"));
+                                               parsingAuthors = false;
+                                               parsingTitle = true;
+                                       }
+                                        continue;
+                               }
+                               if (line.startsWith("  JOURNAL")){
+                                       if (referenceMode){
+                                               reference.setJournal(processReferenceLine(line,"JOURNAL"));
+                                               parsingTitle = false;
+                                               parsingAuthors = false;
+                                       }
+                                       continue;
+                               }
+                               if (line.startsWith("   PUBMED")){
+                                       if (referenceMode){
+                                               reference.setPubmed(processReferenceLine(line,"PUBMED"));
+                                               parsingTitle = false;
+                                               parsingAuthors = false;
+                                       }
+                                        continue;
+                               }
+
+                               if (line.startsWith("   MEDLINE")){
+                                       if (referenceMode){
+                                               reference.setMedline(processReferenceLine(line,"MEDLINE"));
+                                               parsingTitle = false;
+                                               parsingAuthors = false;
+                                       }
+                                        continue;
+                               }
+                               if (line.startsWith("  REMARK")){
+                                       if (referenceMode){
+                                               reference.setRemark(processReferenceLine(line,"REMARK"));
+                                               parsingTitle = false;
+                                               parsingAuthors = false;
+                                       }
+                                        continue;
+                               }
+                               if (line.startsWith("  CONSRTM")){
+                                       if (referenceMode){
+                                               reference.setConsortia(processReferenceLine(line,"CONSRTM"));
+                                               parsingTitle = false;
+                                               parsingAuthors = false;
+                                       }
+                                        continue;
+                               }
+                               
+       
+                               if (line.startsWith("SOURCE")) {
+                                       parsingKeywords = false;
+                                       sourceMode = true;
+                                       commentMode = false;
+                                       if (sourceMode){
+                                               sourceLines.add(line);
+                                       }
+                                        continue;
+                               }
+                               if (line.indexOf("ORGANISM")!=-1) {
+                                       if (sourceMode){
+                                               sourceLines.add(line);
+                                                continue;
+                                       }
+                               }
+                               
+                               if (line.startsWith("COMMENT")){
+                                       if (reference!=null)
+                                               references.add(reference);
+                                       commentMode = true;
+                                       sourceMode = false;
+                                       referenceMode = false;
+                                       sourceMode = false;
+                                       seqMode = false;
+                                       comments.add(processCommentLine(line));
+                                        continue;
+                               }                       
+                               // Process LOCUS line
+                               if (line.startsWith("LOCUS")) {
+                                       locus = parseLocus(line);
+                                       continue;
+                               }
+                               // Process BASE COUNT line
+                               if (line.startsWith("BASE COUNT")) {
+                                       baseCount = processHeaderLine(line,"BASE COUNT");
+                                       featureMode = false;
+                                       continue;
+                               }
+                               // Process DEFINITION line
+                               if (line.startsWith("DEFINITION")) {
+                                       definition = processHeaderLine(line,"DEFINITION");
+                                       parsingDefinition = true;
+                                        continue;
+                               }
+                               // Process ACCESSION line
+                               if (line.startsWith("ACCESSION")) {
+                                       accession = processHeaderLine(line,"ACCESSION");
+                                       parsingDefinition = false;
+                                       continue;
+                               }
+                               // Process VERSION line
+                               if (line.startsWith("VERSION")) {
+                                       version = parseVersion(line);
+                                       //headers.put("VERSION", processHeaderLine(line,"VERSION"));
+                                        continue;
+                               }
+                               // Process DBLINK line
+                               if (line.startsWith("DBLINK")) {
+                                       dblink =  processHeaderLine(line,"DBLINK");
+                                       parsingDbLink = true;
+                                        continue;
+                               }
+                               // Process KEYWORDS line
+                               if (line.startsWith("KEYWORDS")) {
+                                       keywords = processHeaderLine(line,"KEYWORDS");
+                                       parsingKeywords = true;
+                                       parsingDbLink = false;                                  
+                                        continue;
+                               }                               
+                               if (sourceMode){
+                                       sourceLines.add(line);
+                                       continue;
+                               }
+                               if (parsingDefinition){
+                                       StringBuffer sb = new StringBuffer().append(definition).append(line);
+                                       definition = sb.toString();
+                                       continue;
+                               }
+                               if (referenceMode && parsingAuthors){
+                                       if (reference!=null){
+                                               StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line);                                          
+                                               reference.setAuthors(authors.toString());
+                                       }
+                                       continue;
+                               }
+                               if (referenceMode && parsingTitle){
+                                       if (reference!=null){
+                                               StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line);                                              
+                                               reference.setTitle(title.toString());
+                                       }
+                                       continue;
+                               }
+                               if (parsingKeywords){
+                                       StringBuffer sb = new StringBuffer().append(keywords).append(line);
+                                       keywords = sb.toString();
+                                       continue;                                       
+                               }
+                               if (parsingDbLink){
+                                       StringBuffer sb = new StringBuffer().append(dblink).append(line);
+                                       dblink = sb.toString();
+                                       continue;                                       
+                               }
+                               if (commentMode){
+                                       comments.add(line);
+                               }
+                       }
+                       setEntries();
+               }else{
+                       //File is not valid
+                       throw new IOException("GenBankFile is not valid.");
+               }
+       }       
+
+       protected void setEntries(){
+           StringBuffer result = new StringBuffer();
+           //Mapping GenBank info into Jalview data model
+           genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences));
+           //Mapping DBRefEntry
+           DBRefEntry dbRef = new DBRefEntry();
+           dbRef.setSource(DBRefSource.GENBANK);
+           dbRef.setVersion(version == null ? "" : version.toString());
+           dbRef.setAccessionId(accession);
+           // add map to indicate the sequence is a valid coordinate frame for the dbref
+           dbRef.setMap(new Mapping(null, new int[]
+             { 1, genBankSequence.getLength() }, new int[]
+             { 1, genBankSequence.getLength() }, 1, 1));
+           genBankSequence.addDBRef(dbRef);
+           
+           //add header info as non-positional features
+           //add LOCUS
+           SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(locusF);
+           //add DEFNITION
+           SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(defF);
+           //add ACCESSION
+           SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(accessionF);
+           //add VERSION
+           SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(versionF);
+           //add DBLINK
+           SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(dblinkF);
+           //add KEYWORDS
+           SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(keywordsF);
+           //add SOURCE
+           SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(sourceF);
+           //add BASE COUNT
+           SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+           genBankSequence.addSequenceFeature(baseCountF);
+           
+           // add literature and database cross references in the file
+           for (GenBankReference gbRef:references){
+               //They are non-positional features
+               SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK);
+               genBankSequence.addSequenceFeature(refFeature);
+           }
+           //add COMMENTS
+           if (comments.size()>0){
+               StringBuffer sb = new StringBuffer();
+                   for (String comment: comments){
+                       sb.append(comment).append(newline);
+                   }
+                   SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+                   genBankSequence.addSequenceFeature(commentF);
+           }
+           //Mapping FEATURES
+           for (GenBankFeature feature:features){
+               if (feature.getType()!=null){
+                       SequenceFeature sf = new SequenceFeature();
+                       sf.setType(feature.getType());
+                       sf.setDescription(feature.getType());
+                       
+                       sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor());
+                       sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor());
+                       Enumeration<String> names = feature.getQualifiersNames();
+                       while (names.hasMoreElements()){
+                               String qName = names.nextElement();
+                               String qValue = feature.getQualifier(qName);
+                               sf.setValue(qName, qValue);
+                       }
+                       genBankSequence.addSequenceFeature(sf);
+               }
+           }
+           SequenceI[] parsedSeqs = new SequenceI[1];
+           parsedSeqs[0] = genBankSequence;
+           this.setSeqs(parsedSeqs);
+       }
+    private GenBankVersion parseVersion(String line) {
+        //VERSION U00096.2 GI:48994873
+       if (line.trim().equalsIgnoreCase("VERSION")){
+               return null;
+       }else{
+               GenBankVersion ver = new GenBankVersion();
+               String v = line.substring(11, line.indexOf(" ", 12)).trim();
+               ver.setVersion(v);
+               int posGI = line.indexOf("GI:", 11 + v.length());
+               if (posGI > -1) {
+                   ver.setGI(line.substring(posGI));
+               }
+               return ver;
+       }
+    }
+    
+    private GenBankLocus parseLocus(String line){
+       GenBankLocus loc = new GenBankLocus();
+        Matcher mat = patLocus.matcher(line);
+        if (mat.find()) {
+            String name = mat.group(1);
+            String len = mat.group(2);
+            String strand = mat.group(3);
+            String mtype = mat.group(4);
+            String linear = mat.group(5);
+            String division = mat.group(6);
+            String date = mat.group(7);
+       
+               loc.setName(name == null ? "" : name.trim());
+               loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
+               loc.setStrand(strand == null ? "" : strand);
+               loc.setMoleculeType(mtype == null ? "" : mtype);
+               loc.setLinearSequence("linear".equals(linear));
+               loc.setDivision(division == null ? "" : division);
+               loc.setModificationDate(date == null ? "" :date);               
+        }
+       return loc;
+    }
+       private GenBankSource parseSource(List<String> lines){
+               StringBuffer sb = new StringBuffer();
+               for(String line:lines){
+                       sb.append(line).append(newline);
+               }
+        // Source section
+        GenBankSource sou = new GenBankSource();
+        String aux = sb.toString().substring(11);
+        int fim1 = aux.indexOf("\n");
+        if (fim1 > -1) {
+            sou.setSource(aux.substring(0, fim1));
+            int ini2 = aux.indexOf("ORGANISM");
+            if (ini2 > -1) {
+                fim1 = aux.indexOf("\n", ini2 + 10);
+                if (fim1 > -1) {
+                    sou.setOrganism(aux.substring(ini2 + 10, fim1));
+                    sou.setTaxonomic(aux.substring(fim1).replaceAll("            ", "").replaceAll("\\s+", ""));
+                } else {
+                    sou.setOrganism(aux);
+                }
+            }
+        } else {
+            sou.setSource(aux);
+        }      
+        return sou;
+       }
+
+    /**
+     * Possible situations:
+     *
+     * 467 Points to a single base in the presented sequence 340..565 Points to
+     * a continuous range of bases bounded by and including the starting and
+     * ending bases &lt;345..500 Indicates that the exact lower boundary point
+     * of a feature is unknown. The location begins at some base previous to the
+     * first base specified (which need not be contained in the presented
+     * sequence) and continues to and includes the ending base &lt;1..888 The
+     * feature starts before the first sequenced base and continues to and
+     * includes base 888 1..&gt;888 The feature starts at the first sequenced
+     * base and continues beyond base 888 102.110 Indicates that the exact
+     * location is unknown but that it is one of the bases between bases 102 and
+     * 110, inclusive 123^124 Points to a site between bases 123 and 124
+     * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to
+     * form one contiguous sequence complement(34..126) Start at the base
+     * complementary to 126 and finish at the base complementary to base 34 (the
+     * feature is on the strand complementary to the presented strand)
+     * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and
+     * 4918 to 5163, then complements the joined segments (the feature is on the
+     * strand complementary to the presented strand)
+     * join(complement(4918..5163),complement(2691..4571)) Complements regions
+     * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
+     * feature is on the strand complementary to the presented strand)
+     * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
+     * this database) with primary accession number 'J00194'
+     * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
+     * with the region 100..202 of remote entry J00194
+     *
+     * @param fea
+     * @param localiza
+     */
+    private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) {
+        // remove os espaços, quebra de linhas etc
+        String buf = localiza.replaceAll("\\s", "");
+
+        // checks if there is a comma present between ranges
+        // complement(100..110),complement(90..100)
+        char[] buf2 = buf.toCharArray();
+        int abertos = 0;
+        java.util.List<String> lista = new java.util.ArrayList<String>();
+        int pinicial = 0;
+        for (int i = 0; i < buf2.length; i++) {
+            if (buf2[i] == '(') {
+                abertos++;
+            } else if (buf2[i] == ')') {
+                abertos--;
+            } else if (buf2[i] == ',' && abertos == 0) {
+                lista.add(buf.substring(pinicial, i));
+                pinicial = i + 1;
+            }
+        }
+        if (lista.size() > 0) {
+            lista.add(buf.substring(pinicial));
+            GenBankLocations um = new GenBankLocations();
+            um.setOperator(GenBankLocations.NONE);
+            for (String s : lista) {
+                um.getUnits().add(parserFeatureLocation(fea, s));
+            }
+            fea.setLocation(um);
+            return um;
+        }
+
+        // trata as funcoes: complement(location,location...),
+        // join(location,location...), order(location,location...)
+        if (buf.contains("(")) {
+               GenBankLocations um = new GenBankLocations();
+            int ini = buf.indexOf("(");
+            int fim = buf.lastIndexOf(")");
+            String token = buf.substring(0, ini);
+            if ("complement".equalsIgnoreCase(token)) {
+                String inter = buf.substring(ini + 1, fim);
+                GenBankLocation interno = parserFeatureLocation(fea, inter);
+                interno.setComplement(true);
+                um.setOperator(GenBankLocations.COMPLEMENT);
+                um.getUnits().add(interno);
+                fea.setLocation(um);
+            } else if ("join".equalsIgnoreCase(token)) {
+                String inter = buf.substring(ini + 1, fim);
+                GenBankLocation interno = parserFeatureLocation(fea, inter);
+                um.setOperator(GenBankLocations.JOIN);
+                um.getUnits().add(interno);
+                fea.setLocation(um);
+            } else if ("order".equalsIgnoreCase(token)) {
+                String inter = buf.substring(ini + 1, fim);
+                GenBankLocation interno = parserFeatureLocation(fea, inter);
+                um.setOperator(GenBankLocations.ORDER);
+                um.getUnits().add(interno);
+                fea.setLocation(um);
+            } else {
+                log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token);
+                String inter = buf.substring(ini + 1, fim);
+                fea.setLocation(parserFeatureLocation(fea, inter));
+            }
+            return fea.getLocation();
+        } else {
+            // trata quando tiver uma lista de location
+            if (buf.contains(",")) {
+                String[] partes = buf.split(",");
+                GenBankLocations um = new GenBankLocations();
+                for (String p : partes) {
+                    um.getUnits().add(
+                            parserFeatureLocation(fea, p));
+                }
+                fea.setLocation(um);
+                return um;
+            } else {
+                // trata quando tiver range
+                if (buf.contains("..")) {
+                    String[] partes = buf.split("\\.\\.");
+                    GenBankLocationRange range = new GenBankLocationRange();
+                    if (buf.contains(":")) {
+                        for (int i = 0; i < partes.length; i++) {
+                            int pos = partes[i].indexOf(":");
+                            if (pos > 0) {
+                                String entry = partes[i].substring(0, pos);
+                                partes[i] = partes[i].substring(pos + 1);
+                                range.setEntry(entry);
+                            }
+                        }
+                    }
+                    GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]);
+                    range.setStart(gp0);
+                    GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]);
+                    range.setEnd(gp1);
+                    fea.setLocation(range);
+                    return range;
+                } else {
+                    // trata um ponto
+                    // possibilidades consideradas:
+                    //      467
+                    //      102.110
+                    //      123^124
+                    //      <345
+                    //      >400
+                    //      345>
+                    //      400<
+                    //      ou uma combinacao dessas
+                       GenBankLocationPoint gp = new GenBankLocationPoint();
+                    if (buf.contains(":")) {
+                        int pos = buf.indexOf(":");
+                        if (pos > 0) {
+                            String entry = buf.substring(0, pos);
+                            buf = buf.substring(pos + 1);
+                            gp.setEntry(entry);
+                        }
+                    }
+                    int pos = 0;
+                    // verifica os simb < e > antes do primeiro numero
+                    if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
+                        gp.setPrefix(buf.charAt(pos));
+                        pos++;
+                    }
+                    // pega o primeiro numero
+                    int ini = pos;
+                    while (pos < buf.length() && buf.charAt(pos) >= '0'
+                            && buf.charAt(pos) <= '9') {
+                        pos++;
+                    }
+                    if (buf.subSequence(ini, pos).length() < 1) {
+                        System.out.println(localiza);
+                    }
+                    int num = Integer.parseInt(buf.substring(ini, pos));
+                    int num2 = num;
+                    // o primeiro numero pode ser o unico numero
+                    if (pos < buf.length()) {
+                        // verifica se tem os sinais < e > apos o primeiro numero
+                        if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
+                            if (buf.contains(".") || buf.contains("^")) {
+                                gp.setPrefix(buf.charAt(pos));
+                            } else {
+                                gp.setSufix(buf.charAt(pos));
+                            }
+                            pos++;
+                        }
+
+                        // verifica a separacao dos numeros . ou ^
+                        if (pos < buf.length()
+                                && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) {
+                            // separação localizada, possibilidade de mais numero
+                            gp.setSymbol(buf.charAt(pos));
+                            pos++;
+
+                            // verifica os simb < e > antes do segundo numero
+                            if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
+                                gp.setSufix(buf.charAt(pos));
+                                pos++;
+                            }
+
+                            // pega o segundo numero
+                            ini = pos;
+                            while (pos < buf.length() && buf.charAt(pos) >= '0'
+                                    && buf.charAt(pos) <= '9') {
+                                pos++;
+                            }
+                            num2 = Integer.parseInt(buf.substring(ini, pos));
+
+                            // verifica os simb < e > após o segundo numero
+                            if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) {
+                                gp.setSufix(buf.charAt(pos));
+                                pos++;
+                            }
+                        }
+                    }
+                    gp.setMin(num);
+                    gp.setMax(num2);
+                    fea.setLocation(gp);
+                    return gp;
+                }
+            }
+        }
+    }
+    
+    private int[] parseReferenceDescriptor(String descriptor){
+       // 1 (bases 1 to 1609) 
+       int[] resultado = new int[3];
+       descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", "");
+       String[] args = descriptor.split(",");
+       resultado[0] = Integer.parseInt(args[0].trim());
+       resultado[1] = Integer.parseInt(args[1].trim());
+       resultado[2] = Integer.parseInt(args[2].trim());
+       return resultado;
+    }
+       private String processReferenceLine(String line, String component){
+               int init = line.indexOf(component);
+               if (init!=-1){
+                        line = line.replace(component,"");
+               }
+               return line;
+       }
+       private String processHeaderLine(String line, String header){
+               int init = line.indexOf(header);
+               if (init!=-1){
+                        line = line.replace(header,"");
+               }
+               return line;
+       }
+
+       private GenBankSequence processSequenceLine(String line) {
+               GenBankSequence gbs = new GenBankSequence();
+               line = ltrim(line);
+               String[] args = line.split(" ");
+               gbs.setId(Integer.parseInt(args[0]));
+               int len = args.length-1;
+               Vector<String> seqs = new Vector<String>();
+               for (int i=0;i<len;i++)
+                       seqs.add(args[i+1]);
+               gbs.setSequences(seqs);
+               return gbs;
+       }
+
+       private String processCommentLine(String line){
+               int init = line.indexOf("COMMENT");
+               if (init!=-1){
+                        line = line.replace("COMMENT","");
+               }
+               return line;
+       }
+    public String rtrim(String s) {
+        int i = s.length()-1;
+        while (i >= 0 && Character.isWhitespace(s.charAt(i))) {
+            i--;
+        }
+        return s.substring(0,i+1);
+    }
+
+    public String ltrim(String s) {
+        int i = 0;
+        while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
+            i++;
+        }
+        return s.substring(i);
+    }  
+
+    public String print(){
+       StringBuffer out = new StringBuffer();
+       for (SequenceI seq: this.getSeqs()){
+               SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
+                       boolean featureLinePrinted = false;
+               for(SequenceFeature sf:seqFeatures){
+                       if(sf.getType().equals("LOCUS")){
+                               out.append(sf.getDescription()).append(newline);                                
+                       }else if (sf.getType().equals("DEFINITION")){
+                               out.append("DEFINITION  ").append(sf.getDescription()).append(newline);                                 
+                       }else if (sf.getType().equals("VERSION")){
+                               out.append("VERSION     ").append(sf.getDescription()).append(newline);                                 
+                       }else if (sf.getType().equals("ACCESSION")){
+                               out.append("ACCESSION  ").append(sf.getDescription()).append(newline);
+                       }else if (sf.getType().equals("DBLINK")){
+                               out.append("DBLINK ").append(sf.getDescription()).append(newline);
+                       }else if (sf.getType().equals("KEYWORDS")){
+                               out.append("KEYWORDS  ").append(sf.getDescription()).append(newline);                                   
+                       }else if (sf.getType().equals("SOURCE")){
+                               out.append("SOURCE      ").append(sf.getDescription()).append(newline);
+                       }else if (sf.getType().equals("REFERENCE")){
+                               out.append(sf.getDescription()).append(newline);
+                       }else if (sf.getType().equals("COMMENT")){
+                               out.append("COMMENT     ").append(sf.getDescription()).append(newline);                                 
+                       }else if (sf.getType().equals("BASE COUNT")){
+                               out.append("BASE COUNT     ").append(sf.getDescription()).append(newline);                              
+                       }else{
+                               if (!featureLinePrinted){
+                                       out.append("FEATURES             Location/Qualifiers").append(newline);
+                                       featureLinePrinted = true;
+                               }
+                               out.append("     ").append(sf.getType()).append("          ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline);
+                               Hashtable<String,String> qualifiers = sf.otherDetails;
+                               if (qualifiers!=null){
+                                       Enumeration<String> keys = qualifiers.keys();
+                                       while (keys.hasMoreElements()){
+                                               String key = keys.nextElement();
+                                               String value = qualifiers.get(key);
+                                               if (value!=null){
+                                                       out.append("                     /").append(key).append("=").append(value).append(newline);
+                                               }
+                                       }
+                               }
+                       }
+               }
+                       out.append("ORIGIN").append(newline);
+                       //We have to divide sequence in groups of 6x10 chars
+                       String sequenceString = seq.getSequenceAsString();
+                       int howManyGroups = (int) Math.floor(sequenceString.length()/60);
+                       for (int i=0;i<=howManyGroups;i++){
+                               String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length()));
+                                if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
+                                               out.append("        ").append(60*i+1).append(" ");
+                                }
+                               int segmentLength = sequenceSegment.length();
+                               if (segmentLength>=10){
+                                       out.append(sequenceSegment.substring(0,10)).append(" ");
+                                       if (segmentLength>=20){
+                                               out.append(sequenceSegment.substring(10,20)).append(" ");                                       
+                                               if (segmentLength>=30){
+                                                       out.append(sequenceSegment.substring(20,30)).append(" ");                                       
+                                                       if (segmentLength>=40){
+                                                               out.append(sequenceSegment.substring(30,40)).append(" ");                                       
+                                                               if (segmentLength>=50){
+                                                                       out.append(sequenceSegment.substring(40,50)).append(" ");       
+                                                                       if (segmentLength<=60){
+                                                                               out.append(sequenceSegment.substring(50,sequenceSegment.length()));
+                                                                       }
+                                                               }else{
+                                                                       out.append(sequenceSegment.substring(40,sequenceSegment.length()));
+                                                               }
+                                                       }else{
+                                                               out.append(sequenceSegment.substring(30,sequenceSegment.length()));
+                                                       }
+                                               }else{
+                                                       out.append(sequenceSegment.substring(20,sequenceSegment.length()));
+                                               }
+                                       }else{
+                                               out.append(sequenceSegment.substring(10,sequenceSegment.length()));
+                                       }
+                               } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
+                                       out.append(sequenceSegment);
+                               }
+                               out.append(newline);
+                       }
+                       out.append("//");
+       }
+       return out.toString();
+    }
+}
old mode 100755 (executable)
new mode 100644 (file)
index 08d4dca..8ce2af5
@@ -128,6 +128,13 @@ public class IdentifyFile
         }
         data = data.toUpperCase();
 
+        if ((data.indexOf("LOCUS") > -1))
+        {
+          reply = "GENBANK";
+
+          break;
+        }
+
         if ((data.indexOf("# STOCKHOLM") > -1))
         {
           reply = "STH";
diff --git a/src/jalview/io/xdb/genbank/GenBankFeature.java b/src/jalview/io/xdb/genbank/GenBankFeature.java
new file mode 100644 (file)
index 0000000..0de2e65
--- /dev/null
@@ -0,0 +1,68 @@
+package jalview.io.xdb.genbank;
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+public class GenBankFeature {
+       public static final String MISC_TYPE = "misc_feature";
+       public static final String SOURCE = "source";
+       public static final String CDS = "CDS";
+       public static final String GENE = "gene";
+       public static final String EXON = "exon";
+       public static final String INTRON = "intron";
+       public static final String PRIM_TRANSCRIPT = "prim_transcript";
+       public static final String mRNA = "mRNA";
+       public static final String MOBILE_ELEMENT = "mobile_element";
+       public static final String VARIATION = "variation";
+       
+       private String type;
+       private Hashtable<String,String> qualifiers = new Hashtable<String,String>();
+       private GenBankLocation location = null;
+       
+       public GenBankFeature() {
+               super();
+       }
+
+       public GenBankFeature(String type) {
+               super();
+               this.type = type;
+       }
+       
+       public void addQualifier(String key, String value){
+               this.qualifiers.put(key, value);
+       }
+       public void updateQualifier(String key, String newValue){
+               this.qualifiers.remove(key);
+               this.qualifiers.put(key, newValue);
+       }
+       
+       public String getQualifier(String key){
+               return this.qualifiers.get(key);
+       }
+
+       public String getType() {
+               return type;
+       }
+
+       public void setType(String type) {
+               this.type = type;
+       }
+       public Enumeration<String> getQualifiersNames(){
+               return this.qualifiers.keys();
+       }
+       public int getQualifiersSize(){
+               return this.qualifiers.size();
+       }
+
+       public Hashtable<String, String> getFields() {
+               return qualifiers;
+       }
+
+       public GenBankLocation getLocation() {
+               return location;
+       }
+
+       public void setLocation(GenBankLocation location) {
+               this.location = location;
+       }
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankLocation.java b/src/jalview/io/xdb/genbank/GenBankLocation.java
new file mode 100644 (file)
index 0000000..5d0db6c
--- /dev/null
@@ -0,0 +1,150 @@
+package jalview.io.xdb.genbank;
+
+/**
+ * The location contains at least one sequence location descriptor and may
+ * contain one or more operators with one or more sequence location descriptors.
+ * Base numbers refer to the numbering in the entry. This numbering designates
+ * the first base (5' end) of the presented sequence as base 1.
+ * Base locations beyond the range of the presented sequence may not be used in
+ * location descriptors, the only exception being location in a remote entry (see
+ * 3.5.2.1, e).
+ *
+ * Location operators and descriptors are discussed in more detail below.
+ *
+ *  3.5.2.1 Location descriptors
+ * The location descriptor can be one of the following:
+ * (a) a single base number
+ * (b) a site between two indicated adjoining bases
+ * (c) a single base chosen from within a specified range of bases (not allowed for new
+ *     entries)
+ * (d) the base numbers delimiting a sequence span
+ * (e) a remote entry identifier followed by a local location descriptor
+ *     (i.e., a-d)
+ *
+ * A site between two adjoining nucleotides, such as endonucleolytic cleavage
+ * site, is indicated by listing the two points separated by a carat (^). The
+ * permitted formats for this descriptor are n^n+1 (for example 55^56), or, for
+ * circular molecules, n^1, where "n" is the full length of the molecule, ie
+ * 1000^1 for circular molecule with length 1000.
+ *
+ * A single base chosen from a range of bases is indicated by the first base
+ * number and the last base number of the range separated by a single period
+ * (e.g., '12.21' indicates a single base taken from between the indicated
+ * points). From October 2006 the usage of this descriptor is restricted :
+ * it is illegal to use "a single base from a range" (c) either on its own or
+ * in combination with the "sequence span" (d) descriptor for newly created entries.
+ * The existing entries where such descriptors exist are going to be retrofitted.
+ *
+ * Sequence spans are indicated by the starting base number and the ending base
+ * number separated by two periods (e.g., '34..456'). The '<' and '>' symbols may
+ * be used with the starting and ending base numbers to indicate that an end
+ * point is beyond the specified base number. The starting and ending base
+ * positions can be represented as distinct base numbers ('34..456') or a site
+ * between two indicated adjoining bases.
+ *
+ * A location in a remote entry (not the entry to which the feature table
+ * belongs) can be specified by giving  the accession-number and sequence version
+ * of the remote entry, followed by a colon ":", followed by a location
+ * descriptor which applies to that entry's sequence (i.e. J12345.1:1..15, see
+ * also examples below)
+ *
+ * 3.5.2.2 Operators
+ *
+ * The location operator is a prefix that specifies what must be done to the
+ * indicated sequence to find or construct the location corresponding to the
+ * feature. A list of operators is given below with their definitions and most
+ * common format.
+ *
+ * complement(location)
+ * Find the complement of the presented sequence in the span specified by "
+ * location" (i.e., read the complement of the presented strand in its 5'-to-3'
+ * direction)
+ *
+ * join(location,location, ... location)
+ * The indicated elements should be joined (placed end-to-end) to form one
+ * contiguous sequence
+ *
+ * order(location,location, ... location)
+ * The elements can be found in the
+ * specified order (5' to 3' direction), but nothing is implied about the
+ * reasonableness about joining them
+ *
+ * Note : location operator "complement" can be used in combination with either "
+ * join" or "order" within the same location; combinations of "join" and "order"
+ * within the same location (nested operators) are illegal.
+ *
+ * 3.5.3 Location examples
+ *
+ * The following is a list of common location descriptors with their meanings:
+ * Location                  Description
+ * 467                       Points to a single base in the presented sequence
+ * 340..565                  Points to a continuous range of bases bounded by and
+ *                           including the starting and ending bases
+ * <345..500                 Indicates that the exact lower boundary point of a feature
+ *                           is unknown.  The location begins at some  base previous to
+ *                           the first base specified (which need not be contained in
+ *                           the presented sequence) and continues to and includes the
+ *                           ending base
+ * <1..888                   The feature starts before the first sequenced base and
+ *                           continues to and includes base 888
+ * 1..>888                   The feature starts at the first sequenced base and
+ *                           continues beyond base 888
+ * 102.110                   Indicates that the exact location is unknown but that it is
+ *                           one of the bases between bases 102 and 110, inclusive
+ * 123^124                   Points to a site between bases 123 and 124
+ * join(12..78,134..202)     Regions 12 to 78 and 134 to 202 should be joined to form
+ *                           one contiguous sequence
+ * complement(34..126)       Start at the base complementary to 126 and finish at the
+ *                           base complementary to base 34 (the feature is on the strand
+ *                           complementary to the presented strand)
+ * complement(join(2691..4571,4918..5163))
+ *                           Joins regions 2691 to 4571 and 4918 to 5163, then
+ *                           complements the joined segments (the feature is on the
+ *                           strand complementary to the presented strand)
+ * join(complement(4918..5163),complement(2691..4571))
+ *                           Complements regions 4918 to 5163 and 2691 to 4571, then
+ *                           joins the complemented segments (the feature is on the
+ *                           strand complementary to the presented strand)
+ * J00194.1:100..202         Points to bases 100 to 202, inclusive, in the entry (in
+ *                           this database) with primary accession number 'J00194'
+ * join(1..100,J00194.1:100..202)
+ *                           Joins region 1..100 of the existing entry with the region
+ *                           100..202 of remote entry J00194
+ * 
+ *
+ */
+public abstract class GenBankLocation {
+    // the location is complement strand?
+    private boolean complement = false;
+
+    public GenBankLocation() {
+    }
+
+    /**
+     * The minor location in genome sequence
+     *
+     * @return position
+     */
+    public abstract int getMinor();
+
+    /**
+     * The major location in genome sequence
+     *
+     * @return position
+     */
+    public abstract int getMajor();
+
+    /**
+     * @return the complement
+     */
+    public boolean isComplement() {
+        return complement;
+    }
+
+    /**
+     * @param complement the complement to set
+     */
+    public void setComplement(boolean complement) {
+        this.complement = complement;
+    }
+}
\ No newline at end of file
diff --git a/src/jalview/io/xdb/genbank/GenBankLocationPoint.java b/src/jalview/io/xdb/genbank/GenBankLocationPoint.java
new file mode 100644 (file)
index 0000000..6d3a475
--- /dev/null
@@ -0,0 +1,140 @@
+package jalview.io.xdb.genbank;
+
+/**
+ *
+ */
+public class GenBankLocationPoint extends GenBankLocation {
+    private String entry;
+    private char prefix = 0;
+    private int min = 0;
+    private char symbol = 0;
+    private int max = 0;
+    private char sufix = 0;
+
+    public GenBankLocationPoint() {
+    }
+
+    public GenBankLocationPoint(int point) {
+        this.min = point;
+        this.max = point;
+    }
+
+    public GenBankLocationPoint(int min, int max) {
+        this.min = min;
+        this.max = max;
+    }
+
+    public int getMinor() {
+        return this.min;
+    }
+
+    public int getMajor() {
+        return this.max;
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        if( prefix != 0 && prefix != ' ' ) {
+            sb.append(prefix);
+        }
+        if( symbol == '.' || symbol == '^' ) {
+            sb.append( String.format("%d%c%d",min,symbol,max) );
+        } else {
+            if( min != max ) {
+               sb.append( String.format("%d.%d",min,max) );
+            } else {
+               sb.append( min );
+            }
+        }
+        if( sufix != 0 && sufix != ' ' ) {
+            sb.append(sufix);
+        }
+        return sb.toString();
+    }
+
+    /**
+     * @return the prefix
+     */
+    public char getPrefix() {
+        return prefix;
+    }
+
+    /**
+     * @param prefix the prefix to set
+     */
+    public void setPrefix(char prefix) {
+        this.prefix = prefix;
+    }
+
+    /**
+     * @return the min
+     */
+    public int getMin() {
+        return min;
+    }
+
+    /**
+     * @param min the min to set
+     */
+    public void setMin(int min) {
+        this.min = min;
+    }
+
+    /**
+     * @return the symbol
+     */
+    public char getSymbol() {
+        return symbol;
+    }
+
+    /**
+     * @param symbol the symbol to set
+     */
+    public void setSymbol(char symbol) {
+        this.symbol = symbol;
+    }
+
+    /**
+     * @return the max
+     */
+    public int getMax() {
+        return max;
+    }
+
+    /**
+     * @param max the max to set
+     */
+    public void setMax(int max) {
+        this.max = max;
+    }
+
+    /**
+     * @return the sufix
+     */
+    public char getSufix() {
+        return sufix;
+    }
+
+    /**
+     * @param sufix the sufix to set
+     */
+    public void setSufix(char sufix) {
+        this.sufix = sufix;
+    }
+
+    /**
+     * @return the entry
+     */
+    public String getEntry() {
+        return entry;
+    }
+
+    /**
+     * @param entry the entry to set
+     */
+    public void setEntry(String entry) {
+        this.entry = entry;
+    }
+
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankLocationRange.java b/src/jalview/io/xdb/genbank/GenBankLocationRange.java
new file mode 100644 (file)
index 0000000..552d1f9
--- /dev/null
@@ -0,0 +1,86 @@
+package jalview.io.xdb.genbank;
+
+/**
+ *
+ */
+public class GenBankLocationRange extends GenBankLocation {
+    private String entry = null;
+    private GenBankLocationPoint start = null;
+    private GenBankLocationPoint end = null;
+
+    public GenBankLocationRange() {
+    }
+
+    @Override
+    public int getMinor() {
+        return start == null ? 0 : start.getMinor();
+    }
+
+    @Override
+    public int getMajor() {
+        return end == null ? 0 : end.getMajor();
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+//        if( getDirecao() != '5' ) {
+//            sb.append("complement(");
+//        }
+        if( entry != null ) {
+            sb.append(entry);
+            sb.append(":");
+        }
+        if( getStart() != null ) {
+            sb.append( getStart().toString() );
+        }
+        if( getEnd() != null && getStart() != getEnd() && !start.equals(end) ) {
+            sb.append("..");
+            sb.append( getEnd().toString() );
+        }
+        return sb.toString();
+    }
+
+    /**
+     * @return the entry
+     */
+    public String getEntry() {
+        return entry;
+    }
+
+    /**
+     * @param entry the entry to set
+     */
+    public void setEntry(String entry) {
+        this.entry = entry;
+    }
+
+    /**
+     * @return the start
+     */
+    public GenBankLocationPoint getStart() {
+        return start;
+    }
+
+    /**
+     * @param start the start to set
+     */
+    public void setStart(GenBankLocationPoint start) {
+        this.start = start;
+    }
+
+    /**
+     * @return the end
+     */
+    public GenBankLocationPoint getEnd() {
+        return end;
+    }
+
+    /**
+     * @param end the end to set
+     */
+    public void setEnd(GenBankLocationPoint end) {
+        this.end = end;
+    }
+
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankLocations.java b/src/jalview/io/xdb/genbank/GenBankLocations.java
new file mode 100644 (file)
index 0000000..ae3e47b
--- /dev/null
@@ -0,0 +1,98 @@
+package jalview.io.xdb.genbank;
+
+/**
+ *
+ * @author Dieval Guizelini
+ */
+public class GenBankLocations extends GenBankLocation {
+    public static final int NONE = 1; // default
+    public static final int COMPLEMENT = 2;
+    public static final int JOIN = 3;
+    public static final int ORDER = 4; // conj com ordem desconhecida
+    private int operator = NONE;
+    private java.util.List<GenBankLocation> units;
+
+    public GenBankLocations() {
+        units = new java.util.ArrayList<GenBankLocation>();
+    }
+
+    @Override
+    public void setComplement(boolean complement){
+        super.setComplement(complement);
+        this.operator = COMPLEMENT;
+        if (units != null) {
+            for (GenBankLocation o : units) {
+                o.setComplement(complement);
+            }
+        }
+    }
+
+    @Override
+    public String toString() {
+        StringBuilder sb = new StringBuilder();
+        if (getOperator() == COMPLEMENT) {
+            sb.append("complement(");
+        } else if (getOperator() == JOIN) {
+            sb.append("join(");
+        } else if (getOperator() == ORDER) {
+            sb.append("order(");
+        }
+        if (units.size() > 0) {
+            sb.append(units.get(0).toString());
+            for (int i = 1; i < units.size(); i++) {
+                sb.append(",");
+                sb.append(units.get(i).toString());
+            }
+        }
+        if (getOperator() != NONE) {
+            sb.append(")");
+        }
+        return sb.toString();
+    }
+
+    /**
+     * @return the units
+     */
+    public java.util.List<GenBankLocation> getUnits() {
+        return units;
+    }
+
+    /**
+     * @param units the units to set
+     */
+    public void setUnits(java.util.List<GenBankLocation> units) {
+        this.units = units;
+    }
+
+    @Override
+    public int getMinor() {
+        if( units.size() > 0 ) {
+            return units.get(0).getMinor();
+        }
+        return 0;
+    }
+
+    @Override
+    public int getMajor() {
+        int ind = units.size();
+        if( ind > 0 ) {
+            return units.get(ind-1).getMajor();
+        }
+        return 0;
+    }
+
+    /**
+     * @return the operator
+     */
+    public int getOperator() {
+        return operator;
+    }
+
+    /**
+     * @param operator the operator to set
+     */
+    public void setOperator(int operator) {
+        this.operator = operator;
+    }
+
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankLocus.java b/src/jalview/io/xdb/genbank/GenBankLocus.java
new file mode 100644 (file)
index 0000000..cf6289e
--- /dev/null
@@ -0,0 +1,138 @@
+package jalview.io.xdb.genbank;
+
+/**
+ * A short mnemonic name for the entry, chosen to suggest the
+ * sequence's definition. Mandatory keyword/exactly one record.
+ *
+ * <p>The LOCUS field contains a number of different data elements, including locus name,
+ * sequence length, molecule type, GenBank division, and modification date. Each element
+ * is described below.</p>
+ *
+ */
+public class GenBankLocus {
+    private String name;
+    private int sequenceLength;
+    private String strand;
+    private String moleculeType;
+    private boolean linearSequence;
+    private String division;
+    private String modificationDate;
+
+    public GenBankLocus() {
+    }
+
+    public GenBankLocus(String name, int sequenceLength) {
+        this.name = name;
+        this.sequenceLength = sequenceLength;
+    }
+
+
+    /**
+     * @return the name
+     */
+    public String getName() {
+        return name;
+    }
+
+    /**
+     * @param name the name to set
+     */
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    /**
+     * @return the sequenceLength
+     */
+    public int getSequenceLength() {
+        return sequenceLength;
+    }
+
+    /**
+     * @param sequenceLength the sequenceLength to set
+     */
+    public void setSequenceLength(int sequenceLength) {
+        this.sequenceLength = sequenceLength;
+    }
+
+    /**
+     * @return the strand
+     */
+    public String getStrand() {
+        return strand;
+    }
+
+    /**
+     * @param strand the strand to set
+     */
+    public void setStrand(String strand) {
+        this.strand = strand;
+    }
+
+    /**
+     * @return the moleculeType
+     */
+    public String getMoleculeType() {
+        return moleculeType;
+    }
+
+    /**
+     * @param moleculeType the moleculeType to set
+     */
+    public void setMoleculeType(String moleculeType) {
+        this.moleculeType = moleculeType;
+    }
+
+    /**
+     * @return the linearSequence
+     */
+    public boolean isLinearSequence() {
+        return linearSequence;
+    }
+
+    /**
+     * @param linearSequence the linearSequence to set
+     */
+    public void setLinearSequence(boolean linearSequence) {
+        this.linearSequence = linearSequence;
+    }
+
+    /**
+     * @return the division
+     */
+    public String getDivision() {
+        return division;
+    }
+
+    /**
+     * @param division the division to set
+     */
+    public void setDivision(String division) {
+        this.division = division;
+    }
+
+    /**
+     * @return the modificationDate
+     */
+    public String getModificationDate() {
+        return modificationDate;
+    }
+
+    /**
+     * @param modificationDate the modificationDate to set
+     */
+    public void setModificationDate(String modificationDate) {
+        this.modificationDate = modificationDate;
+    }
+
+    @Override
+    public String toString() {
+        return String.format("LOCUS       %-16s %11d bp %3s     %6s  %-8s %3s %s",
+                this.name, this.sequenceLength, this.strand,
+                this.moleculeType, linearSequence?"linear  ":"circular",
+                this.division, ((modificationDate == null) || (modificationDate.equals("")) ? "" : modificationDate.toUpperCase())
+                       );
+    }
+
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankReference.java b/src/jalview/io/xdb/genbank/GenBankReference.java
new file mode 100644 (file)
index 0000000..74f0080
--- /dev/null
@@ -0,0 +1,135 @@
+package jalview.io.xdb.genbank;
+
+public class GenBankReference {
+       private int order;
+       private int begin;
+       private int end;
+       private String descriptor;
+       private String authors;
+       private String title;
+       private String journal;
+       private String pubmed;
+       private String medline;
+       private String consortia;
+       private String remark;
+
+       public GenBankReference() {
+               super();
+       }
+
+       public String getDescriptor() {
+               return descriptor;
+       }
+
+       public void setDescriptor(String descriptor) {
+               this.descriptor = descriptor;
+       }
+
+       public String getAuthors() {
+               return authors;
+       }
+
+       public void setAuthors(String authors) {
+               this.authors = authors;
+       }
+
+       public String getTitle() {
+               return title;
+       }
+
+       public void setTitle(String title) {
+               this.title = title;
+       }
+
+       public String getJournal() {
+               return journal;
+       }
+
+       public void setJournal(String journal) {
+               this.journal = journal;
+       }
+
+       public String getPubmed() {
+               return pubmed;
+       }
+
+       public void setPubmed(String pubmed) {
+               this.pubmed = pubmed;
+       }
+
+       public int getOrder() {
+               return order;
+       }
+
+       public void setOrder(int order) {
+               this.order = order;
+       }
+       public int getBegin() {
+               return begin;
+       }
+
+       public void setBegin(int begin) {
+               this.begin = begin;
+       }
+
+       public int getEnd() {
+               return end;
+       }
+
+       public void setEnd(int end) {
+               this.end = end;
+       }
+       
+       
+       public String getMedline() {
+               return medline;
+       }
+
+       public void setMedline(String medline) {
+               this.medline = medline;
+       }
+
+       public String getConsortia() {
+               return consortia;
+       }
+
+       public void setConsortia(String consortia) {
+               this.consortia = consortia;
+       }
+
+       public String getRemark() {
+               return remark;
+       }
+
+       public void setRemark(String remark) {
+               this.remark = remark;
+       }
+
+       public String toString(){
+//             References has the following format
+//             REFERENCE   1  (bases 1 to 1976)
+//               AUTHORS   Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M.
+//               TITLE     Complete nucleotide sequence of the human delta-globin gene
+//               JOURNAL   Cell 21 (3), 639-646 (1980)
+//                PUBMED   7438204
+               
+               StringBuffer buf = new StringBuffer();
+               buf.append("REFERENCE   ").append(this.getOrder()).append("  (bases ").append(this.getBegin()).append(" to ").append(this.getEnd()).append(")\n");
+               if (this.getAuthors()!=null)
+                       buf.append("  AUTHORS   ").append(this.getAuthors()).append("\n");
+               if (this.getTitle()!=null)
+                       buf.append("  TITLE     ").append(this.getTitle()).append("\n");
+               if (this.getJournal()!=null)
+                       buf.append("  JOURNAL   ").append(this.getJournal()).append("\n");
+               if (this.getPubmed()!=null)
+                       buf.append("  PUBMED   ").append(this.getPubmed()).append("\n");;
+               if (this.getMedline()!=null)
+                       buf.append("  MEDLINE   ").append(this.getMedline()).append("\n");;
+               if (this.getRemark()!=null)
+                       buf.append("  REMARK   ").append(this.getRemark()).append("\n");;
+               if (this.getConsortia()!=null)
+                       buf.append("  CONSRTM   ").append(this.getConsortia()).append("\n");;
+               return buf.toString();
+       }
+       
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankSequence.java b/src/jalview/io/xdb/genbank/GenBankSequence.java
new file mode 100644 (file)
index 0000000..279601c
--- /dev/null
@@ -0,0 +1,57 @@
+package jalview.io.xdb.genbank;
+
+import java.util.Vector;
+/**
+ * A line like the following:
+ *   1 aatgaaggtt catttttcat tctcacaaac taatgaaacc ctgcttatct taaaccaacc
+ * will be mapped as:
+ *     id: 1
+ *  sequences: {"aatgaaggtt", "catttttcat", "tctcacaaac", "taatgaaacc", "ctgcttatct", "taaaccaacc"}
+ *  Each sequence has 8 nucleotides long
+ * @author darolmar
+ *
+ */
+public class GenBankSequence {
+       //Initial position
+       private int id;
+       //Sequences in that line
+       private Vector<String> sequences;
+
+       public GenBankSequence() {
+               super();
+               sequences = new Vector<String>();
+       }
+
+       public int getId() {
+               return id;
+       }
+
+       public void setId(int id) {
+               this.id = id;
+       }
+
+       public Vector<String> getSequences() {
+               return sequences;
+       }
+
+       public void setSequences(Vector<String> sequences) {
+               this.sequences = sequences;
+       }
+       
+       public String getSequencesAsString(){
+               StringBuffer sb = new StringBuffer();
+               for (String seq:sequences)                      
+                       sb.append(seq).append(" ");                             
+               return sb.toString();           
+       }
+       
+       public String toString(){
+               StringBuffer sb = new StringBuffer()
+                       .append("        ").append(this.id);
+               for (String seq:sequences)                      
+                       sb.append(" ").append(seq);
+               sb.append("\n");                                
+               return sb.toString();
+       }
+       
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankSource.java b/src/jalview/io/xdb/genbank/GenBankSource.java
new file mode 100644 (file)
index 0000000..c5ef3c2
--- /dev/null
@@ -0,0 +1,85 @@
+package jalview.io.xdb.genbank;
+
+/**
+ * <p>Free-format information including an abbreviated form of the organism
+ * name, sometimes followed by a molecule type. (See section 3.4.10 of the
+ * GenBank release notes for more info.)</p>
+ * <p>Entrez Search Field: Organism [ORGN] </p>
+ * <p>Search Tip: For some organisms that have well-established common names,
+ * such as baker's yeast, mouse, and human, a search for the common name will
+ * yield the same results as a search for the scientific name, e.g., a search
+ * for "baker's yeast" in the organism field retrieves the same number of
+ * documents as "Saccharomyces cerevisiae". This is true because the Organism
+ * field is connected to the NCBI Taxonomy Database, which contains
+ * cross-references between common names, scientific names, and synonyms for
+ * organisms represented in the Sequence databases.</p>
+ * <h1>Organism</h1>
+ * <p>The formal scientific name for the source organism (genus and species,
+ * where appropriate) and its lineage, based on the phylogenetic classification
+ * scheme used in the NCBI Taxonomy Database. If the complete lineage of an
+ * organism is very long, an abbreviated lineage will be shown in the GenBank
+ * record and the complete lineage will be available in the Taxonomy Database.
+ * (See also the /db_xref=taxon:nnnn Feature qualifer, below.)</p>
+ * <p>Entrez Search Field: Organism [ORGN] </p>
+ * <p>Search Tip: You can search the Organism field by any node in the taxonomic
+ * hierarchy, e.g., you can search for the term "Saccharomyces cerevisiae",
+ * "Saccharomycetales", "Ascomycota", etc. to retrieve all the sequences from
+ * organisms in a particular taxon. </p>
+ * 
+ */
+public class GenBankSource {
+    private String source="";
+    private String organism="";
+    private String taxonomic="";
+
+    public GenBankSource() {
+    }
+
+    @Override
+    public String toString() {
+        return String.format("%s\n\t%s\n\t%s", getSource(), getOrganism(), getTaxonomic());
+    }
+
+    /**
+     * @return the source
+     */
+    public String getSource() {
+        return source;
+    }
+
+    /**
+     * @param source the source to set
+     */
+    public void setSource(String source) {
+        this.source = source;
+    }
+
+    /**
+     * @return the organism
+     */
+    public String getOrganism() {
+        return organism;
+    }
+
+    /**
+     * @param organism the organism to set
+     */
+    public void setOrganism(String organism) {
+        this.organism = organism;
+    }
+
+    /**
+     * @return the taxonomic
+     */
+    public String getTaxonomic() {
+        return taxonomic;
+    }
+
+    /**
+     * @param taxonomic the taxonomic to set
+     */
+    public void setTaxonomic(String taxonomic) {
+        this.taxonomic = taxonomic;
+    }
+
+}
diff --git a/src/jalview/io/xdb/genbank/GenBankVersion.java b/src/jalview/io/xdb/genbank/GenBankVersion.java
new file mode 100644 (file)
index 0000000..85a2fd1
--- /dev/null
@@ -0,0 +1,89 @@
+package jalview.io.xdb.genbank;
+
+/**
+ * <p>A nucleotide sequence identification number that represents a single,
+ * specific sequence in the GenBank database. This identification number uses
+ * the accession.version format implemented by GenBank/EMBL/DDBJ in
+ * February 1999.</p>
+ * <p>If there is any change to the sequence data (even a single base), the
+ * version number will be increased, e.g., U12345.1 → U12345.2, but the
+ * accession portion will remain stable.</p>
+ * <p>The accession.version system of sequence identifiers runs parallel to
+ * the GI number system, i.e., when any change is made to a sequence, it
+ * receives a new GI number AND an increase to its version number.</p>
+ * <p>For more information, see section 1.3.2 of the GenBank 111.0 release
+ * notes, and section 3.4.7 of the current GenBank release notes.</p>
+ * <p>A Sequence Revision History tool is available to track the various GI
+ * numbers, version numbers, and update dates for sequences that appeared in
+ * a specific GenBank record (more information and example).</p>
+ * <p>More details about sequence identification numbers and the difference
+ * between GI number and version are provided in Sequence Identifiers:
+ * A Historical Note.</p>
+ * <p>Entrez Search Field: use the default setting of "All Fields"</p>
+ * <h1>GI</h1>
+ * <p>"GenInfo Identifier" sequence identification number, in this case, for
+ * the nucleotide sequence. If a sequence changes in any way, a new GI number
+ * will be assigned.</p>
+ * <p>A separate GI number is also assigned to each protein translation within
+ * a nucleotide sequence record, and a new GI is assigned if the protein
+ * translation changes in any way (see below).</p>
+ * <p>GI sequence identifiers run parallel to the new accession.version system
+ * of sequence identifiers. For more information, see the description of Version,
+ * above, and section 3.4.7 of the current GenBank release notes.</p>
+ * <p>A Sequence Revision History tool is available to track the various GI
+ * numbers, version numbers, and update dates for sequences that appeared in a
+ * specific GenBank record (more information and example).</p>
+ * <p>More details about sequence identification numbers and the difference
+ * between GI number and version are provided in Sequence Identifiers: A
+ * Historical Note.</p>
+ * <p>Entrez Search Field: use the default setting of "All Fields"</p>
+ * @author Dieval Guizelini
+ * @see Entry
+ */
+public class GenBankVersion {
+    private String version = "";
+    private String gi = "";
+
+    public GenBankVersion() {
+    }
+
+
+    /**
+     * @return the version
+     */
+    public String getVersion() {
+        return version;
+    }
+
+    /**
+     * @param version the version to set
+     */
+    public void setVersion(String version) {
+        this.version = version;
+    }
+
+    /**
+     * @return the gi
+     */
+    public String getGI() {
+        return gi;
+    }
+
+    /**
+     * @param gi the gi to set
+     */
+    public void setGI(String gi) {
+        this.gi = gi;
+    }
+
+
+    /**
+     * Version section in GenBank File Format is text with two fields (version and GI).
+     *
+     * @return  version+" "+gi
+     */
+    @Override
+    public String toString() {
+        return String.format("%s %s",version,gi);
+    }
+}
diff --git a/test/jalview/io/GU324925.1.gb b/test/jalview/io/GU324925.1.gb
new file mode 100644 (file)
index 0000000..feacdfd
--- /dev/null
@@ -0,0 +1,1011 @@
+LOCUS       GU324925               15440 bp    DNA     linear   PRI 10-AUG-2010
+DEFINITION  Homo sapiens hemoglobin, gamma A (HBG1) gene, complete cds.
+ACCESSION   GU324925
+VERSION     GU324925.1  GI:302313142
+KEYWORDS    .
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 15440)
+  AUTHORS   Rieder,M.J., Bertucci,C., Stanaway,I.B., Johnson,E.J.,
+            Swanson,J.E., Siegel,D.L., da Ponte,S.H., Igartua,C., Patterson,K.
+            and Nickerson,D.A.
+  TITLE     Direct Submission
+  JOURNAL   Submitted (25-NOV-2009) Genome Sciences, University of Washington,
+            1705 NE Pacific, Seattle, WA 98195, USA
+COMMENT     To cite this work please use: NHLBI Resequencing and Genotyping
+            Service (RSG),UW HV48194, Department of Genome Sciences, Seattle,
+            WA 98195-7730.
+FEATURES             Location/Qualifiers
+     source          1..15440
+                     /organism="Homo sapiens"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9606"
+     mobile_element  179..289
+                     /mobile_element_type="LINE:L2"
+     variation       293
+                     /frequency="0.0328"
+                     /replace="t"
+     variation       337
+                     /frequency="0.0027"
+                     /replace="c"
+     mobile_element  345..530
+                     /mobile_element_type="other:LTR/ERV1"
+     variation       406
+                     /frequency="0.3873"
+                     /replace="a"
+     variation       534
+                     /frequency="0.6279"
+                     /replace=""
+     mobile_element  544..619
+                     /mobile_element_type="LINE:L1"
+     variation       568
+                     /frequency="0.3088"
+                     /replace="t"
+     variation       692
+                     /frequency="0.3038"
+                     /replace="g"
+     variation       757
+                     /frequency="0.0053"
+                     /replace="t"
+     variation       935
+                     /frequency="0.1888"
+                     /replace="g"
+     variation       1017
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       1202
+                     /frequency="0.0133"
+                     /replace="a"
+     variation       1350
+                     /frequency="0.3617"
+                     /replace="t"
+     variation       1418
+                     /frequency="0.1818"
+                     /replace="a"
+     variation       1507
+                     /frequency="0.2527"
+                     /replace="a"
+     variation       1522
+                     /frequency="0.0027"
+                     /replace="g"
+     variation       1608
+                     /frequency="0.0211"
+                     /replace="a"
+     variation       1637
+                     /frequency="0.0395"
+                     /replace="c"
+     variation       1650
+                     /frequency="0.0211"
+                     /replace="g"
+     variation       1682
+                     /frequency="0.0211"
+                     /replace="t"
+     variation       1689
+                     /frequency="0.0211"
+                     /replace="g"
+     variation       1697
+                     /frequency="0.0211"
+                     /replace="g"
+     variation       1699
+                     /frequency="0.0211"
+                     /replace="a"
+     variation       1735
+                     /frequency="0.0816"
+                     /replace="t"
+     variation       1990
+                     /frequency="0.0027"
+                     /replace="g"
+     gene            2006..3591
+                     /gene="HBG1"
+     mRNA            join(2006..2150,2273..2495,3376..3591)
+                     /gene="HBG1"
+                     /product="hemoglobin, gamma A"
+     variation       2030
+                     /gene="HBG1"
+                     /frequency="0.1657"
+                     /replace="a"
+     CDS             join(2059..2150,2273..2495,3376..3504)
+                     /gene="HBG1"
+                     /codon_start=1
+                     /product="hemoglobin, gamma A"
+                     /protein_id="ADL14496.1"
+                     /db_xref="GI:302313143"
+                     /translation="MGHFTEEDKATITSLWGKVNVEDAGGETLGRLLVVYPWTQRFFD
+                     SFGNLSSASAIMGNPKVKAHGKKVLTSLGDATKHLDDLKGTFAQLSELHCDKLHVDPE
+                     NFKLLGNVLVTVLAIHFGKEFTPEVQASWQKMVTAVASALSSRYH"
+     variation       2190
+                     /gene="HBG1"
+                     /frequency="0.3059"
+                     /replace="a"
+     variation       2191
+                     /gene="HBG1"
+                     /frequency="0.3032"
+                     /replace="a"
+     variation       2215
+                     /gene="HBG1"
+                     /frequency="0.1862"
+                     /replace="t"
+     variation       2407
+                     /gene="HBG1"
+                     /frequency="0.1342"
+                     /replace="t"
+     variation       2518
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       2519
+                     /gene="HBG1"
+                     /frequency="0.3342"
+                     /replace="a"
+     variation       2554
+                     /gene="HBG1"
+                     /frequency="0.4763"
+                     /replace="t"
+     variation       2610
+                     /gene="HBG1"
+                     /frequency="0.3128"
+                     /replace="a"
+     variation       2643
+                     /gene="HBG1"
+                     /frequency="0.0289"
+                     /replace="c"
+     variation       2653
+                     /gene="HBG1"
+                     /frequency="0.3105"
+                     /replace="c"
+     variation       2675
+                     /gene="HBG1"
+                     /frequency="0.1895"
+                     /replace="c"
+     variation       2682
+                     /gene="HBG1"
+                     /frequency="0.3105"
+                     /replace="g"
+     variation       2700
+                     /gene="HBG1"
+                     /frequency="0.3842"
+                     /replace="c"
+     variation       2746..2749
+                     /gene="HBG1"
+                     /frequency="0.2226"
+                     /replace=""
+     variation       2758
+                     /gene="HBG1"
+                     /frequency="0.3281"
+                     /replace="t"
+     variation       2760
+                     /gene="HBG1"
+                     /frequency="0.3219"
+                     /replace="g"
+     variation       2777
+                     /gene="HBG1"
+                     /frequency="0.2959"
+                     /replace="c"
+     variation       2939
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       3023
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       3037
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       3064
+                     /gene="HBG1"
+                     /frequency="0.0079"
+                     /replace="c"
+     variation       3073
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       3074
+                     /gene="HBG1"
+                     /frequency="0.0553"
+                     /replace="t"
+     variation       3142
+                     /gene="HBG1"
+                     /frequency="0.0056"
+                     /replace="g"
+     variation       3158
+                     /gene="HBG1"
+                     /frequency="0.0028"
+                     /replace="a"
+     variation       3162
+                     /gene="HBG1"
+                     /frequency="0.2781"
+                     /replace="g"
+     variation       3205
+                     /gene="HBG1"
+                     /frequency="0.3580"
+                     /replace="g"
+     variation       3206
+                     /gene="HBG1"
+                     /frequency="0.3571"
+                     /replace="c"
+     variation       3210
+                     /gene="HBG1"
+                     /frequency="0.3621"
+                     /replace="a"
+     variation       3211
+                     /gene="HBG1"
+                     /frequency="0.3103"
+                     /replace="a"
+     variation       3238
+                     /gene="HBG1"
+                     /frequency="0.2672"
+                     /replace="a"
+     variation       3287
+                     /gene="HBG1"
+                     /frequency="0.1784"
+                     /replace="a"
+     variation       3287
+                     /gene="HBG1"
+                     /frequency="0.3351"
+                     /replace="t"
+     variation       3291
+                     /gene="HBG1"
+                     /frequency="0.0081"
+                     /replace="t"
+     variation       3294
+                     /gene="HBG1"
+                     /frequency="0.1459"
+                     /replace="g"
+     variation       3303
+                     /gene="HBG1"
+                     /frequency="0.0081"
+                     /replace="a"
+     variation       3507
+                     /gene="HBG1"
+                     /frequency="0.1349"
+                     /replace="c"
+     variation       3508
+                     /gene="HBG1"
+                     /frequency="0.1402"
+                     /replace="t"
+     variation       3509
+                     /gene="HBG1"
+                     /frequency="0.1349"
+                     /replace="c"
+     variation       3510
+                     /gene="HBG1"
+                     /frequency="0.1508"
+                     /replace="t"
+     variation       3519
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       3538
+                     /gene="HBG1"
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       3556
+                     /gene="HBG1"
+                     /frequency="0.0464"
+                     /replace=""
+     variation       3620
+                     /frequency="0.0053"
+                     /replace="t"
+     variation       3628
+                     /frequency="0.2751"
+                     /replace="a"
+     variation       3644
+                     /frequency="0.0026"
+                     /replace="g"
+     variation       3750
+                     /frequency="0.1852"
+                     /replace="t"
+     variation       3763
+                     /frequency="0.3651"
+                     /replace="t"
+     variation       3953
+                     /frequency="0.1349"
+                     /replace="t"
+     variation       4296
+                     /frequency="0.1947"
+                     /replace="t"
+     variation       4324
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       4333
+                     /frequency="0.0053"
+                     /replace="a"
+     variation       4341
+                     /frequency="0.1342"
+                     /replace="g"
+     mobile_element  4365..4701
+                     /mobile_element_type="other:LTR/MaLR"
+     variation       4471
+                     /frequency="0.1958"
+                     /replace="c"
+     variation       4472
+                     /frequency="0.0291"
+                     /replace="c"
+     variation       4595
+                     /frequency="0.0054"
+                     /replace="a"
+     variation       4609
+                     /frequency="0.0108"
+                     /replace="t"
+     variation       4687
+                     /frequency="0.1958"
+                     /replace="a"
+     variation       4938
+                     /frequency="0.0185"
+                     /replace="a"
+     mobile_element  4976..5095
+                     /mobile_element_type="LINE:L2"
+     variation       5070
+                     /frequency="0.0026"
+                     /replace="g"
+     variation       5106
+                     /frequency="0.0132"
+                     /replace="a"
+     mobile_element  5134..5321
+                     /mobile_element_type="other:LTR/ERV1"
+     variation       5179
+                     /frequency="0.0079"
+                     /replace="t"
+     variation       5307
+                     /frequency="0.0053"
+                     /replace="c"
+     mobile_element  5322..5414
+                     /mobile_element_type="LINE:L1"
+     mobile_element  5415..5887
+                     /mobile_element_type="LINE:L1"
+     variation       5423
+                     /frequency="0.0053"
+                     /replace="t"
+     variation       5532
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       5671
+                     /frequency="0.0027"
+                     /replace="c"
+     variation       5754
+                     /frequency="0.0143"
+                     /replace="a"
+     variation       5871
+                     /frequency="0.1057"
+                     /replace="t"
+     mobile_element  5898..6061
+                     /mobile_element_type="LINE:L2"
+     variation       6086
+                     /frequency="0.0158"
+                     /replace="c"
+     variation       6132
+                     /frequency="0.0816"
+                     /replace="g"
+     variation       6135
+                     /frequency="0.0158"
+                     /replace="a"
+     variation       6165
+                     /frequency="0.0079"
+                     /replace="c"
+     variation       6170
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       6200
+                     /frequency="0.0026"
+                     /replace="g"
+     variation       6200
+                     /frequency="0.0053"
+                     /replace="t"
+     variation       6286
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       6296
+                     /frequency="0.0447"
+                     /replace="t"
+     variation       6365
+                     /frequency="0.1921"
+                     /replace="a"
+     variation       6379
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       6467
+                     /frequency="0.0132"
+                     /replace="a"
+     variation       6638
+                     /frequency="0.0159"
+                     /replace="t"
+     variation       6860
+                     /frequency="0.0238"
+                     /replace="c"
+     variation       6955
+                     /frequency="0.0053"
+                     /replace="a"
+     variation       7107
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       7315
+                     /frequency="0.0026"
+                     /replace="t"
+     mobile_element  7396..7708
+                     /mobile_element_type="SINE:Alu"
+     variation       7413
+                     /frequency="0.3128"
+                     /replace="t"
+     variation       7535
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       7618
+                     /frequency="0.5000"
+                     /replace="a"
+     variation       7727
+                     /frequency="0.3677"
+                     /replace="g"
+     variation       7761
+                     /frequency="0.0106"
+                     /replace="a"
+     variation       7872
+                     /frequency="0.0080"
+                     /replace="t"
+     variation       7973
+                     /frequency="0.0132"
+                     /replace="g"
+     variation       7987
+                     /frequency="0.2895"
+                     /replace="t"
+     variation       8164
+                     /frequency="0.0737"
+                     /replace="c"
+     variation       8171
+                     /frequency="0.0526"
+                     /replace="c"
+     variation       8384
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       8410
+                     /frequency="0.0026"
+                     /replace="a"
+     variation       8814
+                     /frequency="0.0079"
+                     /replace="a"
+     variation       8830
+                     /frequency="0.0053"
+                     /replace="g"
+     variation       8947
+                     /frequency="0.1816"
+                     /replace="c"
+     variation       8962
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       9102
+                     /frequency="0.0079"
+                     /replace="g"
+     variation       9240
+                     /frequency="0.1000"
+                     /replace="c"
+     variation       9256
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       9281..9284
+                     /frequency="0.9484"
+                     /replace=""
+     variation       9322
+                     /frequency="0.0053"
+                     /replace="g"
+     variation       9338
+                     /frequency="0.0133"
+                     /replace="c"
+     variation       9374
+                     /frequency="0.9658"
+                     /replace=""
+     variation       9411
+                     /frequency="0.1842"
+                     /replace="a"
+     variation       9517
+                     /frequency="0.0737"
+                     /replace="c"
+     variation       9558
+                     /frequency="0.0079"
+                     /replace="c"
+     variation       9645
+                     /frequency="0.0133"
+                     /replace="c"
+     variation       9752..9773
+                     /frequency="0.0080"
+                     /replace=""
+     variation       9759
+                     /frequency="0.0027"
+                     /replace="g"
+     variation       9791
+                     /frequency="0.0426"
+                     /replace="g"
+     variation       10103
+                     /frequency="0.0968"
+                     /replace="t"
+     variation       10104
+                     /frequency="0.0054"
+                     /replace="a"
+     variation       10244
+                     /frequency="0.1271"
+                     /replace=""
+     variation       10251
+                     /frequency="0.0169"
+                     /replace="t"
+     variation       10312
+                     /frequency="0.3511"
+                     /replace="g"
+     mobile_element  10527..10608
+                     /mobile_element_type="SINE:MIR"
+     variation       10565
+                     /frequency="0.0027"
+                     /replace="c"
+     variation       10705..10706
+                     /frequency="0.8806"
+                     /replace=""
+     variation       10821
+                     /frequency="0.0160"
+                     /replace="a"
+     variation       10864
+                     /frequency="0.0642"
+                     /replace="a"
+     variation       10944
+                     /frequency="0.0027"
+                     /replace="a"
+     variation       11154
+                     /frequency="0.0163"
+                     /replace="t"
+     variation       11259
+                     /frequency="0.0136"
+                     /replace="g"
+     variation       11475
+                     /frequency="0.0027"
+                     /replace="t"
+     variation       11626
+                     /frequency="0.1190"
+                     /replace="a"
+     variation       11706
+                     /frequency="0.0106"
+                     /replace="a"
+     variation       11708
+                     /frequency="0.0026"
+                     /replace="g"
+     variation       11722
+                     /frequency="0.1190"
+                     /replace="t"
+     variation       11818
+                     /frequency="0.0079"
+                     /replace="g"
+     variation       11857
+                     /frequency="0.4418"
+                     /replace="c"
+     variation       11910
+                     /frequency="0.0079"
+                     /replace="c"
+     variation       12024
+                     /frequency="0.0158"
+                     /replace="a"
+     mobile_element  12140..12250
+                     /mobile_element_type="LINE:L2"
+     variation       12160
+                     /frequency="0.0789"
+                     /replace="c"
+     variation       12253
+                     /frequency="0.9947"
+                     /replace=""
+     variation       12267
+                     /frequency="0.0079"
+                     /replace="g"
+     variation       12317
+                     /frequency="0.0026"
+                     /replace="c"
+     variation       12350
+                     /frequency="0.0079"
+                     /replace="a"
+     variation       12521
+                     /frequency="0.3042"
+                     /replace="g"
+     variation       12551
+                     /frequency="0.0105"
+                     /replace="a"
+     variation       12639
+                     /frequency="0.2857"
+                     /replace="g"
+     variation       12697
+                     /frequency="0.0106"
+                     /replace="g"
+     mobile_element  12718..12948
+                     /mobile_element_type="other:LTR/ERVL"
+     variation       12731
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       12740
+                     /frequency="0.0053"
+                     /replace="a"
+     variation       12787
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       12814
+                     /frequency="0.2196"
+                     /replace="a"
+     variation       12975
+                     /frequency="0.1164"
+                     /replace="g"
+     variation       12987
+                     /frequency="0.1190"
+                     /replace="g"
+     variation       13030
+                     /frequency="0.1170"
+                     /replace="a"
+     variation       13042
+                     /frequency="0.0718"
+                     /replace="g"
+     mobile_element  13120..13246
+                     /mobile_element_type="other:LTR/ERVL"
+     variation       13138
+                     /frequency="0.1156"
+                     /replace="c"
+     variation       13286
+                     /frequency="0.0161"
+                     /replace="t"
+     variation       13329
+                     /frequency="0.1216"
+                     /replace="c"
+     variation       13370
+                     /frequency="0.4081"
+                     /replace="g"
+     mobile_element  13541..13842
+                     /mobile_element_type="SINE:Alu"
+     variation       13563
+                     /frequency="0.1243"
+                     /replace="g"
+     variation       13678
+                     /frequency="0.4021"
+                     /replace="t"
+     variation       13749
+                     /frequency="0.0027"
+                     /replace="t"
+     variation       13794
+                     /frequency="0.0316"
+                     /replace="t"
+     variation       13805
+                     /frequency="0.3829"
+                     /replace="t"
+     variation       13808
+                     /frequency="0.3818"
+                     /replace="a"
+     variation       13992..13993
+                     /frequency="0.5895"
+                     /replace=""
+     variation       14110
+                     /frequency="0.4105"
+                     /replace="t"
+     variation       14158
+                     /frequency="0.0079"
+                     /replace="g"
+     mobile_element  14206..14493
+                     /mobile_element_type="LINE:L2"
+     variation       14239
+                     /frequency="0.0079"
+                     /replace="g"
+     variation       14243
+                     /frequency="0.4105"
+                     /replace="t"
+     variation       14247
+                     /frequency="0.4105"
+                     /replace="t"
+     variation       14264
+                     /frequency="0.0026"
+                     /replace="t"
+     variation       14271
+                     /frequency="0.0184"
+                     /replace="g"
+     variation       14272
+                     /frequency="0.4132"
+                     /replace="g"
+     variation       14358
+                     /frequency="0.0158"
+                     /replace="c"
+     variation       14371
+                     /frequency="0.0079"
+                     /replace="t"
+     variation       14406
+                     /frequency="0.0553"
+                     /replace="a"
+     variation       14503
+                     /frequency="0.0421"
+                     /replace="c"
+     variation       14507
+                     /frequency="0.4105"
+                     /replace="a"
+     variation       14609
+                     /frequency="0.4681"
+                     /replace="a"
+     mobile_element  14622..14921
+                     /mobile_element_type="SINE:Alu"
+     variation       14646
+                     /frequency="0.0080"
+                     /replace="c"
+     variation       14670
+                     /frequency="0.4309"
+                     /replace="g"
+     variation       14767
+                     /frequency="0.0027"
+                     /replace="c"
+     variation       14834
+                     /frequency="0.4574"
+                     /replace="g"
+     variation       14861
+                     /frequency="0.0878"
+                     /replace="t"
+     variation       14937
+                     /frequency="0.2872"
+                     /replace="c"
+     variation       14991
+                     /frequency="0.0081"
+                     /replace="g"
+     variation       15061
+                     /frequency="0.3758"
+                     /replace="t"
+     misc_feature    15105..15439
+                     /note="Region not scanned for variation"
+     mobile_element  15305..15396
+                     /mobile_element_type="LINE:L2"
+ORIGIN      
+        1 gtgtttcaga ataaaatacc aactctacta ctctcatctg taagatgcaa atagtaagcc
+       61 tgagcccttc tgtctaactt tgaattctat tttttcttca acgtacttta ggcttgtaat
+      121 gtgtttatat acagtgaaat gtcaagttct ttctttatat ttctttcttt cttttttttc
+      181 ctcagcctca gagttttcca catgcccttc ctactttcag gaacttcttt ctccaaacgt
+      241 cttctgcctg gctccatcaa atcataaagg acccacttca aatgccatca ctcactacca
+      301 tttcacaatt cgcactttct ttctttgtcc tttttttttt tagtaaaaca agtttataaa
+      361 aaattgaagg aataaatgaa tggctacttc ataggcagag tagacgcaag ggctactggt
+      421 tgccgatttt tattgttatt tttcaatagt atgctaaaca aggggtagat tatttatgct
+      481 gcccattttt agaccataaa agataacttc ctgatgttgc catggcattt tttttccttt
+      541 taattttatt tcatttcatt ttaatttcga aggtacatgt gcaggatgtg caggcttgtt
+      601 acatgggtaa atgtgtgtct ttctggcctt ttagccatct gtatcaatga gcagatataa
+      661 gctttacaca ggatcatgaa ggatgaaaga atttcaccaa tattataata atttcaatca
+      721 acctgatagc ttaggggata aactaatttg aagatacagc ttgcctccga taagccagaa
+      781 ttccagagct tctggcatta taatctagca aggttagaga tcatggatca ctttcagaga
+      841 aaaacaaaaa caaactaacc aaaagcaaaa cagaaccaaa aaaccaccat aaatacttcc
+      901 taccctgtta atggtccaat atgtcagaaa cagcactgtg ttagaaataa agctgtctaa
+      961 agtacactaa tattcgagtt ataatagtgt gtggactatt agtcaataaa aacaaccctt
+     1021 gcctctttag agttgttttc catgtacacg cacatcttat gtcttagagt aagattccct
+     1081 gagaagtgaa cctagcattt atacaagata attaattcta atccacagta cctgccaaag
+     1141 aacattctac catcatcttt actgagcata gaagagctac gccaaaaccc tgggtcatca
+     1201 gccagcacac acacttatcc agtggtaaat acacatcatc tggtgtatac atacatacct
+     1261 gaatatggaa tcaaatattt ttctaagatg aaacagtcat gatttatttc aaataggtac
+     1321 ggataagtag atattgaggt aagcattagg tcttatatta tgtaacacta atctattact
+     1381 gcgctgaaac tgtggcttta tagaaattgt tttcactgca ctattgagaa attaagagat
+     1441 aatggcaaaa gtcacaaaga gtatattcaa aaagaagtat agcacttttt ccttagaaac
+     1501 cactgctaac tgaaagagac taagatttgt cccgtcaaaa atcctggacc tatgcctaaa
+     1561 acacatttca caatccctga acttttcaaa aattggtaca tgctttagct ttaaactaca
+     1621 ggcctcactg gagctagaga caagaaggta aaaaacggct gacaaaagaa gtcctggtat
+     1681 cctctatgat gggagaagga aactagctaa agggaagaat aaattagaga aaaactggaa
+     1741 tgactgaatc ggaacaaggc aaaggctata aaaaaaatta agcagcagta tcctcttggg
+     1801 ggccccttcc ccacactatc tcaatgcaaa tatctgtctg aaacggtccc tggctaaact
+     1861 ccacccatgg gttggccagc cttgccttga ccaatagcct tgacaaggca aacttgacca
+     1921 atagtcttag agtatccagt gaggccaggg gccggcggct ggctagggat gaagaataaa
+     1981 aggaagcacc cttcagcagt tccacacact cgcttctgga acgtctgagg ttatcaataa
+     2041 gctcctagtc cagacgccat gggtcatttc acagaggagg acaaggctac tatcacaagc
+     2101 ctgtggggca aggtgaatgt ggaagatgct ggaggagaaa ccctgggaag gtaggctctg
+     2161 gtgaccagga caagggaggg aaggaaggac cctgtgcctg gcaaaagtcc aggtcgcttc
+     2221 tcaggatttg tggcaccttc tgactgtcaa actgttcttg tcaatctcac aggctcctgg
+     2281 ttgtctaccc atggacccag aggttctttg acagctttgg caacctgtcc tctgcctctg
+     2341 ccatcatggg caaccccaaa gtcaaggcac atggcaagaa ggtgctgact tccttgggag
+     2401 atgccacaaa gcacctggat gatctcaagg gcacctttgc ccagctgagt gaactgcact
+     2461 gtgacaagct gcatgtggat cctgagaact tcaaggtgag tccaggagat gtttcagccc
+     2521 tgttgccttt agtctcgagg caacttagac aacggagtat tgatctgagc acagcagggt
+     2581 gtgagctgtt tgaagatact ggggttgggg gtgaagaaac tgcagaggac taactgggct
+     2641 gagacccagt ggtaatgttt tagggcctaa ggagtgcctc taaaaatcta gatggacaat
+     2701 tttgactttg agaaaagaga ggtggaaatg aggaaaatga cttttcttta ttagattcca
+     2761 gtagaaagaa ctttcatctt tccctcattt ttgttgtttt aaaacatcta tctggaggca
+     2821 ggacaagtat ggtcgttaaa aagatgcagg cagaaggcat atattggctc agtcaaagtg
+     2881 gggaactttg gtggccaaac atacattgct aaggctattc ctatatcagc tggacacata
+     2941 taaaatgctg ctaatgcttc attacaaact tatatccttt aattccagat gggggcaaag
+     3001 tatgtccagg ggtgaggaac aattgaaaca tttgggctgg agtagatttt gaaagtcagc
+     3061 tctgtgtgtg tgtgtgtgtg tgcgcgcgcg cgtgtgtgtg tgtgtgtcag cgtgtgtttc
+     3121 ttttaacgtc ttcagcctac aacatacagg gttcatggtg gcaagaagat agcaagattt
+     3181 aaattatggc cagtgactag tgcttgaagg ggaacaacta cctgcattta atgggaaggc
+     3241 aaaatctcag gctttgaggg aagttaacat aggcttgatt ctgggtggaa gcttggtgtg
+     3301 tagttatctg gaggccaggc tggagctctc agctcactat gggttcatct ttattgtctc
+     3361 ctttcatctc aacagctcct gggaaatgtg ctggtgaccg ttttggcaat ccatttcggc
+     3421 aaagaattca cccctgaggt gcaggcttcc tggcagaaga tggtgactgc agtggccagt
+     3481 gccctgtcct ccagatacca ctgagctcac tgcccatgat tcagagcttt caaggatagg
+     3541 ctttattctg caagcaatac aaataataaa tctattctgc tgagagatca cacatgattt
+     3601 tcttcagctc ttttttttac atctttttaa atatatgagc cacaaagggt ttatattgag
+     3661 ggaagtgtgt atgtgtattt ctgcatgcct gtttgtgttt gtggtgtgtg catgctcctc
+     3721 atttattttt atatgagatg tgcattttga tgagcaaata aaagcagtaa agacacttgt
+     3781 acacgggagt tctgcaagtg ggagtaaatg gtgtaggaga aatccggtgg gaagaaagac
+     3841 ctctatagga caggacttct cagaaacaga tgttttggaa gagatgggaa aaggttcagt
+     3901 gaagacctgg gggctggatt gattgcagct gagtagcaag gatggttctt aaggaaggga
+     3961 aagtgttcca agctttagga attcaaggtt tagtcaggtg tagcaattct attttattag
+     4021 gaggaatact atttctaatg gcacttagct tttcacagcc cttgtggatg cctaagaaag
+     4081 tgaaattaat cccatgccct caagtgtgca gattggtcac agcatttcaa gggagagacc
+     4141 tcattgtaag actctggggg aggtggggac ttaggtgtaa gaaatgaatc agcagaggct
+     4201 cacaagtcag catgagcatg ttatgtctga gaaacagacc agcactgtga gatcaaaatg
+     4261 tagtgggaag aatttgtaca acattaattg gaaggcttac ttaatggaat ttttgtatag
+     4321 ttggatgtta gtgcatctct ataagtaaga gtttaatatg atggtgttac ggacctaatg
+     4381 tttgtgtctc ctcaaaattc acatgctgaa tccccaactc ccaactgacc ttatctgtgg
+     4441 gggaggcttt tgaaaagtaa ttaggtttag atgagctcat aagagcagat ccccatcata
+     4501 aaattatttt ccttatcaga agcagagaga caagccattt ctctttcctc ccggtgagga
+     4561 cacagtgaga agtccgccat ctgcaatcca ggaagagaac cctgaccacg agtcagcctt
+     4621 cagaaatgtg agaaaaaact ctgttgttga agccacccag tcttttgtat tttgttatag
+     4681 caccttgcac tgagtaaggc agatgaagaa ggagaaaaaa ataagcttgg gttttgagtg
+     4741 gactacagac catgtttatc tcaggtttgc aaagctcccc tcgtccccta tgtttcagta
+     4801 taaaatacct actctactac tctcatctat aagacccaaa taataagcct gcgcccttct
+     4861 ctctaacttt gatttctcct atttttactt caacatgctt tactctagcc ttgtaatgtc
+     4921 tttacataca gtgaaatgta aagttcttta ttcttttttt ctttctttct tttttctcct
+     4981 cagcctcaga atttggcaca tgcccttcct tctttcagga acttctccaa catctctgcc
+     5041 tggctccatc atatcataaa ggtcccactt caaatgcagt cactaccgtt tcagaatatg
+     5101 cactttcttt cttttttgtt ttttgttttt tttaagtcaa agcaaatttc ttgagagagt
+     5161 aaagaaataa acgaatgact actgcatagg cagagcagcc ccgagggccg ctggttgttc
+     5221 cttttatggt tatttcttga tgatatgtta aacaagtttt ggattattta tgccttctct
+     5281 ttttaggcca tatagggtaa ctttctgaca ttgccatggc atttttcttt taatttaatt
+     5341 tactgttacc ttaaattcag gggtacacgt acaggatatg caggtttgtt ttataggtaa
+     5401 aagtgtgcca tggttttaat gggttttttt tttcttgtaa agttgtttaa gtttcttgtt
+     5461 tactctggat attaggcctt tgtcagaaga atagattgga aaatcttttt cccattctgt
+     5521 agattgtctt tcgctctgat ggtagtttct tttgctgagc aggagctctt tagtttaatt
+     5581 agattccatt ggtcaatttt tgcttttgct gcaattgctt ttcacgcttt catcatgaaa
+     5641 tctgtgcccg tgtttatatc atgaatagta ttgccttgat ttttttctag gctttttata
+     5701 gtttggggtt tttcatttaa gtctctaatc catctggagt taattttgga taaggtataa
+     5761 ggaaggagtc cagtttcatt tttcagcata tggctagcca gttctccccc atcatttatt
+     5821 aaattgaaaa tcctttcccc attgcttgct tttgtcaggt ttctaaaaga ccagatggtt
+     5881 gtaggtacaa tatgcagttt cttcaagtca tataatacca tctgaaatct cttattaatt
+     5941 catttctttt agtatgtatg ctggtctcct ctgctcacta tagtgagggc accattagcc
+     6001 agagaatctg tctgtctagt tcatgtaaga ttctcagaat taagaaaaat ggatggcata
+     6061 tgaatgaaac ttcatggatg acatatggaa tctaatatgt atttgttgaa ttaatgcata
+     6121 agatgcaaca gagagaagtt gacaactgca atgataacct ggtattgatg atataagagt
+     6181 ctatagatca cagtagaagc aataatcatg gaaaacaatt ggaaatgggg aacagccaca
+     6241 aacaagaaag aatcaatact tccaggaaag tgactgcagg tcacttttcc tggagcgggt
+     6301 gagagaaaag tggaagttag cagtaactgc tgaattcctg gttggctgat ggaaagatgg
+     6361 ggcagctgtt cactggtacg cagggtttta gatgtatgta cctaaggata tgaggtatgg
+     6421 caatgaacag aaattctttt gggaatgagt tttagggcca ttaaaggaca tgacctgaag
+     6481 tttcctctga ggccagtccc cacaactcaa tataaatgtg tttcctgcat atagtcaaag
+     6541 ttgccacttc tttttcttca tatcatcgat ctctgctctt aaagataatc ttggttttgc
+     6601 ctcaaactgt ttgtcactac aaactttccc catgttccta agtaaaacag gtaactgcct
+     6661 ctcaactata tcaagtagac taaaatattg tgtctctaat atcagaaatt cagctttaat
+     6721 atattgggtt taactctttg aaatttagag tctccttgaa atacacatgg gggtgatttc
+     6781 ctaaacttta tttcttgtaa ggatttatct caggggtaac acacaaacca gcatcctgaa
+     6841 cctctaagta tgaggacagt aagccttaag aatataaaat aaactgttct tctctctgcc
+     6901 ggtggaagtg tgccctgtct attcctgaaa ttgcttgttt gagacgcatg agacgtgcag
+     6961 cacatgagac acgtgcagca gcctgtggaa tattgtcagt gaagaatgtc tttgcctgat
+     7021 tagatataaa gacaagttaa acacagcatt agactataga tcaagcctgt gccagacaca
+     7081 aatgacctaa tgcccagcac gggccacgga atctcctatc ctcttgcttg aacagagcag
+     7141 cacacttctc ccccaacact attagatgtt ctggcataat tttgtagata tgtaggattt
+     7201 gacatggact attgttcaat gattcagagg aaatctcctt tgttcagata agtacactga
+     7261 ctactaaatg gattaaaaaa cacagtaata aaacccagtt ttccccttac ttccctagtt
+     7321 tgtttcttat tctgctttct tccaagttga tgctggatag aggtgtttat ttctattcta
+     7381 aaaagtgatg aaattggccg ggcgcggtgg ctcacacctg taatcccagc actttgggag
+     7441 gctgaggtgg gcggatcacg aggtcaggag atcaagacca tcctggctaa catggtgaaa
+     7501 ccccatctct actaaaaata caaaaaatta gccagagaca gtggcgggtg cctgtagtcc
+     7561 cagctactcg ggaggctgag gcaggagaat ggcgtgaacc tgggaggcag agcttgcggt
+     7621 gagcagagat cgcgccactg cacactccag cctgggtgac aaagcgagac tccatctcaa
+     7681 aaaaaaaaaa aaaaaaaaga aaaagaaaga aagaaagaaa aaaaaactga tgaaattgtg
+     7741 tattcaatgt agtctcaaga gaattgaaaa ccaagaaagg ctgtggcttc ttccacataa
+     7801 agcctggatg aataacagga taacacgttg ttacattgtc acaactcctg atccaggaat
+     7861 tgatggctaa gatattcgta attcttatcc ttttcagttg taacttattc ctatttgtca
+     7921 gcattcaggt tattagcggc tgctggcgaa gtccttgaga aataaactgc acactggatg
+     7981 gtgggggtag tgtaggaaaa tggaggggaa ggaagtaaag tttcaaatta agcctgaaca
+     8041 gcaaagttcc cctgagaagg ccacctggat tctatcagaa actcgaatgt ccatcttgca
+     8101 aaacttcctt gcccaaaccc cacccctgga gtcacaaccc acccttgacc aatagattca
+     8161 ttttactgag ggaggcaaag ggctggtcaa tagattcatt tcactgggag aggcaaaggg
+     8221 ctgggggcca gagaggagaa gtaaaaagcc acacatgaag cagcaatgca ggcatgcttc
+     8281 tggctcatct gtgatcacca ggaaactccc agatctgaca ctgtagtgca tttcactgct
+     8341 gacaagaagg ctgctgccac cagcctgtga agcaaggtta aggtgagaag gctggaggtg
+     8401 agattctggg caggtaggta ctggaagccg ggacaaggtg cagaaaggca gaaagtgttt
+     8461 ctgaaagagg gattagcccg ttgtcttaca tagtctgact ttgcacctgc tctgtgatta
+     8521 tgactatccc acagtctcct ggttgtctac ccatggacct agaggtactt tgaaagtttt
+     8581 ggatatctgg gctctgactg tgcaataatg ggcaacccca aagtcaaggc acatggcaag
+     8641 aaggtgctga tctccttcgg aaaagctgtt atgctcacgg atgacctcaa aggcaccttt
+     8701 gctacactga gtgacctgca ctgtaacaag ctgcacgtgg accctgagaa cttcctggtg
+     8761 agtagtaagt acactcacgc tttcttcttt acccttagat atttgcacta tgggtacttt
+     8821 tgaaagcaga ggtggctttc tcttgtgtta tgagtcagct atgggatatg atatttcagc
+     8881 agtgggattt tgagagttat gttgctgtaa ataacataac taaaatttgg tagagcaagg
+     8941 actatgaata atggaaggcc acttaccatt tgatagctct gaaaaacaca tcttataaaa
+     9001 aattctggcc aaaatcaaac tgagtgtttt tggatgaggg aacagaagtt gagatagaga
+     9061 aaataacatc tttcctttgg tcagcgaaat tttctataaa aattaatagt cacttttctg
+     9121 catagtcctg gaggttagaa aaagatcaac tgaacaaagt agtgggaagc tgttaaaaag
+     9181 aggattgttt ccctccgaat gatgatggta tacttttgta cgcatggtac aggattcttt
+     9241 gttatgagtg tttgggaaaa ttgtatgtat gtatgtatgt atgtatgtga tgactgggga
+     9301 cttatcctat ccattactgt tccttgaagt actattatcc tactttttaa aaggacgaag
+     9361 tctctaaaaa aaaaaatgaa acaatcacaa tatgttgggg tagtgagttg gcatagcaag
+     9421 taagagaagg ataggacaca atgggaggtg cagggctgcc agtcatattg aagctgatat
+     9481 ctagcccata atggtgagag ttgctcaaac tctggtgaaa aaggatgtaa gtgttatatc
+     9541 tatttactgc aagtccagct tgaggccttc tattcactat gtaccatttt cttttttatc
+     9601 ttcactccct ccccagctct taggcaacgt gatattgatt gttttggcaa cccacttcag
+     9661 cgaggatttt accctacaga tacaggcttc ttggcagtaa ctaacaaatg ctgtggttaa
+     9721 tgctgtagcc cacaagacca ctgagttccc tgtccactat gtttgtacct atggtccact
+     9781 atgtttgtac ctatgtccca aaatctcatc tcctttagat gggggaggtt ggggagaaga
+     9841 gcagtatcct gcctgctgat tcagttcctg catgataaaa atagaataaa gaaatatgct
+     9901 ctctaagaaa tatcattgta ctctttttct gtctttatat tttaccctga ttcagccaaa
+     9961 aggacgcact atttctgatg gaaatgagaa tgttggagaa tgggagttta aggacagaga
+    10021 agatactttc ttgcaatcct gcaagaaaag agagaactcg tgggtggatt tagtggggta
+    10081 gttactccta ggaaggggaa atcgtctcta gaataagaca atgtttttac agaaagggag
+    10141 gtcaatggag gtactctttg gaggtgtaag aggattgttg gtagtgtgta gaggtatgtt
+    10201 aggactcaaa ttagaagttc tgtataggct attatttgta tgaaactcag gatatagctc
+    10261 atttggtgac tgcagttcac ttctacttat tttaaacaac atatttttta ttatttataa
+    10321 tgaagtgggg atggggcttc ctagagacca atcaagggcc aaaccttgaa ctttctctta
+    10381 acgtcttcaa tggtattaat agagaattat ctctaaggca tgtgaactgg ctgtcttggt
+    10441 tttcatctgt acttcatctg ctacctctgt gacctgaaac atatttataa ttccattaag
+    10501 ctgtgcatat gatagattta tcatatgtat tttccttaaa ggatttttgt aagaactaat
+    10561 tgaattgata cctgtaaagt ctttatcaca ctacccaata aataataaat ctctttgttc
+    10621 agctctctgt ttctataaat atgtacaagt tttattgttt ttagtggtag tgattttatt
+    10681 ctctttctat atatatacac acacatatgt gtgcattcat aaatatatac aatttttatg
+    10741 aataaaaaat tattagcaat caatattgaa aaccactgat ttttgtttat gtgagcaaac
+    10801 agcagattaa aaggctgaga tttaggaaac agcacgttaa gtcaagttga tagaggagaa
+    10861 tatggacatt taaaagaggc aggatgatat aaaattaggg aaactggatg cagagaccag
+    10921 atgaagtaag aaaaatagct atcgttttga gcaaaaatca ctgaagtttc ttgcatatga
+    10981 gagtgacata ataaataggg aaacgtagaa aattgattca catgtatata tatatataga
+    11041 actgattaga caaagtctaa cttgggtata gtcagaggag cttgctgtaa ttatattgag
+    11101 gtgatggata aagaactgaa gttgatggaa acaatgaagt taagaaaaaa aatcgagtaa
+    11161 gagaccattg tggcagtgat tgcacagaac tggaaaacat tgtgaaacag agagtcagag
+    11221 atgacagcta aaatccctgt ctgtgaatga aaagaaggaa atttattgac agaacagcaa
+    11281 atgcctacaa gccccctgtt tggatctggc aatgaacgta gccattctgt ggcaatcact
+    11341 tcaaactcct gtacccaaga cccttaggaa gtatgtagca ccctcaaacc taaaacctca
+    11401 aagaaagagg ttttagaaga tataataccc tttcttctcc agtttcatta atcccaaaac
+    11461 ctctttctca aagtatttcc tctatgtgtc caccccaaag agctcacctc accatatctc
+    11521 ttgagtggga gcacatagat aggcggtgct accatctaac agcttctgaa attcctttgt
+    11581 catatttttg agtccccact aataacccac aaagcagaat aaataccagt tgctcatgta
+    11641 caataatcac tcaactgctg tcttgtagca tacattaatt aagcacattc tttgaataat
+    11701 tactgtgtcc aaacaatcac actttaaaat ctcacacttg tgctatccct tgcccttctg
+    11761 aatgtcactc tgtattttaa atgaagagat gagggttgaa tttcctgtgt tacttattgt
+    11821 tcatttctcg atgaggagtt ttcacattca cctttagtgg aaaacacata agtacacatc
+    11881 ttacaggaaa aatataccaa actgacatgt agcatgaatg cttgtgcatg tagtcatata
+    11941 aaatcttgta gcaatgtaaa cattctctga tatacacata cagatgtgtc tatatgtcta
+    12001 cacaatttct tatgctccat gaacaaacat tccatgcaca cataagaaca cacactgtta
+    12061 cagatgcata cttgagtgca ttgacaaaat taccccagtc aatctagaga atttggattt
+    12121 ctgcatttga ctctgttagc tttgtacatg ctgttcattt actctgggtg atgtctttcc
+    12181 ctcattttgc cttgtctatc ttgtactcat actttaagtc ctaacttata tgttatctca
+    12241 actaagaagc tatttttttt ttaattttaa ctgggcttaa agccctgtct ataaactctg
+    12301 ctacaattat gggctctttc ttataatatt tagtgttttt cctactaatg tacttaatct
+    12361 gctcattgta tattcctacc actaaatttt aacctctttt atggtagaga cattgtcttg
+    12421 taaactctta tttccctagt atttggagat gaaaaaaaag attaaattat ccaaaattag
+    12481 atctctcttt tctacattat gagtattaca ctatccatag agaagtttgt ttgagaccta
+    12541 aactgaggaa cctttggttc taaaatgact atgtgatatc ttagtattta taggtcatga
+    12601 ggttccttcc tctgcctctg ctatagtttg attagtcaac aagcatgtgt catgcattta
+    12661 ttcacatcag aatttcatac actaataaga catagtatca gaagtcagtt tattagttat
+    12721 atcagttagg gtccatcaag gaaaggacaa accattatca gttactcaac ctagaattaa
+    12781 atacagctct taatagttaa ttatccttgt attggaagag ctaaaatatc aaataaagga
+    12841 cagtgcagaa atctagatgt tagtaacatc agaaaacctc ttccgccatt aggcctagaa
+    12901 gggcagaagg agaaaatgtt tataccacca gagtccagaa ccagagccca taaccagagg
+    12961 tccactggat tcagtgagct agtgggtgct ccttggagag agccagaact gtctaatggg
+    13021 ggcatcaaag tatcagccat aaaaaaccat aaaaaagact gtctgctgta ggagatccgt
+    13081 tcagagagag agagagacca gaaataatct tgcttatgct ttccctcagc cagtgtttac
+    13141 cattgcagaa tgtacatgcg actgaaaggg tgaggaaacc tgggaaatgt cagttcctca
+    13201 aatacagaga acactgaggg aaggatgaga aataaatgtg aaagcagaca tgaatggtaa
+    13261 ttgacagaag gaaactagga tgtgtccagt aaatgaataa ttacagtgtg cagtgattat
+    13321 tgcaatgatt aatgtattga taagataata tgaaaacaca gaattcaaac agcagtgaac
+    13381 tgagattaga attgtggaga gcactggcat ttaagaatgt cacacttaga atgtgtctct
+    13441 aggcattgtt ctgtgcatat atcatctcaa tattcattat ctgaaaatta tgaattaggt
+    13501 acaaagctca aataatttat tttttcaggt tagcaagaac tttttttttt tttttctgag
+    13561 atagagcatt gctatggttg cccaggctgg agtgcaatgg catgatccag gctcactgca
+    13621 acatctgcct cccaggttca agcgattctc ctgcctcagc ctcccaagta gctggcacta
+    13681 caggcatgtg ccaccaccat gcctggctaa ttttctattt ttagtagata gggggtttca
+    13741 ccatgttggt caggctgatc tcgaactcct aacatcaggt gatccaccct cctcggcctc
+    13801 tgaaagtgct gggatcacag gcgtgagcca ccacacccag ccaagaatgt gaattttgta
+    13861 gaaggatata acccatattt ctctgaccct agagtcctta gtatacctcc cataccatgt
+    13921 ggctcatcct ccttacatac atttcccatc tttcacccta ccttttcctt tttgtttcag
+    13981 cttttcactg tgtgtcaaaa tctagaacct tatctcctac ctgctctgaa accaacagca
+    14041 agttgacttc cattctaacc cacattggca ttacactaat taaaatcgat actgagttct
+    14101 aaaatcatcg gggattttgg ggactatgtc ttacttcata cttccttgag atttcacatt
+    14161 aaatgttggt gttcattaaa ggtccttcat ttaactttgt attcatcaca ctcttggatt
+    14221 cacagttata tctaaactct taaatacagc ctgtataatc ccaattccca actctgattt
+    14281 ctaacctctg acctccaacc tcagtgccaa acccatatat caaacaatgt actgggctta
+    14341 tttatataga tgtcctatag gcacctcaga ctcagcatgg gtatttcact tgttatacta
+    14401 aaactgtttc tcttccagtg ttttccattt tagtcattag atagctactt gcccattcac
+    14461 caaggtcaca gattaaaatc atttccctac ctctaatcaa cagttcgatt ctgcttcaat
+    14521 ttgtccctat ctattaatca ccactcttac tgcccagtca ggtcctcatt gtttcctgaa
+    14581 caagagtaga tgctattctt tccactttta gaccttatcc tggctggatg cggtggctca
+    14641 ggcttgtaaa cccagcactt tgggaggcca aggcaggcag atcacttgag gtcaggagtt
+    14701 caagaccagc ctgaccaaca tggtgaaacc ccatctctac taaaaataca aaatcagccg
+    14761 ggcgtgtggt gcatgcctgc agtcccagct attcaggtgg ctgaggcagg agaattgctt
+    14821 gaacccagga ggcagaggtt gcggtgagcc tagattgcac cattgcactc tagcttgggc
+    14881 aatagggatg aaactccatc tcagaagaga aaagaaaaaa agaccttatt ctgttataca
+    14941 aatcctctca atgcaatcca tatagaataa acatgtaacc agatctccca atgtgtaaaa
+    15001 tcatttcagg tagaacagaa ttaaagtgaa aagccaagtc tttggaatta acagacaaag
+    15061 atcaaataac agtcctcatg gccttaagaa tttacctaac atttttttta gaatcaattt
+    15121 tcttatatat gaattggaaa cataattcct ccctcacaaa cacattctaa gattttaagg
+    15181 agatattgat gaagtacatc atctgtcatt tttaacaggt agtggtagtg attcacacag
+    15241 cacattatga tctgttcttg tatgttctgt tccattctgt attcttgacc tggttgtatt
+    15301 ctttctgagc tccagatcca catatctaag tacatctttt tgcattttac aagagtgcat
+    15361 acaatacaat gtatccaaga ctgtatttct gattttatcg taccactaaa ctcacaaatg
+    15421 tggccctatt cttgtgttca
+//
\ No newline at end of file
diff --git a/test/jalview/io/GenBankTest.java b/test/jalview/io/GenBankTest.java
new file mode 100644 (file)
index 0000000..d3c41da
--- /dev/null
@@ -0,0 +1,282 @@
+package jalview.io;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.Annotation;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+public class GenBankTest {
+//     private final static File GENBANK_FILE = new File("test/jalview/io/V00505.gb");
+//     private final static File GENBANK_FILE = new File("test/jalview/io/NC_000011.10.gb");
+       private final static File GENBANK_FILE = new File("test/jalview/io/M92650.1.gb");
+
+       @Test
+       public void testParsing(){
+               testFileIOwithFormat(GENBANK_FILE, "GENBANK");
+       }
+         /**
+          * test alignment data in given file can be imported, exported and reimported
+          * with no dataloss
+          * 
+          * @param f
+          *          - source datafile (IdentifyFile.identify() should work with it)
+          * @param ioformat
+          *          - label for IO class used to write and read back in the data from
+          *          f
+          */
+         public static void testFileIOwithFormat(File f, String ioformat)
+         {
+           System.out.println("Reading file: " + f);
+           String ff = f.getPath();
+           try
+           {
+             AppletFormatAdapter rf = new AppletFormatAdapter();
+
+             Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
+                     new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
+
+             assertNotNull("Couldn't read supplied alignment data.", al);
+
+             // make sure dataset is initialised ? not sure about this
+             for (int i = 0; i < al.getSequencesArray().length; ++i)
+             {
+               al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+             }
+             String outputfile = rf.formatSequences(ioformat, al, true);
+             System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n<<EOF\n");
+             // test for consistency in io
+             Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
+                     AppletFormatAdapter.PASTE, ioformat);
+             assertNotNull("Couldn't parse reimported alignment data.", al_input);
+
+             String identifyoutput = new IdentifyFile().Identify(outputfile,
+                     AppletFormatAdapter.PASTE);
+             assertNotNull("Identify routine failed for outputformat " + ioformat,
+                     identifyoutput);
+             assertTrue(
+                     "Identify routine could not recognise output generated by '"
+                             + ioformat + "' writer",
+                     ioformat.equals(identifyoutput));
+             testAlignmentEquivalence(al, al_input);
+           } catch (Exception e)
+           {
+             e.printStackTrace();
+             assertTrue("Couln't format the alignment for output file.", false);
+           }
+         }
+         /**
+          * assert alignment equivalence
+          * 
+          * @param al
+          *          'original'
+          * @param al_input
+          *          'secondary' or generated alignment from some datapreserving
+          *          transformation
+          */
+         public static void testAlignmentEquivalence(AlignmentI al,
+                 AlignmentI al_input)
+         {
+           assertNotNull("Original alignment was null", al);
+           assertNotNull("Generated alignment was null", al_input);
+
+           assertTrue(
+                   "Alignment dimension mismatch: original contains "
+                           + al.getHeight() + " and generated has "
+                           + al_input.getHeight() + " sequences; original has "
+                           + al.getWidth() + " and generated has "
+                           + al_input.getWidth() + " columns.",
+                   al.getHeight() == al_input.getHeight()
+                           && al.getWidth() == al_input.getWidth());
+
+           // check Alignment annotation
+           AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
+           AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+
+           // note - at moment we do not distinguish between alignment without any
+           // annotation rows and alignment with no annotation row vector
+           // we might want to revise this in future
+           int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
+                   : aa_original.length);
+           Map<Integer,java.util.BitSet> orig_groups=new HashMap<Integer,java.util.BitSet>(),new_groups=new HashMap<Integer,java.util.BitSet>();
+
+           if (aa_new != null && aa_original != null)
+           {
+             for (int i = 0; i < aa_original.length; i++)
+             {
+               if (aa_new.length>i) {
+                 assertTrue("Different alignment annotation at position "+i,
+                       equalss(aa_original[i], aa_new[i]));
+                 // compare graphGroup or graph properties - needed to verify JAL-1299
+                 assertTrue("Graph type not identical.",aa_original[i].graph==aa_new[i].graph);
+                 assertTrue("Visibility not identical.", aa_original[i].visible==aa_new[i].visible);
+                 assertTrue(
+                         "Threshold line not identical.",
+                         aa_original[i].threshold == null ? aa_new[i].threshold == null
+                                 : aa_original[i].threshold
+                                         .equals(aa_new[i].threshold));
+                 // graphGroup may differ, but pattern should be the same
+                 Integer o_ggrp=new Integer(aa_original[i].graphGroup+2),n_ggrp=new Integer(aa_new[i].graphGroup+2);
+                 BitSet orig_g=orig_groups.get(o_ggrp),new_g=new_groups.get(n_ggrp);
+                 if (orig_g==null) {
+                   orig_groups.put(o_ggrp,orig_g= new BitSet());
+                 }
+                 if (new_g==null) {
+                   new_groups.put(n_ggrp, new_g=new BitSet());
+                 }
+                 assertTrue("Graph Group pattern differs at annotation "+i, orig_g.equals(new_g));
+                 orig_g.set(i); new_g.set(i);
+               } else {
+                 System.err.println("No matching annotation row for "+aa_original[i].toString());
+               }
+             }
+           }
+           assertTrue(
+                   "Generated and imported alignment have different annotation sets ("
+                           + aa_new_size + " != " + aa_original_size + ")",
+                   aa_new_size == aa_original_size);
+
+           // check sequences, annotation and features
+           SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
+           seq_original = al.getSequencesArray();
+           SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
+           seq_new = al_input.getSequencesArray();
+           SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
+           AlignmentAnnotation annot_original, annot_new;
+           //
+           for (int i = 0; i < al.getSequencesArray().length; i++)
+           {
+             String name = seq_original[i].getName();
+             int start = seq_original[i].getStart();
+             int end = seq_original[i].getEnd();
+             System.out.println("Check sequence: " + name + "/" + start + "-"
+                     + end);
+
+             // search equal sequence
+             for (int in = 0; in < al_input.getSequencesArray().length; in++)
+             {
+               if (name.equals(seq_new[in].getName())
+                       && start == seq_new[in].getStart()
+                       && end == seq_new[in].getEnd())
+               {
+                 String ss_original = seq_original[i].getSequenceAsString();
+                 String ss_new = seq_new[in].getSequenceAsString();
+                 assertTrue("The sequences " + name + "/" + start + "-" + end
+                         + " are not equal", ss_original.equals(ss_new));
+
+                 assertTrue(
+                         "Sequence Features were not equivalent",
+                         (seq_original[i].getSequenceFeatures() == null && seq_new[in]
+                                 .getSequenceFeatures() == null)
+                                 || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
+                                         .getSequenceFeatures() != null));
+                 // compare sequence features
+                 if (seq_original[i].getSequenceFeatures() != null
+                         && seq_new[in].getSequenceFeatures() != null)
+                 {
+                   System.out.println("There are feature!!!");
+                   sequenceFeatures_original = new SequenceFeature[seq_original[i]
+                           .getSequenceFeatures().length];
+                   sequenceFeatures_original = seq_original[i]
+                           .getSequenceFeatures();
+                   sequenceFeatures_new = new SequenceFeature[seq_new[in]
+                           .getSequenceFeatures().length];
+                   sequenceFeatures_new = seq_new[in].getSequenceFeatures();
+
+                   assertTrue("different number of features", seq_original[i]
+                           .getSequenceFeatures().length == seq_new[in]
+                           .getSequenceFeatures().length);
+
+                   for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
+                   {
+                     assertTrue("Different features",
+                             sequenceFeatures_original[feat]
+                                     .equals(sequenceFeatures_new[feat]));
+                   }
+                 }
+                 // compare alignment annotation
+                 if (al.getSequenceAt(i).getAnnotation() != null
+                         && al_input.getSequenceAt(in).getAnnotation() != null)
+                 {
+                   for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
+                   {
+                     if (al.getSequenceAt(i).getAnnotation()[j] != null
+                             && al_input.getSequenceAt(in).getAnnotation()[j] != null)
+                     {
+                       annot_original = al.getSequenceAt(i).getAnnotation()[j];
+                       annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
+                       assertTrue("Different annotation elements",
+                               equalss(annot_original, annot_new));
+                     }
+                   }
+                 }
+                 else if (al.getSequenceAt(i).getAnnotation() == null
+                         && al_input.getSequenceAt(in).getAnnotation() == null)
+                 {
+                   System.out.println("No annotations");
+                 }
+                 else if (al.getSequenceAt(i).getAnnotation() != null
+                         && al_input.getSequenceAt(in).getAnnotation() == null)
+                 {
+                   assertTrue("Annotations differed between sequences ("
+                           + al.getSequenceAt(i).getName() + ") and ("
+                           + al_input.getSequenceAt(i).getName() + ")", false);
+                 }
+                 break;
+               }
+             }
+           }
+         }
+         /*
+          * compare annotations
+          */
+         private static boolean equalss(AlignmentAnnotation annot_or,
+                 AlignmentAnnotation annot_new)
+         {
+           if (annot_or.annotations.length != annot_new.annotations.length)
+           {
+             System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length);
+             return false;
+           }
+           for (int i = 0; i < annot_or.annotations.length; i++)
+           {
+             Annotation an_or=annot_or.annotations[i],an_new=annot_new.annotations[i];
+             if (an_or != null
+                     && an_new!= null)
+             {
+               if (!an_or.displayCharacter.trim()
+                       .equals(an_new.displayCharacter.trim())
+                       || !(""+an_or.secondaryStructure).trim().equals((""+an_new.secondaryStructure).trim())
+                       || ((!an_or.description.equals(an_new.description)) && (an_or.description == null
+                               || an_new.description == null || !an_or.description
+                                 .equals(an_new.description))))
+               {
+                 System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString());
+                 return false;
+               }
+             }
+             else if (annot_or.annotations[i] == null
+                     && annot_new.annotations[i] == null)
+             {
+               continue;
+             }
+             else
+             {
+               System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString()));
+               return false;
+             }
+           }
+           return true;
+         }     
+}
diff --git a/test/jalview/io/M92650.1.gb b/test/jalview/io/M92650.1.gb
new file mode 100644 (file)
index 0000000..8abbba9
--- /dev/null
@@ -0,0 +1,92 @@
+LOCUS       HUMDMDXX                2110 bp    mRNA    linear   PRI 07-NOV-1994
+DEFINITION  Human Duchenne muscular dystrophy (DMD) mRNA,
+complete cds.
+ACCESSION   M92650
+VERSION     M92650.1  GI:181598
+KEYWORDS    Duchenne muscular 
+dystrophy protein.
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 2110)
+  AUTHORS   Lederfein,D., Levy,Z., Augier,N., Mornet,D.,
+Morris,G., Fuchs,O.,
+            Yaffe,D. and Nudel,U.
+  TITLE     A 71-kilodalton protein is a major product of the Duchenne muscular
+            dystrophy gene in brain and other nonmuscle tissues
+  JOURNAL   Proc. Natl. Acad. Sci. U.S.A. 89 (12), 5346-5350 (1992)
+   PUBMED   1319059
+COMMENT     Original source text: Homo sapiens brain cDNA to mRNA.
+FEATURES             Location/Qualifiers
+     source          1..2110
+                     /organism="Homo sapiens"
+                     /mol_type="mRNA"
+                     /db_xref="taxon:9606"
+                     /map="Xp21.3-p21.1"
+                     /cell_type="amniotic fluid"
+                     /tissue_type="brain"
+     gene            1..2110
+                     /gene="DMD"
+     5'UTR           1..52
+                     /gene="DMD"
+                     /note="G00-119-850"
+     CDS             53..1921
+                     /gene="DMD"
+                     /codon_start=1
+                     /protein_id="AAA52316.1"
+                     /db_xref="GI:181599"
+                     /db_xref="GDB:G00-119-850"
+                     /translation="MREQLKGHETQTTCWDHPKMTELYQSLADLNNVRFSAYRTAMKL
+                     RRLQKALCLDLLSLSAACDALDQHNLKQNDQPMDILQIINCLTTIYDRLEQEHNNLVN
+                     VPLCVDMCLNWLLNVYDTGRTGRIRVLSFKTGIISLCKAHLEDKYRYLFKQVASSTGF
+                     CDQRRLGLLLHDSIQIPRQLGEVASFGGSNIEPSVRSCFQFANNKPEIEAALFLDWMR
+                     LEPQSMVWLPVLHRVAAAETAKHQAKCNICKECPIIGFRYRSLKHFNYDICQSCFFSG
+                     RVAKGHKMHYPMVEYCTPTTSGEDVRDFAKVLKNKFRTKRYFAKHPRMGYLPVQTVLE
+                     GDNMETPASSPQLSHDDTHSRIEHYASRLAEMENSNGSYLNDSISPNESIDDEHLLIQ
+                     HYCQSLNQDSPLSQPRSPAQILISLESEERGELERILADLEEENRNLQAEYDRLKQQH
+                     EHKGLSPLPSPPEMMPTSPQSPRDAELIAEAKLLRQHKGRLEARMQILEDHNKQLESQ
+                     LHRLRQLLEQPQAEAKVNGTTVSSPSTSLQRSDSSQPMLLRVVGSQTSDSMGEEDLLS
+                     PPQDTSTGLEEVMEQLNNSFPSSRGHNVGSLFHMADDLGRAMESLVSVMTDEEGAE"
+     3'UTR           1922..2110
+                     /gene="DMD"
+                     /note="G00-119-850"
+ORIGIN      
+        1 gaagctcact cctccactcg tacccacact cgaccgcgga gcccttgcag ccatgaggga
+       61 acagctcaaa ggccacgaga ctcaaacaac ttgctgggac catcccaaaa tgacagagct
+      121 ctaccagtct ttagctgacc tgaataatgt cagattctca gcttatagga ctgccatgaa
+      181 actccgaaga ctgcagaagg ccctttgctt ggatctcttg agcctgtcag ctgcatgtga
+      241 tgccttggac cagcacaacc tcaagcaaaa tgaccagccc atggatatcc tgcagattat
+      301 taattgtttg accactattt atgaccgcct ggagcaagag cacaacaatt tggtcaacgt
+      361 ccctctctgc gtggatatgt gtctgaactg gctgctgaat gtttatgata cgggacgaac
+      421 agggaggatc cgtgtcctgt cttttaaaac tggcatcatt tccctgtgta aagcacattt
+      481 ggaagacaag tacagatacc ttttcaagca agtggcaagt tcaacaggat tttgtgacca
+      541 gcgcaggctg ggcctccttc tgcatgattc tatccaaatt ccaagacagt tgggtgaagt
+      601 tgcatccttt gggggcagta acattgagcc aagtgtccgg agctgcttcc aatttgctaa
+      661 taataagcca gagatcgaag cggccctctt cctagactgg atgagactgg aaccccagtc
+      721 catggtgtgg ctgcccgtcc tgcacagagt ggctgctgca gaaactgcca agcatcaggc
+      781 caaatgtaac atctgcaaag agtgtccaat cattggattc aggtacagga gtctaaagca
+      841 ctttaattat gacatctgcc aaagctgctt tttttctggt cgagttgcaa aaggccataa
+      901 aatgcactat cccatggtgg aatattgcac tccgactaca tcaggagaag atgttcgaga
+      961 ctttgccaag gtactaaaaa acaaatttcg aaccaaaagg tattttgcga agcatccccg
+     1021 aatgggctac ctgccagtgc agactgtctt agagggggac aacatggaaa cgcctgcctc
+     1081 gtcccctcag ctttcacacg atgatactca ttcacgcatt gaacattatg ctagcaggct
+     1141 agcagaaatg gaaaacagca atggatctta tctaaatgat agcatctctc ctaatgagag
+     1201 catagatgat gaacatttgt taatccagca ttactgccaa agtttgaacc aggactcccc
+     1261 cctgagccag cctcgtagtc ctgcccagat cttgatttcc ttagagagtg aggaaagagg
+     1321 ggagctagag agaatcctag cagatcttga ggaagaaaac aggaatctgc aagcagaata
+     1381 tgaccgtcta aagcagcagc acgaacataa aggcctgtcc ccactgccgt cccctcctga
+     1441 aatgatgccc acctctcccc agagtccccg ggatgctgag ctcattgctg aggccaagct
+     1501 actgcgtcaa cacaaaggcc gcctggaagc caggatgcaa atcctggaag accacaataa
+     1561 acagctggag tcacagttac acaggctaag gcagctgctg gagcaacccc aggcagaggc
+     1621 caaagtgaat ggcacaacgg tgtcctctcc ttctacctct ctacagaggt ccgacagcag
+     1681 tcagcctatg ctgctccgag tggttggcag tcaaacttcg gactccatgg gtgaggaaga
+     1741 tcttctcagt cctccccagg acacaagcac agggttagag gaggtgatgg agcaactcaa
+     1801 caactccttc cctagttcaa gaggacacaa tgtaggaagt cttttccaca tggcagatga
+     1861 tttgggcaga gcgatggagt ccttagtatc agtcatgaca gatgaagaag gagcagaata
+     1921 aatgttttac aactcctgat tcccgcatgg tttttataat attcatacaa caaagaggat
+     1981 tagacagtaa gagtttacaa gaaataaatc tatatttttg tgaagggtag tggtattata
+     2041 ctgtagattt cagtagtttc taagtctgtt attgttttgt taacaatggc aggttttaca
+     2101 cgtctatgca
+//
\ No newline at end of file
diff --git a/test/jalview/io/NC_000011.10.gb b/test/jalview/io/NC_000011.10.gb
new file mode 100644 (file)
index 0000000..850c004
--- /dev/null
@@ -0,0 +1,173 @@
+LOCUS       NC_000011               1800 bp    DNA     linear   CON 03-FEB-2014
+DEFINITION  Homo sapiens chromosome 11, GRCh38 Primary Assembly.
+ACCESSION   NC_000011 REGION: complement(5232829..5234628) GPC_000001303
+VERSION     NC_000011.10  GI:568815587
+DBLINK      BioProject: PRJNA168
+            Assembly: GCF_000001405.26
+KEYWORDS    RefSeq.
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 1800)
+  AUTHORS   Taylor,T.D., Noguchi,H., Totoki,Y., Toyoda,A., Kuroki,Y., Dewar,K.,
+            Lloyd,C., Itoh,T., Takeda,T., Kim,D.W., She,X., Barlow,K.F.,
+            Bloom,T., Bruford,E., Chang,J.L., Cuomo,C.A., Eichler,E.,
+            FitzGerald,M.G., Jaffe,D.B., LaButti,K., Nicol,R., Park,H.S.,
+            Seaman,C., Sougnez,C., Yang,X., Zimmer,A.R., Zody,M.C.,
+            Birren,B.W., Nusbaum,C., Fujiyama,A., Hattori,M., Rogers,J.,
+            Lander,E.S. and Sakaki,Y.
+  TITLE     Human chromosome 11 DNA sequence and analysis including novel gene identification
+  JOURNAL   Nature 440 (7083), 497-500 (2006)
+   PUBMED   16554811
+REFERENCE   2  (bases 1 to 1800)
+  CONSRTM   International Human Genome Sequencing Consortium
+  TITLE     Finishing the euchromatic sequence of the human genome
+  JOURNAL   Nature 431 (7011), 931-945 (2004)
+   PUBMED   15496913
+REFERENCE   3  (bases 1 to 1800)
+  AUTHORS   Lander,E.S., Linton,L.M., Birren,B., Nusbaum,C., Zody,M.C.,
+            Baldwin,J., Devon,K., Dewar,K., Doyle,M., FitzHugh,W., Funke,R.,
+            Gage,D., Harris,K., Heaford,A., Howland,J., Kann,L., Lehoczky,J.,
+            LeVine,R., McEwan,P., McKernan,K., Meldrim,J., Mesirov,J.P.,
+            Miranda,C., Morris,W., Naylor,J., Raymond,C., Rosetti,M.,
+            Santos,R., Sheridan,A., Sougnez,C., Stange-Thomann,N.,
+            Stojanovic,N., Subramanian,A., Wyman,D., Rogers,J., Sulston,J.,
+            Ainscough,R., Beck,S., Bentley,D., Burton,J., Clee,C., Carter,N.,
+            Coulson,A., Deadman,R., Deloukas,P., Dunham,A., Dunham,I.,
+            Durbin,R., French,L., Grafham,D., Gregory,S., Hubbard,T.,
+            Humphray,S., Hunt,A., Jones,M., Lloyd,C., McMurray,A., Matthews,L.,
+            Mercer,S., Milne,S., Mullikin,J.C., Mungall,A., Plumb,R., Ross,M.,
+            Shownkeen,R., Sims,S., Waterston,R.H., Wilson,R.K., Hillier,L.W.,
+            McPherson,J.D., Marra,M.A., Mardis,E.R., Fulton,L.A.,
+            Chinwalla,A.T., Pepin,K.H., Gish,W.R., Chissoe,S.L., Wendl,M.C.,
+            Delehaunty,K.D., Miner,T.L., Delehaunty,A., Kramer,J.B., Cook,L.L.,
+            Fulton,R.S., Johnson,D.L., Minx,P.J., Clifton,S.W., Hawkins,T.,
+            Branscomb,E., Predki,P., Richardson,P., Wenning,S., Slezak,T.,
+            Doggett,N., Cheng,J.F., Olsen,A., Lucas,S., Elkin,C.,
+            Uberbacher,E., Frazier,M., Gibbs,R.A., Muzny,D.M., Scherer,S.E.,
+            Bouck,J.B., Sodergren,E.J., Worley,K.C., Rives,C.M., Gorrell,J.H.,
+            Metzker,M.L., Naylor,S.L., Kucherlapati,R.S., Nelson,D.L.,
+            Weinstock,G.M., Sakaki,Y., Fujiyama,A., Hattori,M., Yada,T.,
+            Toyoda,A., Itoh,T., Kawagoe,C., Watanabe,H., Totoki,Y., Taylor,T.,
+            Weissenbach,J., Heilig,R., Saurin,W., Artiguenave,F., Brottier,P.,
+            Bruls,T., Pelletier,E., Robert,C., Wincker,P., Smith,D.R.,
+            Doucette-Stamm,L., Rubenfield,M., Weinstock,K., Lee,H.M.,
+            Dubois,J., Rosenthal,A., Platzer,M., Nyakatura,G., Taudien,S.,
+            Rump,A., Yang,H., Yu,J., Wang,J., Huang,G., Gu,J., Hood,L.,
+            Rowen,L., Madan,A., Qin,S., Davis,R.W., Federspiel,N.A.,
+            Abola,A.P., Proctor,M.J., Myers,R.M., Schmutz,J., Dickson,M.,
+            Grimwood,J., Cox,D.R., Olson,M.V., Kaul,R., Raymond,C., Shimizu,N.,
+            Kawasaki,K., Minoshima,S., Evans,G.A., Athanasiou,M., Schultz,R.,
+            Roe,B.A., Chen,F., Pan,H., Ramser,J., Lehrach,H., Reinhardt,R.,
+            McCombie,W.R., de la Bastide,M., Dedhia,N., Blocker,H.,
+            Hornischer,K., Nordsiek,G., Agarwala,R., Aravind,L., Bailey,J.A.,
+            Bateman,A., Batzoglou,S., Birney,E., Bork,P., Brown,D.G.,
+            Burge,C.B., Cerutti,L., Chen,H.C., Church,D., Clamp,M.,
+            Copley,R.R., Doerks,T., Eddy,S.R., Eichler,E.E., Furey,T.S.,
+            Galagan,J., Gilbert,J.G., Harmon,C., Hayashizaki,Y., Haussler,D.,
+            Hermjakob,H., Hokamp,K., Jang,W., Johnson,L.S., Jones,T.A.,
+            Kasif,S., Kaspryzk,A., Kennedy,S., Kent,W.J., Kitts,P.,
+            Koonin,E.V., Korf,I., Kulp,D., Lancet,D., Lowe,T.M., McLysaght,A.,
+            Mikkelsen,T., Moran,J.V., Mulder,N., Pollara,V.J., Ponting,C.P.,
+            Schuler,G., Schultz,J., Slater,G., Smit,A.F., Stupka,E.,
+            Szustakowski,J., Thierry-Mieg,D., Thierry-Mieg,J., Wagner,L.,
+            Wallis,J., Wheeler,R., Williams,A., Wolf,Y.I., Wolfe,K.H.,
+            Yang,S.P., Yeh,R.F., Collins,F., Guyer,M.S., Peterson,J.,
+            Felsenfeld,A., Wetterstrand,K.A., Patrinos,A., Morgan,M.J., de
+            Jong,P., Catanese,J.J., Osoegawa,K., Shizuya,H., Choi,S. and
+            Chen,Y.J.
+  CONSRTM   International Human Genome Sequencing Consortium
+  TITLE     Initial sequencing and analysis of the human genome
+  JOURNAL   Nature 409 (6822), 860-921 (2001)
+   PUBMED   11237011
+  REMARK    Erratum:[Nature 2001 Aug 2;412(6846):565]
+COMMENT     REFSEQ INFORMATION: The reference sequence is identical to
+            CM000673.2.
+            On Feb 3, 2014 this sequence version replaced gi:224589802.
+            Assembly Name: GRCh38 Primary Assembly
+            The DNA sequence is composed of genomic sequence, primarily
+            finished clones that were sequenced as part of the Human Genome
+            Project. PCR products and WGS shotgun sequence have been added
+            where necessary to fill gaps or correct errors. All such additions
+            are manually curated by GRC staff. For more information see:
+            http://genomereference.org.
+            
+            ##Genome-Annotation-Data-START##
+            Annotation Provider         :: NCBI
+            Annotation Status           :: Full annotation
+            Annotation Version          :: Homo sapiens Annotation Release 106
+            Annotation Pipeline         :: NCBI eukaryotic genome annotation
+                                           pipeline
+            Annotation Software Version :: 5.2
+            Annotation Method           :: Best-placed RefSeq; Gnomon
+            Features Annotated          :: Gene; mRNA; CDS; ncRNA
+            ##Genome-Annotation-Data-END##
+FEATURES             Location/Qualifiers
+     source          1..1800
+                     /organism="Homo sapiens"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9606"
+                     /chromosome="11"
+     gene            1..1800
+                     /gene="HBD"
+                     /note="hemoglobin, delta; Derived by automated computational analysis using gene prediction method: Curated Genomic."
+                     /db_xref="GeneID:3045"
+                     /db_xref="HGNC:4829"
+                     /db_xref="MIM:142000"
+     mRNA            join(1..287,416..638,1537..1800)
+                     /gene="HBD"
+                     /product="hemoglobin, delta"
+                     /note="Derived by automated computational analysis using gene prediction method: Curated Genomic."
+                     /transcript_id="NM_000519.3"
+                     /db_xref="GI:62865863"
+                     /db_xref="GeneID:3045"
+                     /db_xref="HGNC:4829"
+                     /db_xref="MIM:142000"
+     CDS             join(196..287,416..638,1537..1665)
+                     /gene="HBD"
+                     /note="delta globin; delta-globin chain; hemoglobin delta chain; Derived by automated computational analysis using gene prediction method: Curated Genomic."
+                     /codon_start=1
+                     /product="hemoglobin subunit delta"
+                     /protein_id="NP_000510.1"
+                     /db_xref="GI:4504351"
+                     /db_xref="CCDS:CCDS31376.1"
+                     /db_xref="GeneID:3045"
+                     /db_xref="HGNC:4829"
+                     /db_xref="MIM:142000"
+                     /translation="MVHLTPEEKTAVNALWGKVNVDAVGGEALGRLLVVYPWTQRFFE
+                     SFGDLSSPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFSQLSELHCDKLHVDPE
+                     NFRLLGNVLVCVLARNFGKEFTPQMQAAYQKVVAGVANALAHKYH"
+ORIGIN      
+        1 agggcaagtt aagggaatag tggaatgaag gttcattttt cattctcaca aactaatgaa
+       61 accctgctta tcttaaacca acctgctcac tggagcaggg aggacaggac cagcataaaa
+      121 ggcagggcag agtcgactgt tgcttacact ttcttctgac ataacagtgt tcactagcaa
+      181 cctcaaacag acaccatggt gcatctgact cctgaggaga agactgctgt caatgccctg
+      241 tggggcaaag tgaacgtgga tgcagttggt ggtgaggccc tgggcaggtt ggtatcaagg
+      301 ttataagaga ggctcaagga ggcaaatgga aactgggcat gtgtagacag agaagactct
+      361 tgggtttctg ataggcactg actctctgtc ccttgggctg ttttcctacc ctcagattac
+      421 tggtggtcta cccttggacc cagaggttct ttgagtcctt tggggatctg tcctctcctg
+      481 atgctgttat gggcaaccct aaggtgaagg ctcatggcaa gaaggtgcta ggtgccttta
+      541 gtgatggcct ggctcacctg gacaacctca agggcacttt ttctcagctg agtgagctgc
+      601 actgtgacaa gctgcacgtg gatcctgaga acttcagggt gagtccagga gatgcttcac
+      661 ttttctcttt ttactttcta atcttacatt ttggttcttt tacctacctg ctcttctccc
+      721 acatttttgt cattttacta tattttatca tttaatgctt ctaaaatttt gttaattttt
+      781 tatttaaata ttctgcattt tttccttcct cacaatcttg ctattttaaa ttatttaata
+      841 tcctgtcttt ctctcccaac cccctccctt catttttcct tctctaacaa caactcaaat
+      901 tatgcatacc agctctcacc tgctaattct gcacttagaa taatcctttt gtctctccac
+      961 atgggtatgg gagaggctcc aactcaaaga tgagaggcat agaatactgt tttagaggct
+     1021 ataaatcatt ttacaataag gaataattgg aattttataa attctgtagt aaatggaatg
+     1081 gaaaggaaag tgaatatttg attatgaaag actaggcagt tacactggag gtggggcaga
+     1141 agtcgttgct aggagacagc ccatcatcac actgattaat caattaattt gtatctatta
+     1201 atctgtttat agtaattaat ttgtatatgc tatatacaca tacaaaatta aaactaattt
+     1261 ggaattaatt tgtatatagt attatacagc atatatagca tatatgtaca tatatagact
+     1321 acatgctagt taagtacata gaggatgtgt gtgtatagat atatgttata tgtatgcatt
+     1381 catatatgta cttatttatg ctgatgggaa taacctgggg atcagttttg tctaagattt
+     1441 gggcagaaaa aaatgggtgt tggctcagtt tctcagaagc cagtctttat ttctctgtta
+     1501 accatatgca tgtatctgcc tacctcttct ccgcagctct tgggcaatgt gctggtgtgt
+     1561 gtgctggccc gcaactttgg caaggaattc accccacaaa tgcaggctgc ctatcagaag
+     1621 gtggtggctg gtgtggctaa tgccctggct cacaagtacc attgagatcc tggactgttt
+     1681 cctgataacc ataagaagac cctatttccc tagattctat tttctgaact tgggaacaca
+     1741 atgcctactt caagggtatg gcttctgcct aataaagaat gttcagctca acttcctgat
+//
\ No newline at end of file
diff --git a/test/jalview/io/V00505.gb b/test/jalview/io/V00505.gb
new file mode 100644 (file)
index 0000000..73c5cf4
--- /dev/null
@@ -0,0 +1,83 @@
+LOCUS       V00505                  1976 bp    DNA     linear   PRI 14-NOV-2006
+DEFINITION  Human gene for delta-globin.
+ACCESSION   V00505
+VERSION     V00505.1  GI:30510
+KEYWORDS    delta globin; germ line; globin.
+SOURCE      Homo sapiens (human)
+  ORGANISM  Homo sapiens
+            Eukaryota; Metazoa; Chordata; Craniata; Vertebrata; Euteleostomi;
+            Mammalia; Eutheria; Euarchontoglires; Primates; Haplorrhini;
+            Catarrhini; Hominidae; Homo.
+REFERENCE   1  (bases 1 to 1976)
+  AUTHORS   Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M.
+  TITLE     Complete nucleotide sequence of the human delta-globin gene
+  JOURNAL   Cell 21 (3), 639-646 (1980)
+   PUBMED   7438204
+COMMENT     KST HSA.DELGLOBIN.
+FEATURES             Location/Qualifiers
+     source          1..1976
+                     /organism="Homo sapiens"
+                     /mol_type="genomic DNA"
+                     /db_xref="taxon:9606"
+     prim_transcript 123..1763
+     exon            123..265
+                     /number=1
+     CDS             join(173..265,394..615,1505..1633)
+                     /codon_start=1
+                     /product="delta globin"
+                     /protein_id="CAA23763.1"
+                     /db_xref="GI:30511"
+                     /db_xref="GDB:119298"
+                     /db_xref="GOA:P02042"
+                     /db_xref="HGNC:4829"
+                     /db_xref="InterPro:IPR000971"
+                     /db_xref="InterPro:IPR002337"
+                     /db_xref="InterPro:IPR009050"
+                     /db_xref="InterPro:IPR012292"
+                     /db_xref="PDB:1SHR"
+                     /db_xref="PDB:1SI4"
+                     /db_xref="UniProtKB/Swiss-Prot:P02042"
+                     /translation="MVHLTPEEKTAVNALWGKVNVDAVGGEALGRLLVVYPWTQRFFESFGDLSSPDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFSQLSELHCDKLHVDPENFRLLGNVLVCVLARNFGKEFTPQMQAAYQKVVAGVANALAHKYH"
+     intron          266..393
+                     /number=1
+     exon            394..615
+                     /number=2
+     intron          616..1504
+                     /number=3
+     exon            1505..1763
+                     /number=3
+ORIGIN      
+        1 aatgaaggtt catttttcat tctcacaaac taatgaaacc ctgcttatct taaaccaacc
+       61 tgctcactgg agcagggagg acaggaccag cataaaaggc agggcagagt cgactgttgc
+      121 ttacactttc ttctgacata acagtgttca ctagcaacct caaacagaca ccatggtgca
+      181 tctgactcct gaggagaaga ctgctgtcaa tgccctgtgg ggcaaagtga acgtggatgc
+      241 agttggtggt gaggccctgg gcaggttggt atcaaggtta taagagaggc tcaaggaggc
+      301 aaatggaaac tgggcatgtg tagacagaga agactcttgg gtttctgata ggcactgact
+      361 ctctgtccct tgggctgttt tcctaccctc agattactgg tggtctaccc ttggacccag
+      421 aggttctttg agtcctttgg ggatctgtcc tctcctgatg ctgttatggg caaccctaag
+      481 gtgaaggctc atggcaagaa ggtgctaggt gcctttagtg atggcctggc tcacctggac
+      541 aacctcaagg gcactttttc tcagctgagt gagctgcact gtgacaagct gcacgtggat
+      601 cctgagaact tcagggtgag tccaggagat gcttcacttt tctcttttta ctttctaatc
+      661 ttacattttg gttcttttac ctacctgctc ttctcccaca tttttgtcat tttactatat
+      721 tttatcattt aatgcttcta aaattttgtt atttttttat ttaaaaattc tgcatttttt
+      781 ccttcctcac aatcttgcta ctctaaatta tttaatatcc tgtctttctc tcccaacccc
+      841 ctcccttcat ttttccttct ctaacaacaa ctcaaattat gcataccagc tctcacctgc
+      901 taatttcgca cttagaataa tccttttgtc tctccacatg ggtatgggag aggctccaac
+      961 tcaaagatga gaggcataga atactgtttt agaggctata aatcatttta caataaggaa
+     1021 taattggaat tttataaatt ctgtagtaaa tggaatggaa aggaaagtga atatttgatt
+     1081 atgaaagact aggcagttac actggaggtg gggcagaagt cgttgctagg agacagccca
+     1141 tcatcacact gatttatcaa ttcaatttgt atctattaat ctgtttatag taattaattt
+     1201 gtatatgcta tatacacata caaaattaaa actaatttgg aattaatttg tatatagtat
+     1261 tatacagcat atatgtacat atatagacta catgctagtt aagtacatag aggatgtgtg
+     1321 tgtatagata tatgttatat gtatgcattc atatatgtac ttatttatgc tgatgggaat
+     1381 aacctgggga tcagttttgt ctaagatttg ggcagaaaaa aatgggtgtt ggctcagttc
+     1441 tcagaagcca gtctttattt ctctgttaac catatgcatg tatctgccta cctcttctcc
+     1501 gcagctcttg ggcaatgtgc tggtgtgtgt gctggcccgc aactttggca aggaattcac
+     1561 cccacaaatg caggctgcct atcagaaggt ggtggctggt gtggctaatg ccttggctca
+     1621 caagtaccat tgagatcctg gactgtttcc tgataaccat aagaagaccc tatttcccta
+     1681 gattctattt tctgaacttg ggaacacaat gcctacttca agggtatggc ttctgcctaa
+     1741 taaagaatgt tcagctcaac ttcctgatta atttcactta tttcattttt ttgtccaggt
+     1801 gtgtaagaag gttcctgagg ctctacagat agggagcact tctttatttt acaaagagta
+     1861 catgggaaaa gagaaaagca agggaaccgt acaaggcatt aatgggtgac acttctacct
+     1921 ccaaagagca gaaattatca agaactcttg atacaaagat aatactggca ctgcag
+//
\ No newline at end of file