3 import jalview.datamodel.Alignment;
4 import jalview.datamodel.AlignmentI;
5 import jalview.datamodel.DBRefEntry;
6 import jalview.datamodel.DBRefSource;
7 import jalview.datamodel.Mapping;
8 import jalview.datamodel.Sequence;
9 import jalview.datamodel.SequenceFeature;
10 import jalview.datamodel.SequenceI;
11 import jalview.io.xdb.genbank.GenBankFeature;
12 import jalview.io.xdb.genbank.GenBankLocation;
13 import jalview.io.xdb.genbank.GenBankLocationPoint;
14 import jalview.io.xdb.genbank.GenBankLocationRange;
15 import jalview.io.xdb.genbank.GenBankLocations;
16 import jalview.io.xdb.genbank.GenBankLocus;
17 import jalview.io.xdb.genbank.GenBankReference;
18 import jalview.io.xdb.genbank.GenBankSequence;
19 import jalview.io.xdb.genbank.GenBankSource;
20 import jalview.io.xdb.genbank.GenBankVersion;
22 import java.io.IOException;
23 import java.util.ArrayList;
24 import java.util.Enumeration;
25 import java.util.Hashtable;
26 import java.util.List;
27 import java.util.Vector;
28 import java.util.logging.Level;
29 import java.util.logging.Logger;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
33 import org.apache.james.mime4j.field.ParsedField;
35 public class GenBankFile extends AlignFile
37 private static final Logger log = Logger.getLogger(GenBankFile.class
40 private GenBankVersion version = new GenBankVersion();
42 private GenBankLocus locus = new GenBankLocus();
44 private GenBankSource source = new GenBankSource();
46 private static final Pattern patLocation = Pattern
47 .compile("(\\d+)\\.\\.(\\d+)");
49 private static final Pattern patLocationComp = Pattern
50 .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
52 private static final Pattern patLocus = Pattern
53 .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
55 private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
57 private static final Pattern patFeatureKey = Pattern
58 .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
60 private String definition;
62 private String accession;
64 private String keywords;
66 private String dblink;
68 private String baseCount;
70 private Vector<GenBankFeature> features;
72 private Vector<String> comments;
75 private Vector<GenBankSequence> sequences;
77 private Vector<GenBankReference> references;
79 private SequenceI genBankSequence;
85 public GenBankFile(String inFile, String type) throws IOException
90 public GenBankFile(FileParse source) throws IOException
95 public void initData()
98 features = new Vector<GenBankFeature>();
99 comments = new Vector<String>();
100 sequences = new Vector<GenBankSequence>();
101 references = new Vector<GenBankReference>();
104 public void parse() throws IOException
107 boolean featureMode = false; // FEATURES found
108 boolean seqMode = false; // Parsing Sequences from SOURCE
109 boolean referenceMode = false; // REFERENCE found
110 boolean sourceMode = false; // SOURCE found
111 boolean commentMode = false; // COMMENT found
112 boolean parsingAuthors = false; // Parsing authors (multiline)
113 boolean parsingDefinition = false; // Parsing definition (multiline)
114 boolean parsingKeywords = false; // Parsing keywords (multiline)
115 boolean parsingDbLink = false; // Parsing DBLINK (multiline)
116 boolean parsingTitle = false; // Parsing title (multiline)
117 boolean parsingQualifier = false; // Parsing feature qualifier (multine)
118 String currentQualifierName = "";
119 GenBankReference reference = null;
120 GenBankFeature feature = null;
121 List<String> sourceLines = new ArrayList<String>();
126 while ((line = nextLine()) != null)
128 // We only process lines if they have contents within
129 if (line.length() == 0)
132 if (line.startsWith("FEATURES"))
136 referenceMode = false;
139 feature = new GenBankFeature();
140 source = parseSource(sourceLines);
145 if (!line.startsWith("//"))
147 GenBankSequence seq = processSequenceLine(line);
151 referenceMode = false;
155 if (line.startsWith("ORIGIN"))
157 if (feature.getType() != null)
158 features.add(feature);
160 referenceMode = false;
167 // Process feature line
168 if (!line.startsWith("FEATURES")
169 && !line.startsWith("BASE COUNT"))
172 if (!line.trim().startsWith("/"))
174 Matcher featuresMatch = patFeatureKey.matcher(line);
175 if (featuresMatch.find())
177 if (feature.getType() != null)
178 features.add(feature); // Hay que a�adirlo s�lo si no se est�
179 // a mitad de un qualif o una feature
181 String type = featuresMatch.group(0);
182 feature = new GenBankFeature();
183 feature.setType(type);
184 GenBankLocation loc = parserFeatureLocation(feature,
185 line.replace(type, ""));
186 feature.setLocation(loc);
187 parsingQualifier = false;
190 else if (parsingQualifier)
191 { // If not a feature, it's another part of a qualifier
192 String qValue = feature.getQualifier(currentQualifierName);
193 StringBuffer sb = new StringBuffer().append(qValue).append(
195 feature.updateQualifier(currentQualifierName, sb.toString());
201 // It's the begining of a qualifier line
202 Matcher matcher = patQualifierKey.matcher(line);
205 String qName = matcher.group(1);
206 currentQualifierName = qName.replace("/", "");
207 line = line.replace(qName, "").replace("/", "")
209 feature.addQualifier(currentQualifierName, ltrim(line));
210 parsingQualifier = true;
216 // Process REFERENCE line
217 if (line.startsWith("REFERENCE"))
221 // This is line is the REFERENCE line
222 referenceMode = true;
229 // We were at referenceMode, then add current reference to the list
230 // and create a new one
231 references.add(reference);
233 reference = new GenBankReference();
234 String desc = processReferenceLine(line, "REFERENCE");
235 int[] ranges = parseReferenceDescriptor(desc);
236 reference.setDescriptor(desc);
237 reference.setOrder(ranges[0]);
238 reference.setBegin(ranges[1]);
239 reference.setEnd(ranges[2]);
240 parsingAuthors = false;
241 parsingTitle = false;
245 if (line.startsWith(" AUTHORS"))
249 reference.setAuthors(processReferenceLine(line, "AUTHORS"));
250 parsingAuthors = true;
251 parsingTitle = false;
255 if (line.startsWith(" TITLE"))
259 reference.setTitle(processReferenceLine(line, "TITLE"));
260 parsingAuthors = false;
265 if (line.startsWith(" JOURNAL"))
269 reference.setJournal(processReferenceLine(line, "JOURNAL"));
270 parsingTitle = false;
271 parsingAuthors = false;
275 if (line.startsWith(" PUBMED"))
279 reference.setPubmed(processReferenceLine(line, "PUBMED"));
280 parsingTitle = false;
281 parsingAuthors = false;
286 if (line.startsWith(" MEDLINE"))
290 reference.setMedline(processReferenceLine(line, "MEDLINE"));
291 parsingTitle = false;
292 parsingAuthors = false;
296 if (line.startsWith(" REMARK"))
300 reference.setRemark(processReferenceLine(line, "REMARK"));
301 parsingTitle = false;
302 parsingAuthors = false;
306 if (line.startsWith(" CONSRTM"))
310 reference.setConsortia(processReferenceLine(line, "CONSRTM"));
311 parsingTitle = false;
312 parsingAuthors = false;
317 if (line.startsWith("SOURCE"))
319 parsingKeywords = false;
324 sourceLines.add(line);
328 if (line.indexOf("ORGANISM") != -1)
332 sourceLines.add(line);
337 if (line.startsWith("COMMENT"))
339 if (reference != null)
340 references.add(reference);
343 referenceMode = false;
346 comments.add(processCommentLine(line));
349 // Process LOCUS line
350 if (line.startsWith("LOCUS"))
352 locus = parseLocus(line);
355 // Process BASE COUNT line
356 if (line.startsWith("BASE COUNT"))
358 baseCount = processHeaderLine(line, "BASE COUNT");
362 // Process DEFINITION line
363 if (line.startsWith("DEFINITION"))
365 definition = processHeaderLine(line, "DEFINITION");
366 parsingDefinition = true;
369 // Process ACCESSION line
370 if (line.startsWith("ACCESSION"))
372 accession = processHeaderLine(line, "ACCESSION");
373 parsingDefinition = false;
376 // Process VERSION line
377 if (line.startsWith("VERSION"))
379 version = parseVersion(line);
380 // headers.put("VERSION", processHeaderLine(line,"VERSION"));
383 // Process DBLINK line
384 if (line.startsWith("DBLINK"))
386 dblink = processHeaderLine(line, "DBLINK");
387 parsingDbLink = true;
390 // Process KEYWORDS line
391 if (line.startsWith("KEYWORDS"))
393 keywords = processHeaderLine(line, "KEYWORDS");
394 parsingKeywords = true;
395 parsingDbLink = false;
400 sourceLines.add(line);
403 if (parsingDefinition)
405 StringBuffer sb = new StringBuffer().append(definition).append(
407 definition = sb.toString();
410 if (referenceMode && parsingAuthors)
412 if (reference != null)
414 StringBuffer authors = new StringBuffer().append(
415 reference.getAuthors()).append(line);
416 reference.setAuthors(authors.toString());
420 if (referenceMode && parsingTitle)
422 if (reference != null)
424 StringBuffer title = new StringBuffer().append(
425 reference.getTitle()).append(line);
426 reference.setTitle(title.toString());
432 StringBuffer sb = new StringBuffer().append(keywords)
434 keywords = sb.toString();
439 StringBuffer sb = new StringBuffer().append(dblink).append(line);
440 dblink = sb.toString();
453 throw new IOException("GenBankFile is not valid.");
457 protected void setEntries()
459 StringBuffer result = new StringBuffer();
460 // Mapping GenBank info into Jalview data model
461 genBankSequence = new Sequence(accession,
462 DnaUtils.getNucleotidesFromSequenceVector(sequences));
463 // Mapping DBRefEntry
464 DBRefEntry dbRef = new DBRefEntry();
465 dbRef.setSource(DBRefSource.GENBANK);
466 dbRef.setVersion(version == null ? "" : version.toString());
467 dbRef.setAccessionId(accession);
468 // add map to indicate the sequence is a valid coordinate frame for the
470 dbRef.setMap(new Mapping(null, new int[]
471 { 1, genBankSequence.getLength() }, new int[]
472 { 1, genBankSequence.getLength() }, 1, 1));
473 genBankSequence.addDBRef(dbRef);
475 // add header info as non-positional features
477 SequenceFeature locusF = new SequenceFeature("LOCUS",
478 (locus == null ? "" : locus.toString()), null, 1,
479 genBankSequence.getLength(), DBRefSource.GENBANK);
480 genBankSequence.addSequenceFeature(locusF);
482 SequenceFeature defF = new SequenceFeature("DEFINITION", definition,
483 null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
484 genBankSequence.addSequenceFeature(defF);
486 SequenceFeature accessionF = new SequenceFeature("ACCESSION",
487 accession, null, 1, genBankSequence.getLength(),
488 DBRefSource.GENBANK);
489 genBankSequence.addSequenceFeature(accessionF);
491 SequenceFeature versionF = new SequenceFeature("VERSION",
492 (version == null ? "" : version.toString()), null, 1,
493 genBankSequence.getLength(), DBRefSource.GENBANK);
494 genBankSequence.addSequenceFeature(versionF);
496 SequenceFeature dblinkF = new SequenceFeature("DBLINK",
497 (dblink == null ? "" : dblink.toString()), null, 1,
498 genBankSequence.getLength(), DBRefSource.GENBANK);
499 genBankSequence.addSequenceFeature(dblinkF);
501 SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords,
502 null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
503 genBankSequence.addSequenceFeature(keywordsF);
505 SequenceFeature sourceF = new SequenceFeature("SOURCE",
506 (source == null ? "" : source.toString()), null, 1,
507 genBankSequence.getLength(), DBRefSource.GENBANK);
508 genBankSequence.addSequenceFeature(sourceF);
510 SequenceFeature baseCountF = new SequenceFeature("BASE COUNT",
511 (baseCount == null ? "" : baseCount.toString()), null, 1,
512 genBankSequence.getLength(), DBRefSource.GENBANK);
513 genBankSequence.addSequenceFeature(baseCountF);
515 // add literature and database cross references in the file
516 for (GenBankReference gbRef : references)
518 // They are non-positional features
519 SequenceFeature refFeature = new SequenceFeature("REFERENCE",
520 gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(),
521 DBRefSource.GENBANK);
522 genBankSequence.addSequenceFeature(refFeature);
525 if (comments.size() > 0)
527 StringBuffer sb = new StringBuffer();
528 for (String comment : comments)
530 sb.append(comment).append(newline);
532 SequenceFeature commentF = new SequenceFeature("COMMENT",
533 sb.toString(), null, 1, genBankSequence.getLength(),
534 DBRefSource.GENBANK);
535 genBankSequence.addSequenceFeature(commentF);
538 for (GenBankFeature feature : features)
540 if (feature.getType() != null)
542 SequenceFeature sf = new SequenceFeature();
543 sf.setType(feature.getType());
544 sf.setDescription(feature.getType());
546 sf.setBegin(feature.getLocation() == null ? 0 : feature
547 .getLocation().getMinor());
548 sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation()
550 Enumeration<String> names = feature.getQualifiersNames();
551 while (names.hasMoreElements())
553 String qName = names.nextElement();
554 String qValue = feature.getQualifier(qName);
555 sf.setValue(qName, qValue);
557 genBankSequence.addSequenceFeature(sf);
560 SequenceI[] parsedSeqs = new SequenceI[1];
561 parsedSeqs[0] = genBankSequence;
562 this.setSeqs(parsedSeqs);
565 private GenBankVersion parseVersion(String line)
567 // VERSION U00096.2 GI:48994873
568 if (line.trim().equalsIgnoreCase("VERSION"))
574 GenBankVersion ver = new GenBankVersion();
575 String v = line.substring(11, line.indexOf(" ", 12)).trim();
577 int posGI = line.indexOf("GI:", 11 + v.length());
580 ver.setGI(line.substring(posGI));
586 private GenBankLocus parseLocus(String line)
588 GenBankLocus loc = new GenBankLocus();
589 Matcher mat = patLocus.matcher(line);
592 String name = mat.group(1);
593 String len = mat.group(2);
594 String strand = mat.group(3);
595 String mtype = mat.group(4);
596 String linear = mat.group(5);
597 String division = mat.group(6);
598 String date = mat.group(7);
600 loc.setName(name == null ? "" : name.trim());
601 loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
602 loc.setStrand(strand == null ? "" : strand);
603 loc.setMoleculeType(mtype == null ? "" : mtype);
604 loc.setLinearSequence("linear".equals(linear));
605 loc.setDivision(division == null ? "" : division);
606 loc.setModificationDate(date == null ? "" : date);
611 private GenBankSource parseSource(List<String> lines)
613 StringBuffer sb = new StringBuffer();
614 for (String line : lines)
616 sb.append(line).append(newline);
619 GenBankSource sou = new GenBankSource();
620 String aux = sb.toString().substring(11);
621 int fim1 = aux.indexOf("\n");
624 sou.setSource(aux.substring(0, fim1));
625 int ini2 = aux.indexOf("ORGANISM");
628 fim1 = aux.indexOf("\n", ini2 + 10);
631 sou.setOrganism(aux.substring(ini2 + 10, fim1));
632 sou.setTaxonomic(aux.substring(fim1)
633 .replaceAll(" ", "").replaceAll("\\s+", ""));
637 sou.setOrganism(aux);
649 * Possible situations:
651 * 467 Points to a single base in the presented sequence 340..565 Points to a
652 * continuous range of bases bounded by and including the starting and ending
653 * bases <345..500 Indicates that the exact lower boundary point of a
654 * feature is unknown. The location begins at some base previous to the first
655 * base specified (which need not be contained in the presented sequence) and
656 * continues to and includes the ending base <1..888 The feature starts
657 * before the first sequenced base and continues to and includes base 888
658 * 1..>888 The feature starts at the first sequenced base and continues
659 * beyond base 888 102.110 Indicates that the exact location is unknown but
660 * that it is one of the bases between bases 102 and 110, inclusive 123^124
661 * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12
662 * to 78 and 134 to 202 should be joined to form one contiguous sequence
663 * complement(34..126) Start at the base complementary to 126 and finish at
664 * the base complementary to base 34 (the feature is on the strand
665 * complementary to the presented strand)
666 * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918
667 * to 5163, then complements the joined segments (the feature is on the strand
668 * complementary to the presented strand)
669 * join(complement(4918..5163),complement(2691..4571)) Complements regions
670 * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
671 * feature is on the strand complementary to the presented strand)
672 * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
673 * this database) with primary accession number 'J00194'
674 * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
675 * with the region 100..202 of remote entry J00194
680 private GenBankLocation parserFeatureLocation(GenBankFeature fea,
683 // remove os espaços, quebra de linhas etc
684 String buf = localiza.replaceAll("\\s", "");
686 // checks if there is a comma present between ranges
687 // complement(100..110),complement(90..100)
688 char[] buf2 = buf.toCharArray();
690 java.util.List<String> lista = new java.util.ArrayList<String>();
692 for (int i = 0; i < buf2.length; i++)
698 else if (buf2[i] == ')')
702 else if (buf2[i] == ',' && abertos == 0)
704 lista.add(buf.substring(pinicial, i));
708 if (lista.size() > 0)
710 lista.add(buf.substring(pinicial));
711 GenBankLocations um = new GenBankLocations();
712 um.setOperator(GenBankLocations.NONE);
713 for (String s : lista)
715 um.getUnits().add(parserFeatureLocation(fea, s));
721 // trata as funcoes: complement(location,location...),
722 // join(location,location...), order(location,location...)
723 if (buf.contains("("))
725 GenBankLocations um = new GenBankLocations();
726 int ini = buf.indexOf("(");
727 int fim = buf.lastIndexOf(")");
728 String token = buf.substring(0, ini);
729 if ("complement".equalsIgnoreCase(token))
731 String inter = buf.substring(ini + 1, fim);
732 GenBankLocation interno = parserFeatureLocation(fea, inter);
733 interno.setComplement(true);
734 um.setOperator(GenBankLocations.COMPLEMENT);
735 um.getUnits().add(interno);
738 else if ("join".equalsIgnoreCase(token))
740 String inter = buf.substring(ini + 1, fim);
741 GenBankLocation interno = parserFeatureLocation(fea, inter);
742 um.setOperator(GenBankLocations.JOIN);
743 um.getUnits().add(interno);
746 else if ("order".equalsIgnoreCase(token))
748 String inter = buf.substring(ini + 1, fim);
749 GenBankLocation interno = parserFeatureLocation(fea, inter);
750 um.setOperator(GenBankLocations.ORDER);
751 um.getUnits().add(interno);
756 log.log(Level.WARNING,
757 "Token desconhecido em location/features - {0}", token);
758 String inter = buf.substring(ini + 1, fim);
759 fea.setLocation(parserFeatureLocation(fea, inter));
761 return fea.getLocation();
765 // trata quando tiver uma lista de location
766 if (buf.contains(","))
768 String[] partes = buf.split(",");
769 GenBankLocations um = new GenBankLocations();
770 for (String p : partes)
772 um.getUnits().add(parserFeatureLocation(fea, p));
779 // trata quando tiver range
780 if (buf.contains(".."))
782 String[] partes = buf.split("\\.\\.");
783 GenBankLocationRange range = new GenBankLocationRange();
784 if (buf.contains(":"))
786 for (int i = 0; i < partes.length; i++)
788 int pos = partes[i].indexOf(":");
791 String entry = partes[i].substring(0, pos);
792 partes[i] = partes[i].substring(pos + 1);
793 range.setEntry(entry);
797 GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(
800 GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(
803 fea.setLocation(range);
809 // possibilidades consideradas:
817 // ou uma combinacao dessas
818 GenBankLocationPoint gp = new GenBankLocationPoint();
819 if (buf.contains(":"))
821 int pos = buf.indexOf(":");
824 String entry = buf.substring(0, pos);
825 buf = buf.substring(pos + 1);
830 // verifica os simb < e > antes do primeiro numero
831 if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
833 gp.setPrefix(buf.charAt(pos));
836 // pega o primeiro numero
838 while (pos < buf.length() && buf.charAt(pos) >= '0'
839 && buf.charAt(pos) <= '9')
843 if (buf.subSequence(ini, pos).length() < 1)
845 System.out.println(localiza);
847 int num = Integer.parseInt(buf.substring(ini, pos));
849 // o primeiro numero pode ser o unico numero
850 if (pos < buf.length())
852 // verifica se tem os sinais < e > apos o primeiro numero
853 if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
855 if (buf.contains(".") || buf.contains("^"))
857 gp.setPrefix(buf.charAt(pos));
861 gp.setSufix(buf.charAt(pos));
866 // verifica a separacao dos numeros . ou ^
867 if (pos < buf.length()
868 && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^'))
870 // separação localizada, possibilidade de mais numero
871 gp.setSymbol(buf.charAt(pos));
874 // verifica os simb < e > antes do segundo numero
875 if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
877 gp.setSufix(buf.charAt(pos));
881 // pega o segundo numero
883 while (pos < buf.length() && buf.charAt(pos) >= '0'
884 && buf.charAt(pos) <= '9')
888 num2 = Integer.parseInt(buf.substring(ini, pos));
890 // verifica os simb < e > após o segundo numero
891 if (pos < buf.length()
892 && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>'))
894 gp.setSufix(buf.charAt(pos));
908 private int[] parseReferenceDescriptor(String descriptor)
910 // 1 (bases 1 to 1609)
911 int[] resultado = new int[3];
912 descriptor = descriptor.replace("(bases", ",").replace("to", ",")
914 String[] args = descriptor.split(",");
915 resultado[0] = Integer.parseInt(args[0].trim());
916 resultado[1] = Integer.parseInt(args[1].trim());
917 resultado[2] = Integer.parseInt(args[2].trim());
921 private String processReferenceLine(String line, String component)
923 int init = line.indexOf(component);
926 line = line.replace(component, "");
931 private String processHeaderLine(String line, String header)
933 int init = line.indexOf(header);
936 line = line.replace(header, "");
941 private GenBankSequence processSequenceLine(String line)
943 GenBankSequence gbs = new GenBankSequence();
945 String[] args = line.split(" ");
946 gbs.setId(Integer.parseInt(args[0]));
947 int len = args.length - 1;
948 Vector<String> seqs = new Vector<String>();
949 for (int i = 0; i < len; i++)
950 seqs.add(args[i + 1]);
951 gbs.setSequences(seqs);
955 private String processCommentLine(String line)
957 int init = line.indexOf("COMMENT");
960 line = line.replace("COMMENT", "");
965 public String rtrim(String s)
967 int i = s.length() - 1;
968 while (i >= 0 && Character.isWhitespace(s.charAt(i)))
972 return s.substring(0, i + 1);
975 public String ltrim(String s)
978 while (i < s.length() && Character.isWhitespace(s.charAt(i)))
982 return s.substring(i);
985 public String print()
987 StringBuffer out = new StringBuffer();
988 for (SequenceI seq : this.getSeqs())
990 SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
991 boolean featureLinePrinted = false;
992 for (SequenceFeature sf : seqFeatures)
994 if (sf.getType().equals("LOCUS"))
996 out.append(sf.getDescription()).append(newline);
998 else if (sf.getType().equals("DEFINITION"))
1000 out.append("DEFINITION ").append(sf.getDescription())
1003 else if (sf.getType().equals("VERSION"))
1005 out.append("VERSION ").append(sf.getDescription())
1008 else if (sf.getType().equals("ACCESSION"))
1010 out.append("ACCESSION ").append(sf.getDescription())
1013 else if (sf.getType().equals("DBLINK"))
1015 out.append("DBLINK ").append(sf.getDescription()).append(newline);
1017 else if (sf.getType().equals("KEYWORDS"))
1019 out.append("KEYWORDS ").append(sf.getDescription())
1022 else if (sf.getType().equals("SOURCE"))
1024 out.append("SOURCE ").append(sf.getDescription())
1027 else if (sf.getType().equals("REFERENCE"))
1029 out.append(sf.getDescription()).append(newline);
1031 else if (sf.getType().equals("COMMENT"))
1033 out.append("COMMENT ").append(sf.getDescription())
1036 else if (sf.getType().equals("BASE COUNT"))
1038 out.append("BASE COUNT ").append(sf.getDescription())
1043 if (!featureLinePrinted)
1045 out.append("FEATURES Location/Qualifiers").append(
1047 featureLinePrinted = true;
1049 out.append(" ").append(sf.getType()).append(" ")
1050 .append(sf.getBegin()).append("..").append(sf.getEnd())
1052 Hashtable<String, String> qualifiers = sf.otherDetails;
1053 if (qualifiers != null)
1055 Enumeration<String> keys = qualifiers.keys();
1056 while (keys.hasMoreElements())
1058 String key = keys.nextElement();
1059 String value = qualifiers.get(key);
1062 out.append(" /").append(key)
1063 .append("=").append(value).append(newline);
1069 out.append("ORIGIN").append(newline);
1070 // We have to divide sequence in groups of 6x10 chars
1071 String sequenceString = seq.getSequenceAsString();
1072 int howManyGroups = (int) Math.floor(sequenceString.length() / 60);
1073 for (int i = 0; i <= howManyGroups; i++)
1075 String sequenceSegment = sequenceString.substring(i * 60,
1076 Math.min((i + 1) * 60, sequenceString.length()));
1077 if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
1080 out.append(" ").append(60 * i + 1).append(" ");
1082 int segmentLength = sequenceSegment.length();
1083 if (segmentLength >= 10)
1085 out.append(sequenceSegment.substring(0, 10)).append(" ");
1086 if (segmentLength >= 20)
1088 out.append(sequenceSegment.substring(10, 20)).append(" ");
1089 if (segmentLength >= 30)
1091 out.append(sequenceSegment.substring(20, 30)).append(" ");
1092 if (segmentLength >= 40)
1094 out.append(sequenceSegment.substring(30, 40)).append(" ");
1095 if (segmentLength >= 50)
1097 out.append(sequenceSegment.substring(40, 50)).append(" ");
1098 if (segmentLength <= 60)
1100 out.append(sequenceSegment.substring(50,
1101 sequenceSegment.length()));
1106 out.append(sequenceSegment.substring(40,
1107 sequenceSegment.length()));
1112 out.append(sequenceSegment.substring(30,
1113 sequenceSegment.length()));
1118 out.append(sequenceSegment.substring(20,
1119 sequenceSegment.length()));
1124 out.append(sequenceSegment.substring(10,
1125 sequenceSegment.length()));
1128 else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
1131 out.append(sequenceSegment);
1133 out.append(newline);
1137 return out.toString();