package jalview.io; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.xdb.genbank.GenBankFeature; import jalview.io.xdb.genbank.GenBankLocation; import jalview.io.xdb.genbank.GenBankLocationPoint; import jalview.io.xdb.genbank.GenBankLocationRange; import jalview.io.xdb.genbank.GenBankLocations; import jalview.io.xdb.genbank.GenBankLocus; import jalview.io.xdb.genbank.GenBankReference; import jalview.io.xdb.genbank.GenBankSequence; import jalview.io.xdb.genbank.GenBankSource; import jalview.io.xdb.genbank.GenBankVersion; import java.io.IOException; import java.util.ArrayList; import java.util.Enumeration; import java.util.Hashtable; import java.util.List; import java.util.Vector; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.james.mime4j.field.ParsedField; public class GenBankFile extends AlignFile { private static final Logger log = Logger.getLogger(GenBankFile.class .getName()); private GenBankVersion version = new GenBankVersion(); private GenBankLocus locus = new GenBankLocus(); private GenBankSource source = new GenBankSource(); private static final Pattern patLocation = Pattern .compile("(\\d+)\\.\\.(\\d+)"); private static final Pattern patLocationComp = Pattern .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); private static final Pattern patLocus = Pattern .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); private static final Pattern patFeatureKey = Pattern .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); private String definition; private String accession; private String keywords; private String dblink; private String baseCount; private Vector features; private Vector comments; // Items under origin private Vector sequences; private Vector references; private SequenceI genBankSequence; public GenBankFile() { } public GenBankFile(String inFile, String type) throws IOException { super(inFile, type); } public GenBankFile(FileParse source) throws IOException { super(source); } public void initData() { super.initData(); features = new Vector(); comments = new Vector(); sequences = new Vector(); references = new Vector(); } public void parse() throws IOException { String line; boolean featureMode = false; // FEATURES found boolean seqMode = false; // Parsing Sequences from SOURCE boolean referenceMode = false; // REFERENCE found boolean sourceMode = false; // SOURCE found boolean commentMode = false; // COMMENT found boolean parsingAuthors = false; // Parsing authors (multiline) boolean parsingDefinition = false; // Parsing definition (multiline) boolean parsingKeywords = false; // Parsing keywords (multiline) boolean parsingDbLink = false; // Parsing DBLINK (multiline) boolean parsingTitle = false; // Parsing title (multiline) boolean parsingQualifier = false; // Parsing feature qualifier (multine) String currentQualifierName = ""; GenBankReference reference = null; GenBankFeature feature = null; List sourceLines = new ArrayList(); if (this.isValid()) { while ((line = nextLine()) != null) { // We only process lines if they have contents within if (line.length() == 0) continue; if (line.startsWith("FEATURES")) { featureMode = true; seqMode = false; referenceMode = false; sourceMode = false; commentMode = false; feature = new GenBankFeature(); source = parseSource(sourceLines); } if (seqMode) { if (!line.startsWith("//")) { GenBankSequence seq = processSequenceLine(line); sequences.add(seq); } featureMode = false; referenceMode = false; sourceMode = false; } if (line.startsWith("ORIGIN")) { if (feature.getType() != null) features.add(feature); featureMode = false; referenceMode = false; sourceMode = false; seqMode = true; } if (featureMode) { // Process feature line if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")) { // Parse type if (!line.trim().startsWith("/")) { Matcher featuresMatch = patFeatureKey.matcher(line); if (featuresMatch.find()) { if (feature.getType() != null) features.add(feature); // Hay que a�adirlo s�lo si no se est� // a mitad de un qualif o una feature // It's a feature String type = featuresMatch.group(0); feature = new GenBankFeature(); feature.setType(type); GenBankLocation loc = parserFeatureLocation(feature, line.replace(type, "")); feature.setLocation(loc); parsingQualifier = false; continue; } else if (parsingQualifier) { // If not a feature, it's another part of a qualifier String qValue = feature.getQualifier(currentQualifierName); StringBuffer sb = new StringBuffer().append(qValue).append( ltrim(line)); feature.updateQualifier(currentQualifierName, sb.toString()); continue; } } else { // It's the begining of a qualifier line Matcher matcher = patQualifierKey.matcher(line); if (matcher.find()) { String qName = matcher.group(1); currentQualifierName = qName.replace("/", ""); line = line.replace(qName, "").replace("/", "") .replace("=", ""); feature.addQualifier(currentQualifierName, ltrim(line)); parsingQualifier = true; continue; } } } } // Process REFERENCE line if (line.startsWith("REFERENCE")) { if (!referenceMode) { // This is line is the REFERENCE line referenceMode = true; featureMode = false; sourceMode = false; seqMode = false; } else { // We were at referenceMode, then add current reference to the list // and create a new one references.add(reference); } reference = new GenBankReference(); String desc = processReferenceLine(line, "REFERENCE"); int[] ranges = parseReferenceDescriptor(desc); reference.setDescriptor(desc); reference.setOrder(ranges[0]); reference.setBegin(ranges[1]); reference.setEnd(ranges[2]); parsingAuthors = false; parsingTitle = false; continue; } if (line.startsWith(" AUTHORS")) { if (referenceMode) { reference.setAuthors(processReferenceLine(line, "AUTHORS")); parsingAuthors = true; parsingTitle = false; } continue; } if (line.startsWith(" TITLE")) { if (referenceMode) { reference.setTitle(processReferenceLine(line, "TITLE")); parsingAuthors = false; parsingTitle = true; } continue; } if (line.startsWith(" JOURNAL")) { if (referenceMode) { reference.setJournal(processReferenceLine(line, "JOURNAL")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" PUBMED")) { if (referenceMode) { reference.setPubmed(processReferenceLine(line, "PUBMED")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" MEDLINE")) { if (referenceMode) { reference.setMedline(processReferenceLine(line, "MEDLINE")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" REMARK")) { if (referenceMode) { reference.setRemark(processReferenceLine(line, "REMARK")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" CONSRTM")) { if (referenceMode) { reference.setConsortia(processReferenceLine(line, "CONSRTM")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith("SOURCE")) { parsingKeywords = false; sourceMode = true; commentMode = false; if (sourceMode) { sourceLines.add(line); } continue; } if (line.indexOf("ORGANISM") != -1) { if (sourceMode) { sourceLines.add(line); continue; } } if (line.startsWith("COMMENT")) { if (reference != null) references.add(reference); commentMode = true; sourceMode = false; referenceMode = false; sourceMode = false; seqMode = false; comments.add(processCommentLine(line)); continue; } // Process LOCUS line if (line.startsWith("LOCUS")) { locus = parseLocus(line); continue; } // Process BASE COUNT line if (line.startsWith("BASE COUNT")) { baseCount = processHeaderLine(line, "BASE COUNT"); featureMode = false; continue; } // Process DEFINITION line if (line.startsWith("DEFINITION")) { definition = processHeaderLine(line, "DEFINITION"); parsingDefinition = true; continue; } // Process ACCESSION line if (line.startsWith("ACCESSION")) { accession = processHeaderLine(line, "ACCESSION"); parsingDefinition = false; continue; } // Process VERSION line if (line.startsWith("VERSION")) { version = parseVersion(line); // headers.put("VERSION", processHeaderLine(line,"VERSION")); continue; } // Process DBLINK line if (line.startsWith("DBLINK")) { dblink = processHeaderLine(line, "DBLINK"); parsingDbLink = true; continue; } // Process KEYWORDS line if (line.startsWith("KEYWORDS")) { keywords = processHeaderLine(line, "KEYWORDS"); parsingKeywords = true; parsingDbLink = false; continue; } if (sourceMode) { sourceLines.add(line); continue; } if (parsingDefinition) { StringBuffer sb = new StringBuffer().append(definition).append( line); definition = sb.toString(); continue; } if (referenceMode && parsingAuthors) { if (reference != null) { StringBuffer authors = new StringBuffer().append( reference.getAuthors()).append(line); reference.setAuthors(authors.toString()); } continue; } if (referenceMode && parsingTitle) { if (reference != null) { StringBuffer title = new StringBuffer().append( reference.getTitle()).append(line); reference.setTitle(title.toString()); } continue; } if (parsingKeywords) { StringBuffer sb = new StringBuffer().append(keywords) .append(line); keywords = sb.toString(); continue; } if (parsingDbLink) { StringBuffer sb = new StringBuffer().append(dblink).append(line); dblink = sb.toString(); continue; } if (commentMode) { comments.add(line); } } setEntries(); } else { // File is not valid throw new IOException("GenBankFile is not valid."); } } protected void setEntries() { StringBuffer result = new StringBuffer(); // Mapping GenBank info into Jalview data model genBankSequence = new Sequence(accession, DnaUtils.getNucleotidesFromSequenceVector(sequences)); // Mapping DBRefEntry DBRefEntry dbRef = new DBRefEntry(); dbRef.setSource(DBRefSource.GENBANK); dbRef.setVersion(version == null ? "" : version.toString()); dbRef.setAccessionId(accession); // add map to indicate the sequence is a valid coordinate frame for the // dbref dbRef.setMap(new Mapping(null, new int[] { 1, genBankSequence.getLength() }, new int[] { 1, genBankSequence.getLength() }, 1, 1)); genBankSequence.addDBRef(dbRef); // add header info as non-positional features // add LOCUS SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(locusF); // add DEFNITION SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(defF); // add ACCESSION SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(accessionF); // add VERSION SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(versionF); // add DBLINK SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(dblinkF); // add KEYWORDS SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(keywordsF); // add SOURCE SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(sourceF); // add BASE COUNT SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(baseCountF); // add literature and database cross references in the file for (GenBankReference gbRef : references) { // They are non-positional features SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(refFeature); } // add COMMENTS if (comments.size() > 0) { StringBuffer sb = new StringBuffer(); for (String comment : comments) { sb.append(comment).append(newline); } SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(commentF); } // Mapping FEATURES for (GenBankFeature feature : features) { if (feature.getType() != null) { SequenceFeature sf = new SequenceFeature(); sf.setType(feature.getType()); sf.setDescription(feature.getType()); sf.setBegin(feature.getLocation() == null ? 0 : feature .getLocation().getMinor()); sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation() .getMajor()); Enumeration names = feature.getQualifiersNames(); while (names.hasMoreElements()) { String qName = names.nextElement(); String qValue = feature.getQualifier(qName); sf.setValue(qName, qValue); } genBankSequence.addSequenceFeature(sf); } } SequenceI[] parsedSeqs = new SequenceI[1]; parsedSeqs[0] = genBankSequence; this.setSeqs(parsedSeqs); } private GenBankVersion parseVersion(String line) { // VERSION U00096.2 GI:48994873 if (line.trim().equalsIgnoreCase("VERSION")) { return null; } else { GenBankVersion ver = new GenBankVersion(); String v = line.substring(11, line.indexOf(" ", 12)).trim(); ver.setVersion(v); int posGI = line.indexOf("GI:", 11 + v.length()); if (posGI > -1) { ver.setGI(line.substring(posGI)); } return ver; } } private GenBankLocus parseLocus(String line) { GenBankLocus loc = new GenBankLocus(); Matcher mat = patLocus.matcher(line); if (mat.find()) { String name = mat.group(1); String len = mat.group(2); String strand = mat.group(3); String mtype = mat.group(4); String linear = mat.group(5); String division = mat.group(6); String date = mat.group(7); loc.setName(name == null ? "" : name.trim()); loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); loc.setStrand(strand == null ? "" : strand); loc.setMoleculeType(mtype == null ? "" : mtype); loc.setLinearSequence("linear".equals(linear)); loc.setDivision(division == null ? "" : division); loc.setModificationDate(date == null ? "" : date); } return loc; } private GenBankSource parseSource(List lines) { StringBuffer sb = new StringBuffer(); for (String line : lines) { sb.append(line).append(newline); } // Source section GenBankSource sou = new GenBankSource(); String aux = sb.toString().substring(11); int fim1 = aux.indexOf("\n"); if (fim1 > -1) { sou.setSource(aux.substring(0, fim1)); int ini2 = aux.indexOf("ORGANISM"); if (ini2 > -1) { fim1 = aux.indexOf("\n", ini2 + 10); if (fim1 > -1) { sou.setOrganism(aux.substring(ini2 + 10, fim1)); sou.setTaxonomic(aux.substring(fim1) .replaceAll(" ", "").replaceAll("\\s+", "")); } else { sou.setOrganism(aux); } } } else { sou.setSource(aux); } return sou; } /** * Possible situations: * * 467 Points to a single base in the presented sequence 340..565 Points to a * continuous range of bases bounded by and including the starting and ending * bases <345..500 Indicates that the exact lower boundary point of a * feature is unknown. The location begins at some base previous to the first * base specified (which need not be contained in the presented sequence) and * continues to and includes the ending base <1..888 The feature starts * before the first sequenced base and continues to and includes base 888 * 1..>888 The feature starts at the first sequenced base and continues * beyond base 888 102.110 Indicates that the exact location is unknown but * that it is one of the bases between bases 102 and 110, inclusive 123^124 * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12 * to 78 and 134 to 202 should be joined to form one contiguous sequence * complement(34..126) Start at the base complementary to 126 and finish at * the base complementary to base 34 (the feature is on the strand * complementary to the presented strand) * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918 * to 5163, then complements the joined segments (the feature is on the strand * complementary to the presented strand) * join(complement(4918..5163),complement(2691..4571)) Complements regions * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the * feature is on the strand complementary to the presented strand) * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in * this database) with primary accession number 'J00194' * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry * with the region 100..202 of remote entry J00194 * * @param fea * @param localiza */ private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) { // remove os espaços, quebra de linhas etc String buf = localiza.replaceAll("\\s", ""); // checks if there is a comma present between ranges // complement(100..110),complement(90..100) char[] buf2 = buf.toCharArray(); int abertos = 0; java.util.List lista = new java.util.ArrayList(); int pinicial = 0; for (int i = 0; i < buf2.length; i++) { if (buf2[i] == '(') { abertos++; } else if (buf2[i] == ')') { abertos--; } else if (buf2[i] == ',' && abertos == 0) { lista.add(buf.substring(pinicial, i)); pinicial = i + 1; } } if (lista.size() > 0) { lista.add(buf.substring(pinicial)); GenBankLocations um = new GenBankLocations(); um.setOperator(GenBankLocations.NONE); for (String s : lista) { um.getUnits().add(parserFeatureLocation(fea, s)); } fea.setLocation(um); return um; } // trata as funcoes: complement(location,location...), // join(location,location...), order(location,location...) if (buf.contains("(")) { GenBankLocations um = new GenBankLocations(); int ini = buf.indexOf("("); int fim = buf.lastIndexOf(")"); String token = buf.substring(0, ini); if ("complement".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); interno.setComplement(true); um.setOperator(GenBankLocations.COMPLEMENT); um.getUnits().add(interno); fea.setLocation(um); } else if ("join".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); um.setOperator(GenBankLocations.JOIN); um.getUnits().add(interno); fea.setLocation(um); } else if ("order".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); um.setOperator(GenBankLocations.ORDER); um.getUnits().add(interno); fea.setLocation(um); } else { log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token); String inter = buf.substring(ini + 1, fim); fea.setLocation(parserFeatureLocation(fea, inter)); } return fea.getLocation(); } else { // trata quando tiver uma lista de location if (buf.contains(",")) { String[] partes = buf.split(","); GenBankLocations um = new GenBankLocations(); for (String p : partes) { um.getUnits().add(parserFeatureLocation(fea, p)); } fea.setLocation(um); return um; } else { // trata quando tiver range if (buf.contains("..")) { String[] partes = buf.split("\\.\\."); GenBankLocationRange range = new GenBankLocationRange(); if (buf.contains(":")) { for (int i = 0; i < partes.length; i++) { int pos = partes[i].indexOf(":"); if (pos > 0) { String entry = partes[i].substring(0, pos); partes[i] = partes[i].substring(pos + 1); range.setEntry(entry); } } } GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation( fea, partes[0]); range.setStart(gp0); GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation( fea, partes[1]); range.setEnd(gp1); fea.setLocation(range); return range; } else { // trata um ponto // possibilidades consideradas: // 467 // 102.110 // 123^124 // <345 // >400 // 345> // 400< // ou uma combinacao dessas GenBankLocationPoint gp = new GenBankLocationPoint(); if (buf.contains(":")) { int pos = buf.indexOf(":"); if (pos > 0) { String entry = buf.substring(0, pos); buf = buf.substring(pos + 1); gp.setEntry(entry); } } int pos = 0; // verifica os simb < e > antes do primeiro numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { gp.setPrefix(buf.charAt(pos)); pos++; } // pega o primeiro numero int ini = pos; while (pos < buf.length() && buf.charAt(pos) >= '0' && buf.charAt(pos) <= '9') { pos++; } if (buf.subSequence(ini, pos).length() < 1) { System.out.println(localiza); } int num = Integer.parseInt(buf.substring(ini, pos)); int num2 = num; // o primeiro numero pode ser o unico numero if (pos < buf.length()) { // verifica se tem os sinais < e > apos o primeiro numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { if (buf.contains(".") || buf.contains("^")) { gp.setPrefix(buf.charAt(pos)); } else { gp.setSufix(buf.charAt(pos)); } pos++; } // verifica a separacao dos numeros . ou ^ if (pos < buf.length() && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) { // separação localizada, possibilidade de mais numero gp.setSymbol(buf.charAt(pos)); pos++; // verifica os simb < e > antes do segundo numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { gp.setSufix(buf.charAt(pos)); pos++; } // pega o segundo numero ini = pos; while (pos < buf.length() && buf.charAt(pos) >= '0' && buf.charAt(pos) <= '9') { pos++; } num2 = Integer.parseInt(buf.substring(ini, pos)); // verifica os simb < e > após o segundo numero if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) { gp.setSufix(buf.charAt(pos)); pos++; } } } gp.setMin(num); gp.setMax(num2); fea.setLocation(gp); return gp; } } } } private int[] parseReferenceDescriptor(String descriptor) { // 1 (bases 1 to 1609) int[] resultado = new int[3]; descriptor = descriptor.replace("(bases", ",").replace("to", ",") .replace(")", ""); String[] args = descriptor.split(","); resultado[0] = Integer.parseInt(args[0].trim()); resultado[1] = Integer.parseInt(args[1].trim()); resultado[2] = Integer.parseInt(args[2].trim()); return resultado; } private String processReferenceLine(String line, String component) { int init = line.indexOf(component); if (init != -1) { line = line.replace(component, ""); } return line; } private String processHeaderLine(String line, String header) { int init = line.indexOf(header); if (init != -1) { line = line.replace(header, ""); } return line; } private GenBankSequence processSequenceLine(String line) { GenBankSequence gbs = new GenBankSequence(); line = ltrim(line); String[] args = line.split(" "); gbs.setId(Integer.parseInt(args[0])); int len = args.length - 1; Vector seqs = new Vector(); for (int i = 0; i < len; i++) seqs.add(args[i + 1]); gbs.setSequences(seqs); return gbs; } private String processCommentLine(String line) { int init = line.indexOf("COMMENT"); if (init != -1) { line = line.replace("COMMENT", ""); } return line; } public String rtrim(String s) { int i = s.length() - 1; while (i >= 0 && Character.isWhitespace(s.charAt(i))) { i--; } return s.substring(0, i + 1); } public String ltrim(String s) { int i = 0; while (i < s.length() && Character.isWhitespace(s.charAt(i))) { i++; } return s.substring(i); } public String print() { StringBuffer out = new StringBuffer(); for (SequenceI seq : this.getSeqs()) { SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); boolean featureLinePrinted = false; for (SequenceFeature sf : seqFeatures) { if (sf.getType().equals("LOCUS")) { out.append(sf.getDescription()).append(newline); } else if (sf.getType().equals("DEFINITION")) { out.append("DEFINITION ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("VERSION")) { out.append("VERSION ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("ACCESSION")) { out.append("ACCESSION ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("DBLINK")) { out.append("DBLINK ").append(sf.getDescription()).append(newline); } else if (sf.getType().equals("KEYWORDS")) { out.append("KEYWORDS ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("SOURCE")) { out.append("SOURCE ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("REFERENCE")) { out.append(sf.getDescription()).append(newline); } else if (sf.getType().equals("COMMENT")) { out.append("COMMENT ").append(sf.getDescription()) .append(newline); } else if (sf.getType().equals("BASE COUNT")) { out.append("BASE COUNT ").append(sf.getDescription()) .append(newline); } else { if (!featureLinePrinted) { out.append("FEATURES Location/Qualifiers").append( newline); featureLinePrinted = true; } out.append(" ").append(sf.getType()).append(" ") .append(sf.getBegin()).append("..").append(sf.getEnd()) .append(newline); Hashtable qualifiers = sf.otherDetails; if (qualifiers != null) { Enumeration keys = qualifiers.keys(); while (keys.hasMoreElements()) { String key = keys.nextElement(); String value = qualifiers.get(key); if (value != null) { out.append(" /").append(key) .append("=").append(value).append(newline); } } } } } out.append("ORIGIN").append(newline); // We have to divide sequence in groups of 6x10 chars String sequenceString = seq.getSequenceAsString(); int howManyGroups = (int) Math.floor(sequenceString.length() / 60); for (int i = 0; i <= howManyGroups; i++) { String sequenceSegment = sequenceString.substring(i * 60, Math.min((i + 1) * 60, sequenceString.length())); if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment .length() > 0))) { out.append(" ").append(60 * i + 1).append(" "); } int segmentLength = sequenceSegment.length(); if (segmentLength >= 10) { out.append(sequenceSegment.substring(0, 10)).append(" "); if (segmentLength >= 20) { out.append(sequenceSegment.substring(10, 20)).append(" "); if (segmentLength >= 30) { out.append(sequenceSegment.substring(20, 30)).append(" "); if (segmentLength >= 40) { out.append(sequenceSegment.substring(30, 40)).append(" "); if (segmentLength >= 50) { out.append(sequenceSegment.substring(40, 50)).append(" "); if (segmentLength <= 60) { out.append(sequenceSegment.substring(50, sequenceSegment.length())); } } else { out.append(sequenceSegment.substring(40, sequenceSegment.length())); } } else { out.append(sequenceSegment.substring(30, sequenceSegment.length())); } } else { out.append(sequenceSegment.substring(20, sequenceSegment.length())); } } else { out.append(sequenceSegment.substring(10, sequenceSegment.length())); } } else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment .length() > 0))) { out.append(sequenceSegment); } out.append(newline); } out.append("//"); } return out.toString(); } }