package jalview.io; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.DBRefEntry; import jalview.datamodel.DBRefSource; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.io.xdb.genbank.GenBankFeature; import jalview.io.xdb.genbank.GenBankLocation; import jalview.io.xdb.genbank.GenBankLocationPoint; import jalview.io.xdb.genbank.GenBankLocationRange; import jalview.io.xdb.genbank.GenBankLocations; import jalview.io.xdb.genbank.GenBankLocus; import jalview.io.xdb.genbank.GenBankReference; import jalview.io.xdb.genbank.GenBankSequence; import jalview.io.xdb.genbank.GenBankSource; import jalview.io.xdb.genbank.GenBankVersion; import java.io.IOException; import java.util.ArrayList; import java.util.Enumeration; import java.util.Hashtable; import java.util.List; import java.util.Vector; import java.util.logging.Level; import java.util.logging.Logger; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.james.mime4j.field.ParsedField; public class GenBankFile extends AlignFile { private static final Logger log = Logger.getLogger(GenBankFile.class.getName()); private GenBankVersion version = new GenBankVersion(); private GenBankLocus locus = new GenBankLocus(); private GenBankSource source = new GenBankSource(); private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)"); private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); private String definition; private String accession; private String keywords; private String dblink; private String baseCount; private Vector features; private Vector comments; //Items under origin private Vector sequences; private Vector references; private SequenceI genBankSequence; public GenBankFile() { } public GenBankFile(String inFile, String type) throws IOException { super(inFile, type); } public GenBankFile(FileParse source) throws IOException { super(source); } public void initData() { super.initData(); features = new Vector(); comments = new Vector(); sequences = new Vector(); references = new Vector(); } public void parse() throws IOException { String line; boolean featureMode = false; //FEATURES found boolean seqMode = false; //Parsing Sequences from SOURCE boolean referenceMode = false; //REFERENCE found boolean sourceMode = false; //SOURCE found boolean commentMode = false; //COMMENT found boolean parsingAuthors = false; //Parsing authors (multiline) boolean parsingDefinition = false; //Parsing definition (multiline) boolean parsingKeywords = false; //Parsing keywords (multiline) boolean parsingDbLink = false; //Parsing DBLINK (multiline) boolean parsingTitle = false; //Parsing title (multiline) boolean parsingQualifier = false; //Parsing feature qualifier (multine) String currentQualifierName = ""; GenBankReference reference = null; GenBankFeature feature = null; List sourceLines = new ArrayList(); if (this.isValid()){ while ((line = nextLine()) != null) { // We only process lines if they have contents within if (line.length() == 0) continue; if (line.startsWith("FEATURES")){ featureMode = true; seqMode = false; referenceMode = false; sourceMode = false; commentMode = false; feature = new GenBankFeature(); source = parseSource(sourceLines); } if (seqMode) { if (!line.startsWith("//")){ GenBankSequence seq = processSequenceLine(line); sequences.add(seq); } featureMode = false; referenceMode = false; sourceMode = false; } if (line.startsWith("ORIGIN")){ if (feature.getType()!=null) features.add(feature); featureMode = false; referenceMode = false; sourceMode = false; seqMode = true; } if (featureMode){ // Process feature line if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){ //Parse type if (!line.trim().startsWith("/")){ Matcher featuresMatch = patFeatureKey.matcher(line); if (featuresMatch.find()){ if (feature.getType()!=null) features.add(feature); //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature //It's a feature String type = featuresMatch.group(0); feature = new GenBankFeature(); feature.setType(type); GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,"")); feature.setLocation(loc); parsingQualifier = false; continue; }else if (parsingQualifier) { //If not a feature, it's another part of a qualifier String qValue = feature.getQualifier(currentQualifierName); StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line)); feature.updateQualifier(currentQualifierName, sb.toString()); continue; } }else{ //It's the begining of a qualifier line Matcher matcher = patQualifierKey.matcher(line); if (matcher.find()){ String qName = matcher.group(1); currentQualifierName = qName.replace("/",""); line = line.replace(qName,"").replace("/", "").replace("=",""); feature.addQualifier(currentQualifierName, ltrim(line)); parsingQualifier = true; continue; } } } } // Process REFERENCE line if (line.startsWith("REFERENCE")) { if (!referenceMode){ //This is line is the REFERENCE line referenceMode = true; featureMode = false; sourceMode = false; seqMode = false; }else{ //We were at referenceMode, then add current reference to the list and create a new one references.add(reference); } reference = new GenBankReference(); String desc = processReferenceLine(line,"REFERENCE"); int[] ranges = parseReferenceDescriptor(desc); reference.setDescriptor(desc); reference.setOrder(ranges[0]); reference.setBegin(ranges[1]); reference.setEnd(ranges[2]); parsingAuthors = false; parsingTitle = false; continue; } if (line.startsWith(" AUTHORS")){ if (referenceMode){ reference.setAuthors(processReferenceLine(line,"AUTHORS")); parsingAuthors = true; parsingTitle = false; } continue; } if (line.startsWith(" TITLE")){ if (referenceMode){ reference.setTitle(processReferenceLine(line,"TITLE")); parsingAuthors = false; parsingTitle = true; } continue; } if (line.startsWith(" JOURNAL")){ if (referenceMode){ reference.setJournal(processReferenceLine(line,"JOURNAL")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" PUBMED")){ if (referenceMode){ reference.setPubmed(processReferenceLine(line,"PUBMED")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" MEDLINE")){ if (referenceMode){ reference.setMedline(processReferenceLine(line,"MEDLINE")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" REMARK")){ if (referenceMode){ reference.setRemark(processReferenceLine(line,"REMARK")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith(" CONSRTM")){ if (referenceMode){ reference.setConsortia(processReferenceLine(line,"CONSRTM")); parsingTitle = false; parsingAuthors = false; } continue; } if (line.startsWith("SOURCE")) { parsingKeywords = false; sourceMode = true; commentMode = false; if (sourceMode){ sourceLines.add(line); } continue; } if (line.indexOf("ORGANISM")!=-1) { if (sourceMode){ sourceLines.add(line); continue; } } if (line.startsWith("COMMENT")){ if (reference!=null) references.add(reference); commentMode = true; sourceMode = false; referenceMode = false; sourceMode = false; seqMode = false; comments.add(processCommentLine(line)); continue; } // Process LOCUS line if (line.startsWith("LOCUS")) { locus = parseLocus(line); continue; } // Process BASE COUNT line if (line.startsWith("BASE COUNT")) { baseCount = processHeaderLine(line,"BASE COUNT"); featureMode = false; continue; } // Process DEFINITION line if (line.startsWith("DEFINITION")) { definition = processHeaderLine(line,"DEFINITION"); parsingDefinition = true; continue; } // Process ACCESSION line if (line.startsWith("ACCESSION")) { accession = processHeaderLine(line,"ACCESSION"); parsingDefinition = false; continue; } // Process VERSION line if (line.startsWith("VERSION")) { version = parseVersion(line); //headers.put("VERSION", processHeaderLine(line,"VERSION")); continue; } // Process DBLINK line if (line.startsWith("DBLINK")) { dblink = processHeaderLine(line,"DBLINK"); parsingDbLink = true; continue; } // Process KEYWORDS line if (line.startsWith("KEYWORDS")) { keywords = processHeaderLine(line,"KEYWORDS"); parsingKeywords = true; parsingDbLink = false; continue; } if (sourceMode){ sourceLines.add(line); continue; } if (parsingDefinition){ StringBuffer sb = new StringBuffer().append(definition).append(line); definition = sb.toString(); continue; } if (referenceMode && parsingAuthors){ if (reference!=null){ StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line); reference.setAuthors(authors.toString()); } continue; } if (referenceMode && parsingTitle){ if (reference!=null){ StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line); reference.setTitle(title.toString()); } continue; } if (parsingKeywords){ StringBuffer sb = new StringBuffer().append(keywords).append(line); keywords = sb.toString(); continue; } if (parsingDbLink){ StringBuffer sb = new StringBuffer().append(dblink).append(line); dblink = sb.toString(); continue; } if (commentMode){ comments.add(line); } } setEntries(); }else{ //File is not valid throw new IOException("GenBankFile is not valid."); } } protected void setEntries(){ StringBuffer result = new StringBuffer(); //Mapping GenBank info into Jalview data model genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences)); //Mapping DBRefEntry DBRefEntry dbRef = new DBRefEntry(); dbRef.setSource(DBRefSource.GENBANK); dbRef.setVersion(version == null ? "" : version.toString()); dbRef.setAccessionId(accession); // add map to indicate the sequence is a valid coordinate frame for the dbref dbRef.setMap(new Mapping(null, new int[] { 1, genBankSequence.getLength() }, new int[] { 1, genBankSequence.getLength() }, 1, 1)); genBankSequence.addDBRef(dbRef); //add header info as non-positional features //add LOCUS SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(locusF); //add DEFNITION SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(defF); //add ACCESSION SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(accessionF); //add VERSION SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(versionF); //add DBLINK SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(dblinkF); //add KEYWORDS SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(keywordsF); //add SOURCE SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(sourceF); //add BASE COUNT SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(baseCountF); // add literature and database cross references in the file for (GenBankReference gbRef:references){ //They are non-positional features SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK); genBankSequence.addSequenceFeature(refFeature); } //add COMMENTS if (comments.size()>0){ StringBuffer sb = new StringBuffer(); for (String comment: comments){ sb.append(comment).append(newline); } SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); genBankSequence.addSequenceFeature(commentF); } //Mapping FEATURES for (GenBankFeature feature:features){ if (feature.getType()!=null){ SequenceFeature sf = new SequenceFeature(); sf.setType(feature.getType()); sf.setDescription(feature.getType()); sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor()); sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor()); Enumeration names = feature.getQualifiersNames(); while (names.hasMoreElements()){ String qName = names.nextElement(); String qValue = feature.getQualifier(qName); sf.setValue(qName, qValue); } genBankSequence.addSequenceFeature(sf); } } SequenceI[] parsedSeqs = new SequenceI[1]; parsedSeqs[0] = genBankSequence; this.setSeqs(parsedSeqs); } private GenBankVersion parseVersion(String line) { //VERSION U00096.2 GI:48994873 if (line.trim().equalsIgnoreCase("VERSION")){ return null; }else{ GenBankVersion ver = new GenBankVersion(); String v = line.substring(11, line.indexOf(" ", 12)).trim(); ver.setVersion(v); int posGI = line.indexOf("GI:", 11 + v.length()); if (posGI > -1) { ver.setGI(line.substring(posGI)); } return ver; } } private GenBankLocus parseLocus(String line){ GenBankLocus loc = new GenBankLocus(); Matcher mat = patLocus.matcher(line); if (mat.find()) { String name = mat.group(1); String len = mat.group(2); String strand = mat.group(3); String mtype = mat.group(4); String linear = mat.group(5); String division = mat.group(6); String date = mat.group(7); loc.setName(name == null ? "" : name.trim()); loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); loc.setStrand(strand == null ? "" : strand); loc.setMoleculeType(mtype == null ? "" : mtype); loc.setLinearSequence("linear".equals(linear)); loc.setDivision(division == null ? "" : division); loc.setModificationDate(date == null ? "" :date); } return loc; } private GenBankSource parseSource(List lines){ StringBuffer sb = new StringBuffer(); for(String line:lines){ sb.append(line).append(newline); } // Source section GenBankSource sou = new GenBankSource(); String aux = sb.toString().substring(11); int fim1 = aux.indexOf("\n"); if (fim1 > -1) { sou.setSource(aux.substring(0, fim1)); int ini2 = aux.indexOf("ORGANISM"); if (ini2 > -1) { fim1 = aux.indexOf("\n", ini2 + 10); if (fim1 > -1) { sou.setOrganism(aux.substring(ini2 + 10, fim1)); sou.setTaxonomic(aux.substring(fim1).replaceAll(" ", "").replaceAll("\\s+", "")); } else { sou.setOrganism(aux); } } } else { sou.setSource(aux); } return sou; } /** * Possible situations: * * 467 Points to a single base in the presented sequence 340..565 Points to * a continuous range of bases bounded by and including the starting and * ending bases <345..500 Indicates that the exact lower boundary point * of a feature is unknown. The location begins at some base previous to the * first base specified (which need not be contained in the presented * sequence) and continues to and includes the ending base <1..888 The * feature starts before the first sequenced base and continues to and * includes base 888 1..>888 The feature starts at the first sequenced * base and continues beyond base 888 102.110 Indicates that the exact * location is unknown but that it is one of the bases between bases 102 and * 110, inclusive 123^124 Points to a site between bases 123 and 124 * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to * form one contiguous sequence complement(34..126) Start at the base * complementary to 126 and finish at the base complementary to base 34 (the * feature is on the strand complementary to the presented strand) * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and * 4918 to 5163, then complements the joined segments (the feature is on the * strand complementary to the presented strand) * join(complement(4918..5163),complement(2691..4571)) Complements regions * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the * feature is on the strand complementary to the presented strand) * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in * this database) with primary accession number 'J00194' * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry * with the region 100..202 of remote entry J00194 * * @param fea * @param localiza */ private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) { // remove os espaços, quebra de linhas etc String buf = localiza.replaceAll("\\s", ""); // checks if there is a comma present between ranges // complement(100..110),complement(90..100) char[] buf2 = buf.toCharArray(); int abertos = 0; java.util.List lista = new java.util.ArrayList(); int pinicial = 0; for (int i = 0; i < buf2.length; i++) { if (buf2[i] == '(') { abertos++; } else if (buf2[i] == ')') { abertos--; } else if (buf2[i] == ',' && abertos == 0) { lista.add(buf.substring(pinicial, i)); pinicial = i + 1; } } if (lista.size() > 0) { lista.add(buf.substring(pinicial)); GenBankLocations um = new GenBankLocations(); um.setOperator(GenBankLocations.NONE); for (String s : lista) { um.getUnits().add(parserFeatureLocation(fea, s)); } fea.setLocation(um); return um; } // trata as funcoes: complement(location,location...), // join(location,location...), order(location,location...) if (buf.contains("(")) { GenBankLocations um = new GenBankLocations(); int ini = buf.indexOf("("); int fim = buf.lastIndexOf(")"); String token = buf.substring(0, ini); if ("complement".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); interno.setComplement(true); um.setOperator(GenBankLocations.COMPLEMENT); um.getUnits().add(interno); fea.setLocation(um); } else if ("join".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); um.setOperator(GenBankLocations.JOIN); um.getUnits().add(interno); fea.setLocation(um); } else if ("order".equalsIgnoreCase(token)) { String inter = buf.substring(ini + 1, fim); GenBankLocation interno = parserFeatureLocation(fea, inter); um.setOperator(GenBankLocations.ORDER); um.getUnits().add(interno); fea.setLocation(um); } else { log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token); String inter = buf.substring(ini + 1, fim); fea.setLocation(parserFeatureLocation(fea, inter)); } return fea.getLocation(); } else { // trata quando tiver uma lista de location if (buf.contains(",")) { String[] partes = buf.split(","); GenBankLocations um = new GenBankLocations(); for (String p : partes) { um.getUnits().add( parserFeatureLocation(fea, p)); } fea.setLocation(um); return um; } else { // trata quando tiver range if (buf.contains("..")) { String[] partes = buf.split("\\.\\."); GenBankLocationRange range = new GenBankLocationRange(); if (buf.contains(":")) { for (int i = 0; i < partes.length; i++) { int pos = partes[i].indexOf(":"); if (pos > 0) { String entry = partes[i].substring(0, pos); partes[i] = partes[i].substring(pos + 1); range.setEntry(entry); } } } GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]); range.setStart(gp0); GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]); range.setEnd(gp1); fea.setLocation(range); return range; } else { // trata um ponto // possibilidades consideradas: // 467 // 102.110 // 123^124 // <345 // >400 // 345> // 400< // ou uma combinacao dessas GenBankLocationPoint gp = new GenBankLocationPoint(); if (buf.contains(":")) { int pos = buf.indexOf(":"); if (pos > 0) { String entry = buf.substring(0, pos); buf = buf.substring(pos + 1); gp.setEntry(entry); } } int pos = 0; // verifica os simb < e > antes do primeiro numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { gp.setPrefix(buf.charAt(pos)); pos++; } // pega o primeiro numero int ini = pos; while (pos < buf.length() && buf.charAt(pos) >= '0' && buf.charAt(pos) <= '9') { pos++; } if (buf.subSequence(ini, pos).length() < 1) { System.out.println(localiza); } int num = Integer.parseInt(buf.substring(ini, pos)); int num2 = num; // o primeiro numero pode ser o unico numero if (pos < buf.length()) { // verifica se tem os sinais < e > apos o primeiro numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { if (buf.contains(".") || buf.contains("^")) { gp.setPrefix(buf.charAt(pos)); } else { gp.setSufix(buf.charAt(pos)); } pos++; } // verifica a separacao dos numeros . ou ^ if (pos < buf.length() && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) { // separação localizada, possibilidade de mais numero gp.setSymbol(buf.charAt(pos)); pos++; // verifica os simb < e > antes do segundo numero if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { gp.setSufix(buf.charAt(pos)); pos++; } // pega o segundo numero ini = pos; while (pos < buf.length() && buf.charAt(pos) >= '0' && buf.charAt(pos) <= '9') { pos++; } num2 = Integer.parseInt(buf.substring(ini, pos)); // verifica os simb < e > após o segundo numero if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) { gp.setSufix(buf.charAt(pos)); pos++; } } } gp.setMin(num); gp.setMax(num2); fea.setLocation(gp); return gp; } } } } private int[] parseReferenceDescriptor(String descriptor){ // 1 (bases 1 to 1609) int[] resultado = new int[3]; descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", ""); String[] args = descriptor.split(","); resultado[0] = Integer.parseInt(args[0].trim()); resultado[1] = Integer.parseInt(args[1].trim()); resultado[2] = Integer.parseInt(args[2].trim()); return resultado; } private String processReferenceLine(String line, String component){ int init = line.indexOf(component); if (init!=-1){ line = line.replace(component,""); } return line; } private String processHeaderLine(String line, String header){ int init = line.indexOf(header); if (init!=-1){ line = line.replace(header,""); } return line; } private GenBankSequence processSequenceLine(String line) { GenBankSequence gbs = new GenBankSequence(); line = ltrim(line); String[] args = line.split(" "); gbs.setId(Integer.parseInt(args[0])); int len = args.length-1; Vector seqs = new Vector(); for (int i=0;i= 0 && Character.isWhitespace(s.charAt(i))) { i--; } return s.substring(0,i+1); } public String ltrim(String s) { int i = 0; while (i < s.length() && Character.isWhitespace(s.charAt(i))) { i++; } return s.substring(i); } public String print(){ StringBuffer out = new StringBuffer(); for (SequenceI seq: this.getSeqs()){ SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); boolean featureLinePrinted = false; for(SequenceFeature sf:seqFeatures){ if(sf.getType().equals("LOCUS")){ out.append(sf.getDescription()).append(newline); }else if (sf.getType().equals("DEFINITION")){ out.append("DEFINITION ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("VERSION")){ out.append("VERSION ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("ACCESSION")){ out.append("ACCESSION ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("DBLINK")){ out.append("DBLINK ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("KEYWORDS")){ out.append("KEYWORDS ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("SOURCE")){ out.append("SOURCE ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("REFERENCE")){ out.append(sf.getDescription()).append(newline); }else if (sf.getType().equals("COMMENT")){ out.append("COMMENT ").append(sf.getDescription()).append(newline); }else if (sf.getType().equals("BASE COUNT")){ out.append("BASE COUNT ").append(sf.getDescription()).append(newline); }else{ if (!featureLinePrinted){ out.append("FEATURES Location/Qualifiers").append(newline); featureLinePrinted = true; } out.append(" ").append(sf.getType()).append(" ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline); Hashtable qualifiers = sf.otherDetails; if (qualifiers!=null){ Enumeration keys = qualifiers.keys(); while (keys.hasMoreElements()){ String key = keys.nextElement(); String value = qualifiers.get(key); if (value!=null){ out.append(" /").append(key).append("=").append(value).append(newline); } } } } } out.append("ORIGIN").append(newline); //We have to divide sequence in groups of 6x10 chars String sequenceString = seq.getSequenceAsString(); int howManyGroups = (int) Math.floor(sequenceString.length()/60); for (int i=0;i<=howManyGroups;i++){ String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length())); if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ out.append(" ").append(60*i+1).append(" "); } int segmentLength = sequenceSegment.length(); if (segmentLength>=10){ out.append(sequenceSegment.substring(0,10)).append(" "); if (segmentLength>=20){ out.append(sequenceSegment.substring(10,20)).append(" "); if (segmentLength>=30){ out.append(sequenceSegment.substring(20,30)).append(" "); if (segmentLength>=40){ out.append(sequenceSegment.substring(30,40)).append(" "); if (segmentLength>=50){ out.append(sequenceSegment.substring(40,50)).append(" "); if (segmentLength<=60){ out.append(sequenceSegment.substring(50,sequenceSegment.length())); } }else{ out.append(sequenceSegment.substring(40,sequenceSegment.length())); } }else{ out.append(sequenceSegment.substring(30,sequenceSegment.length())); } }else{ out.append(sequenceSegment.substring(20,sequenceSegment.length())); } }else{ out.append(sequenceSegment.substring(10,sequenceSegment.length())); } } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ out.append(sequenceSegment); } out.append(newline); } out.append("//"); } return out.toString(); } }