X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FGenBankFile.java;h=1c17c0594903916d5f5d56b92948583abfbebe26;hb=refs%2Fheads%2FJAL-1260_droldan;hp=47150959bd1fbaa828c34766bdddae5db485cf26;hpb=2d63cfb4d8f84de5f40670bb301ee8a22db321ff;p=jalview.git diff --git a/src/jalview/io/GenBankFile.java b/src/jalview/io/GenBankFile.java index 4715095..1c17c05 100644 --- a/src/jalview/io/GenBankFile.java +++ b/src/jalview/io/GenBankFile.java @@ -32,842 +32,1108 @@ import java.util.regex.Pattern; import org.apache.james.mime4j.field.ParsedField; -public class GenBankFile extends AlignFile { - private static final Logger log = Logger.getLogger(GenBankFile.class.getName()); - private GenBankVersion version = new GenBankVersion(); - private GenBankLocus locus = new GenBankLocus(); - private GenBankSource source = new GenBankSource(); - private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)"); - private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); - private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); - private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); - private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); - - private String definition; - private String accession; - private String keywords; - private String dblink; - private String baseCount; - - private Vector features; - private Vector comments; - //Items under origin - private Vector sequences; - private Vector references; - - private SequenceI genBankSequence; - - public GenBankFile() { - } - - public GenBankFile(String inFile, String type) throws IOException { - super(inFile, type); - } - - public GenBankFile(FileParse source) throws IOException { - super(source); - } - - public void initData() { - super.initData(); - features = new Vector(); - comments = new Vector(); - sequences = new Vector(); - references = new Vector(); - } - - public void parse() throws IOException { - String line; - boolean featureMode = false; //FEATURES found - boolean seqMode = false; //Parsing Sequences from SOURCE - boolean referenceMode = false; //REFERENCE found - boolean sourceMode = false; //SOURCE found - boolean commentMode = false; //COMMENT found - boolean parsingAuthors = false; //Parsing authors (multiline) - boolean parsingDefinition = false; //Parsing definition (multiline) - boolean parsingKeywords = false; //Parsing keywords (multiline) - boolean parsingDbLink = false; //Parsing DBLINK (multiline) - boolean parsingTitle = false; //Parsing title (multiline) - boolean parsingQualifier = false; //Parsing feature qualifier (multine) - String currentQualifierName = ""; - GenBankReference reference = null; - GenBankFeature feature = null; - List sourceLines = new ArrayList(); - - if (this.isValid()){ - - while ((line = nextLine()) != null) { - // We only process lines if they have contents within - if (line.length() == 0) - continue; - - if (line.startsWith("FEATURES")){ - featureMode = true; - seqMode = false; - referenceMode = false; - sourceMode = false; - commentMode = false; - feature = new GenBankFeature(); - source = parseSource(sourceLines); - } - - - if (seqMode) { - if (!line.startsWith("//")){ - GenBankSequence seq = processSequenceLine(line); - sequences.add(seq); - } - featureMode = false; - referenceMode = false; - sourceMode = false; - } - - if (line.startsWith("ORIGIN")){ - if (feature.getType()!=null) - features.add(feature); - featureMode = false; - referenceMode = false; - sourceMode = false; - seqMode = true; - } - - if (featureMode){ - // Process feature line - if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){ - //Parse type - if (!line.trim().startsWith("/")){ - Matcher featuresMatch = patFeatureKey.matcher(line); - if (featuresMatch.find()){ - if (feature.getType()!=null) - features.add(feature); //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature - //It's a feature - String type = featuresMatch.group(0); - feature = new GenBankFeature(); - feature.setType(type); - GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,"")); - feature.setLocation(loc); - parsingQualifier = false; - continue; - }else if (parsingQualifier) { //If not a feature, it's another part of a qualifier - String qValue = feature.getQualifier(currentQualifierName); - StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line)); - feature.updateQualifier(currentQualifierName, sb.toString()); - continue; - } - }else{ - //It's the begining of a qualifier line - Matcher matcher = patQualifierKey.matcher(line); - if (matcher.find()){ - String qName = matcher.group(1); - currentQualifierName = qName.replace("/",""); - line = line.replace(qName,"").replace("/", "").replace("=",""); - feature.addQualifier(currentQualifierName, ltrim(line)); - parsingQualifier = true; - continue; - } - } - } - } - // Process REFERENCE line - if (line.startsWith("REFERENCE")) { - if (!referenceMode){ - //This is line is the REFERENCE line - referenceMode = true; - featureMode = false; - sourceMode = false; - seqMode = false; - }else{ - //We were at referenceMode, then add current reference to the list and create a new one - references.add(reference); - } - reference = new GenBankReference(); - String desc = processReferenceLine(line,"REFERENCE"); - int[] ranges = parseReferenceDescriptor(desc); - reference.setDescriptor(desc); - reference.setOrder(ranges[0]); - reference.setBegin(ranges[1]); - reference.setEnd(ranges[2]); - parsingAuthors = false; - parsingTitle = false; - continue; - } - - if (line.startsWith(" AUTHORS")){ - if (referenceMode){ - reference.setAuthors(processReferenceLine(line,"AUTHORS")); - parsingAuthors = true; - parsingTitle = false; - } - continue; - } - if (line.startsWith(" TITLE")){ - if (referenceMode){ - reference.setTitle(processReferenceLine(line,"TITLE")); - parsingAuthors = false; - parsingTitle = true; - } - continue; - } - if (line.startsWith(" JOURNAL")){ - if (referenceMode){ - reference.setJournal(processReferenceLine(line,"JOURNAL")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" PUBMED")){ - if (referenceMode){ - reference.setPubmed(processReferenceLine(line,"PUBMED")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - - if (line.startsWith(" MEDLINE")){ - if (referenceMode){ - reference.setMedline(processReferenceLine(line,"MEDLINE")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" REMARK")){ - if (referenceMode){ - reference.setRemark(processReferenceLine(line,"REMARK")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" CONSRTM")){ - if (referenceMode){ - reference.setConsortia(processReferenceLine(line,"CONSRTM")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - - - if (line.startsWith("SOURCE")) { - parsingKeywords = false; - sourceMode = true; - commentMode = false; - if (sourceMode){ - sourceLines.add(line); - } - continue; - } - if (line.indexOf("ORGANISM")!=-1) { - if (sourceMode){ - sourceLines.add(line); - continue; - } - } - - if (line.startsWith("COMMENT")){ - if (reference!=null) - references.add(reference); - commentMode = true; - sourceMode = false; - referenceMode = false; - sourceMode = false; - seqMode = false; - comments.add(processCommentLine(line)); - continue; - } - // Process LOCUS line - if (line.startsWith("LOCUS")) { - locus = parseLocus(line); - continue; - } - // Process BASE COUNT line - if (line.startsWith("BASE COUNT")) { - baseCount = processHeaderLine(line,"BASE COUNT"); - featureMode = false; - continue; - } - // Process DEFINITION line - if (line.startsWith("DEFINITION")) { - definition = processHeaderLine(line,"DEFINITION"); - parsingDefinition = true; - continue; - } - // Process ACCESSION line - if (line.startsWith("ACCESSION")) { - accession = processHeaderLine(line,"ACCESSION"); - parsingDefinition = false; - continue; - } - // Process VERSION line - if (line.startsWith("VERSION")) { - version = parseVersion(line); - //headers.put("VERSION", processHeaderLine(line,"VERSION")); - continue; - } - // Process DBLINK line - if (line.startsWith("DBLINK")) { - dblink = processHeaderLine(line,"DBLINK"); - parsingDbLink = true; - continue; - } - // Process KEYWORDS line - if (line.startsWith("KEYWORDS")) { - keywords = processHeaderLine(line,"KEYWORDS"); - parsingKeywords = true; - parsingDbLink = false; - continue; - } - if (sourceMode){ - sourceLines.add(line); - continue; - } - if (parsingDefinition){ - StringBuffer sb = new StringBuffer().append(definition).append(line); - definition = sb.toString(); - continue; - } - if (referenceMode && parsingAuthors){ - if (reference!=null){ - StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line); - reference.setAuthors(authors.toString()); - } - continue; - } - if (referenceMode && parsingTitle){ - if (reference!=null){ - StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line); - reference.setTitle(title.toString()); - } - continue; - } - if (parsingKeywords){ - StringBuffer sb = new StringBuffer().append(keywords).append(line); - keywords = sb.toString(); - continue; - } - if (parsingDbLink){ - StringBuffer sb = new StringBuffer().append(dblink).append(line); - dblink = sb.toString(); - continue; - } - if (commentMode){ - comments.add(line); - } - } - setEntries(); - }else{ - //File is not valid - throw new IOException("GenBankFile is not valid."); - } - } - - protected void setEntries(){ - StringBuffer result = new StringBuffer(); - //Mapping GenBank info into Jalview data model - genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences)); - //Mapping DBRefEntry - DBRefEntry dbRef = new DBRefEntry(); - dbRef.setSource(DBRefSource.GENBANK); - dbRef.setVersion(version == null ? "" : version.toString()); - dbRef.setAccessionId(accession); - // add map to indicate the sequence is a valid coordinate frame for the dbref - dbRef.setMap(new Mapping(null, new int[] - { 1, genBankSequence.getLength() }, new int[] - { 1, genBankSequence.getLength() }, 1, 1)); - genBankSequence.addDBRef(dbRef); - - //add header info as non-positional features - //add LOCUS - SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(locusF); - //add DEFNITION - SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(defF); - //add ACCESSION - SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(accessionF); - //add VERSION - SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(versionF); - //add DBLINK - SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(dblinkF); - //add KEYWORDS - SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(keywordsF); - //add SOURCE - SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(sourceF); - //add BASE COUNT - SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(baseCountF); - - // add literature and database cross references in the file - for (GenBankReference gbRef:references){ - //They are non-positional features - SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(refFeature); - } - //add COMMENTS - if (comments.size()>0){ - StringBuffer sb = new StringBuffer(); - for (String comment: comments){ - sb.append(comment).append(newline); - } - SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(commentF); - } - //Mapping FEATURES - for (GenBankFeature feature:features){ - if (feature.getType()!=null){ - SequenceFeature sf = new SequenceFeature(); - sf.setType(feature.getType()); - sf.setDescription(feature.getType()); - - sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor()); - sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor()); - Enumeration names = feature.getQualifiersNames(); - while (names.hasMoreElements()){ - String qName = names.nextElement(); - String qValue = feature.getQualifier(qName); - sf.setValue(qName, qValue); - } - genBankSequence.addSequenceFeature(sf); - } - } - SequenceI[] parsedSeqs = new SequenceI[1]; - parsedSeqs[0] = genBankSequence; - this.setSeqs(parsedSeqs); - } - private GenBankVersion parseVersion(String line) { - //VERSION U00096.2 GI:48994873 - if (line.trim().equalsIgnoreCase("VERSION")){ - return null; - }else{ - GenBankVersion ver = new GenBankVersion(); - String v = line.substring(11, line.indexOf(" ", 12)).trim(); - ver.setVersion(v); - int posGI = line.indexOf("GI:", 11 + v.length()); - if (posGI > -1) { - ver.setGI(line.substring(posGI)); - } - return ver; - } +public class GenBankFile extends AlignFile +{ + private static final Logger log = Logger.getLogger(GenBankFile.class + .getName()); + + private GenBankVersion version = new GenBankVersion(); + + private GenBankLocus locus = new GenBankLocus(); + + private GenBankSource source = new GenBankSource(); + + private static final Pattern patLocation = Pattern + .compile("(\\d+)\\.\\.(\\d+)"); + + private static final Pattern patLocationComp = Pattern + .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); + + private static final Pattern patLocus = Pattern + .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); + + private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); + + private static final Pattern patFeatureKey = Pattern + .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); + + private String definition; + + private String accession; + + private String keywords; + + private String dblink; + + private String baseCount; + + private Vector features; + + private Vector comments; + + // Items under origin + private Vector sequences; + + private Vector references; + + private SequenceI genBankSequence; + + public GenBankFile() + { + } + + public GenBankFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public GenBankFile(FileParse source) throws IOException + { + super(source); + } + + public void initData() + { + super.initData(); + features = new Vector(); + comments = new Vector(); + sequences = new Vector(); + references = new Vector(); + } + + public void parse() throws IOException + { + String line; + boolean featureMode = false; // FEATURES found + boolean seqMode = false; // Parsing Sequences from SOURCE + boolean referenceMode = false; // REFERENCE found + boolean sourceMode = false; // SOURCE found + boolean commentMode = false; // COMMENT found + boolean parsingAuthors = false; // Parsing authors (multiline) + boolean parsingDefinition = false; // Parsing definition (multiline) + boolean parsingKeywords = false; // Parsing keywords (multiline) + boolean parsingDbLink = false; // Parsing DBLINK (multiline) + boolean parsingTitle = false; // Parsing title (multiline) + boolean parsingQualifier = false; // Parsing feature qualifier (multine) + String currentQualifierName = ""; + GenBankReference reference = null; + GenBankFeature feature = null; + List sourceLines = new ArrayList(); + + if (this.isValid()) + { + + while ((line = nextLine()) != null) + { + // We only process lines if they have contents within + if (line.length() == 0) + continue; + + if (line.startsWith("FEATURES")) + { + featureMode = true; + seqMode = false; + referenceMode = false; + sourceMode = false; + commentMode = false; + feature = new GenBankFeature(); + source = parseSource(sourceLines); + } + + if (seqMode) + { + if (!line.startsWith("//")) + { + GenBankSequence seq = processSequenceLine(line); + sequences.add(seq); + } + featureMode = false; + referenceMode = false; + sourceMode = false; + } + + if (line.startsWith("ORIGIN")) + { + if (feature.getType() != null) + features.add(feature); + featureMode = false; + referenceMode = false; + sourceMode = false; + seqMode = true; + } + + if (featureMode) + { + // Process feature line + if (!line.startsWith("FEATURES") + && !line.startsWith("BASE COUNT")) + { + // Parse type + if (!line.trim().startsWith("/")) + { + Matcher featuresMatch = patFeatureKey.matcher(line); + if (featuresMatch.find()) + { + if (feature.getType() != null) + features.add(feature); // Hay que a�adirlo s�lo si no se est� + // a mitad de un qualif o una feature + // It's a feature + String type = featuresMatch.group(0); + feature = new GenBankFeature(); + feature.setType(type); + GenBankLocation loc = parserFeatureLocation(feature, + line.replace(type, "")); + feature.setLocation(loc); + parsingQualifier = false; + continue; + } + else if (parsingQualifier) + { // If not a feature, it's another part of a qualifier + String qValue = feature.getQualifier(currentQualifierName); + StringBuffer sb = new StringBuffer().append(qValue).append( + ltrim(line)); + feature.updateQualifier(currentQualifierName, sb.toString()); + continue; + } + } + else + { + // It's the begining of a qualifier line + Matcher matcher = patQualifierKey.matcher(line); + if (matcher.find()) + { + String qName = matcher.group(1); + currentQualifierName = qName.replace("/", ""); + line = line.replace(qName, "").replace("/", "") + .replace("=", ""); + feature.addQualifier(currentQualifierName, ltrim(line)); + parsingQualifier = true; + continue; + } + } + } + } + // Process REFERENCE line + if (line.startsWith("REFERENCE")) + { + if (!referenceMode) + { + // This is line is the REFERENCE line + referenceMode = true; + featureMode = false; + sourceMode = false; + seqMode = false; + } + else + { + // We were at referenceMode, then add current reference to the list + // and create a new one + references.add(reference); + } + reference = new GenBankReference(); + String desc = processReferenceLine(line, "REFERENCE"); + int[] ranges = parseReferenceDescriptor(desc); + reference.setDescriptor(desc); + reference.setOrder(ranges[0]); + reference.setBegin(ranges[1]); + reference.setEnd(ranges[2]); + parsingAuthors = false; + parsingTitle = false; + continue; + } + + if (line.startsWith(" AUTHORS")) + { + if (referenceMode) + { + reference.setAuthors(processReferenceLine(line, "AUTHORS")); + parsingAuthors = true; + parsingTitle = false; + } + continue; + } + if (line.startsWith(" TITLE")) + { + if (referenceMode) + { + reference.setTitle(processReferenceLine(line, "TITLE")); + parsingAuthors = false; + parsingTitle = true; + } + continue; + } + if (line.startsWith(" JOURNAL")) + { + if (referenceMode) + { + reference.setJournal(processReferenceLine(line, "JOURNAL")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" PUBMED")) + { + if (referenceMode) + { + reference.setPubmed(processReferenceLine(line, "PUBMED")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + + if (line.startsWith(" MEDLINE")) + { + if (referenceMode) + { + reference.setMedline(processReferenceLine(line, "MEDLINE")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" REMARK")) + { + if (referenceMode) + { + reference.setRemark(processReferenceLine(line, "REMARK")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" CONSRTM")) + { + if (referenceMode) + { + reference.setConsortia(processReferenceLine(line, "CONSRTM")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + + if (line.startsWith("SOURCE")) + { + parsingKeywords = false; + sourceMode = true; + commentMode = false; + if (sourceMode) + { + sourceLines.add(line); + } + continue; + } + if (line.indexOf("ORGANISM") != -1) + { + if (sourceMode) + { + sourceLines.add(line); + continue; + } + } + + if (line.startsWith("COMMENT")) + { + if (reference != null) + references.add(reference); + commentMode = true; + sourceMode = false; + referenceMode = false; + sourceMode = false; + seqMode = false; + comments.add(processCommentLine(line)); + continue; + } + // Process LOCUS line + if (line.startsWith("LOCUS")) + { + locus = parseLocus(line); + continue; + } + // Process BASE COUNT line + if (line.startsWith("BASE COUNT")) + { + baseCount = processHeaderLine(line, "BASE COUNT"); + featureMode = false; + continue; + } + // Process DEFINITION line + if (line.startsWith("DEFINITION")) + { + definition = processHeaderLine(line, "DEFINITION"); + parsingDefinition = true; + continue; + } + // Process ACCESSION line + if (line.startsWith("ACCESSION")) + { + accession = processHeaderLine(line, "ACCESSION"); + parsingDefinition = false; + continue; + } + // Process VERSION line + if (line.startsWith("VERSION")) + { + version = parseVersion(line); + // headers.put("VERSION", processHeaderLine(line,"VERSION")); + continue; + } + // Process DBLINK line + if (line.startsWith("DBLINK")) + { + dblink = processHeaderLine(line, "DBLINK"); + parsingDbLink = true; + continue; + } + // Process KEYWORDS line + if (line.startsWith("KEYWORDS")) + { + keywords = processHeaderLine(line, "KEYWORDS"); + parsingKeywords = true; + parsingDbLink = false; + continue; + } + if (sourceMode) + { + sourceLines.add(line); + continue; + } + if (parsingDefinition) + { + StringBuffer sb = new StringBuffer().append(definition).append( + line); + definition = sb.toString(); + continue; + } + if (referenceMode && parsingAuthors) + { + if (reference != null) + { + StringBuffer authors = new StringBuffer().append( + reference.getAuthors()).append(line); + reference.setAuthors(authors.toString()); + } + continue; + } + if (referenceMode && parsingTitle) + { + if (reference != null) + { + StringBuffer title = new StringBuffer().append( + reference.getTitle()).append(line); + reference.setTitle(title.toString()); + } + continue; + } + if (parsingKeywords) + { + StringBuffer sb = new StringBuffer().append(keywords) + .append(line); + keywords = sb.toString(); + continue; + } + if (parsingDbLink) + { + StringBuffer sb = new StringBuffer().append(dblink).append(line); + dblink = sb.toString(); + continue; + } + if (commentMode) + { + comments.add(line); + } + } + setEntries(); } - - private GenBankLocus parseLocus(String line){ - GenBankLocus loc = new GenBankLocus(); - Matcher mat = patLocus.matcher(line); - if (mat.find()) { - String name = mat.group(1); - String len = mat.group(2); - String strand = mat.group(3); - String mtype = mat.group(4); - String linear = mat.group(5); - String division = mat.group(6); - String date = mat.group(7); - - loc.setName(name == null ? "" : name.trim()); - loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); - loc.setStrand(strand == null ? "" : strand); - loc.setMoleculeType(mtype == null ? "" : mtype); - loc.setLinearSequence("linear".equals(linear)); - loc.setDivision(division == null ? "" : division); - loc.setModificationDate(date == null ? "" :date); - } - return loc; + else + { + // File is not valid + throw new IOException("GenBankFile is not valid."); } - private GenBankSource parseSource(List lines){ - StringBuffer sb = new StringBuffer(); - for(String line:lines){ - sb.append(line).append(newline); - } - // Source section - GenBankSource sou = new GenBankSource(); - String aux = sb.toString().substring(11); - int fim1 = aux.indexOf("\n"); - if (fim1 > -1) { - sou.setSource(aux.substring(0, fim1)); - int ini2 = aux.indexOf("ORGANISM"); - if (ini2 > -1) { - fim1 = aux.indexOf("\n", ini2 + 10); - if (fim1 > -1) { - sou.setOrganism(aux.substring(ini2 + 10, fim1)); - sou.setTaxonomic(aux.substring(fim1).replaceAll(" ", "").replaceAll("\\s+", "")); - } else { - sou.setOrganism(aux); - } - } - } else { - sou.setSource(aux); - } - return sou; - } - - /** - * Possible situations: - * - * 467 Points to a single base in the presented sequence 340..565 Points to - * a continuous range of bases bounded by and including the starting and - * ending bases <345..500 Indicates that the exact lower boundary point - * of a feature is unknown. The location begins at some base previous to the - * first base specified (which need not be contained in the presented - * sequence) and continues to and includes the ending base <1..888 The - * feature starts before the first sequenced base and continues to and - * includes base 888 1..>888 The feature starts at the first sequenced - * base and continues beyond base 888 102.110 Indicates that the exact - * location is unknown but that it is one of the bases between bases 102 and - * 110, inclusive 123^124 Points to a site between bases 123 and 124 - * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to - * form one contiguous sequence complement(34..126) Start at the base - * complementary to 126 and finish at the base complementary to base 34 (the - * feature is on the strand complementary to the presented strand) - * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and - * 4918 to 5163, then complements the joined segments (the feature is on the - * strand complementary to the presented strand) - * join(complement(4918..5163),complement(2691..4571)) Complements regions - * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the - * feature is on the strand complementary to the presented strand) - * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in - * this database) with primary accession number 'J00194' - * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry - * with the region 100..202 of remote entry J00194 - * - * @param fea - * @param localiza - */ - private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) { - // remove os espaços, quebra de linhas etc - String buf = localiza.replaceAll("\\s", ""); - - // checks if there is a comma present between ranges - // complement(100..110),complement(90..100) - char[] buf2 = buf.toCharArray(); - int abertos = 0; - java.util.List lista = new java.util.ArrayList(); - int pinicial = 0; - for (int i = 0; i < buf2.length; i++) { - if (buf2[i] == '(') { - abertos++; - } else if (buf2[i] == ')') { - abertos--; - } else if (buf2[i] == ',' && abertos == 0) { - lista.add(buf.substring(pinicial, i)); - pinicial = i + 1; + } + + protected void setEntries() + { + StringBuffer result = new StringBuffer(); + // Mapping GenBank info into Jalview data model + genBankSequence = new Sequence(accession, + DnaUtils.getNucleotidesFromSequenceVector(sequences)); + // Mapping DBRefEntry + DBRefEntry dbRef = new DBRefEntry(); + dbRef.setSource(DBRefSource.GENBANK); + dbRef.setVersion(version == null ? "" : version.toString()); + dbRef.setAccessionId(accession); + // add map to indicate the sequence is a valid coordinate frame for the + // dbref + dbRef.setMap(new Mapping(null, new int[] + { 1, genBankSequence.getLength() }, new int[] + { 1, genBankSequence.getLength() }, 1, 1)); + genBankSequence.addDBRef(dbRef); + + // add header info as non-positional features + // add LOCUS + SequenceFeature locusF = new SequenceFeature("LOCUS", + (locus == null ? "" : locus.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(locusF); + // add DEFNITION + SequenceFeature defF = new SequenceFeature("DEFINITION", definition, + null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(defF); + // add ACCESSION + SequenceFeature accessionF = new SequenceFeature("ACCESSION", + accession, null, 1, genBankSequence.getLength(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(accessionF); + // add VERSION + SequenceFeature versionF = new SequenceFeature("VERSION", + (version == null ? "" : version.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(versionF); + // add DBLINK + SequenceFeature dblinkF = new SequenceFeature("DBLINK", + (dblink == null ? "" : dblink.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(dblinkF); + // add KEYWORDS + SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, + null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(keywordsF); + // add SOURCE + SequenceFeature sourceF = new SequenceFeature("SOURCE", + (source == null ? "" : source.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(sourceF); + // add BASE COUNT + SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", + (baseCount == null ? "" : baseCount.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(baseCountF); + + // add literature and database cross references in the file + for (GenBankReference gbRef : references) + { + // They are non-positional features + SequenceFeature refFeature = new SequenceFeature("REFERENCE", + gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(refFeature); + } + // add COMMENTS + if (comments.size() > 0) + { + StringBuffer sb = new StringBuffer(); + for (String comment : comments) + { + sb.append(comment).append(newline); + } + SequenceFeature commentF = new SequenceFeature("COMMENT", + sb.toString(), null, 1, genBankSequence.getLength(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(commentF); + } + // Mapping FEATURES + for (GenBankFeature feature : features) + { + if (feature.getType() != null) + { + SequenceFeature sf = new SequenceFeature(); + sf.setType(feature.getType()); + sf.setDescription(feature.getType()); + + sf.setBegin(feature.getLocation() == null ? 0 : feature + .getLocation().getMinor()); + sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation() + .getMajor()); + Enumeration names = feature.getQualifiersNames(); + while (names.hasMoreElements()) + { + String qName = names.nextElement(); + String qValue = feature.getQualifier(qName); + sf.setValue(qName, qValue); + } + genBankSequence.addSequenceFeature(sf); + } + } + SequenceI[] parsedSeqs = new SequenceI[1]; + parsedSeqs[0] = genBankSequence; + this.setSeqs(parsedSeqs); + } + + private GenBankVersion parseVersion(String line) + { + // VERSION U00096.2 GI:48994873 + if (line.trim().equalsIgnoreCase("VERSION")) + { + return null; + } + else + { + GenBankVersion ver = new GenBankVersion(); + String v = line.substring(11, line.indexOf(" ", 12)).trim(); + ver.setVersion(v); + int posGI = line.indexOf("GI:", 11 + v.length()); + if (posGI > -1) + { + ver.setGI(line.substring(posGI)); + } + return ver; + } + } + + private GenBankLocus parseLocus(String line) + { + GenBankLocus loc = new GenBankLocus(); + Matcher mat = patLocus.matcher(line); + if (mat.find()) + { + String name = mat.group(1); + String len = mat.group(2); + String strand = mat.group(3); + String mtype = mat.group(4); + String linear = mat.group(5); + String division = mat.group(6); + String date = mat.group(7); + + loc.setName(name == null ? "" : name.trim()); + loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); + loc.setStrand(strand == null ? "" : strand); + loc.setMoleculeType(mtype == null ? "" : mtype); + loc.setLinearSequence("linear".equals(linear)); + loc.setDivision(division == null ? "" : division); + loc.setModificationDate(date == null ? "" : date); + } + return loc; + } + + private GenBankSource parseSource(List lines) + { + StringBuffer sb = new StringBuffer(); + for (String line : lines) + { + sb.append(line).append(newline); + } + // Source section + GenBankSource sou = new GenBankSource(); + String aux = sb.toString().substring(11); + int fim1 = aux.indexOf("\n"); + if (fim1 > -1) + { + sou.setSource(aux.substring(0, fim1)); + int ini2 = aux.indexOf("ORGANISM"); + if (ini2 > -1) + { + fim1 = aux.indexOf("\n", ini2 + 10); + if (fim1 > -1) + { + sou.setOrganism(aux.substring(ini2 + 10, fim1)); + sou.setTaxonomic(aux.substring(fim1) + .replaceAll(" ", "").replaceAll("\\s+", "")); + } + else + { + sou.setOrganism(aux); + } + } + } + else + { + sou.setSource(aux); + } + return sou; + } + + /** + * Possible situations: + * + * 467 Points to a single base in the presented sequence 340..565 Points to a + * continuous range of bases bounded by and including the starting and ending + * bases <345..500 Indicates that the exact lower boundary point of a + * feature is unknown. The location begins at some base previous to the first + * base specified (which need not be contained in the presented sequence) and + * continues to and includes the ending base <1..888 The feature starts + * before the first sequenced base and continues to and includes base 888 + * 1..>888 The feature starts at the first sequenced base and continues + * beyond base 888 102.110 Indicates that the exact location is unknown but + * that it is one of the bases between bases 102 and 110, inclusive 123^124 + * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12 + * to 78 and 134 to 202 should be joined to form one contiguous sequence + * complement(34..126) Start at the base complementary to 126 and finish at + * the base complementary to base 34 (the feature is on the strand + * complementary to the presented strand) + * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918 + * to 5163, then complements the joined segments (the feature is on the strand + * complementary to the presented strand) + * join(complement(4918..5163),complement(2691..4571)) Complements regions + * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the + * feature is on the strand complementary to the presented strand) + * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in + * this database) with primary accession number 'J00194' + * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry + * with the region 100..202 of remote entry J00194 + * + * @param fea + * @param localiza + */ + private GenBankLocation parserFeatureLocation(GenBankFeature fea, + String localiza) + { + // remove os espaços, quebra de linhas etc + String buf = localiza.replaceAll("\\s", ""); + + // checks if there is a comma present between ranges + // complement(100..110),complement(90..100) + char[] buf2 = buf.toCharArray(); + int abertos = 0; + java.util.List lista = new java.util.ArrayList(); + int pinicial = 0; + for (int i = 0; i < buf2.length; i++) + { + if (buf2[i] == '(') + { + abertos++; + } + else if (buf2[i] == ')') + { + abertos--; + } + else if (buf2[i] == ',' && abertos == 0) + { + lista.add(buf.substring(pinicial, i)); + pinicial = i + 1; + } + } + if (lista.size() > 0) + { + lista.add(buf.substring(pinicial)); + GenBankLocations um = new GenBankLocations(); + um.setOperator(GenBankLocations.NONE); + for (String s : lista) + { + um.getUnits().add(parserFeatureLocation(fea, s)); + } + fea.setLocation(um); + return um; + } + + // trata as funcoes: complement(location,location...), + // join(location,location...), order(location,location...) + if (buf.contains("(")) + { + GenBankLocations um = new GenBankLocations(); + int ini = buf.indexOf("("); + int fim = buf.lastIndexOf(")"); + String token = buf.substring(0, ini); + if ("complement".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + interno.setComplement(true); + um.setOperator(GenBankLocations.COMPLEMENT); + um.getUnits().add(interno); + fea.setLocation(um); + } + else if ("join".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + um.setOperator(GenBankLocations.JOIN); + um.getUnits().add(interno); + fea.setLocation(um); + } + else if ("order".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + um.setOperator(GenBankLocations.ORDER); + um.getUnits().add(interno); + fea.setLocation(um); + } + else + { + log.log(Level.WARNING, + "Token desconhecido em location/features - {0}", token); + String inter = buf.substring(ini + 1, fim); + fea.setLocation(parserFeatureLocation(fea, inter)); + } + return fea.getLocation(); + } + else + { + // trata quando tiver uma lista de location + if (buf.contains(",")) + { + String[] partes = buf.split(","); + GenBankLocations um = new GenBankLocations(); + for (String p : partes) + { + um.getUnits().add(parserFeatureLocation(fea, p)); + } + fea.setLocation(um); + return um; + } + else + { + // trata quando tiver range + if (buf.contains("..")) + { + String[] partes = buf.split("\\.\\."); + GenBankLocationRange range = new GenBankLocationRange(); + if (buf.contains(":")) + { + for (int i = 0; i < partes.length; i++) + { + int pos = partes[i].indexOf(":"); + if (pos > 0) + { + String entry = partes[i].substring(0, pos); + partes[i] = partes[i].substring(pos + 1); + range.setEntry(entry); + } } + } + GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation( + fea, partes[0]); + range.setStart(gp0); + GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation( + fea, partes[1]); + range.setEnd(gp1); + fea.setLocation(range); + return range; } - if (lista.size() > 0) { - lista.add(buf.substring(pinicial)); - GenBankLocations um = new GenBankLocations(); - um.setOperator(GenBankLocations.NONE); - for (String s : lista) { - um.getUnits().add(parserFeatureLocation(fea, s)); + else + { + // trata um ponto + // possibilidades consideradas: + // 467 + // 102.110 + // 123^124 + // <345 + // >400 + // 345> + // 400< + // ou uma combinacao dessas + GenBankLocationPoint gp = new GenBankLocationPoint(); + if (buf.contains(":")) + { + int pos = buf.indexOf(":"); + if (pos > 0) + { + String entry = buf.substring(0, pos); + buf = buf.substring(pos + 1); + gp.setEntry(entry); } - fea.setLocation(um); - return um; - } - - // trata as funcoes: complement(location,location...), - // join(location,location...), order(location,location...) - if (buf.contains("(")) { - GenBankLocations um = new GenBankLocations(); - int ini = buf.indexOf("("); - int fim = buf.lastIndexOf(")"); - String token = buf.substring(0, ini); - if ("complement".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - interno.setComplement(true); - um.setOperator(GenBankLocations.COMPLEMENT); - um.getUnits().add(interno); - fea.setLocation(um); - } else if ("join".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - um.setOperator(GenBankLocations.JOIN); - um.getUnits().add(interno); - fea.setLocation(um); - } else if ("order".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - um.setOperator(GenBankLocations.ORDER); - um.getUnits().add(interno); - fea.setLocation(um); - } else { - log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token); - String inter = buf.substring(ini + 1, fim); - fea.setLocation(parserFeatureLocation(fea, inter)); + } + int pos = 0; + // verifica os simb < e > antes do primeiro numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + gp.setPrefix(buf.charAt(pos)); + pos++; + } + // pega o primeiro numero + int ini = pos; + while (pos < buf.length() && buf.charAt(pos) >= '0' + && buf.charAt(pos) <= '9') + { + pos++; + } + if (buf.subSequence(ini, pos).length() < 1) + { + System.out.println(localiza); + } + int num = Integer.parseInt(buf.substring(ini, pos)); + int num2 = num; + // o primeiro numero pode ser o unico numero + if (pos < buf.length()) + { + // verifica se tem os sinais < e > apos o primeiro numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + if (buf.contains(".") || buf.contains("^")) + { + gp.setPrefix(buf.charAt(pos)); + } + else + { + gp.setSufix(buf.charAt(pos)); + } + pos++; } - return fea.getLocation(); - } else { - // trata quando tiver uma lista de location - if (buf.contains(",")) { - String[] partes = buf.split(","); - GenBankLocations um = new GenBankLocations(); - for (String p : partes) { - um.getUnits().add( - parserFeatureLocation(fea, p)); - } - fea.setLocation(um); - return um; - } else { - // trata quando tiver range - if (buf.contains("..")) { - String[] partes = buf.split("\\.\\."); - GenBankLocationRange range = new GenBankLocationRange(); - if (buf.contains(":")) { - for (int i = 0; i < partes.length; i++) { - int pos = partes[i].indexOf(":"); - if (pos > 0) { - String entry = partes[i].substring(0, pos); - partes[i] = partes[i].substring(pos + 1); - range.setEntry(entry); - } - } - } - GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]); - range.setStart(gp0); - GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]); - range.setEnd(gp1); - fea.setLocation(range); - return range; - } else { - // trata um ponto - // possibilidades consideradas: - // 467 - // 102.110 - // 123^124 - // <345 - // >400 - // 345> - // 400< - // ou uma combinacao dessas - GenBankLocationPoint gp = new GenBankLocationPoint(); - if (buf.contains(":")) { - int pos = buf.indexOf(":"); - if (pos > 0) { - String entry = buf.substring(0, pos); - buf = buf.substring(pos + 1); - gp.setEntry(entry); - } - } - int pos = 0; - // verifica os simb < e > antes do primeiro numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - gp.setPrefix(buf.charAt(pos)); - pos++; - } - // pega o primeiro numero - int ini = pos; - while (pos < buf.length() && buf.charAt(pos) >= '0' - && buf.charAt(pos) <= '9') { - pos++; - } - if (buf.subSequence(ini, pos).length() < 1) { - System.out.println(localiza); - } - int num = Integer.parseInt(buf.substring(ini, pos)); - int num2 = num; - // o primeiro numero pode ser o unico numero - if (pos < buf.length()) { - // verifica se tem os sinais < e > apos o primeiro numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - if (buf.contains(".") || buf.contains("^")) { - gp.setPrefix(buf.charAt(pos)); - } else { - gp.setSufix(buf.charAt(pos)); - } - pos++; - } - - // verifica a separacao dos numeros . ou ^ - if (pos < buf.length() - && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) { - // separação localizada, possibilidade de mais numero - gp.setSymbol(buf.charAt(pos)); - pos++; - - // verifica os simb < e > antes do segundo numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - gp.setSufix(buf.charAt(pos)); - pos++; - } - - // pega o segundo numero - ini = pos; - while (pos < buf.length() && buf.charAt(pos) >= '0' - && buf.charAt(pos) <= '9') { - pos++; - } - num2 = Integer.parseInt(buf.substring(ini, pos)); - - // verifica os simb < e > após o segundo numero - if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) { - gp.setSufix(buf.charAt(pos)); - pos++; - } - } - } - gp.setMin(num); - gp.setMax(num2); - fea.setLocation(gp); - return gp; - } + + // verifica a separacao dos numeros . ou ^ + if (pos < buf.length() + && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) + { + // separação localizada, possibilidade de mais numero + gp.setSymbol(buf.charAt(pos)); + pos++; + + // verifica os simb < e > antes do segundo numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + gp.setSufix(buf.charAt(pos)); + pos++; + } + + // pega o segundo numero + ini = pos; + while (pos < buf.length() && buf.charAt(pos) >= '0' + && buf.charAt(pos) <= '9') + { + pos++; + } + num2 = Integer.parseInt(buf.substring(ini, pos)); + + // verifica os simb < e > após o segundo numero + if (pos < buf.length() + && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) + { + gp.setSufix(buf.charAt(pos)); + pos++; + } } + } + gp.setMin(num); + gp.setMax(num2); + fea.setLocation(gp); + return gp; } + } + } + } + + private int[] parseReferenceDescriptor(String descriptor) + { + // 1 (bases 1 to 1609) + int[] resultado = new int[3]; + descriptor = descriptor.replace("(bases", ",").replace("to", ",") + .replace(")", ""); + String[] args = descriptor.split(","); + resultado[0] = Integer.parseInt(args[0].trim()); + resultado[1] = Integer.parseInt(args[1].trim()); + resultado[2] = Integer.parseInt(args[2].trim()); + return resultado; + } + + private String processReferenceLine(String line, String component) + { + int init = line.indexOf(component); + if (init != -1) + { + line = line.replace(component, ""); + } + return line; + } + + private String processHeaderLine(String line, String header) + { + int init = line.indexOf(header); + if (init != -1) + { + line = line.replace(header, ""); } - - private int[] parseReferenceDescriptor(String descriptor){ - // 1 (bases 1 to 1609) - int[] resultado = new int[3]; - descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", ""); - String[] args = descriptor.split(","); - resultado[0] = Integer.parseInt(args[0].trim()); - resultado[1] = Integer.parseInt(args[1].trim()); - resultado[2] = Integer.parseInt(args[2].trim()); - return resultado; + return line; + } + + private GenBankSequence processSequenceLine(String line) + { + GenBankSequence gbs = new GenBankSequence(); + line = ltrim(line); + String[] args = line.split(" "); + gbs.setId(Integer.parseInt(args[0])); + int len = args.length - 1; + Vector seqs = new Vector(); + for (int i = 0; i < len; i++) + seqs.add(args[i + 1]); + gbs.setSequences(seqs); + return gbs; + } + + private String processCommentLine(String line) + { + int init = line.indexOf("COMMENT"); + if (init != -1) + { + line = line.replace("COMMENT", ""); + } + return line; + } + + public String rtrim(String s) + { + int i = s.length() - 1; + while (i >= 0 && Character.isWhitespace(s.charAt(i))) + { + i--; } - private String processReferenceLine(String line, String component){ - int init = line.indexOf(component); - if (init!=-1){ - line = line.replace(component,""); - } - return line; - } - private String processHeaderLine(String line, String header){ - int init = line.indexOf(header); - if (init!=-1){ - line = line.replace(header,""); - } - return line; - } - - private GenBankSequence processSequenceLine(String line) { - GenBankSequence gbs = new GenBankSequence(); - line = ltrim(line); - String[] args = line.split(" "); - gbs.setId(Integer.parseInt(args[0])); - int len = args.length-1; - Vector seqs = new Vector(); - for (int i=0;i= 0 && Character.isWhitespace(s.charAt(i))) { - i--; - } - return s.substring(0,i+1); + return s.substring(0, i + 1); + } + + public String ltrim(String s) + { + int i = 0; + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + { + i++; } + return s.substring(i); + } - public String ltrim(String s) { - int i = 0; - while (i < s.length() && Character.isWhitespace(s.charAt(i))) { - i++; - } - return s.substring(i); - } - - public String print(){ - StringBuffer out = new StringBuffer(); - for (SequenceI seq: this.getSeqs()){ - SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); - boolean featureLinePrinted = false; - for(SequenceFeature sf:seqFeatures){ - if(sf.getType().equals("LOCUS")){ - out.append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("DEFINITION")){ - out.append("DEFINITION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("VERSION")){ - out.append("VERSION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("ACCESSION")){ - out.append("ACCESSION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("DBLINK")){ - out.append("DBLINK ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("KEYWORDS")){ - out.append("KEYWORDS ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("SOURCE")){ - out.append("SOURCE ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("REFERENCE")){ - out.append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("COMMENT")){ - out.append("COMMENT ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("BASE COUNT")){ - out.append("BASE COUNT ").append(sf.getDescription()).append(newline); - }else{ - if (!featureLinePrinted){ - out.append("FEATURES Location/Qualifiers").append(newline); - featureLinePrinted = true; - } - out.append(" ").append(sf.getType()).append(" ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline); - Hashtable qualifiers = sf.otherDetails; - if (qualifiers!=null){ - Enumeration keys = qualifiers.keys(); - while (keys.hasMoreElements()){ - String key = keys.nextElement(); - String value = qualifiers.get(key); - if (value!=null){ - out.append(" /").append(key).append("=").append(value).append(newline); - } - } - } - } - } - out.append("ORIGIN").append(newline); - //We have to divide sequence in groups of 6x10 chars - String sequenceString = seq.getSequenceAsString(); - int howManyGroups = (int) Math.floor(sequenceString.length()/60); - for (int i=0;i<=howManyGroups;i++){ - String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length())); - if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ - out.append(" ").append(60*i+1).append(" "); - } - int segmentLength = sequenceSegment.length(); - if (segmentLength>=10){ - out.append(sequenceSegment.substring(0,10)).append(" "); - if (segmentLength>=20){ - out.append(sequenceSegment.substring(10,20)).append(" "); - if (segmentLength>=30){ - out.append(sequenceSegment.substring(20,30)).append(" "); - if (segmentLength>=40){ - out.append(sequenceSegment.substring(30,40)).append(" "); - if (segmentLength>=50){ - out.append(sequenceSegment.substring(40,50)).append(" "); - if (segmentLength<=60){ - out.append(sequenceSegment.substring(50,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(40,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(30,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(20,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(10,sequenceSegment.length())); - } - } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ - out.append(sequenceSegment); - } - out.append(newline); - } - out.append("//"); - } - return out.toString(); + public String print() + { + StringBuffer out = new StringBuffer(); + for (SequenceI seq : this.getSeqs()) + { + SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); + boolean featureLinePrinted = false; + for (SequenceFeature sf : seqFeatures) + { + if (sf.getType().equals("LOCUS")) + { + out.append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("DEFINITION")) + { + out.append("DEFINITION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("VERSION")) + { + out.append("VERSION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("ACCESSION")) + { + out.append("ACCESSION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("DBLINK")) + { + out.append("DBLINK ").append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("KEYWORDS")) + { + out.append("KEYWORDS ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("SOURCE")) + { + out.append("SOURCE ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("REFERENCE")) + { + out.append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("COMMENT")) + { + out.append("COMMENT ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("BASE COUNT")) + { + out.append("BASE COUNT ").append(sf.getDescription()) + .append(newline); + } + else + { + if (!featureLinePrinted) + { + out.append("FEATURES Location/Qualifiers").append( + newline); + featureLinePrinted = true; + } + out.append(" ").append(sf.getType()).append(" ") + .append(sf.getBegin()).append("..").append(sf.getEnd()) + .append(newline); + Hashtable qualifiers = sf.otherDetails; + if (qualifiers != null) + { + Enumeration keys = qualifiers.keys(); + while (keys.hasMoreElements()) + { + String key = keys.nextElement(); + String value = qualifiers.get(key); + if (value != null) + { + out.append(" /").append(key) + .append("=").append(value).append(newline); + } + } + } + } + } + out.append("ORIGIN").append(newline); + // We have to divide sequence in groups of 6x10 chars + String sequenceString = seq.getSequenceAsString(); + int howManyGroups = (int) Math.floor(sequenceString.length() / 60); + for (int i = 0; i <= howManyGroups; i++) + { + String sequenceSegment = sequenceString.substring(i * 60, + Math.min((i + 1) * 60, sequenceString.length())); + if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment + .length() > 0))) + { + out.append(" ").append(60 * i + 1).append(" "); + } + int segmentLength = sequenceSegment.length(); + if (segmentLength >= 10) + { + out.append(sequenceSegment.substring(0, 10)).append(" "); + if (segmentLength >= 20) + { + out.append(sequenceSegment.substring(10, 20)).append(" "); + if (segmentLength >= 30) + { + out.append(sequenceSegment.substring(20, 30)).append(" "); + if (segmentLength >= 40) + { + out.append(sequenceSegment.substring(30, 40)).append(" "); + if (segmentLength >= 50) + { + out.append(sequenceSegment.substring(40, 50)).append(" "); + if (segmentLength <= 60) + { + out.append(sequenceSegment.substring(50, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(40, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(30, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(20, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(10, + sequenceSegment.length())); + } + } + else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment + .length() > 0))) + { + out.append(sequenceSegment); + } + out.append(newline); + } + out.append("//"); } + return out.toString(); + } }