import org.apache.james.mime4j.field.ParsedField;
-public class GenBankFile extends AlignFile {
- private static final Logger log = Logger.getLogger(GenBankFile.class.getName());
- private GenBankVersion version = new GenBankVersion();
- private GenBankLocus locus = new GenBankLocus();
- private GenBankSource source = new GenBankSource();
- private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)");
- private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
- private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
- private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
- private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
-
- private String definition;
- private String accession;
- private String keywords;
- private String dblink;
- private String baseCount;
-
- private Vector<GenBankFeature> features;
- private Vector<String> comments;
- //Items under origin
- private Vector<GenBankSequence> sequences;
- private Vector<GenBankReference> references;
-
- private SequenceI genBankSequence;
-
- public GenBankFile() {
- }
-
- public GenBankFile(String inFile, String type) throws IOException {
- super(inFile, type);
- }
-
- public GenBankFile(FileParse source) throws IOException {
- super(source);
- }
-
- public void initData() {
- super.initData();
- features = new Vector<GenBankFeature>();
- comments = new Vector<String>();
- sequences = new Vector<GenBankSequence>();
- references = new Vector<GenBankReference>();
- }
-
- public void parse() throws IOException {
- String line;
- boolean featureMode = false; //FEATURES found
- boolean seqMode = false; //Parsing Sequences from SOURCE
- boolean referenceMode = false; //REFERENCE found
- boolean sourceMode = false; //SOURCE found
- boolean commentMode = false; //COMMENT found
- boolean parsingAuthors = false; //Parsing authors (multiline)
- boolean parsingDefinition = false; //Parsing definition (multiline)
- boolean parsingKeywords = false; //Parsing keywords (multiline)
- boolean parsingDbLink = false; //Parsing DBLINK (multiline)
- boolean parsingTitle = false; //Parsing title (multiline)
- boolean parsingQualifier = false; //Parsing feature qualifier (multine)
- String currentQualifierName = "";
- GenBankReference reference = null;
- GenBankFeature feature = null;
- List<String> sourceLines = new ArrayList<String>();
-
- if (this.isValid()){
-
- while ((line = nextLine()) != null) {
- // We only process lines if they have contents within
- if (line.length() == 0)
- continue;
-
- if (line.startsWith("FEATURES")){
- featureMode = true;
- seqMode = false;
- referenceMode = false;
- sourceMode = false;
- commentMode = false;
- feature = new GenBankFeature();
- source = parseSource(sourceLines);
- }
-
-
- if (seqMode) {
- if (!line.startsWith("//")){
- GenBankSequence seq = processSequenceLine(line);
- sequences.add(seq);
- }
- featureMode = false;
- referenceMode = false;
- sourceMode = false;
- }
-
- if (line.startsWith("ORIGIN")){
- if (feature.getType()!=null)
- features.add(feature);
- featureMode = false;
- referenceMode = false;
- sourceMode = false;
- seqMode = true;
- }
-
- if (featureMode){
- // Process feature line
- if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){
- //Parse type
- if (!line.trim().startsWith("/")){
- Matcher featuresMatch = patFeatureKey.matcher(line);
- if (featuresMatch.find()){
- if (feature.getType()!=null)
- features.add(feature); //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature
- //It's a feature
- String type = featuresMatch.group(0);
- feature = new GenBankFeature();
- feature.setType(type);
- GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,""));
- feature.setLocation(loc);
- parsingQualifier = false;
- continue;
- }else if (parsingQualifier) { //If not a feature, it's another part of a qualifier
- String qValue = feature.getQualifier(currentQualifierName);
- StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line));
- feature.updateQualifier(currentQualifierName, sb.toString());
- continue;
- }
- }else{
- //It's the begining of a qualifier line
- Matcher matcher = patQualifierKey.matcher(line);
- if (matcher.find()){
- String qName = matcher.group(1);
- currentQualifierName = qName.replace("/","");
- line = line.replace(qName,"").replace("/", "").replace("=","");
- feature.addQualifier(currentQualifierName, ltrim(line));
- parsingQualifier = true;
- continue;
- }
- }
- }
- }
- // Process REFERENCE line
- if (line.startsWith("REFERENCE")) {
- if (!referenceMode){
- //This is line is the REFERENCE line
- referenceMode = true;
- featureMode = false;
- sourceMode = false;
- seqMode = false;
- }else{
- //We were at referenceMode, then add current reference to the list and create a new one
- references.add(reference);
- }
- reference = new GenBankReference();
- String desc = processReferenceLine(line,"REFERENCE");
- int[] ranges = parseReferenceDescriptor(desc);
- reference.setDescriptor(desc);
- reference.setOrder(ranges[0]);
- reference.setBegin(ranges[1]);
- reference.setEnd(ranges[2]);
- parsingAuthors = false;
- parsingTitle = false;
- continue;
- }
-
- if (line.startsWith(" AUTHORS")){
- if (referenceMode){
- reference.setAuthors(processReferenceLine(line,"AUTHORS"));
- parsingAuthors = true;
- parsingTitle = false;
- }
- continue;
- }
- if (line.startsWith(" TITLE")){
- if (referenceMode){
- reference.setTitle(processReferenceLine(line,"TITLE"));
- parsingAuthors = false;
- parsingTitle = true;
- }
- continue;
- }
- if (line.startsWith(" JOURNAL")){
- if (referenceMode){
- reference.setJournal(processReferenceLine(line,"JOURNAL"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" PUBMED")){
- if (referenceMode){
- reference.setPubmed(processReferenceLine(line,"PUBMED"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
-
- if (line.startsWith(" MEDLINE")){
- if (referenceMode){
- reference.setMedline(processReferenceLine(line,"MEDLINE"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" REMARK")){
- if (referenceMode){
- reference.setRemark(processReferenceLine(line,"REMARK"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" CONSRTM")){
- if (referenceMode){
- reference.setConsortia(processReferenceLine(line,"CONSRTM"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
-
-
- if (line.startsWith("SOURCE")) {
- parsingKeywords = false;
- sourceMode = true;
- commentMode = false;
- if (sourceMode){
- sourceLines.add(line);
- }
- continue;
- }
- if (line.indexOf("ORGANISM")!=-1) {
- if (sourceMode){
- sourceLines.add(line);
- continue;
- }
- }
-
- if (line.startsWith("COMMENT")){
- if (reference!=null)
- references.add(reference);
- commentMode = true;
- sourceMode = false;
- referenceMode = false;
- sourceMode = false;
- seqMode = false;
- comments.add(processCommentLine(line));
- continue;
- }
- // Process LOCUS line
- if (line.startsWith("LOCUS")) {
- locus = parseLocus(line);
- continue;
- }
- // Process BASE COUNT line
- if (line.startsWith("BASE COUNT")) {
- baseCount = processHeaderLine(line,"BASE COUNT");
- featureMode = false;
- continue;
- }
- // Process DEFINITION line
- if (line.startsWith("DEFINITION")) {
- definition = processHeaderLine(line,"DEFINITION");
- parsingDefinition = true;
- continue;
- }
- // Process ACCESSION line
- if (line.startsWith("ACCESSION")) {
- accession = processHeaderLine(line,"ACCESSION");
- parsingDefinition = false;
- continue;
- }
- // Process VERSION line
- if (line.startsWith("VERSION")) {
- version = parseVersion(line);
- //headers.put("VERSION", processHeaderLine(line,"VERSION"));
- continue;
- }
- // Process DBLINK line
- if (line.startsWith("DBLINK")) {
- dblink = processHeaderLine(line,"DBLINK");
- parsingDbLink = true;
- continue;
- }
- // Process KEYWORDS line
- if (line.startsWith("KEYWORDS")) {
- keywords = processHeaderLine(line,"KEYWORDS");
- parsingKeywords = true;
- parsingDbLink = false;
- continue;
- }
- if (sourceMode){
- sourceLines.add(line);
- continue;
- }
- if (parsingDefinition){
- StringBuffer sb = new StringBuffer().append(definition).append(line);
- definition = sb.toString();
- continue;
- }
- if (referenceMode && parsingAuthors){
- if (reference!=null){
- StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line);
- reference.setAuthors(authors.toString());
- }
- continue;
- }
- if (referenceMode && parsingTitle){
- if (reference!=null){
- StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line);
- reference.setTitle(title.toString());
- }
- continue;
- }
- if (parsingKeywords){
- StringBuffer sb = new StringBuffer().append(keywords).append(line);
- keywords = sb.toString();
- continue;
- }
- if (parsingDbLink){
- StringBuffer sb = new StringBuffer().append(dblink).append(line);
- dblink = sb.toString();
- continue;
- }
- if (commentMode){
- comments.add(line);
- }
- }
- setEntries();
- }else{
- //File is not valid
- throw new IOException("GenBankFile is not valid.");
- }
- }
-
- protected void setEntries(){
- StringBuffer result = new StringBuffer();
- //Mapping GenBank info into Jalview data model
- genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences));
- //Mapping DBRefEntry
- DBRefEntry dbRef = new DBRefEntry();
- dbRef.setSource(DBRefSource.GENBANK);
- dbRef.setVersion(version == null ? "" : version.toString());
- dbRef.setAccessionId(accession);
- // add map to indicate the sequence is a valid coordinate frame for the dbref
- dbRef.setMap(new Mapping(null, new int[]
- { 1, genBankSequence.getLength() }, new int[]
- { 1, genBankSequence.getLength() }, 1, 1));
- genBankSequence.addDBRef(dbRef);
-
- //add header info as non-positional features
- //add LOCUS
- SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(locusF);
- //add DEFNITION
- SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(defF);
- //add ACCESSION
- SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(accessionF);
- //add VERSION
- SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(versionF);
- //add DBLINK
- SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(dblinkF);
- //add KEYWORDS
- SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(keywordsF);
- //add SOURCE
- SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(sourceF);
- //add BASE COUNT
- SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(baseCountF);
-
- // add literature and database cross references in the file
- for (GenBankReference gbRef:references){
- //They are non-positional features
- SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(refFeature);
- }
- //add COMMENTS
- if (comments.size()>0){
- StringBuffer sb = new StringBuffer();
- for (String comment: comments){
- sb.append(comment).append(newline);
- }
- SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(commentF);
- }
- //Mapping FEATURES
- for (GenBankFeature feature:features){
- if (feature.getType()!=null){
- SequenceFeature sf = new SequenceFeature();
- sf.setType(feature.getType());
- sf.setDescription(feature.getType());
-
- sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor());
- sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor());
- Enumeration<String> names = feature.getQualifiersNames();
- while (names.hasMoreElements()){
- String qName = names.nextElement();
- String qValue = feature.getQualifier(qName);
- sf.setValue(qName, qValue);
- }
- genBankSequence.addSequenceFeature(sf);
- }
- }
- SequenceI[] parsedSeqs = new SequenceI[1];
- parsedSeqs[0] = genBankSequence;
- this.setSeqs(parsedSeqs);
- }
- private GenBankVersion parseVersion(String line) {
- //VERSION U00096.2 GI:48994873
- if (line.trim().equalsIgnoreCase("VERSION")){
- return null;
- }else{
- GenBankVersion ver = new GenBankVersion();
- String v = line.substring(11, line.indexOf(" ", 12)).trim();
- ver.setVersion(v);
- int posGI = line.indexOf("GI:", 11 + v.length());
- if (posGI > -1) {
- ver.setGI(line.substring(posGI));
- }
- return ver;
- }
+public class GenBankFile extends AlignFile
+{
+ private static final Logger log = Logger.getLogger(GenBankFile.class
+ .getName());
+
+ private GenBankVersion version = new GenBankVersion();
+
+ private GenBankLocus locus = new GenBankLocus();
+
+ private GenBankSource source = new GenBankSource();
+
+ private static final Pattern patLocation = Pattern
+ .compile("(\\d+)\\.\\.(\\d+)");
+
+ private static final Pattern patLocationComp = Pattern
+ .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
+
+ private static final Pattern patLocus = Pattern
+ .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
+
+ private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
+
+ private static final Pattern patFeatureKey = Pattern
+ .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
+
+ private String definition;
+
+ private String accession;
+
+ private String keywords;
+
+ private String dblink;
+
+ private String baseCount;
+
+ private Vector<GenBankFeature> features;
+
+ private Vector<String> comments;
+
+ // Items under origin
+ private Vector<GenBankSequence> sequences;
+
+ private Vector<GenBankReference> references;
+
+ private SequenceI genBankSequence;
+
+ public GenBankFile()
+ {
+ }
+
+ public GenBankFile(String inFile, String type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ public GenBankFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
+
+ public void initData()
+ {
+ super.initData();
+ features = new Vector<GenBankFeature>();
+ comments = new Vector<String>();
+ sequences = new Vector<GenBankSequence>();
+ references = new Vector<GenBankReference>();
+ }
+
+ public void parse() throws IOException
+ {
+ String line;
+ boolean featureMode = false; // FEATURES found
+ boolean seqMode = false; // Parsing Sequences from SOURCE
+ boolean referenceMode = false; // REFERENCE found
+ boolean sourceMode = false; // SOURCE found
+ boolean commentMode = false; // COMMENT found
+ boolean parsingAuthors = false; // Parsing authors (multiline)
+ boolean parsingDefinition = false; // Parsing definition (multiline)
+ boolean parsingKeywords = false; // Parsing keywords (multiline)
+ boolean parsingDbLink = false; // Parsing DBLINK (multiline)
+ boolean parsingTitle = false; // Parsing title (multiline)
+ boolean parsingQualifier = false; // Parsing feature qualifier (multine)
+ String currentQualifierName = "";
+ GenBankReference reference = null;
+ GenBankFeature feature = null;
+ List<String> sourceLines = new ArrayList<String>();
+
+ if (this.isValid())
+ {
+
+ while ((line = nextLine()) != null)
+ {
+ // We only process lines if they have contents within
+ if (line.length() == 0)
+ continue;
+
+ if (line.startsWith("FEATURES"))
+ {
+ featureMode = true;
+ seqMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ commentMode = false;
+ feature = new GenBankFeature();
+ source = parseSource(sourceLines);
+ }
+
+ if (seqMode)
+ {
+ if (!line.startsWith("//"))
+ {
+ GenBankSequence seq = processSequenceLine(line);
+ sequences.add(seq);
+ }
+ featureMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ }
+
+ if (line.startsWith("ORIGIN"))
+ {
+ if (feature.getType() != null)
+ features.add(feature);
+ featureMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ seqMode = true;
+ }
+
+ if (featureMode)
+ {
+ // Process feature line
+ if (!line.startsWith("FEATURES")
+ && !line.startsWith("BASE COUNT"))
+ {
+ // Parse type
+ if (!line.trim().startsWith("/"))
+ {
+ Matcher featuresMatch = patFeatureKey.matcher(line);
+ if (featuresMatch.find())
+ {
+ if (feature.getType() != null)
+ features.add(feature); // Hay que a�adirlo s�lo si no se est�
+ // a mitad de un qualif o una feature
+ // It's a feature
+ String type = featuresMatch.group(0);
+ feature = new GenBankFeature();
+ feature.setType(type);
+ GenBankLocation loc = parserFeatureLocation(feature,
+ line.replace(type, ""));
+ feature.setLocation(loc);
+ parsingQualifier = false;
+ continue;
+ }
+ else if (parsingQualifier)
+ { // If not a feature, it's another part of a qualifier
+ String qValue = feature.getQualifier(currentQualifierName);
+ StringBuffer sb = new StringBuffer().append(qValue).append(
+ ltrim(line));
+ feature.updateQualifier(currentQualifierName, sb.toString());
+ continue;
+ }
+ }
+ else
+ {
+ // It's the begining of a qualifier line
+ Matcher matcher = patQualifierKey.matcher(line);
+ if (matcher.find())
+ {
+ String qName = matcher.group(1);
+ currentQualifierName = qName.replace("/", "");
+ line = line.replace(qName, "").replace("/", "")
+ .replace("=", "");
+ feature.addQualifier(currentQualifierName, ltrim(line));
+ parsingQualifier = true;
+ continue;
+ }
+ }
+ }
+ }
+ // Process REFERENCE line
+ if (line.startsWith("REFERENCE"))
+ {
+ if (!referenceMode)
+ {
+ // This is line is the REFERENCE line
+ referenceMode = true;
+ featureMode = false;
+ sourceMode = false;
+ seqMode = false;
+ }
+ else
+ {
+ // We were at referenceMode, then add current reference to the list
+ // and create a new one
+ references.add(reference);
+ }
+ reference = new GenBankReference();
+ String desc = processReferenceLine(line, "REFERENCE");
+ int[] ranges = parseReferenceDescriptor(desc);
+ reference.setDescriptor(desc);
+ reference.setOrder(ranges[0]);
+ reference.setBegin(ranges[1]);
+ reference.setEnd(ranges[2]);
+ parsingAuthors = false;
+ parsingTitle = false;
+ continue;
+ }
+
+ if (line.startsWith(" AUTHORS"))
+ {
+ if (referenceMode)
+ {
+ reference.setAuthors(processReferenceLine(line, "AUTHORS"));
+ parsingAuthors = true;
+ parsingTitle = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" TITLE"))
+ {
+ if (referenceMode)
+ {
+ reference.setTitle(processReferenceLine(line, "TITLE"));
+ parsingAuthors = false;
+ parsingTitle = true;
+ }
+ continue;
+ }
+ if (line.startsWith(" JOURNAL"))
+ {
+ if (referenceMode)
+ {
+ reference.setJournal(processReferenceLine(line, "JOURNAL"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" PUBMED"))
+ {
+ if (referenceMode)
+ {
+ reference.setPubmed(processReferenceLine(line, "PUBMED"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+
+ if (line.startsWith(" MEDLINE"))
+ {
+ if (referenceMode)
+ {
+ reference.setMedline(processReferenceLine(line, "MEDLINE"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" REMARK"))
+ {
+ if (referenceMode)
+ {
+ reference.setRemark(processReferenceLine(line, "REMARK"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" CONSRTM"))
+ {
+ if (referenceMode)
+ {
+ reference.setConsortia(processReferenceLine(line, "CONSRTM"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+
+ if (line.startsWith("SOURCE"))
+ {
+ parsingKeywords = false;
+ sourceMode = true;
+ commentMode = false;
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ }
+ continue;
+ }
+ if (line.indexOf("ORGANISM") != -1)
+ {
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ continue;
+ }
+ }
+
+ if (line.startsWith("COMMENT"))
+ {
+ if (reference != null)
+ references.add(reference);
+ commentMode = true;
+ sourceMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ seqMode = false;
+ comments.add(processCommentLine(line));
+ continue;
+ }
+ // Process LOCUS line
+ if (line.startsWith("LOCUS"))
+ {
+ locus = parseLocus(line);
+ continue;
+ }
+ // Process BASE COUNT line
+ if (line.startsWith("BASE COUNT"))
+ {
+ baseCount = processHeaderLine(line, "BASE COUNT");
+ featureMode = false;
+ continue;
+ }
+ // Process DEFINITION line
+ if (line.startsWith("DEFINITION"))
+ {
+ definition = processHeaderLine(line, "DEFINITION");
+ parsingDefinition = true;
+ continue;
+ }
+ // Process ACCESSION line
+ if (line.startsWith("ACCESSION"))
+ {
+ accession = processHeaderLine(line, "ACCESSION");
+ parsingDefinition = false;
+ continue;
+ }
+ // Process VERSION line
+ if (line.startsWith("VERSION"))
+ {
+ version = parseVersion(line);
+ // headers.put("VERSION", processHeaderLine(line,"VERSION"));
+ continue;
+ }
+ // Process DBLINK line
+ if (line.startsWith("DBLINK"))
+ {
+ dblink = processHeaderLine(line, "DBLINK");
+ parsingDbLink = true;
+ continue;
+ }
+ // Process KEYWORDS line
+ if (line.startsWith("KEYWORDS"))
+ {
+ keywords = processHeaderLine(line, "KEYWORDS");
+ parsingKeywords = true;
+ parsingDbLink = false;
+ continue;
+ }
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ continue;
+ }
+ if (parsingDefinition)
+ {
+ StringBuffer sb = new StringBuffer().append(definition).append(
+ line);
+ definition = sb.toString();
+ continue;
+ }
+ if (referenceMode && parsingAuthors)
+ {
+ if (reference != null)
+ {
+ StringBuffer authors = new StringBuffer().append(
+ reference.getAuthors()).append(line);
+ reference.setAuthors(authors.toString());
+ }
+ continue;
+ }
+ if (referenceMode && parsingTitle)
+ {
+ if (reference != null)
+ {
+ StringBuffer title = new StringBuffer().append(
+ reference.getTitle()).append(line);
+ reference.setTitle(title.toString());
+ }
+ continue;
+ }
+ if (parsingKeywords)
+ {
+ StringBuffer sb = new StringBuffer().append(keywords)
+ .append(line);
+ keywords = sb.toString();
+ continue;
+ }
+ if (parsingDbLink)
+ {
+ StringBuffer sb = new StringBuffer().append(dblink).append(line);
+ dblink = sb.toString();
+ continue;
+ }
+ if (commentMode)
+ {
+ comments.add(line);
+ }
+ }
+ setEntries();
}
-
- private GenBankLocus parseLocus(String line){
- GenBankLocus loc = new GenBankLocus();
- Matcher mat = patLocus.matcher(line);
- if (mat.find()) {
- String name = mat.group(1);
- String len = mat.group(2);
- String strand = mat.group(3);
- String mtype = mat.group(4);
- String linear = mat.group(5);
- String division = mat.group(6);
- String date = mat.group(7);
-
- loc.setName(name == null ? "" : name.trim());
- loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
- loc.setStrand(strand == null ? "" : strand);
- loc.setMoleculeType(mtype == null ? "" : mtype);
- loc.setLinearSequence("linear".equals(linear));
- loc.setDivision(division == null ? "" : division);
- loc.setModificationDate(date == null ? "" :date);
- }
- return loc;
+ else
+ {
+ // File is not valid
+ throw new IOException("GenBankFile is not valid.");
}
- private GenBankSource parseSource(List<String> lines){
- StringBuffer sb = new StringBuffer();
- for(String line:lines){
- sb.append(line).append(newline);
- }
- // Source section
- GenBankSource sou = new GenBankSource();
- String aux = sb.toString().substring(11);
- int fim1 = aux.indexOf("\n");
- if (fim1 > -1) {
- sou.setSource(aux.substring(0, fim1));
- int ini2 = aux.indexOf("ORGANISM");
- if (ini2 > -1) {
- fim1 = aux.indexOf("\n", ini2 + 10);
- if (fim1 > -1) {
- sou.setOrganism(aux.substring(ini2 + 10, fim1));
- sou.setTaxonomic(aux.substring(fim1).replaceAll(" ", "").replaceAll("\\s+", ""));
- } else {
- sou.setOrganism(aux);
- }
- }
- } else {
- sou.setSource(aux);
- }
- return sou;
- }
-
- /**
- * Possible situations:
- *
- * 467 Points to a single base in the presented sequence 340..565 Points to
- * a continuous range of bases bounded by and including the starting and
- * ending bases <345..500 Indicates that the exact lower boundary point
- * of a feature is unknown. The location begins at some base previous to the
- * first base specified (which need not be contained in the presented
- * sequence) and continues to and includes the ending base <1..888 The
- * feature starts before the first sequenced base and continues to and
- * includes base 888 1..>888 The feature starts at the first sequenced
- * base and continues beyond base 888 102.110 Indicates that the exact
- * location is unknown but that it is one of the bases between bases 102 and
- * 110, inclusive 123^124 Points to a site between bases 123 and 124
- * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to
- * form one contiguous sequence complement(34..126) Start at the base
- * complementary to 126 and finish at the base complementary to base 34 (the
- * feature is on the strand complementary to the presented strand)
- * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and
- * 4918 to 5163, then complements the joined segments (the feature is on the
- * strand complementary to the presented strand)
- * join(complement(4918..5163),complement(2691..4571)) Complements regions
- * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
- * feature is on the strand complementary to the presented strand)
- * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
- * this database) with primary accession number 'J00194'
- * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
- * with the region 100..202 of remote entry J00194
- *
- * @param fea
- * @param localiza
- */
- private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) {
- // remove os espaços, quebra de linhas etc
- String buf = localiza.replaceAll("\\s", "");
-
- // checks if there is a comma present between ranges
- // complement(100..110),complement(90..100)
- char[] buf2 = buf.toCharArray();
- int abertos = 0;
- java.util.List<String> lista = new java.util.ArrayList<String>();
- int pinicial = 0;
- for (int i = 0; i < buf2.length; i++) {
- if (buf2[i] == '(') {
- abertos++;
- } else if (buf2[i] == ')') {
- abertos--;
- } else if (buf2[i] == ',' && abertos == 0) {
- lista.add(buf.substring(pinicial, i));
- pinicial = i + 1;
+ }
+
+ protected void setEntries()
+ {
+ StringBuffer result = new StringBuffer();
+ // Mapping GenBank info into Jalview data model
+ genBankSequence = new Sequence(accession,
+ DnaUtils.getNucleotidesFromSequenceVector(sequences));
+ // Mapping DBRefEntry
+ DBRefEntry dbRef = new DBRefEntry();
+ dbRef.setSource(DBRefSource.GENBANK);
+ dbRef.setVersion(version == null ? "" : version.toString());
+ dbRef.setAccessionId(accession);
+ // add map to indicate the sequence is a valid coordinate frame for the
+ // dbref
+ dbRef.setMap(new Mapping(null, new int[]
+ { 1, genBankSequence.getLength() }, new int[]
+ { 1, genBankSequence.getLength() }, 1, 1));
+ genBankSequence.addDBRef(dbRef);
+
+ // add header info as non-positional features
+ // add LOCUS
+ SequenceFeature locusF = new SequenceFeature("LOCUS",
+ (locus == null ? "" : locus.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(locusF);
+ // add DEFNITION
+ SequenceFeature defF = new SequenceFeature("DEFINITION", definition,
+ null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(defF);
+ // add ACCESSION
+ SequenceFeature accessionF = new SequenceFeature("ACCESSION",
+ accession, null, 1, genBankSequence.getLength(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(accessionF);
+ // add VERSION
+ SequenceFeature versionF = new SequenceFeature("VERSION",
+ (version == null ? "" : version.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(versionF);
+ // add DBLINK
+ SequenceFeature dblinkF = new SequenceFeature("DBLINK",
+ (dblink == null ? "" : dblink.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(dblinkF);
+ // add KEYWORDS
+ SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords,
+ null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(keywordsF);
+ // add SOURCE
+ SequenceFeature sourceF = new SequenceFeature("SOURCE",
+ (source == null ? "" : source.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(sourceF);
+ // add BASE COUNT
+ SequenceFeature baseCountF = new SequenceFeature("BASE COUNT",
+ (baseCount == null ? "" : baseCount.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(baseCountF);
+
+ // add literature and database cross references in the file
+ for (GenBankReference gbRef : references)
+ {
+ // They are non-positional features
+ SequenceFeature refFeature = new SequenceFeature("REFERENCE",
+ gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(refFeature);
+ }
+ // add COMMENTS
+ if (comments.size() > 0)
+ {
+ StringBuffer sb = new StringBuffer();
+ for (String comment : comments)
+ {
+ sb.append(comment).append(newline);
+ }
+ SequenceFeature commentF = new SequenceFeature("COMMENT",
+ sb.toString(), null, 1, genBankSequence.getLength(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(commentF);
+ }
+ // Mapping FEATURES
+ for (GenBankFeature feature : features)
+ {
+ if (feature.getType() != null)
+ {
+ SequenceFeature sf = new SequenceFeature();
+ sf.setType(feature.getType());
+ sf.setDescription(feature.getType());
+
+ sf.setBegin(feature.getLocation() == null ? 0 : feature
+ .getLocation().getMinor());
+ sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation()
+ .getMajor());
+ Enumeration<String> names = feature.getQualifiersNames();
+ while (names.hasMoreElements())
+ {
+ String qName = names.nextElement();
+ String qValue = feature.getQualifier(qName);
+ sf.setValue(qName, qValue);
+ }
+ genBankSequence.addSequenceFeature(sf);
+ }
+ }
+ SequenceI[] parsedSeqs = new SequenceI[1];
+ parsedSeqs[0] = genBankSequence;
+ this.setSeqs(parsedSeqs);
+ }
+
+ private GenBankVersion parseVersion(String line)
+ {
+ // VERSION U00096.2 GI:48994873
+ if (line.trim().equalsIgnoreCase("VERSION"))
+ {
+ return null;
+ }
+ else
+ {
+ GenBankVersion ver = new GenBankVersion();
+ String v = line.substring(11, line.indexOf(" ", 12)).trim();
+ ver.setVersion(v);
+ int posGI = line.indexOf("GI:", 11 + v.length());
+ if (posGI > -1)
+ {
+ ver.setGI(line.substring(posGI));
+ }
+ return ver;
+ }
+ }
+
+ private GenBankLocus parseLocus(String line)
+ {
+ GenBankLocus loc = new GenBankLocus();
+ Matcher mat = patLocus.matcher(line);
+ if (mat.find())
+ {
+ String name = mat.group(1);
+ String len = mat.group(2);
+ String strand = mat.group(3);
+ String mtype = mat.group(4);
+ String linear = mat.group(5);
+ String division = mat.group(6);
+ String date = mat.group(7);
+
+ loc.setName(name == null ? "" : name.trim());
+ loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
+ loc.setStrand(strand == null ? "" : strand);
+ loc.setMoleculeType(mtype == null ? "" : mtype);
+ loc.setLinearSequence("linear".equals(linear));
+ loc.setDivision(division == null ? "" : division);
+ loc.setModificationDate(date == null ? "" : date);
+ }
+ return loc;
+ }
+
+ private GenBankSource parseSource(List<String> lines)
+ {
+ StringBuffer sb = new StringBuffer();
+ for (String line : lines)
+ {
+ sb.append(line).append(newline);
+ }
+ // Source section
+ GenBankSource sou = new GenBankSource();
+ String aux = sb.toString().substring(11);
+ int fim1 = aux.indexOf("\n");
+ if (fim1 > -1)
+ {
+ sou.setSource(aux.substring(0, fim1));
+ int ini2 = aux.indexOf("ORGANISM");
+ if (ini2 > -1)
+ {
+ fim1 = aux.indexOf("\n", ini2 + 10);
+ if (fim1 > -1)
+ {
+ sou.setOrganism(aux.substring(ini2 + 10, fim1));
+ sou.setTaxonomic(aux.substring(fim1)
+ .replaceAll(" ", "").replaceAll("\\s+", ""));
+ }
+ else
+ {
+ sou.setOrganism(aux);
+ }
+ }
+ }
+ else
+ {
+ sou.setSource(aux);
+ }
+ return sou;
+ }
+
+ /**
+ * Possible situations:
+ *
+ * 467 Points to a single base in the presented sequence 340..565 Points to a
+ * continuous range of bases bounded by and including the starting and ending
+ * bases <345..500 Indicates that the exact lower boundary point of a
+ * feature is unknown. The location begins at some base previous to the first
+ * base specified (which need not be contained in the presented sequence) and
+ * continues to and includes the ending base <1..888 The feature starts
+ * before the first sequenced base and continues to and includes base 888
+ * 1..>888 The feature starts at the first sequenced base and continues
+ * beyond base 888 102.110 Indicates that the exact location is unknown but
+ * that it is one of the bases between bases 102 and 110, inclusive 123^124
+ * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12
+ * to 78 and 134 to 202 should be joined to form one contiguous sequence
+ * complement(34..126) Start at the base complementary to 126 and finish at
+ * the base complementary to base 34 (the feature is on the strand
+ * complementary to the presented strand)
+ * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918
+ * to 5163, then complements the joined segments (the feature is on the strand
+ * complementary to the presented strand)
+ * join(complement(4918..5163),complement(2691..4571)) Complements regions
+ * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
+ * feature is on the strand complementary to the presented strand)
+ * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
+ * this database) with primary accession number 'J00194'
+ * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
+ * with the region 100..202 of remote entry J00194
+ *
+ * @param fea
+ * @param localiza
+ */
+ private GenBankLocation parserFeatureLocation(GenBankFeature fea,
+ String localiza)
+ {
+ // remove os espaços, quebra de linhas etc
+ String buf = localiza.replaceAll("\\s", "");
+
+ // checks if there is a comma present between ranges
+ // complement(100..110),complement(90..100)
+ char[] buf2 = buf.toCharArray();
+ int abertos = 0;
+ java.util.List<String> lista = new java.util.ArrayList<String>();
+ int pinicial = 0;
+ for (int i = 0; i < buf2.length; i++)
+ {
+ if (buf2[i] == '(')
+ {
+ abertos++;
+ }
+ else if (buf2[i] == ')')
+ {
+ abertos--;
+ }
+ else if (buf2[i] == ',' && abertos == 0)
+ {
+ lista.add(buf.substring(pinicial, i));
+ pinicial = i + 1;
+ }
+ }
+ if (lista.size() > 0)
+ {
+ lista.add(buf.substring(pinicial));
+ GenBankLocations um = new GenBankLocations();
+ um.setOperator(GenBankLocations.NONE);
+ for (String s : lista)
+ {
+ um.getUnits().add(parserFeatureLocation(fea, s));
+ }
+ fea.setLocation(um);
+ return um;
+ }
+
+ // trata as funcoes: complement(location,location...),
+ // join(location,location...), order(location,location...)
+ if (buf.contains("("))
+ {
+ GenBankLocations um = new GenBankLocations();
+ int ini = buf.indexOf("(");
+ int fim = buf.lastIndexOf(")");
+ String token = buf.substring(0, ini);
+ if ("complement".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ interno.setComplement(true);
+ um.setOperator(GenBankLocations.COMPLEMENT);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else if ("join".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ um.setOperator(GenBankLocations.JOIN);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else if ("order".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ um.setOperator(GenBankLocations.ORDER);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else
+ {
+ log.log(Level.WARNING,
+ "Token desconhecido em location/features - {0}", token);
+ String inter = buf.substring(ini + 1, fim);
+ fea.setLocation(parserFeatureLocation(fea, inter));
+ }
+ return fea.getLocation();
+ }
+ else
+ {
+ // trata quando tiver uma lista de location
+ if (buf.contains(","))
+ {
+ String[] partes = buf.split(",");
+ GenBankLocations um = new GenBankLocations();
+ for (String p : partes)
+ {
+ um.getUnits().add(parserFeatureLocation(fea, p));
+ }
+ fea.setLocation(um);
+ return um;
+ }
+ else
+ {
+ // trata quando tiver range
+ if (buf.contains(".."))
+ {
+ String[] partes = buf.split("\\.\\.");
+ GenBankLocationRange range = new GenBankLocationRange();
+ if (buf.contains(":"))
+ {
+ for (int i = 0; i < partes.length; i++)
+ {
+ int pos = partes[i].indexOf(":");
+ if (pos > 0)
+ {
+ String entry = partes[i].substring(0, pos);
+ partes[i] = partes[i].substring(pos + 1);
+ range.setEntry(entry);
+ }
}
+ }
+ GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(
+ fea, partes[0]);
+ range.setStart(gp0);
+ GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(
+ fea, partes[1]);
+ range.setEnd(gp1);
+ fea.setLocation(range);
+ return range;
}
- if (lista.size() > 0) {
- lista.add(buf.substring(pinicial));
- GenBankLocations um = new GenBankLocations();
- um.setOperator(GenBankLocations.NONE);
- for (String s : lista) {
- um.getUnits().add(parserFeatureLocation(fea, s));
+ else
+ {
+ // trata um ponto
+ // possibilidades consideradas:
+ // 467
+ // 102.110
+ // 123^124
+ // <345
+ // >400
+ // 345>
+ // 400<
+ // ou uma combinacao dessas
+ GenBankLocationPoint gp = new GenBankLocationPoint();
+ if (buf.contains(":"))
+ {
+ int pos = buf.indexOf(":");
+ if (pos > 0)
+ {
+ String entry = buf.substring(0, pos);
+ buf = buf.substring(pos + 1);
+ gp.setEntry(entry);
}
- fea.setLocation(um);
- return um;
- }
-
- // trata as funcoes: complement(location,location...),
- // join(location,location...), order(location,location...)
- if (buf.contains("(")) {
- GenBankLocations um = new GenBankLocations();
- int ini = buf.indexOf("(");
- int fim = buf.lastIndexOf(")");
- String token = buf.substring(0, ini);
- if ("complement".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- interno.setComplement(true);
- um.setOperator(GenBankLocations.COMPLEMENT);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else if ("join".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- um.setOperator(GenBankLocations.JOIN);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else if ("order".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- um.setOperator(GenBankLocations.ORDER);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else {
- log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token);
- String inter = buf.substring(ini + 1, fim);
- fea.setLocation(parserFeatureLocation(fea, inter));
+ }
+ int pos = 0;
+ // verifica os simb < e > antes do primeiro numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ gp.setPrefix(buf.charAt(pos));
+ pos++;
+ }
+ // pega o primeiro numero
+ int ini = pos;
+ while (pos < buf.length() && buf.charAt(pos) >= '0'
+ && buf.charAt(pos) <= '9')
+ {
+ pos++;
+ }
+ if (buf.subSequence(ini, pos).length() < 1)
+ {
+ System.out.println(localiza);
+ }
+ int num = Integer.parseInt(buf.substring(ini, pos));
+ int num2 = num;
+ // o primeiro numero pode ser o unico numero
+ if (pos < buf.length())
+ {
+ // verifica se tem os sinais < e > apos o primeiro numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ if (buf.contains(".") || buf.contains("^"))
+ {
+ gp.setPrefix(buf.charAt(pos));
+ }
+ else
+ {
+ gp.setSufix(buf.charAt(pos));
+ }
+ pos++;
}
- return fea.getLocation();
- } else {
- // trata quando tiver uma lista de location
- if (buf.contains(",")) {
- String[] partes = buf.split(",");
- GenBankLocations um = new GenBankLocations();
- for (String p : partes) {
- um.getUnits().add(
- parserFeatureLocation(fea, p));
- }
- fea.setLocation(um);
- return um;
- } else {
- // trata quando tiver range
- if (buf.contains("..")) {
- String[] partes = buf.split("\\.\\.");
- GenBankLocationRange range = new GenBankLocationRange();
- if (buf.contains(":")) {
- for (int i = 0; i < partes.length; i++) {
- int pos = partes[i].indexOf(":");
- if (pos > 0) {
- String entry = partes[i].substring(0, pos);
- partes[i] = partes[i].substring(pos + 1);
- range.setEntry(entry);
- }
- }
- }
- GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]);
- range.setStart(gp0);
- GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]);
- range.setEnd(gp1);
- fea.setLocation(range);
- return range;
- } else {
- // trata um ponto
- // possibilidades consideradas:
- // 467
- // 102.110
- // 123^124
- // <345
- // >400
- // 345>
- // 400<
- // ou uma combinacao dessas
- GenBankLocationPoint gp = new GenBankLocationPoint();
- if (buf.contains(":")) {
- int pos = buf.indexOf(":");
- if (pos > 0) {
- String entry = buf.substring(0, pos);
- buf = buf.substring(pos + 1);
- gp.setEntry(entry);
- }
- }
- int pos = 0;
- // verifica os simb < e > antes do primeiro numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- gp.setPrefix(buf.charAt(pos));
- pos++;
- }
- // pega o primeiro numero
- int ini = pos;
- while (pos < buf.length() && buf.charAt(pos) >= '0'
- && buf.charAt(pos) <= '9') {
- pos++;
- }
- if (buf.subSequence(ini, pos).length() < 1) {
- System.out.println(localiza);
- }
- int num = Integer.parseInt(buf.substring(ini, pos));
- int num2 = num;
- // o primeiro numero pode ser o unico numero
- if (pos < buf.length()) {
- // verifica se tem os sinais < e > apos o primeiro numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- if (buf.contains(".") || buf.contains("^")) {
- gp.setPrefix(buf.charAt(pos));
- } else {
- gp.setSufix(buf.charAt(pos));
- }
- pos++;
- }
-
- // verifica a separacao dos numeros . ou ^
- if (pos < buf.length()
- && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) {
- // separação localizada, possibilidade de mais numero
- gp.setSymbol(buf.charAt(pos));
- pos++;
-
- // verifica os simb < e > antes do segundo numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- gp.setSufix(buf.charAt(pos));
- pos++;
- }
-
- // pega o segundo numero
- ini = pos;
- while (pos < buf.length() && buf.charAt(pos) >= '0'
- && buf.charAt(pos) <= '9') {
- pos++;
- }
- num2 = Integer.parseInt(buf.substring(ini, pos));
-
- // verifica os simb < e > após o segundo numero
- if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) {
- gp.setSufix(buf.charAt(pos));
- pos++;
- }
- }
- }
- gp.setMin(num);
- gp.setMax(num2);
- fea.setLocation(gp);
- return gp;
- }
+
+ // verifica a separacao dos numeros . ou ^
+ if (pos < buf.length()
+ && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^'))
+ {
+ // separação localizada, possibilidade de mais numero
+ gp.setSymbol(buf.charAt(pos));
+ pos++;
+
+ // verifica os simb < e > antes do segundo numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ gp.setSufix(buf.charAt(pos));
+ pos++;
+ }
+
+ // pega o segundo numero
+ ini = pos;
+ while (pos < buf.length() && buf.charAt(pos) >= '0'
+ && buf.charAt(pos) <= '9')
+ {
+ pos++;
+ }
+ num2 = Integer.parseInt(buf.substring(ini, pos));
+
+ // verifica os simb < e > após o segundo numero
+ if (pos < buf.length()
+ && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>'))
+ {
+ gp.setSufix(buf.charAt(pos));
+ pos++;
+ }
}
+ }
+ gp.setMin(num);
+ gp.setMax(num2);
+ fea.setLocation(gp);
+ return gp;
}
+ }
+ }
+ }
+
+ private int[] parseReferenceDescriptor(String descriptor)
+ {
+ // 1 (bases 1 to 1609)
+ int[] resultado = new int[3];
+ descriptor = descriptor.replace("(bases", ",").replace("to", ",")
+ .replace(")", "");
+ String[] args = descriptor.split(",");
+ resultado[0] = Integer.parseInt(args[0].trim());
+ resultado[1] = Integer.parseInt(args[1].trim());
+ resultado[2] = Integer.parseInt(args[2].trim());
+ return resultado;
+ }
+
+ private String processReferenceLine(String line, String component)
+ {
+ int init = line.indexOf(component);
+ if (init != -1)
+ {
+ line = line.replace(component, "");
+ }
+ return line;
+ }
+
+ private String processHeaderLine(String line, String header)
+ {
+ int init = line.indexOf(header);
+ if (init != -1)
+ {
+ line = line.replace(header, "");
}
-
- private int[] parseReferenceDescriptor(String descriptor){
- // 1 (bases 1 to 1609)
- int[] resultado = new int[3];
- descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", "");
- String[] args = descriptor.split(",");
- resultado[0] = Integer.parseInt(args[0].trim());
- resultado[1] = Integer.parseInt(args[1].trim());
- resultado[2] = Integer.parseInt(args[2].trim());
- return resultado;
+ return line;
+ }
+
+ private GenBankSequence processSequenceLine(String line)
+ {
+ GenBankSequence gbs = new GenBankSequence();
+ line = ltrim(line);
+ String[] args = line.split(" ");
+ gbs.setId(Integer.parseInt(args[0]));
+ int len = args.length - 1;
+ Vector<String> seqs = new Vector<String>();
+ for (int i = 0; i < len; i++)
+ seqs.add(args[i + 1]);
+ gbs.setSequences(seqs);
+ return gbs;
+ }
+
+ private String processCommentLine(String line)
+ {
+ int init = line.indexOf("COMMENT");
+ if (init != -1)
+ {
+ line = line.replace("COMMENT", "");
+ }
+ return line;
+ }
+
+ public String rtrim(String s)
+ {
+ int i = s.length() - 1;
+ while (i >= 0 && Character.isWhitespace(s.charAt(i)))
+ {
+ i--;
}
- private String processReferenceLine(String line, String component){
- int init = line.indexOf(component);
- if (init!=-1){
- line = line.replace(component,"");
- }
- return line;
- }
- private String processHeaderLine(String line, String header){
- int init = line.indexOf(header);
- if (init!=-1){
- line = line.replace(header,"");
- }
- return line;
- }
-
- private GenBankSequence processSequenceLine(String line) {
- GenBankSequence gbs = new GenBankSequence();
- line = ltrim(line);
- String[] args = line.split(" ");
- gbs.setId(Integer.parseInt(args[0]));
- int len = args.length-1;
- Vector<String> seqs = new Vector<String>();
- for (int i=0;i<len;i++)
- seqs.add(args[i+1]);
- gbs.setSequences(seqs);
- return gbs;
- }
-
- private String processCommentLine(String line){
- int init = line.indexOf("COMMENT");
- if (init!=-1){
- line = line.replace("COMMENT","");
- }
- return line;
- }
- public String rtrim(String s) {
- int i = s.length()-1;
- while (i >= 0 && Character.isWhitespace(s.charAt(i))) {
- i--;
- }
- return s.substring(0,i+1);
+ return s.substring(0, i + 1);
+ }
+
+ public String ltrim(String s)
+ {
+ int i = 0;
+ while (i < s.length() && Character.isWhitespace(s.charAt(i)))
+ {
+ i++;
}
+ return s.substring(i);
+ }
- public String ltrim(String s) {
- int i = 0;
- while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
- i++;
- }
- return s.substring(i);
- }
-
- public String print(){
- StringBuffer out = new StringBuffer();
- for (SequenceI seq: this.getSeqs()){
- SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
- boolean featureLinePrinted = false;
- for(SequenceFeature sf:seqFeatures){
- if(sf.getType().equals("LOCUS")){
- out.append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("DEFINITION")){
- out.append("DEFINITION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("VERSION")){
- out.append("VERSION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("ACCESSION")){
- out.append("ACCESSION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("DBLINK")){
- out.append("DBLINK ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("KEYWORDS")){
- out.append("KEYWORDS ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("SOURCE")){
- out.append("SOURCE ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("REFERENCE")){
- out.append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("COMMENT")){
- out.append("COMMENT ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("BASE COUNT")){
- out.append("BASE COUNT ").append(sf.getDescription()).append(newline);
- }else{
- if (!featureLinePrinted){
- out.append("FEATURES Location/Qualifiers").append(newline);
- featureLinePrinted = true;
- }
- out.append(" ").append(sf.getType()).append(" ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline);
- Hashtable<String,String> qualifiers = sf.otherDetails;
- if (qualifiers!=null){
- Enumeration<String> keys = qualifiers.keys();
- while (keys.hasMoreElements()){
- String key = keys.nextElement();
- String value = qualifiers.get(key);
- if (value!=null){
- out.append(" /").append(key).append("=").append(value).append(newline);
- }
- }
- }
- }
- }
- out.append("ORIGIN").append(newline);
- //We have to divide sequence in groups of 6x10 chars
- String sequenceString = seq.getSequenceAsString();
- int howManyGroups = (int) Math.floor(sequenceString.length()/60);
- for (int i=0;i<=howManyGroups;i++){
- String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length()));
- if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
- out.append(" ").append(60*i+1).append(" ");
- }
- int segmentLength = sequenceSegment.length();
- if (segmentLength>=10){
- out.append(sequenceSegment.substring(0,10)).append(" ");
- if (segmentLength>=20){
- out.append(sequenceSegment.substring(10,20)).append(" ");
- if (segmentLength>=30){
- out.append(sequenceSegment.substring(20,30)).append(" ");
- if (segmentLength>=40){
- out.append(sequenceSegment.substring(30,40)).append(" ");
- if (segmentLength>=50){
- out.append(sequenceSegment.substring(40,50)).append(" ");
- if (segmentLength<=60){
- out.append(sequenceSegment.substring(50,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(40,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(30,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(20,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(10,sequenceSegment.length()));
- }
- } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
- out.append(sequenceSegment);
- }
- out.append(newline);
- }
- out.append("//");
- }
- return out.toString();
+ public String print()
+ {
+ StringBuffer out = new StringBuffer();
+ for (SequenceI seq : this.getSeqs())
+ {
+ SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
+ boolean featureLinePrinted = false;
+ for (SequenceFeature sf : seqFeatures)
+ {
+ if (sf.getType().equals("LOCUS"))
+ {
+ out.append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("DEFINITION"))
+ {
+ out.append("DEFINITION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("VERSION"))
+ {
+ out.append("VERSION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("ACCESSION"))
+ {
+ out.append("ACCESSION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("DBLINK"))
+ {
+ out.append("DBLINK ").append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("KEYWORDS"))
+ {
+ out.append("KEYWORDS ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("SOURCE"))
+ {
+ out.append("SOURCE ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("REFERENCE"))
+ {
+ out.append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("COMMENT"))
+ {
+ out.append("COMMENT ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("BASE COUNT"))
+ {
+ out.append("BASE COUNT ").append(sf.getDescription())
+ .append(newline);
+ }
+ else
+ {
+ if (!featureLinePrinted)
+ {
+ out.append("FEATURES Location/Qualifiers").append(
+ newline);
+ featureLinePrinted = true;
+ }
+ out.append(" ").append(sf.getType()).append(" ")
+ .append(sf.getBegin()).append("..").append(sf.getEnd())
+ .append(newline);
+ Hashtable<String, String> qualifiers = sf.otherDetails;
+ if (qualifiers != null)
+ {
+ Enumeration<String> keys = qualifiers.keys();
+ while (keys.hasMoreElements())
+ {
+ String key = keys.nextElement();
+ String value = qualifiers.get(key);
+ if (value != null)
+ {
+ out.append(" /").append(key)
+ .append("=").append(value).append(newline);
+ }
+ }
+ }
+ }
+ }
+ out.append("ORIGIN").append(newline);
+ // We have to divide sequence in groups of 6x10 chars
+ String sequenceString = seq.getSequenceAsString();
+ int howManyGroups = (int) Math.floor(sequenceString.length() / 60);
+ for (int i = 0; i <= howManyGroups; i++)
+ {
+ String sequenceSegment = sequenceString.substring(i * 60,
+ Math.min((i + 1) * 60, sequenceString.length()));
+ if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
+ .length() > 0)))
+ {
+ out.append(" ").append(60 * i + 1).append(" ");
+ }
+ int segmentLength = sequenceSegment.length();
+ if (segmentLength >= 10)
+ {
+ out.append(sequenceSegment.substring(0, 10)).append(" ");
+ if (segmentLength >= 20)
+ {
+ out.append(sequenceSegment.substring(10, 20)).append(" ");
+ if (segmentLength >= 30)
+ {
+ out.append(sequenceSegment.substring(20, 30)).append(" ");
+ if (segmentLength >= 40)
+ {
+ out.append(sequenceSegment.substring(30, 40)).append(" ");
+ if (segmentLength >= 50)
+ {
+ out.append(sequenceSegment.substring(40, 50)).append(" ");
+ if (segmentLength <= 60)
+ {
+ out.append(sequenceSegment.substring(50,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(40,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(30,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(20,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(10,
+ sequenceSegment.length()));
+ }
+ }
+ else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
+ .length() > 0)))
+ {
+ out.append(sequenceSegment);
+ }
+ out.append(newline);
+ }
+ out.append("//");
}
+ return out.toString();
+ }
}