From: Jim Procter Date: Mon, 9 Jun 2014 09:42:00 +0000 (+0100) Subject: JAL-1260 source formatting according to utils/eclipse/JalviewCodeStyle.xml X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=96f10f96255b4e9bb88601ce01adbf5faf16544c;p=jalview.git JAL-1260 source formatting according to utils/eclipse/JalviewCodeStyle.xml --- diff --git a/src/jalview/datamodel/DBRefSource.java b/src/jalview/datamodel/DBRefSource.java index b3b3b64..a888abc 100644 --- a/src/jalview/datamodel/DBRefSource.java +++ b/src/jalview/datamodel/DBRefSource.java @@ -74,10 +74,12 @@ public class DBRefSource * GeneDB ID */ public static final String GENEDB = "GeneDB"; + /** * GeneBank */ public static final String GENBANK = "GenBank"; + /** * List of databases whose sequences might have coding regions annotated */ diff --git a/src/jalview/io/DnaUtils.java b/src/jalview/io/DnaUtils.java index acd0bb9..5d5a603 100644 --- a/src/jalview/io/DnaUtils.java +++ b/src/jalview/io/DnaUtils.java @@ -7,55 +7,78 @@ import java.util.ArrayList; import java.util.List; import java.util.Vector; -public class DnaUtils { - - /** - * @param gbf CDS feature data - * @param sequences ORIGIN data - * @return Nucleotid String (sequence) of CDS - */ - public static String getSequence(GenBankFeature gbf, Vector sequences){ - if (!gbf.getType().equals(GenBankFeature.CDS)){ - //If the feature is not a CDS, no sequence is returned - return null; - }else{ - String range = gbf.getQualifier("range"); - if (range.startsWith("join")){ - //TODO - //It's a composed sequence - }else{ - //It's a simple range - String[] positions = range.split(".."); - int initRange = Integer.parseInt(positions[0]); - int endRange = Integer.parseInt(positions[1]); - String sourceSequence = getNucleotidesFromSequenceVector(sequences); - return sourceSequence.substring(initRange, endRange); - } - } - return null; - - } - private static boolean isSequenceInRange(int initRange, int endRange, GenBankSequence gbs){ - return ((initRange>=gbs.getId()) && (endRange>=gbs.getId())); - } - private static String getNucleotidesInRangeFromSequence(int initRange, int endRange, GenBankSequence gbs){ - return ""; - } - public static String getNucleotidesFromSequenceVector(Vector v){ - StringBuffer sb = new StringBuffer(); - for (GenBankSequence gbs:v){ - Vector seqs = gbs.getSequences(); - for (String s:seqs) - sb.append(s); - } - return sb.toString(); - } - /** - * @param args - */ - public static void main(String[] args) { - // TODO Auto-generated method stub - - } +public class DnaUtils +{ + + /** + * @param gbf + * CDS feature data + * @param sequences + * ORIGIN data + * @return Nucleotid String (sequence) of CDS + */ + public static String getSequence(GenBankFeature gbf, + Vector sequences) + { + if (!gbf.getType().equals(GenBankFeature.CDS)) + { + // If the feature is not a CDS, no sequence is returned + return null; + } + else + { + String range = gbf.getQualifier("range"); + if (range.startsWith("join")) + { + // TODO + // It's a composed sequence + } + else + { + // It's a simple range + String[] positions = range.split(".."); + int initRange = Integer.parseInt(positions[0]); + int endRange = Integer.parseInt(positions[1]); + String sourceSequence = getNucleotidesFromSequenceVector(sequences); + return sourceSequence.substring(initRange, endRange); + } + } + return null; + + } + + private static boolean isSequenceInRange(int initRange, int endRange, + GenBankSequence gbs) + { + return ((initRange >= gbs.getId()) && (endRange >= gbs.getId())); + } + + private static String getNucleotidesInRangeFromSequence(int initRange, + int endRange, GenBankSequence gbs) + { + return ""; + } + + public static String getNucleotidesFromSequenceVector( + Vector v) + { + StringBuffer sb = new StringBuffer(); + for (GenBankSequence gbs : v) + { + Vector seqs = gbs.getSequences(); + for (String s : seqs) + sb.append(s); + } + return sb.toString(); + } + + /** + * @param args + */ + public static void main(String[] args) + { + // TODO Auto-generated method stub + + } } diff --git a/src/jalview/io/GenBankFile.java b/src/jalview/io/GenBankFile.java index 4715095..1c17c05 100644 --- a/src/jalview/io/GenBankFile.java +++ b/src/jalview/io/GenBankFile.java @@ -32,842 +32,1108 @@ import java.util.regex.Pattern; import org.apache.james.mime4j.field.ParsedField; -public class GenBankFile extends AlignFile { - private static final Logger log = Logger.getLogger(GenBankFile.class.getName()); - private GenBankVersion version = new GenBankVersion(); - private GenBankLocus locus = new GenBankLocus(); - private GenBankSource source = new GenBankSource(); - private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)"); - private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); - private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); - private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); - private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); - - private String definition; - private String accession; - private String keywords; - private String dblink; - private String baseCount; - - private Vector features; - private Vector comments; - //Items under origin - private Vector sequences; - private Vector references; - - private SequenceI genBankSequence; - - public GenBankFile() { - } - - public GenBankFile(String inFile, String type) throws IOException { - super(inFile, type); - } - - public GenBankFile(FileParse source) throws IOException { - super(source); - } - - public void initData() { - super.initData(); - features = new Vector(); - comments = new Vector(); - sequences = new Vector(); - references = new Vector(); - } - - public void parse() throws IOException { - String line; - boolean featureMode = false; //FEATURES found - boolean seqMode = false; //Parsing Sequences from SOURCE - boolean referenceMode = false; //REFERENCE found - boolean sourceMode = false; //SOURCE found - boolean commentMode = false; //COMMENT found - boolean parsingAuthors = false; //Parsing authors (multiline) - boolean parsingDefinition = false; //Parsing definition (multiline) - boolean parsingKeywords = false; //Parsing keywords (multiline) - boolean parsingDbLink = false; //Parsing DBLINK (multiline) - boolean parsingTitle = false; //Parsing title (multiline) - boolean parsingQualifier = false; //Parsing feature qualifier (multine) - String currentQualifierName = ""; - GenBankReference reference = null; - GenBankFeature feature = null; - List sourceLines = new ArrayList(); - - if (this.isValid()){ - - while ((line = nextLine()) != null) { - // We only process lines if they have contents within - if (line.length() == 0) - continue; - - if (line.startsWith("FEATURES")){ - featureMode = true; - seqMode = false; - referenceMode = false; - sourceMode = false; - commentMode = false; - feature = new GenBankFeature(); - source = parseSource(sourceLines); - } - - - if (seqMode) { - if (!line.startsWith("//")){ - GenBankSequence seq = processSequenceLine(line); - sequences.add(seq); - } - featureMode = false; - referenceMode = false; - sourceMode = false; - } - - if (line.startsWith("ORIGIN")){ - if (feature.getType()!=null) - features.add(feature); - featureMode = false; - referenceMode = false; - sourceMode = false; - seqMode = true; - } - - if (featureMode){ - // Process feature line - if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){ - //Parse type - if (!line.trim().startsWith("/")){ - Matcher featuresMatch = patFeatureKey.matcher(line); - if (featuresMatch.find()){ - if (feature.getType()!=null) - features.add(feature); //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature - //It's a feature - String type = featuresMatch.group(0); - feature = new GenBankFeature(); - feature.setType(type); - GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,"")); - feature.setLocation(loc); - parsingQualifier = false; - continue; - }else if (parsingQualifier) { //If not a feature, it's another part of a qualifier - String qValue = feature.getQualifier(currentQualifierName); - StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line)); - feature.updateQualifier(currentQualifierName, sb.toString()); - continue; - } - }else{ - //It's the begining of a qualifier line - Matcher matcher = patQualifierKey.matcher(line); - if (matcher.find()){ - String qName = matcher.group(1); - currentQualifierName = qName.replace("/",""); - line = line.replace(qName,"").replace("/", "").replace("=",""); - feature.addQualifier(currentQualifierName, ltrim(line)); - parsingQualifier = true; - continue; - } - } - } - } - // Process REFERENCE line - if (line.startsWith("REFERENCE")) { - if (!referenceMode){ - //This is line is the REFERENCE line - referenceMode = true; - featureMode = false; - sourceMode = false; - seqMode = false; - }else{ - //We were at referenceMode, then add current reference to the list and create a new one - references.add(reference); - } - reference = new GenBankReference(); - String desc = processReferenceLine(line,"REFERENCE"); - int[] ranges = parseReferenceDescriptor(desc); - reference.setDescriptor(desc); - reference.setOrder(ranges[0]); - reference.setBegin(ranges[1]); - reference.setEnd(ranges[2]); - parsingAuthors = false; - parsingTitle = false; - continue; - } - - if (line.startsWith(" AUTHORS")){ - if (referenceMode){ - reference.setAuthors(processReferenceLine(line,"AUTHORS")); - parsingAuthors = true; - parsingTitle = false; - } - continue; - } - if (line.startsWith(" TITLE")){ - if (referenceMode){ - reference.setTitle(processReferenceLine(line,"TITLE")); - parsingAuthors = false; - parsingTitle = true; - } - continue; - } - if (line.startsWith(" JOURNAL")){ - if (referenceMode){ - reference.setJournal(processReferenceLine(line,"JOURNAL")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" PUBMED")){ - if (referenceMode){ - reference.setPubmed(processReferenceLine(line,"PUBMED")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - - if (line.startsWith(" MEDLINE")){ - if (referenceMode){ - reference.setMedline(processReferenceLine(line,"MEDLINE")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" REMARK")){ - if (referenceMode){ - reference.setRemark(processReferenceLine(line,"REMARK")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - if (line.startsWith(" CONSRTM")){ - if (referenceMode){ - reference.setConsortia(processReferenceLine(line,"CONSRTM")); - parsingTitle = false; - parsingAuthors = false; - } - continue; - } - - - if (line.startsWith("SOURCE")) { - parsingKeywords = false; - sourceMode = true; - commentMode = false; - if (sourceMode){ - sourceLines.add(line); - } - continue; - } - if (line.indexOf("ORGANISM")!=-1) { - if (sourceMode){ - sourceLines.add(line); - continue; - } - } - - if (line.startsWith("COMMENT")){ - if (reference!=null) - references.add(reference); - commentMode = true; - sourceMode = false; - referenceMode = false; - sourceMode = false; - seqMode = false; - comments.add(processCommentLine(line)); - continue; - } - // Process LOCUS line - if (line.startsWith("LOCUS")) { - locus = parseLocus(line); - continue; - } - // Process BASE COUNT line - if (line.startsWith("BASE COUNT")) { - baseCount = processHeaderLine(line,"BASE COUNT"); - featureMode = false; - continue; - } - // Process DEFINITION line - if (line.startsWith("DEFINITION")) { - definition = processHeaderLine(line,"DEFINITION"); - parsingDefinition = true; - continue; - } - // Process ACCESSION line - if (line.startsWith("ACCESSION")) { - accession = processHeaderLine(line,"ACCESSION"); - parsingDefinition = false; - continue; - } - // Process VERSION line - if (line.startsWith("VERSION")) { - version = parseVersion(line); - //headers.put("VERSION", processHeaderLine(line,"VERSION")); - continue; - } - // Process DBLINK line - if (line.startsWith("DBLINK")) { - dblink = processHeaderLine(line,"DBLINK"); - parsingDbLink = true; - continue; - } - // Process KEYWORDS line - if (line.startsWith("KEYWORDS")) { - keywords = processHeaderLine(line,"KEYWORDS"); - parsingKeywords = true; - parsingDbLink = false; - continue; - } - if (sourceMode){ - sourceLines.add(line); - continue; - } - if (parsingDefinition){ - StringBuffer sb = new StringBuffer().append(definition).append(line); - definition = sb.toString(); - continue; - } - if (referenceMode && parsingAuthors){ - if (reference!=null){ - StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line); - reference.setAuthors(authors.toString()); - } - continue; - } - if (referenceMode && parsingTitle){ - if (reference!=null){ - StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line); - reference.setTitle(title.toString()); - } - continue; - } - if (parsingKeywords){ - StringBuffer sb = new StringBuffer().append(keywords).append(line); - keywords = sb.toString(); - continue; - } - if (parsingDbLink){ - StringBuffer sb = new StringBuffer().append(dblink).append(line); - dblink = sb.toString(); - continue; - } - if (commentMode){ - comments.add(line); - } - } - setEntries(); - }else{ - //File is not valid - throw new IOException("GenBankFile is not valid."); - } - } - - protected void setEntries(){ - StringBuffer result = new StringBuffer(); - //Mapping GenBank info into Jalview data model - genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences)); - //Mapping DBRefEntry - DBRefEntry dbRef = new DBRefEntry(); - dbRef.setSource(DBRefSource.GENBANK); - dbRef.setVersion(version == null ? "" : version.toString()); - dbRef.setAccessionId(accession); - // add map to indicate the sequence is a valid coordinate frame for the dbref - dbRef.setMap(new Mapping(null, new int[] - { 1, genBankSequence.getLength() }, new int[] - { 1, genBankSequence.getLength() }, 1, 1)); - genBankSequence.addDBRef(dbRef); - - //add header info as non-positional features - //add LOCUS - SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(locusF); - //add DEFNITION - SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(defF); - //add ACCESSION - SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(accessionF); - //add VERSION - SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(versionF); - //add DBLINK - SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(dblinkF); - //add KEYWORDS - SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(keywordsF); - //add SOURCE - SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(sourceF); - //add BASE COUNT - SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(baseCountF); - - // add literature and database cross references in the file - for (GenBankReference gbRef:references){ - //They are non-positional features - SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(refFeature); - } - //add COMMENTS - if (comments.size()>0){ - StringBuffer sb = new StringBuffer(); - for (String comment: comments){ - sb.append(comment).append(newline); - } - SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); - genBankSequence.addSequenceFeature(commentF); - } - //Mapping FEATURES - for (GenBankFeature feature:features){ - if (feature.getType()!=null){ - SequenceFeature sf = new SequenceFeature(); - sf.setType(feature.getType()); - sf.setDescription(feature.getType()); - - sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor()); - sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor()); - Enumeration names = feature.getQualifiersNames(); - while (names.hasMoreElements()){ - String qName = names.nextElement(); - String qValue = feature.getQualifier(qName); - sf.setValue(qName, qValue); - } - genBankSequence.addSequenceFeature(sf); - } - } - SequenceI[] parsedSeqs = new SequenceI[1]; - parsedSeqs[0] = genBankSequence; - this.setSeqs(parsedSeqs); - } - private GenBankVersion parseVersion(String line) { - //VERSION U00096.2 GI:48994873 - if (line.trim().equalsIgnoreCase("VERSION")){ - return null; - }else{ - GenBankVersion ver = new GenBankVersion(); - String v = line.substring(11, line.indexOf(" ", 12)).trim(); - ver.setVersion(v); - int posGI = line.indexOf("GI:", 11 + v.length()); - if (posGI > -1) { - ver.setGI(line.substring(posGI)); - } - return ver; - } +public class GenBankFile extends AlignFile +{ + private static final Logger log = Logger.getLogger(GenBankFile.class + .getName()); + + private GenBankVersion version = new GenBankVersion(); + + private GenBankLocus locus = new GenBankLocus(); + + private GenBankSource source = new GenBankSource(); + + private static final Pattern patLocation = Pattern + .compile("(\\d+)\\.\\.(\\d+)"); + + private static final Pattern patLocationComp = Pattern + .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)"); + + private static final Pattern patLocus = Pattern + .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)"); + + private static final Pattern patQualifierKey = Pattern.compile("/(.*?)="); + + private static final Pattern patFeatureKey = Pattern + .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+"); + + private String definition; + + private String accession; + + private String keywords; + + private String dblink; + + private String baseCount; + + private Vector features; + + private Vector comments; + + // Items under origin + private Vector sequences; + + private Vector references; + + private SequenceI genBankSequence; + + public GenBankFile() + { + } + + public GenBankFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + public GenBankFile(FileParse source) throws IOException + { + super(source); + } + + public void initData() + { + super.initData(); + features = new Vector(); + comments = new Vector(); + sequences = new Vector(); + references = new Vector(); + } + + public void parse() throws IOException + { + String line; + boolean featureMode = false; // FEATURES found + boolean seqMode = false; // Parsing Sequences from SOURCE + boolean referenceMode = false; // REFERENCE found + boolean sourceMode = false; // SOURCE found + boolean commentMode = false; // COMMENT found + boolean parsingAuthors = false; // Parsing authors (multiline) + boolean parsingDefinition = false; // Parsing definition (multiline) + boolean parsingKeywords = false; // Parsing keywords (multiline) + boolean parsingDbLink = false; // Parsing DBLINK (multiline) + boolean parsingTitle = false; // Parsing title (multiline) + boolean parsingQualifier = false; // Parsing feature qualifier (multine) + String currentQualifierName = ""; + GenBankReference reference = null; + GenBankFeature feature = null; + List sourceLines = new ArrayList(); + + if (this.isValid()) + { + + while ((line = nextLine()) != null) + { + // We only process lines if they have contents within + if (line.length() == 0) + continue; + + if (line.startsWith("FEATURES")) + { + featureMode = true; + seqMode = false; + referenceMode = false; + sourceMode = false; + commentMode = false; + feature = new GenBankFeature(); + source = parseSource(sourceLines); + } + + if (seqMode) + { + if (!line.startsWith("//")) + { + GenBankSequence seq = processSequenceLine(line); + sequences.add(seq); + } + featureMode = false; + referenceMode = false; + sourceMode = false; + } + + if (line.startsWith("ORIGIN")) + { + if (feature.getType() != null) + features.add(feature); + featureMode = false; + referenceMode = false; + sourceMode = false; + seqMode = true; + } + + if (featureMode) + { + // Process feature line + if (!line.startsWith("FEATURES") + && !line.startsWith("BASE COUNT")) + { + // Parse type + if (!line.trim().startsWith("/")) + { + Matcher featuresMatch = patFeatureKey.matcher(line); + if (featuresMatch.find()) + { + if (feature.getType() != null) + features.add(feature); // Hay que a�adirlo s�lo si no se est� + // a mitad de un qualif o una feature + // It's a feature + String type = featuresMatch.group(0); + feature = new GenBankFeature(); + feature.setType(type); + GenBankLocation loc = parserFeatureLocation(feature, + line.replace(type, "")); + feature.setLocation(loc); + parsingQualifier = false; + continue; + } + else if (parsingQualifier) + { // If not a feature, it's another part of a qualifier + String qValue = feature.getQualifier(currentQualifierName); + StringBuffer sb = new StringBuffer().append(qValue).append( + ltrim(line)); + feature.updateQualifier(currentQualifierName, sb.toString()); + continue; + } + } + else + { + // It's the begining of a qualifier line + Matcher matcher = patQualifierKey.matcher(line); + if (matcher.find()) + { + String qName = matcher.group(1); + currentQualifierName = qName.replace("/", ""); + line = line.replace(qName, "").replace("/", "") + .replace("=", ""); + feature.addQualifier(currentQualifierName, ltrim(line)); + parsingQualifier = true; + continue; + } + } + } + } + // Process REFERENCE line + if (line.startsWith("REFERENCE")) + { + if (!referenceMode) + { + // This is line is the REFERENCE line + referenceMode = true; + featureMode = false; + sourceMode = false; + seqMode = false; + } + else + { + // We were at referenceMode, then add current reference to the list + // and create a new one + references.add(reference); + } + reference = new GenBankReference(); + String desc = processReferenceLine(line, "REFERENCE"); + int[] ranges = parseReferenceDescriptor(desc); + reference.setDescriptor(desc); + reference.setOrder(ranges[0]); + reference.setBegin(ranges[1]); + reference.setEnd(ranges[2]); + parsingAuthors = false; + parsingTitle = false; + continue; + } + + if (line.startsWith(" AUTHORS")) + { + if (referenceMode) + { + reference.setAuthors(processReferenceLine(line, "AUTHORS")); + parsingAuthors = true; + parsingTitle = false; + } + continue; + } + if (line.startsWith(" TITLE")) + { + if (referenceMode) + { + reference.setTitle(processReferenceLine(line, "TITLE")); + parsingAuthors = false; + parsingTitle = true; + } + continue; + } + if (line.startsWith(" JOURNAL")) + { + if (referenceMode) + { + reference.setJournal(processReferenceLine(line, "JOURNAL")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" PUBMED")) + { + if (referenceMode) + { + reference.setPubmed(processReferenceLine(line, "PUBMED")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + + if (line.startsWith(" MEDLINE")) + { + if (referenceMode) + { + reference.setMedline(processReferenceLine(line, "MEDLINE")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" REMARK")) + { + if (referenceMode) + { + reference.setRemark(processReferenceLine(line, "REMARK")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + if (line.startsWith(" CONSRTM")) + { + if (referenceMode) + { + reference.setConsortia(processReferenceLine(line, "CONSRTM")); + parsingTitle = false; + parsingAuthors = false; + } + continue; + } + + if (line.startsWith("SOURCE")) + { + parsingKeywords = false; + sourceMode = true; + commentMode = false; + if (sourceMode) + { + sourceLines.add(line); + } + continue; + } + if (line.indexOf("ORGANISM") != -1) + { + if (sourceMode) + { + sourceLines.add(line); + continue; + } + } + + if (line.startsWith("COMMENT")) + { + if (reference != null) + references.add(reference); + commentMode = true; + sourceMode = false; + referenceMode = false; + sourceMode = false; + seqMode = false; + comments.add(processCommentLine(line)); + continue; + } + // Process LOCUS line + if (line.startsWith("LOCUS")) + { + locus = parseLocus(line); + continue; + } + // Process BASE COUNT line + if (line.startsWith("BASE COUNT")) + { + baseCount = processHeaderLine(line, "BASE COUNT"); + featureMode = false; + continue; + } + // Process DEFINITION line + if (line.startsWith("DEFINITION")) + { + definition = processHeaderLine(line, "DEFINITION"); + parsingDefinition = true; + continue; + } + // Process ACCESSION line + if (line.startsWith("ACCESSION")) + { + accession = processHeaderLine(line, "ACCESSION"); + parsingDefinition = false; + continue; + } + // Process VERSION line + if (line.startsWith("VERSION")) + { + version = parseVersion(line); + // headers.put("VERSION", processHeaderLine(line,"VERSION")); + continue; + } + // Process DBLINK line + if (line.startsWith("DBLINK")) + { + dblink = processHeaderLine(line, "DBLINK"); + parsingDbLink = true; + continue; + } + // Process KEYWORDS line + if (line.startsWith("KEYWORDS")) + { + keywords = processHeaderLine(line, "KEYWORDS"); + parsingKeywords = true; + parsingDbLink = false; + continue; + } + if (sourceMode) + { + sourceLines.add(line); + continue; + } + if (parsingDefinition) + { + StringBuffer sb = new StringBuffer().append(definition).append( + line); + definition = sb.toString(); + continue; + } + if (referenceMode && parsingAuthors) + { + if (reference != null) + { + StringBuffer authors = new StringBuffer().append( + reference.getAuthors()).append(line); + reference.setAuthors(authors.toString()); + } + continue; + } + if (referenceMode && parsingTitle) + { + if (reference != null) + { + StringBuffer title = new StringBuffer().append( + reference.getTitle()).append(line); + reference.setTitle(title.toString()); + } + continue; + } + if (parsingKeywords) + { + StringBuffer sb = new StringBuffer().append(keywords) + .append(line); + keywords = sb.toString(); + continue; + } + if (parsingDbLink) + { + StringBuffer sb = new StringBuffer().append(dblink).append(line); + dblink = sb.toString(); + continue; + } + if (commentMode) + { + comments.add(line); + } + } + setEntries(); } - - private GenBankLocus parseLocus(String line){ - GenBankLocus loc = new GenBankLocus(); - Matcher mat = patLocus.matcher(line); - if (mat.find()) { - String name = mat.group(1); - String len = mat.group(2); - String strand = mat.group(3); - String mtype = mat.group(4); - String linear = mat.group(5); - String division = mat.group(6); - String date = mat.group(7); - - loc.setName(name == null ? "" : name.trim()); - loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); - loc.setStrand(strand == null ? "" : strand); - loc.setMoleculeType(mtype == null ? "" : mtype); - loc.setLinearSequence("linear".equals(linear)); - loc.setDivision(division == null ? "" : division); - loc.setModificationDate(date == null ? "" :date); - } - return loc; + else + { + // File is not valid + throw new IOException("GenBankFile is not valid."); } - private GenBankSource parseSource(List lines){ - StringBuffer sb = new StringBuffer(); - for(String line:lines){ - sb.append(line).append(newline); - } - // Source section - GenBankSource sou = new GenBankSource(); - String aux = sb.toString().substring(11); - int fim1 = aux.indexOf("\n"); - if (fim1 > -1) { - sou.setSource(aux.substring(0, fim1)); - int ini2 = aux.indexOf("ORGANISM"); - if (ini2 > -1) { - fim1 = aux.indexOf("\n", ini2 + 10); - if (fim1 > -1) { - sou.setOrganism(aux.substring(ini2 + 10, fim1)); - sou.setTaxonomic(aux.substring(fim1).replaceAll(" ", "").replaceAll("\\s+", "")); - } else { - sou.setOrganism(aux); - } - } - } else { - sou.setSource(aux); - } - return sou; - } - - /** - * Possible situations: - * - * 467 Points to a single base in the presented sequence 340..565 Points to - * a continuous range of bases bounded by and including the starting and - * ending bases <345..500 Indicates that the exact lower boundary point - * of a feature is unknown. The location begins at some base previous to the - * first base specified (which need not be contained in the presented - * sequence) and continues to and includes the ending base <1..888 The - * feature starts before the first sequenced base and continues to and - * includes base 888 1..>888 The feature starts at the first sequenced - * base and continues beyond base 888 102.110 Indicates that the exact - * location is unknown but that it is one of the bases between bases 102 and - * 110, inclusive 123^124 Points to a site between bases 123 and 124 - * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to - * form one contiguous sequence complement(34..126) Start at the base - * complementary to 126 and finish at the base complementary to base 34 (the - * feature is on the strand complementary to the presented strand) - * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and - * 4918 to 5163, then complements the joined segments (the feature is on the - * strand complementary to the presented strand) - * join(complement(4918..5163),complement(2691..4571)) Complements regions - * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the - * feature is on the strand complementary to the presented strand) - * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in - * this database) with primary accession number 'J00194' - * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry - * with the region 100..202 of remote entry J00194 - * - * @param fea - * @param localiza - */ - private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) { - // remove os espaços, quebra de linhas etc - String buf = localiza.replaceAll("\\s", ""); - - // checks if there is a comma present between ranges - // complement(100..110),complement(90..100) - char[] buf2 = buf.toCharArray(); - int abertos = 0; - java.util.List lista = new java.util.ArrayList(); - int pinicial = 0; - for (int i = 0; i < buf2.length; i++) { - if (buf2[i] == '(') { - abertos++; - } else if (buf2[i] == ')') { - abertos--; - } else if (buf2[i] == ',' && abertos == 0) { - lista.add(buf.substring(pinicial, i)); - pinicial = i + 1; + } + + protected void setEntries() + { + StringBuffer result = new StringBuffer(); + // Mapping GenBank info into Jalview data model + genBankSequence = new Sequence(accession, + DnaUtils.getNucleotidesFromSequenceVector(sequences)); + // Mapping DBRefEntry + DBRefEntry dbRef = new DBRefEntry(); + dbRef.setSource(DBRefSource.GENBANK); + dbRef.setVersion(version == null ? "" : version.toString()); + dbRef.setAccessionId(accession); + // add map to indicate the sequence is a valid coordinate frame for the + // dbref + dbRef.setMap(new Mapping(null, new int[] + { 1, genBankSequence.getLength() }, new int[] + { 1, genBankSequence.getLength() }, 1, 1)); + genBankSequence.addDBRef(dbRef); + + // add header info as non-positional features + // add LOCUS + SequenceFeature locusF = new SequenceFeature("LOCUS", + (locus == null ? "" : locus.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(locusF); + // add DEFNITION + SequenceFeature defF = new SequenceFeature("DEFINITION", definition, + null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(defF); + // add ACCESSION + SequenceFeature accessionF = new SequenceFeature("ACCESSION", + accession, null, 1, genBankSequence.getLength(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(accessionF); + // add VERSION + SequenceFeature versionF = new SequenceFeature("VERSION", + (version == null ? "" : version.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(versionF); + // add DBLINK + SequenceFeature dblinkF = new SequenceFeature("DBLINK", + (dblink == null ? "" : dblink.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(dblinkF); + // add KEYWORDS + SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, + null, 1, genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(keywordsF); + // add SOURCE + SequenceFeature sourceF = new SequenceFeature("SOURCE", + (source == null ? "" : source.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(sourceF); + // add BASE COUNT + SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", + (baseCount == null ? "" : baseCount.toString()), null, 1, + genBankSequence.getLength(), DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(baseCountF); + + // add literature and database cross references in the file + for (GenBankReference gbRef : references) + { + // They are non-positional features + SequenceFeature refFeature = new SequenceFeature("REFERENCE", + gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(refFeature); + } + // add COMMENTS + if (comments.size() > 0) + { + StringBuffer sb = new StringBuffer(); + for (String comment : comments) + { + sb.append(comment).append(newline); + } + SequenceFeature commentF = new SequenceFeature("COMMENT", + sb.toString(), null, 1, genBankSequence.getLength(), + DBRefSource.GENBANK); + genBankSequence.addSequenceFeature(commentF); + } + // Mapping FEATURES + for (GenBankFeature feature : features) + { + if (feature.getType() != null) + { + SequenceFeature sf = new SequenceFeature(); + sf.setType(feature.getType()); + sf.setDescription(feature.getType()); + + sf.setBegin(feature.getLocation() == null ? 0 : feature + .getLocation().getMinor()); + sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation() + .getMajor()); + Enumeration names = feature.getQualifiersNames(); + while (names.hasMoreElements()) + { + String qName = names.nextElement(); + String qValue = feature.getQualifier(qName); + sf.setValue(qName, qValue); + } + genBankSequence.addSequenceFeature(sf); + } + } + SequenceI[] parsedSeqs = new SequenceI[1]; + parsedSeqs[0] = genBankSequence; + this.setSeqs(parsedSeqs); + } + + private GenBankVersion parseVersion(String line) + { + // VERSION U00096.2 GI:48994873 + if (line.trim().equalsIgnoreCase("VERSION")) + { + return null; + } + else + { + GenBankVersion ver = new GenBankVersion(); + String v = line.substring(11, line.indexOf(" ", 12)).trim(); + ver.setVersion(v); + int posGI = line.indexOf("GI:", 11 + v.length()); + if (posGI > -1) + { + ver.setGI(line.substring(posGI)); + } + return ver; + } + } + + private GenBankLocus parseLocus(String line) + { + GenBankLocus loc = new GenBankLocus(); + Matcher mat = patLocus.matcher(line); + if (mat.find()) + { + String name = mat.group(1); + String len = mat.group(2); + String strand = mat.group(3); + String mtype = mat.group(4); + String linear = mat.group(5); + String division = mat.group(6); + String date = mat.group(7); + + loc.setName(name == null ? "" : name.trim()); + loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len)); + loc.setStrand(strand == null ? "" : strand); + loc.setMoleculeType(mtype == null ? "" : mtype); + loc.setLinearSequence("linear".equals(linear)); + loc.setDivision(division == null ? "" : division); + loc.setModificationDate(date == null ? "" : date); + } + return loc; + } + + private GenBankSource parseSource(List lines) + { + StringBuffer sb = new StringBuffer(); + for (String line : lines) + { + sb.append(line).append(newline); + } + // Source section + GenBankSource sou = new GenBankSource(); + String aux = sb.toString().substring(11); + int fim1 = aux.indexOf("\n"); + if (fim1 > -1) + { + sou.setSource(aux.substring(0, fim1)); + int ini2 = aux.indexOf("ORGANISM"); + if (ini2 > -1) + { + fim1 = aux.indexOf("\n", ini2 + 10); + if (fim1 > -1) + { + sou.setOrganism(aux.substring(ini2 + 10, fim1)); + sou.setTaxonomic(aux.substring(fim1) + .replaceAll(" ", "").replaceAll("\\s+", "")); + } + else + { + sou.setOrganism(aux); + } + } + } + else + { + sou.setSource(aux); + } + return sou; + } + + /** + * Possible situations: + * + * 467 Points to a single base in the presented sequence 340..565 Points to a + * continuous range of bases bounded by and including the starting and ending + * bases <345..500 Indicates that the exact lower boundary point of a + * feature is unknown. The location begins at some base previous to the first + * base specified (which need not be contained in the presented sequence) and + * continues to and includes the ending base <1..888 The feature starts + * before the first sequenced base and continues to and includes base 888 + * 1..>888 The feature starts at the first sequenced base and continues + * beyond base 888 102.110 Indicates that the exact location is unknown but + * that it is one of the bases between bases 102 and 110, inclusive 123^124 + * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12 + * to 78 and 134 to 202 should be joined to form one contiguous sequence + * complement(34..126) Start at the base complementary to 126 and finish at + * the base complementary to base 34 (the feature is on the strand + * complementary to the presented strand) + * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918 + * to 5163, then complements the joined segments (the feature is on the strand + * complementary to the presented strand) + * join(complement(4918..5163),complement(2691..4571)) Complements regions + * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the + * feature is on the strand complementary to the presented strand) + * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in + * this database) with primary accession number 'J00194' + * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry + * with the region 100..202 of remote entry J00194 + * + * @param fea + * @param localiza + */ + private GenBankLocation parserFeatureLocation(GenBankFeature fea, + String localiza) + { + // remove os espaços, quebra de linhas etc + String buf = localiza.replaceAll("\\s", ""); + + // checks if there is a comma present between ranges + // complement(100..110),complement(90..100) + char[] buf2 = buf.toCharArray(); + int abertos = 0; + java.util.List lista = new java.util.ArrayList(); + int pinicial = 0; + for (int i = 0; i < buf2.length; i++) + { + if (buf2[i] == '(') + { + abertos++; + } + else if (buf2[i] == ')') + { + abertos--; + } + else if (buf2[i] == ',' && abertos == 0) + { + lista.add(buf.substring(pinicial, i)); + pinicial = i + 1; + } + } + if (lista.size() > 0) + { + lista.add(buf.substring(pinicial)); + GenBankLocations um = new GenBankLocations(); + um.setOperator(GenBankLocations.NONE); + for (String s : lista) + { + um.getUnits().add(parserFeatureLocation(fea, s)); + } + fea.setLocation(um); + return um; + } + + // trata as funcoes: complement(location,location...), + // join(location,location...), order(location,location...) + if (buf.contains("(")) + { + GenBankLocations um = new GenBankLocations(); + int ini = buf.indexOf("("); + int fim = buf.lastIndexOf(")"); + String token = buf.substring(0, ini); + if ("complement".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + interno.setComplement(true); + um.setOperator(GenBankLocations.COMPLEMENT); + um.getUnits().add(interno); + fea.setLocation(um); + } + else if ("join".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + um.setOperator(GenBankLocations.JOIN); + um.getUnits().add(interno); + fea.setLocation(um); + } + else if ("order".equalsIgnoreCase(token)) + { + String inter = buf.substring(ini + 1, fim); + GenBankLocation interno = parserFeatureLocation(fea, inter); + um.setOperator(GenBankLocations.ORDER); + um.getUnits().add(interno); + fea.setLocation(um); + } + else + { + log.log(Level.WARNING, + "Token desconhecido em location/features - {0}", token); + String inter = buf.substring(ini + 1, fim); + fea.setLocation(parserFeatureLocation(fea, inter)); + } + return fea.getLocation(); + } + else + { + // trata quando tiver uma lista de location + if (buf.contains(",")) + { + String[] partes = buf.split(","); + GenBankLocations um = new GenBankLocations(); + for (String p : partes) + { + um.getUnits().add(parserFeatureLocation(fea, p)); + } + fea.setLocation(um); + return um; + } + else + { + // trata quando tiver range + if (buf.contains("..")) + { + String[] partes = buf.split("\\.\\."); + GenBankLocationRange range = new GenBankLocationRange(); + if (buf.contains(":")) + { + for (int i = 0; i < partes.length; i++) + { + int pos = partes[i].indexOf(":"); + if (pos > 0) + { + String entry = partes[i].substring(0, pos); + partes[i] = partes[i].substring(pos + 1); + range.setEntry(entry); + } } + } + GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation( + fea, partes[0]); + range.setStart(gp0); + GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation( + fea, partes[1]); + range.setEnd(gp1); + fea.setLocation(range); + return range; } - if (lista.size() > 0) { - lista.add(buf.substring(pinicial)); - GenBankLocations um = new GenBankLocations(); - um.setOperator(GenBankLocations.NONE); - for (String s : lista) { - um.getUnits().add(parserFeatureLocation(fea, s)); + else + { + // trata um ponto + // possibilidades consideradas: + // 467 + // 102.110 + // 123^124 + // <345 + // >400 + // 345> + // 400< + // ou uma combinacao dessas + GenBankLocationPoint gp = new GenBankLocationPoint(); + if (buf.contains(":")) + { + int pos = buf.indexOf(":"); + if (pos > 0) + { + String entry = buf.substring(0, pos); + buf = buf.substring(pos + 1); + gp.setEntry(entry); } - fea.setLocation(um); - return um; - } - - // trata as funcoes: complement(location,location...), - // join(location,location...), order(location,location...) - if (buf.contains("(")) { - GenBankLocations um = new GenBankLocations(); - int ini = buf.indexOf("("); - int fim = buf.lastIndexOf(")"); - String token = buf.substring(0, ini); - if ("complement".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - interno.setComplement(true); - um.setOperator(GenBankLocations.COMPLEMENT); - um.getUnits().add(interno); - fea.setLocation(um); - } else if ("join".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - um.setOperator(GenBankLocations.JOIN); - um.getUnits().add(interno); - fea.setLocation(um); - } else if ("order".equalsIgnoreCase(token)) { - String inter = buf.substring(ini + 1, fim); - GenBankLocation interno = parserFeatureLocation(fea, inter); - um.setOperator(GenBankLocations.ORDER); - um.getUnits().add(interno); - fea.setLocation(um); - } else { - log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token); - String inter = buf.substring(ini + 1, fim); - fea.setLocation(parserFeatureLocation(fea, inter)); + } + int pos = 0; + // verifica os simb < e > antes do primeiro numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + gp.setPrefix(buf.charAt(pos)); + pos++; + } + // pega o primeiro numero + int ini = pos; + while (pos < buf.length() && buf.charAt(pos) >= '0' + && buf.charAt(pos) <= '9') + { + pos++; + } + if (buf.subSequence(ini, pos).length() < 1) + { + System.out.println(localiza); + } + int num = Integer.parseInt(buf.substring(ini, pos)); + int num2 = num; + // o primeiro numero pode ser o unico numero + if (pos < buf.length()) + { + // verifica se tem os sinais < e > apos o primeiro numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + if (buf.contains(".") || buf.contains("^")) + { + gp.setPrefix(buf.charAt(pos)); + } + else + { + gp.setSufix(buf.charAt(pos)); + } + pos++; } - return fea.getLocation(); - } else { - // trata quando tiver uma lista de location - if (buf.contains(",")) { - String[] partes = buf.split(","); - GenBankLocations um = new GenBankLocations(); - for (String p : partes) { - um.getUnits().add( - parserFeatureLocation(fea, p)); - } - fea.setLocation(um); - return um; - } else { - // trata quando tiver range - if (buf.contains("..")) { - String[] partes = buf.split("\\.\\."); - GenBankLocationRange range = new GenBankLocationRange(); - if (buf.contains(":")) { - for (int i = 0; i < partes.length; i++) { - int pos = partes[i].indexOf(":"); - if (pos > 0) { - String entry = partes[i].substring(0, pos); - partes[i] = partes[i].substring(pos + 1); - range.setEntry(entry); - } - } - } - GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]); - range.setStart(gp0); - GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]); - range.setEnd(gp1); - fea.setLocation(range); - return range; - } else { - // trata um ponto - // possibilidades consideradas: - // 467 - // 102.110 - // 123^124 - // <345 - // >400 - // 345> - // 400< - // ou uma combinacao dessas - GenBankLocationPoint gp = new GenBankLocationPoint(); - if (buf.contains(":")) { - int pos = buf.indexOf(":"); - if (pos > 0) { - String entry = buf.substring(0, pos); - buf = buf.substring(pos + 1); - gp.setEntry(entry); - } - } - int pos = 0; - // verifica os simb < e > antes do primeiro numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - gp.setPrefix(buf.charAt(pos)); - pos++; - } - // pega o primeiro numero - int ini = pos; - while (pos < buf.length() && buf.charAt(pos) >= '0' - && buf.charAt(pos) <= '9') { - pos++; - } - if (buf.subSequence(ini, pos).length() < 1) { - System.out.println(localiza); - } - int num = Integer.parseInt(buf.substring(ini, pos)); - int num2 = num; - // o primeiro numero pode ser o unico numero - if (pos < buf.length()) { - // verifica se tem os sinais < e > apos o primeiro numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - if (buf.contains(".") || buf.contains("^")) { - gp.setPrefix(buf.charAt(pos)); - } else { - gp.setSufix(buf.charAt(pos)); - } - pos++; - } - - // verifica a separacao dos numeros . ou ^ - if (pos < buf.length() - && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) { - // separação localizada, possibilidade de mais numero - gp.setSymbol(buf.charAt(pos)); - pos++; - - // verifica os simb < e > antes do segundo numero - if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') { - gp.setSufix(buf.charAt(pos)); - pos++; - } - - // pega o segundo numero - ini = pos; - while (pos < buf.length() && buf.charAt(pos) >= '0' - && buf.charAt(pos) <= '9') { - pos++; - } - num2 = Integer.parseInt(buf.substring(ini, pos)); - - // verifica os simb < e > após o segundo numero - if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) { - gp.setSufix(buf.charAt(pos)); - pos++; - } - } - } - gp.setMin(num); - gp.setMax(num2); - fea.setLocation(gp); - return gp; - } + + // verifica a separacao dos numeros . ou ^ + if (pos < buf.length() + && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) + { + // separação localizada, possibilidade de mais numero + gp.setSymbol(buf.charAt(pos)); + pos++; + + // verifica os simb < e > antes do segundo numero + if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') + { + gp.setSufix(buf.charAt(pos)); + pos++; + } + + // pega o segundo numero + ini = pos; + while (pos < buf.length() && buf.charAt(pos) >= '0' + && buf.charAt(pos) <= '9') + { + pos++; + } + num2 = Integer.parseInt(buf.substring(ini, pos)); + + // verifica os simb < e > após o segundo numero + if (pos < buf.length() + && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) + { + gp.setSufix(buf.charAt(pos)); + pos++; + } } + } + gp.setMin(num); + gp.setMax(num2); + fea.setLocation(gp); + return gp; } + } + } + } + + private int[] parseReferenceDescriptor(String descriptor) + { + // 1 (bases 1 to 1609) + int[] resultado = new int[3]; + descriptor = descriptor.replace("(bases", ",").replace("to", ",") + .replace(")", ""); + String[] args = descriptor.split(","); + resultado[0] = Integer.parseInt(args[0].trim()); + resultado[1] = Integer.parseInt(args[1].trim()); + resultado[2] = Integer.parseInt(args[2].trim()); + return resultado; + } + + private String processReferenceLine(String line, String component) + { + int init = line.indexOf(component); + if (init != -1) + { + line = line.replace(component, ""); + } + return line; + } + + private String processHeaderLine(String line, String header) + { + int init = line.indexOf(header); + if (init != -1) + { + line = line.replace(header, ""); } - - private int[] parseReferenceDescriptor(String descriptor){ - // 1 (bases 1 to 1609) - int[] resultado = new int[3]; - descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", ""); - String[] args = descriptor.split(","); - resultado[0] = Integer.parseInt(args[0].trim()); - resultado[1] = Integer.parseInt(args[1].trim()); - resultado[2] = Integer.parseInt(args[2].trim()); - return resultado; + return line; + } + + private GenBankSequence processSequenceLine(String line) + { + GenBankSequence gbs = new GenBankSequence(); + line = ltrim(line); + String[] args = line.split(" "); + gbs.setId(Integer.parseInt(args[0])); + int len = args.length - 1; + Vector seqs = new Vector(); + for (int i = 0; i < len; i++) + seqs.add(args[i + 1]); + gbs.setSequences(seqs); + return gbs; + } + + private String processCommentLine(String line) + { + int init = line.indexOf("COMMENT"); + if (init != -1) + { + line = line.replace("COMMENT", ""); + } + return line; + } + + public String rtrim(String s) + { + int i = s.length() - 1; + while (i >= 0 && Character.isWhitespace(s.charAt(i))) + { + i--; } - private String processReferenceLine(String line, String component){ - int init = line.indexOf(component); - if (init!=-1){ - line = line.replace(component,""); - } - return line; - } - private String processHeaderLine(String line, String header){ - int init = line.indexOf(header); - if (init!=-1){ - line = line.replace(header,""); - } - return line; - } - - private GenBankSequence processSequenceLine(String line) { - GenBankSequence gbs = new GenBankSequence(); - line = ltrim(line); - String[] args = line.split(" "); - gbs.setId(Integer.parseInt(args[0])); - int len = args.length-1; - Vector seqs = new Vector(); - for (int i=0;i= 0 && Character.isWhitespace(s.charAt(i))) { - i--; - } - return s.substring(0,i+1); + return s.substring(0, i + 1); + } + + public String ltrim(String s) + { + int i = 0; + while (i < s.length() && Character.isWhitespace(s.charAt(i))) + { + i++; } + return s.substring(i); + } - public String ltrim(String s) { - int i = 0; - while (i < s.length() && Character.isWhitespace(s.charAt(i))) { - i++; - } - return s.substring(i); - } - - public String print(){ - StringBuffer out = new StringBuffer(); - for (SequenceI seq: this.getSeqs()){ - SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); - boolean featureLinePrinted = false; - for(SequenceFeature sf:seqFeatures){ - if(sf.getType().equals("LOCUS")){ - out.append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("DEFINITION")){ - out.append("DEFINITION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("VERSION")){ - out.append("VERSION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("ACCESSION")){ - out.append("ACCESSION ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("DBLINK")){ - out.append("DBLINK ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("KEYWORDS")){ - out.append("KEYWORDS ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("SOURCE")){ - out.append("SOURCE ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("REFERENCE")){ - out.append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("COMMENT")){ - out.append("COMMENT ").append(sf.getDescription()).append(newline); - }else if (sf.getType().equals("BASE COUNT")){ - out.append("BASE COUNT ").append(sf.getDescription()).append(newline); - }else{ - if (!featureLinePrinted){ - out.append("FEATURES Location/Qualifiers").append(newline); - featureLinePrinted = true; - } - out.append(" ").append(sf.getType()).append(" ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline); - Hashtable qualifiers = sf.otherDetails; - if (qualifiers!=null){ - Enumeration keys = qualifiers.keys(); - while (keys.hasMoreElements()){ - String key = keys.nextElement(); - String value = qualifiers.get(key); - if (value!=null){ - out.append(" /").append(key).append("=").append(value).append(newline); - } - } - } - } - } - out.append("ORIGIN").append(newline); - //We have to divide sequence in groups of 6x10 chars - String sequenceString = seq.getSequenceAsString(); - int howManyGroups = (int) Math.floor(sequenceString.length()/60); - for (int i=0;i<=howManyGroups;i++){ - String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length())); - if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ - out.append(" ").append(60*i+1).append(" "); - } - int segmentLength = sequenceSegment.length(); - if (segmentLength>=10){ - out.append(sequenceSegment.substring(0,10)).append(" "); - if (segmentLength>=20){ - out.append(sequenceSegment.substring(10,20)).append(" "); - if (segmentLength>=30){ - out.append(sequenceSegment.substring(20,30)).append(" "); - if (segmentLength>=40){ - out.append(sequenceSegment.substring(30,40)).append(" "); - if (segmentLength>=50){ - out.append(sequenceSegment.substring(40,50)).append(" "); - if (segmentLength<=60){ - out.append(sequenceSegment.substring(50,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(40,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(30,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(20,sequenceSegment.length())); - } - }else{ - out.append(sequenceSegment.substring(10,sequenceSegment.length())); - } - } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){ - out.append(sequenceSegment); - } - out.append(newline); - } - out.append("//"); - } - return out.toString(); + public String print() + { + StringBuffer out = new StringBuffer(); + for (SequenceI seq : this.getSeqs()) + { + SequenceFeature[] seqFeatures = seq.getSequenceFeatures(); + boolean featureLinePrinted = false; + for (SequenceFeature sf : seqFeatures) + { + if (sf.getType().equals("LOCUS")) + { + out.append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("DEFINITION")) + { + out.append("DEFINITION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("VERSION")) + { + out.append("VERSION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("ACCESSION")) + { + out.append("ACCESSION ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("DBLINK")) + { + out.append("DBLINK ").append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("KEYWORDS")) + { + out.append("KEYWORDS ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("SOURCE")) + { + out.append("SOURCE ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("REFERENCE")) + { + out.append(sf.getDescription()).append(newline); + } + else if (sf.getType().equals("COMMENT")) + { + out.append("COMMENT ").append(sf.getDescription()) + .append(newline); + } + else if (sf.getType().equals("BASE COUNT")) + { + out.append("BASE COUNT ").append(sf.getDescription()) + .append(newline); + } + else + { + if (!featureLinePrinted) + { + out.append("FEATURES Location/Qualifiers").append( + newline); + featureLinePrinted = true; + } + out.append(" ").append(sf.getType()).append(" ") + .append(sf.getBegin()).append("..").append(sf.getEnd()) + .append(newline); + Hashtable qualifiers = sf.otherDetails; + if (qualifiers != null) + { + Enumeration keys = qualifiers.keys(); + while (keys.hasMoreElements()) + { + String key = keys.nextElement(); + String value = qualifiers.get(key); + if (value != null) + { + out.append(" /").append(key) + .append("=").append(value).append(newline); + } + } + } + } + } + out.append("ORIGIN").append(newline); + // We have to divide sequence in groups of 6x10 chars + String sequenceString = seq.getSequenceAsString(); + int howManyGroups = (int) Math.floor(sequenceString.length() / 60); + for (int i = 0; i <= howManyGroups; i++) + { + String sequenceSegment = sequenceString.substring(i * 60, + Math.min((i + 1) * 60, sequenceString.length())); + if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment + .length() > 0))) + { + out.append(" ").append(60 * i + 1).append(" "); + } + int segmentLength = sequenceSegment.length(); + if (segmentLength >= 10) + { + out.append(sequenceSegment.substring(0, 10)).append(" "); + if (segmentLength >= 20) + { + out.append(sequenceSegment.substring(10, 20)).append(" "); + if (segmentLength >= 30) + { + out.append(sequenceSegment.substring(20, 30)).append(" "); + if (segmentLength >= 40) + { + out.append(sequenceSegment.substring(30, 40)).append(" "); + if (segmentLength >= 50) + { + out.append(sequenceSegment.substring(40, 50)).append(" "); + if (segmentLength <= 60) + { + out.append(sequenceSegment.substring(50, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(40, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(30, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(20, + sequenceSegment.length())); + } + } + else + { + out.append(sequenceSegment.substring(10, + sequenceSegment.length())); + } + } + else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment + .length() > 0))) + { + out.append(sequenceSegment); + } + out.append(newline); + } + out.append("//"); } + return out.toString(); + } } diff --git a/src/jalview/io/xdb/genbank/GenBankFeature.java b/src/jalview/io/xdb/genbank/GenBankFeature.java index 0de2e65..ca169af 100644 --- a/src/jalview/io/xdb/genbank/GenBankFeature.java +++ b/src/jalview/io/xdb/genbank/GenBankFeature.java @@ -3,66 +3,93 @@ package jalview.io.xdb.genbank; import java.util.Enumeration; import java.util.Hashtable; -public class GenBankFeature { - public static final String MISC_TYPE = "misc_feature"; - public static final String SOURCE = "source"; - public static final String CDS = "CDS"; - public static final String GENE = "gene"; - public static final String EXON = "exon"; - public static final String INTRON = "intron"; - public static final String PRIM_TRANSCRIPT = "prim_transcript"; - public static final String mRNA = "mRNA"; - public static final String MOBILE_ELEMENT = "mobile_element"; - public static final String VARIATION = "variation"; - - private String type; - private Hashtable qualifiers = new Hashtable(); - private GenBankLocation location = null; - - public GenBankFeature() { - super(); - } - - public GenBankFeature(String type) { - super(); - this.type = type; - } - - public void addQualifier(String key, String value){ - this.qualifiers.put(key, value); - } - public void updateQualifier(String key, String newValue){ - this.qualifiers.remove(key); - this.qualifiers.put(key, newValue); - } - - public String getQualifier(String key){ - return this.qualifiers.get(key); - } - - public String getType() { - return type; - } - - public void setType(String type) { - this.type = type; - } - public Enumeration getQualifiersNames(){ - return this.qualifiers.keys(); - } - public int getQualifiersSize(){ - return this.qualifiers.size(); - } - - public Hashtable getFields() { - return qualifiers; - } - - public GenBankLocation getLocation() { - return location; - } - - public void setLocation(GenBankLocation location) { - this.location = location; - } +public class GenBankFeature +{ + public static final String MISC_TYPE = "misc_feature"; + + public static final String SOURCE = "source"; + + public static final String CDS = "CDS"; + + public static final String GENE = "gene"; + + public static final String EXON = "exon"; + + public static final String INTRON = "intron"; + + public static final String PRIM_TRANSCRIPT = "prim_transcript"; + + public static final String mRNA = "mRNA"; + + public static final String MOBILE_ELEMENT = "mobile_element"; + + public static final String VARIATION = "variation"; + + private String type; + + private Hashtable qualifiers = new Hashtable(); + + private GenBankLocation location = null; + + public GenBankFeature() + { + super(); + } + + public GenBankFeature(String type) + { + super(); + this.type = type; + } + + public void addQualifier(String key, String value) + { + this.qualifiers.put(key, value); + } + + public void updateQualifier(String key, String newValue) + { + this.qualifiers.remove(key); + this.qualifiers.put(key, newValue); + } + + public String getQualifier(String key) + { + return this.qualifiers.get(key); + } + + public String getType() + { + return type; + } + + public void setType(String type) + { + this.type = type; + } + + public Enumeration getQualifiersNames() + { + return this.qualifiers.keys(); + } + + public int getQualifiersSize() + { + return this.qualifiers.size(); + } + + public Hashtable getFields() + { + return qualifiers; + } + + public GenBankLocation getLocation() + { + return location; + } + + public void setLocation(GenBankLocation location) + { + this.location = location; + } } diff --git a/src/jalview/io/xdb/genbank/GenBankLocation.java b/src/jalview/io/xdb/genbank/GenBankLocation.java index 5d0db6c..da82171 100644 --- a/src/jalview/io/xdb/genbank/GenBankLocation.java +++ b/src/jalview/io/xdb/genbank/GenBankLocation.java @@ -113,38 +113,43 @@ package jalview.io.xdb.genbank; * * */ -public abstract class GenBankLocation { - // the location is complement strand? - private boolean complement = false; +public abstract class GenBankLocation +{ + // the location is complement strand? + private boolean complement = false; - public GenBankLocation() { - } + public GenBankLocation() + { + } - /** - * The minor location in genome sequence - * - * @return position - */ - public abstract int getMinor(); + /** + * The minor location in genome sequence + * + * @return position + */ + public abstract int getMinor(); - /** - * The major location in genome sequence - * - * @return position - */ - public abstract int getMajor(); + /** + * The major location in genome sequence + * + * @return position + */ + public abstract int getMajor(); - /** - * @return the complement - */ - public boolean isComplement() { - return complement; - } + /** + * @return the complement + */ + public boolean isComplement() + { + return complement; + } - /** - * @param complement the complement to set - */ - public void setComplement(boolean complement) { - this.complement = complement; - } + /** + * @param complement + * the complement to set + */ + public void setComplement(boolean complement) + { + this.complement = complement; + } } \ No newline at end of file diff --git a/src/jalview/io/xdb/genbank/GenBankLocationPoint.java b/src/jalview/io/xdb/genbank/GenBankLocationPoint.java index 6d3a475..f15528a 100644 --- a/src/jalview/io/xdb/genbank/GenBankLocationPoint.java +++ b/src/jalview/io/xdb/genbank/GenBankLocationPoint.java @@ -3,138 +3,176 @@ package jalview.io.xdb.genbank; /** * */ -public class GenBankLocationPoint extends GenBankLocation { - private String entry; - private char prefix = 0; - private int min = 0; - private char symbol = 0; - private int max = 0; - private char sufix = 0; - - public GenBankLocationPoint() { - } - - public GenBankLocationPoint(int point) { - this.min = point; - this.max = point; - } - - public GenBankLocationPoint(int min, int max) { - this.min = min; - this.max = max; - } - - public int getMinor() { - return this.min; - } - - public int getMajor() { - return this.max; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - if( prefix != 0 && prefix != ' ' ) { - sb.append(prefix); - } - if( symbol == '.' || symbol == '^' ) { - sb.append( String.format("%d%c%d",min,symbol,max) ); - } else { - if( min != max ) { - sb.append( String.format("%d.%d",min,max) ); - } else { - sb.append( min ); - } - } - if( sufix != 0 && sufix != ' ' ) { - sb.append(sufix); - } - return sb.toString(); - } - - /** - * @return the prefix - */ - public char getPrefix() { - return prefix; - } - - /** - * @param prefix the prefix to set - */ - public void setPrefix(char prefix) { - this.prefix = prefix; - } - - /** - * @return the min - */ - public int getMin() { - return min; - } - - /** - * @param min the min to set - */ - public void setMin(int min) { - this.min = min; - } - - /** - * @return the symbol - */ - public char getSymbol() { - return symbol; - } - - /** - * @param symbol the symbol to set - */ - public void setSymbol(char symbol) { - this.symbol = symbol; - } - - /** - * @return the max - */ - public int getMax() { - return max; - } - - /** - * @param max the max to set - */ - public void setMax(int max) { - this.max = max; - } - - /** - * @return the sufix - */ - public char getSufix() { - return sufix; - } - - /** - * @param sufix the sufix to set - */ - public void setSufix(char sufix) { - this.sufix = sufix; - } - - /** - * @return the entry - */ - public String getEntry() { - return entry; - } - - /** - * @param entry the entry to set - */ - public void setEntry(String entry) { - this.entry = entry; - } +public class GenBankLocationPoint extends GenBankLocation +{ + private String entry; + + private char prefix = 0; + + private int min = 0; + + private char symbol = 0; + + private int max = 0; + + private char sufix = 0; + + public GenBankLocationPoint() + { + } + + public GenBankLocationPoint(int point) + { + this.min = point; + this.max = point; + } + + public GenBankLocationPoint(int min, int max) + { + this.min = min; + this.max = max; + } + + public int getMinor() + { + return this.min; + } + + public int getMajor() + { + return this.max; + } + + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + if (prefix != 0 && prefix != ' ') + { + sb.append(prefix); + } + if (symbol == '.' || symbol == '^') + { + sb.append(String.format("%d%c%d", min, symbol, max)); + } + else + { + if (min != max) + { + sb.append(String.format("%d.%d", min, max)); + } + else + { + sb.append(min); + } + } + if (sufix != 0 && sufix != ' ') + { + sb.append(sufix); + } + return sb.toString(); + } + + /** + * @return the prefix + */ + public char getPrefix() + { + return prefix; + } + + /** + * @param prefix + * the prefix to set + */ + public void setPrefix(char prefix) + { + this.prefix = prefix; + } + + /** + * @return the min + */ + public int getMin() + { + return min; + } + + /** + * @param min + * the min to set + */ + public void setMin(int min) + { + this.min = min; + } + + /** + * @return the symbol + */ + public char getSymbol() + { + return symbol; + } + + /** + * @param symbol + * the symbol to set + */ + public void setSymbol(char symbol) + { + this.symbol = symbol; + } + + /** + * @return the max + */ + public int getMax() + { + return max; + } + + /** + * @param max + * the max to set + */ + public void setMax(int max) + { + this.max = max; + } + + /** + * @return the sufix + */ + public char getSufix() + { + return sufix; + } + + /** + * @param sufix + * the sufix to set + */ + public void setSufix(char sufix) + { + this.sufix = sufix; + } + + /** + * @return the entry + */ + public String getEntry() + { + return entry; + } + + /** + * @param entry + * the entry to set + */ + public void setEntry(String entry) + { + this.entry = entry; + } } diff --git a/src/jalview/io/xdb/genbank/GenBankLocationRange.java b/src/jalview/io/xdb/genbank/GenBankLocationRange.java index 552d1f9..c8926ae 100644 --- a/src/jalview/io/xdb/genbank/GenBankLocationRange.java +++ b/src/jalview/io/xdb/genbank/GenBankLocationRange.java @@ -3,84 +3,103 @@ package jalview.io.xdb.genbank; /** * */ -public class GenBankLocationRange extends GenBankLocation { - private String entry = null; - private GenBankLocationPoint start = null; - private GenBankLocationPoint end = null; +public class GenBankLocationRange extends GenBankLocation +{ + private String entry = null; - public GenBankLocationRange() { - } + private GenBankLocationPoint start = null; - @Override - public int getMinor() { - return start == null ? 0 : start.getMinor(); - } + private GenBankLocationPoint end = null; - @Override - public int getMajor() { - return end == null ? 0 : end.getMajor(); - } + public GenBankLocationRange() + { + } - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); -// if( getDirecao() != '5' ) { -// sb.append("complement("); -// } - if( entry != null ) { - sb.append(entry); - sb.append(":"); - } - if( getStart() != null ) { - sb.append( getStart().toString() ); - } - if( getEnd() != null && getStart() != getEnd() && !start.equals(end) ) { - sb.append(".."); - sb.append( getEnd().toString() ); - } - return sb.toString(); - } + @Override + public int getMinor() + { + return start == null ? 0 : start.getMinor(); + } - /** - * @return the entry - */ - public String getEntry() { - return entry; - } + @Override + public int getMajor() + { + return end == null ? 0 : end.getMajor(); + } - /** - * @param entry the entry to set - */ - public void setEntry(String entry) { - this.entry = entry; + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + // if( getDirecao() != '5' ) { + // sb.append("complement("); + // } + if (entry != null) + { + sb.append(entry); + sb.append(":"); } - - /** - * @return the start - */ - public GenBankLocationPoint getStart() { - return start; + if (getStart() != null) + { + sb.append(getStart().toString()); } - - /** - * @param start the start to set - */ - public void setStart(GenBankLocationPoint start) { - this.start = start; + if (getEnd() != null && getStart() != getEnd() && !start.equals(end)) + { + sb.append(".."); + sb.append(getEnd().toString()); } + return sb.toString(); + } - /** - * @return the end - */ - public GenBankLocationPoint getEnd() { - return end; - } + /** + * @return the entry + */ + public String getEntry() + { + return entry; + } - /** - * @param end the end to set - */ - public void setEnd(GenBankLocationPoint end) { - this.end = end; - } + /** + * @param entry + * the entry to set + */ + public void setEntry(String entry) + { + this.entry = entry; + } + + /** + * @return the start + */ + public GenBankLocationPoint getStart() + { + return start; + } + + /** + * @param start + * the start to set + */ + public void setStart(GenBankLocationPoint start) + { + this.start = start; + } + + /** + * @return the end + */ + public GenBankLocationPoint getEnd() + { + return end; + } + + /** + * @param end + * the end to set + */ + public void setEnd(GenBankLocationPoint end) + { + this.end = end; + } } diff --git a/src/jalview/io/xdb/genbank/GenBankLocations.java b/src/jalview/io/xdb/genbank/GenBankLocations.java index ae3e47b..7281703 100644 --- a/src/jalview/io/xdb/genbank/GenBankLocations.java +++ b/src/jalview/io/xdb/genbank/GenBankLocations.java @@ -1,98 +1,127 @@ package jalview.io.xdb.genbank; /** - * + * * @author Dieval Guizelini */ -public class GenBankLocations extends GenBankLocation { - public static final int NONE = 1; // default - public static final int COMPLEMENT = 2; - public static final int JOIN = 3; - public static final int ORDER = 4; // conj com ordem desconhecida - private int operator = NONE; - private java.util.List units; - - public GenBankLocations() { - units = new java.util.ArrayList(); - } +public class GenBankLocations extends GenBankLocation +{ + public static final int NONE = 1; // default - @Override - public void setComplement(boolean complement){ - super.setComplement(complement); - this.operator = COMPLEMENT; - if (units != null) { - for (GenBankLocation o : units) { - o.setComplement(complement); - } - } - } + public static final int COMPLEMENT = 2; - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - if (getOperator() == COMPLEMENT) { - sb.append("complement("); - } else if (getOperator() == JOIN) { - sb.append("join("); - } else if (getOperator() == ORDER) { - sb.append("order("); - } - if (units.size() > 0) { - sb.append(units.get(0).toString()); - for (int i = 1; i < units.size(); i++) { - sb.append(","); - sb.append(units.get(i).toString()); - } - } - if (getOperator() != NONE) { - sb.append(")"); - } - return sb.toString(); - } + public static final int JOIN = 3; - /** - * @return the units - */ - public java.util.List getUnits() { - return units; - } + public static final int ORDER = 4; // conj com ordem desconhecida - /** - * @param units the units to set - */ - public void setUnits(java.util.List units) { - this.units = units; - } + private int operator = NONE; + + private java.util.List units; - @Override - public int getMinor() { - if( units.size() > 0 ) { - return units.get(0).getMinor(); - } - return 0; + public GenBankLocations() + { + units = new java.util.ArrayList(); + } + + @Override + public void setComplement(boolean complement) + { + super.setComplement(complement); + this.operator = COMPLEMENT; + if (units != null) + { + for (GenBankLocation o : units) + { + o.setComplement(complement); + } } + } - @Override - public int getMajor() { - int ind = units.size(); - if( ind > 0 ) { - return units.get(ind-1).getMajor(); - } - return 0; + @Override + public String toString() + { + StringBuilder sb = new StringBuilder(); + if (getOperator() == COMPLEMENT) + { + sb.append("complement("); + } + else if (getOperator() == JOIN) + { + sb.append("join("); } + else if (getOperator() == ORDER) + { + sb.append("order("); + } + if (units.size() > 0) + { + sb.append(units.get(0).toString()); + for (int i = 1; i < units.size(); i++) + { + sb.append(","); + sb.append(units.get(i).toString()); + } + } + if (getOperator() != NONE) + { + sb.append(")"); + } + return sb.toString(); + } + + /** + * @return the units + */ + public java.util.List getUnits() + { + return units; + } - /** - * @return the operator - */ - public int getOperator() { - return operator; + /** + * @param units + * the units to set + */ + public void setUnits(java.util.List units) + { + this.units = units; + } + + @Override + public int getMinor() + { + if (units.size() > 0) + { + return units.get(0).getMinor(); } + return 0; + } - /** - * @param operator the operator to set - */ - public void setOperator(int operator) { - this.operator = operator; + @Override + public int getMajor() + { + int ind = units.size(); + if (ind > 0) + { + return units.get(ind - 1).getMajor(); } + return 0; + } + + /** + * @return the operator + */ + public int getOperator() + { + return operator; + } + + /** + * @param operator + * the operator to set + */ + public void setOperator(int operator) + { + this.operator = operator; + } } diff --git a/src/jalview/io/xdb/genbank/GenBankLocus.java b/src/jalview/io/xdb/genbank/GenBankLocus.java index cf6289e..8a44222 100644 --- a/src/jalview/io/xdb/genbank/GenBankLocus.java +++ b/src/jalview/io/xdb/genbank/GenBankLocus.java @@ -1,138 +1,176 @@ package jalview.io.xdb.genbank; /** - * A short mnemonic name for the entry, chosen to suggest the - * sequence's definition. Mandatory keyword/exactly one record. - * - *

The LOCUS field contains a number of different data elements, including locus name, - * sequence length, molecule type, GenBank division, and modification date. Each element - * is described below.

- * + * A short mnemonic name for the entry, chosen to suggest the sequence's + * definition. Mandatory keyword/exactly one record. + * + *

+ * The LOCUS field contains a number of different data elements, including locus + * name, sequence length, molecule type, GenBank division, and modification + * date. Each element is described below. + *

+ * */ -public class GenBankLocus { - private String name; - private int sequenceLength; - private String strand; - private String moleculeType; - private boolean linearSequence; - private String division; - private String modificationDate; - - public GenBankLocus() { - } - - public GenBankLocus(String name, int sequenceLength) { - this.name = name; - this.sequenceLength = sequenceLength; - } - - - /** - * @return the name - */ - public String getName() { - return name; - } - - /** - * @param name the name to set - */ - public void setName(String name) { - this.name = name; - } - - /** - * @return the sequenceLength - */ - public int getSequenceLength() { - return sequenceLength; - } - - /** - * @param sequenceLength the sequenceLength to set - */ - public void setSequenceLength(int sequenceLength) { - this.sequenceLength = sequenceLength; - } - - /** - * @return the strand - */ - public String getStrand() { - return strand; - } - - /** - * @param strand the strand to set - */ - public void setStrand(String strand) { - this.strand = strand; - } - - /** - * @return the moleculeType - */ - public String getMoleculeType() { - return moleculeType; - } - - /** - * @param moleculeType the moleculeType to set - */ - public void setMoleculeType(String moleculeType) { - this.moleculeType = moleculeType; - } - - /** - * @return the linearSequence - */ - public boolean isLinearSequence() { - return linearSequence; - } - - /** - * @param linearSequence the linearSequence to set - */ - public void setLinearSequence(boolean linearSequence) { - this.linearSequence = linearSequence; - } - - /** - * @return the division - */ - public String getDivision() { - return division; - } - - /** - * @param division the division to set - */ - public void setDivision(String division) { - this.division = division; - } - - /** - * @return the modificationDate - */ - public String getModificationDate() { - return modificationDate; - } - - /** - * @param modificationDate the modificationDate to set - */ - public void setModificationDate(String modificationDate) { - this.modificationDate = modificationDate; - } - - @Override - public String toString() { - - return String.format("LOCUS %-16s %11d bp %3s %6s %-8s %3s %s", - this.name, this.sequenceLength, this.strand, - this.moleculeType, linearSequence?"linear ":"circular", - this.division, ((modificationDate == null) || (modificationDate.equals("")) ? "" : modificationDate.toUpperCase()) - ); - } +public class GenBankLocus +{ + private String name; + + private int sequenceLength; + + private String strand; + + private String moleculeType; + + private boolean linearSequence; + + private String division; + + private String modificationDate; + + public GenBankLocus() + { + } + + public GenBankLocus(String name, int sequenceLength) + { + this.name = name; + this.sequenceLength = sequenceLength; + } + + /** + * @return the name + */ + public String getName() + { + return name; + } + + /** + * @param name + * the name to set + */ + public void setName(String name) + { + this.name = name; + } + + /** + * @return the sequenceLength + */ + public int getSequenceLength() + { + return sequenceLength; + } + + /** + * @param sequenceLength + * the sequenceLength to set + */ + public void setSequenceLength(int sequenceLength) + { + this.sequenceLength = sequenceLength; + } + + /** + * @return the strand + */ + public String getStrand() + { + return strand; + } + + /** + * @param strand + * the strand to set + */ + public void setStrand(String strand) + { + this.strand = strand; + } + + /** + * @return the moleculeType + */ + public String getMoleculeType() + { + return moleculeType; + } + + /** + * @param moleculeType + * the moleculeType to set + */ + public void setMoleculeType(String moleculeType) + { + this.moleculeType = moleculeType; + } + + /** + * @return the linearSequence + */ + public boolean isLinearSequence() + { + return linearSequence; + } + + /** + * @param linearSequence + * the linearSequence to set + */ + public void setLinearSequence(boolean linearSequence) + { + this.linearSequence = linearSequence; + } + + /** + * @return the division + */ + public String getDivision() + { + return division; + } + + /** + * @param division + * the division to set + */ + public void setDivision(String division) + { + this.division = division; + } + + /** + * @return the modificationDate + */ + public String getModificationDate() + { + return modificationDate; + } + + /** + * @param modificationDate + * the modificationDate to set + */ + public void setModificationDate(String modificationDate) + { + this.modificationDate = modificationDate; + } + + @Override + public String toString() + { + + return String + .format("LOCUS %-16s %11d bp %3s %6s %-8s %3s %s", + this.name, + this.sequenceLength, + this.strand, + this.moleculeType, + linearSequence ? "linear " : "circular", + this.division, + ((modificationDate == null) + || (modificationDate.equals("")) ? "" + : modificationDate.toUpperCase())); + } } diff --git a/src/jalview/io/xdb/genbank/GenBankReference.java b/src/jalview/io/xdb/genbank/GenBankReference.java index 74f0080..17ad8b4 100644 --- a/src/jalview/io/xdb/genbank/GenBankReference.java +++ b/src/jalview/io/xdb/genbank/GenBankReference.java @@ -1,135 +1,176 @@ package jalview.io.xdb.genbank; -public class GenBankReference { - private int order; - private int begin; - private int end; - private String descriptor; - private String authors; - private String title; - private String journal; - private String pubmed; - private String medline; - private String consortia; - private String remark; - - public GenBankReference() { - super(); - } - - public String getDescriptor() { - return descriptor; - } - - public void setDescriptor(String descriptor) { - this.descriptor = descriptor; - } - - public String getAuthors() { - return authors; - } - - public void setAuthors(String authors) { - this.authors = authors; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getJournal() { - return journal; - } - - public void setJournal(String journal) { - this.journal = journal; - } - - public String getPubmed() { - return pubmed; - } - - public void setPubmed(String pubmed) { - this.pubmed = pubmed; - } - - public int getOrder() { - return order; - } - - public void setOrder(int order) { - this.order = order; - } - public int getBegin() { - return begin; - } - - public void setBegin(int begin) { - this.begin = begin; - } - - public int getEnd() { - return end; - } - - public void setEnd(int end) { - this.end = end; - } - - - public String getMedline() { - return medline; - } - - public void setMedline(String medline) { - this.medline = medline; - } - - public String getConsortia() { - return consortia; - } - - public void setConsortia(String consortia) { - this.consortia = consortia; - } - - public String getRemark() { - return remark; - } - - public void setRemark(String remark) { - this.remark = remark; - } - - public String toString(){ -// References has the following format -// REFERENCE 1 (bases 1 to 1976) -// AUTHORS Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M. -// TITLE Complete nucleotide sequence of the human delta-globin gene -// JOURNAL Cell 21 (3), 639-646 (1980) -// PUBMED 7438204 - - StringBuffer buf = new StringBuffer(); - buf.append("REFERENCE ").append(this.getOrder()).append(" (bases ").append(this.getBegin()).append(" to ").append(this.getEnd()).append(")\n"); - if (this.getAuthors()!=null) - buf.append(" AUTHORS ").append(this.getAuthors()).append("\n"); - if (this.getTitle()!=null) - buf.append(" TITLE ").append(this.getTitle()).append("\n"); - if (this.getJournal()!=null) - buf.append(" JOURNAL ").append(this.getJournal()).append("\n"); - if (this.getPubmed()!=null) - buf.append(" PUBMED ").append(this.getPubmed()).append("\n");; - if (this.getMedline()!=null) - buf.append(" MEDLINE ").append(this.getMedline()).append("\n");; - if (this.getRemark()!=null) - buf.append(" REMARK ").append(this.getRemark()).append("\n");; - if (this.getConsortia()!=null) - buf.append(" CONSRTM ").append(this.getConsortia()).append("\n");; - return buf.toString(); - } - +public class GenBankReference +{ + private int order; + + private int begin; + + private int end; + + private String descriptor; + + private String authors; + + private String title; + + private String journal; + + private String pubmed; + + private String medline; + + private String consortia; + + private String remark; + + public GenBankReference() + { + super(); + } + + public String getDescriptor() + { + return descriptor; + } + + public void setDescriptor(String descriptor) + { + this.descriptor = descriptor; + } + + public String getAuthors() + { + return authors; + } + + public void setAuthors(String authors) + { + this.authors = authors; + } + + public String getTitle() + { + return title; + } + + public void setTitle(String title) + { + this.title = title; + } + + public String getJournal() + { + return journal; + } + + public void setJournal(String journal) + { + this.journal = journal; + } + + public String getPubmed() + { + return pubmed; + } + + public void setPubmed(String pubmed) + { + this.pubmed = pubmed; + } + + public int getOrder() + { + return order; + } + + public void setOrder(int order) + { + this.order = order; + } + + public int getBegin() + { + return begin; + } + + public void setBegin(int begin) + { + this.begin = begin; + } + + public int getEnd() + { + return end; + } + + public void setEnd(int end) + { + this.end = end; + } + + public String getMedline() + { + return medline; + } + + public void setMedline(String medline) + { + this.medline = medline; + } + + public String getConsortia() + { + return consortia; + } + + public void setConsortia(String consortia) + { + this.consortia = consortia; + } + + public String getRemark() + { + return remark; + } + + public void setRemark(String remark) + { + this.remark = remark; + } + + public String toString() + { + // References has the following format + // REFERENCE 1 (bases 1 to 1976) + // AUTHORS Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M. + // TITLE Complete nucleotide sequence of the human delta-globin gene + // JOURNAL Cell 21 (3), 639-646 (1980) + // PUBMED 7438204 + + StringBuffer buf = new StringBuffer(); + buf.append("REFERENCE ").append(this.getOrder()).append(" (bases ") + .append(this.getBegin()).append(" to ").append(this.getEnd()) + .append(")\n"); + if (this.getAuthors() != null) + buf.append(" AUTHORS ").append(this.getAuthors()).append("\n"); + if (this.getTitle() != null) + buf.append(" TITLE ").append(this.getTitle()).append("\n"); + if (this.getJournal() != null) + buf.append(" JOURNAL ").append(this.getJournal()).append("\n"); + if (this.getPubmed() != null) + buf.append(" PUBMED ").append(this.getPubmed()).append("\n"); + ; + if (this.getMedline() != null) + buf.append(" MEDLINE ").append(this.getMedline()).append("\n"); + ; + if (this.getRemark() != null) + buf.append(" REMARK ").append(this.getRemark()).append("\n"); + ; + if (this.getConsortia() != null) + buf.append(" CONSRTM ").append(this.getConsortia()).append("\n"); + ; + return buf.toString(); + } + } diff --git a/src/jalview/io/xdb/genbank/GenBankSequence.java b/src/jalview/io/xdb/genbank/GenBankSequence.java index 279601c..6e0797c 100644 --- a/src/jalview/io/xdb/genbank/GenBankSequence.java +++ b/src/jalview/io/xdb/genbank/GenBankSequence.java @@ -1,57 +1,65 @@ package jalview.io.xdb.genbank; import java.util.Vector; + /** - * A line like the following: - * 1 aatgaaggtt catttttcat tctcacaaac taatgaaacc ctgcttatct taaaccaacc - * will be mapped as: - * id: 1 - * sequences: {"aatgaaggtt", "catttttcat", "tctcacaaac", "taatgaaacc", "ctgcttatct", "taaaccaacc"} - * Each sequence has 8 nucleotides long + * A line like the following: 1 aatgaaggtt catttttcat tctcacaaac taatgaaacc + * ctgcttatct taaaccaacc will be mapped as: id: 1 sequences: {"aatgaaggtt", + * "catttttcat", "tctcacaaac", "taatgaaacc", "ctgcttatct", "taaaccaacc"} Each + * sequence has 8 nucleotides long + * * @author darolmar - * + * */ -public class GenBankSequence { - //Initial position - private int id; - //Sequences in that line - private Vector sequences; - - public GenBankSequence() { - super(); - sequences = new Vector(); - } - - public int getId() { - return id; - } - - public void setId(int id) { - this.id = id; - } - - public Vector getSequences() { - return sequences; - } - - public void setSequences(Vector sequences) { - this.sequences = sequences; - } - - public String getSequencesAsString(){ - StringBuffer sb = new StringBuffer(); - for (String seq:sequences) - sb.append(seq).append(" "); - return sb.toString(); - } - - public String toString(){ - StringBuffer sb = new StringBuffer() - .append(" ").append(this.id); - for (String seq:sequences) - sb.append(" ").append(seq); - sb.append("\n"); - return sb.toString(); - } - +public class GenBankSequence +{ + // Initial position + private int id; + + // Sequences in that line + private Vector sequences; + + public GenBankSequence() + { + super(); + sequences = new Vector(); + } + + public int getId() + { + return id; + } + + public void setId(int id) + { + this.id = id; + } + + public Vector getSequences() + { + return sequences; + } + + public void setSequences(Vector sequences) + { + this.sequences = sequences; + } + + public String getSequencesAsString() + { + StringBuffer sb = new StringBuffer(); + for (String seq : sequences) + sb.append(seq).append(" "); + return sb.toString(); + } + + public String toString() + { + StringBuffer sb = new StringBuffer().append(" ").append(this.id); + for (String seq : sequences) + sb.append(" ").append(seq); + sb.append("\n"); + return sb.toString(); + } + } diff --git a/src/jalview/io/xdb/genbank/GenBankSource.java b/src/jalview/io/xdb/genbank/GenBankSource.java index c5ef3c2..dbdba9f 100644 --- a/src/jalview/io/xdb/genbank/GenBankSource.java +++ b/src/jalview/io/xdb/genbank/GenBankSource.java @@ -1,85 +1,112 @@ package jalview.io.xdb.genbank; /** - *

Free-format information including an abbreviated form of the organism - * name, sometimes followed by a molecule type. (See section 3.4.10 of the - * GenBank release notes for more info.)

- *

Entrez Search Field: Organism [ORGN]

- *

Search Tip: For some organisms that have well-established common names, - * such as baker's yeast, mouse, and human, a search for the common name will - * yield the same results as a search for the scientific name, e.g., a search - * for "baker's yeast" in the organism field retrieves the same number of - * documents as "Saccharomyces cerevisiae". This is true because the Organism - * field is connected to the NCBI Taxonomy Database, which contains - * cross-references between common names, scientific names, and synonyms for - * organisms represented in the Sequence databases.

+ *

+ * Free-format information including an abbreviated form of the organism name, + * sometimes followed by a molecule type. (See section 3.4.10 of the GenBank + * release notes for more info.) + *

+ *

+ * Entrez Search Field: Organism [ORGN] + *

+ *

+ * Search Tip: For some organisms that have well-established common names, such + * as baker's yeast, mouse, and human, a search for the common name will yield + * the same results as a search for the scientific name, e.g., a search for + * "baker's yeast" in the organism field retrieves the same number of documents + * as "Saccharomyces cerevisiae". This is true because the Organism field is + * connected to the NCBI Taxonomy Database, which contains cross-references + * between common names, scientific names, and synonyms for organisms + * represented in the Sequence databases. + *

*

Organism

- *

The formal scientific name for the source organism (genus and species, - * where appropriate) and its lineage, based on the phylogenetic classification - * scheme used in the NCBI Taxonomy Database. If the complete lineage of an - * organism is very long, an abbreviated lineage will be shown in the GenBank - * record and the complete lineage will be available in the Taxonomy Database. - * (See also the /db_xref=taxon:nnnn Feature qualifer, below.)

- *

Entrez Search Field: Organism [ORGN]

- *

Search Tip: You can search the Organism field by any node in the taxonomic + *

+ * The formal scientific name for the source organism (genus and species, where + * appropriate) and its lineage, based on the phylogenetic classification scheme + * used in the NCBI Taxonomy Database. If the complete lineage of an organism is + * very long, an abbreviated lineage will be shown in the GenBank record and the + * complete lineage will be available in the Taxonomy Database. (See also the + * /db_xref=taxon:nnnn Feature qualifer, below.) + *

+ *

+ * Entrez Search Field: Organism [ORGN] + *

+ *

+ * Search Tip: You can search the Organism field by any node in the taxonomic * hierarchy, e.g., you can search for the term "Saccharomyces cerevisiae", * "Saccharomycetales", "Ascomycota", etc. to retrieve all the sequences from - * organisms in a particular taxon.

+ * organisms in a particular taxon. + *

* */ -public class GenBankSource { - private String source=""; - private String organism=""; - private String taxonomic=""; +public class GenBankSource +{ + private String source = ""; - public GenBankSource() { - } + private String organism = ""; - @Override - public String toString() { - return String.format("%s\n\t%s\n\t%s", getSource(), getOrganism(), getTaxonomic()); - } + private String taxonomic = ""; - /** - * @return the source - */ - public String getSource() { - return source; - } + public GenBankSource() + { + } - /** - * @param source the source to set - */ - public void setSource(String source) { - this.source = source; - } + @Override + public String toString() + { + return String.format("%s\n\t%s\n\t%s", getSource(), getOrganism(), + getTaxonomic()); + } - /** - * @return the organism - */ - public String getOrganism() { - return organism; - } + /** + * @return the source + */ + public String getSource() + { + return source; + } - /** - * @param organism the organism to set - */ - public void setOrganism(String organism) { - this.organism = organism; - } + /** + * @param source + * the source to set + */ + public void setSource(String source) + { + this.source = source; + } - /** - * @return the taxonomic - */ - public String getTaxonomic() { - return taxonomic; - } + /** + * @return the organism + */ + public String getOrganism() + { + return organism; + } - /** - * @param taxonomic the taxonomic to set - */ - public void setTaxonomic(String taxonomic) { - this.taxonomic = taxonomic; - } + /** + * @param organism + * the organism to set + */ + public void setOrganism(String organism) + { + this.organism = organism; + } + + /** + * @return the taxonomic + */ + public String getTaxonomic() + { + return taxonomic; + } + + /** + * @param taxonomic + * the taxonomic to set + */ + public void setTaxonomic(String taxonomic) + { + this.taxonomic = taxonomic; + } } diff --git a/src/jalview/io/xdb/genbank/GenBankVersion.java b/src/jalview/io/xdb/genbank/GenBankVersion.java index 85a2fd1..2deb5ee 100644 --- a/src/jalview/io/xdb/genbank/GenBankVersion.java +++ b/src/jalview/io/xdb/genbank/GenBankVersion.java @@ -1,89 +1,125 @@ package jalview.io.xdb.genbank; /** - *

A nucleotide sequence identification number that represents a single, + *

+ * A nucleotide sequence identification number that represents a single, * specific sequence in the GenBank database. This identification number uses - * the accession.version format implemented by GenBank/EMBL/DDBJ in - * February 1999.

- *

If there is any change to the sequence data (even a single base), the - * version number will be increased, e.g., U12345.1 → U12345.2, but the - * accession portion will remain stable.

- *

The accession.version system of sequence identifiers runs parallel to - * the GI number system, i.e., when any change is made to a sequence, it - * receives a new GI number AND an increase to its version number.

- *

For more information, see section 1.3.2 of the GenBank 111.0 release - * notes, and section 3.4.7 of the current GenBank release notes.

- *

A Sequence Revision History tool is available to track the various GI - * numbers, version numbers, and update dates for sequences that appeared in - * a specific GenBank record (more information and example).

- *

More details about sequence identification numbers and the difference - * between GI number and version are provided in Sequence Identifiers: - * A Historical Note.

- *

Entrez Search Field: use the default setting of "All Fields"

+ * the accession.version format implemented by GenBank/EMBL/DDBJ in February + * 1999. + *

+ *

+ * If there is any change to the sequence data (even a single base), the version + * number will be increased, e.g., U12345.1 → U12345.2, but the accession + * portion will remain stable. + *

+ *

+ * The accession.version system of sequence identifiers runs parallel to the GI + * number system, i.e., when any change is made to a sequence, it receives a new + * GI number AND an increase to its version number. + *

+ *

+ * For more information, see section 1.3.2 of the GenBank 111.0 release notes, + * and section 3.4.7 of the current GenBank release notes. + *

+ *

+ * A Sequence Revision History tool is available to track the various GI + * numbers, version numbers, and update dates for sequences that appeared in a + * specific GenBank record (more information and example). + *

+ *

+ * More details about sequence identification numbers and the difference between + * GI number and version are provided in Sequence Identifiers: A Historical + * Note. + *

+ *

+ * Entrez Search Field: use the default setting of "All Fields" + *

*

GI

- *

"GenInfo Identifier" sequence identification number, in this case, for - * the nucleotide sequence. If a sequence changes in any way, a new GI number - * will be assigned.

- *

A separate GI number is also assigned to each protein translation within - * a nucleotide sequence record, and a new GI is assigned if the protein - * translation changes in any way (see below).

- *

GI sequence identifiers run parallel to the new accession.version system - * of sequence identifiers. For more information, see the description of Version, - * above, and section 3.4.7 of the current GenBank release notes.

- *

A Sequence Revision History tool is available to track the various GI + *

+ * "GenInfo Identifier" sequence identification number, in this case, for the + * nucleotide sequence. If a sequence changes in any way, a new GI number will + * be assigned. + *

+ *

+ * A separate GI number is also assigned to each protein translation within a + * nucleotide sequence record, and a new GI is assigned if the protein + * translation changes in any way (see below). + *

+ *

+ * GI sequence identifiers run parallel to the new accession.version system of + * sequence identifiers. For more information, see the description of Version, + * above, and section 3.4.7 of the current GenBank release notes. + *

+ *

+ * A Sequence Revision History tool is available to track the various GI * numbers, version numbers, and update dates for sequences that appeared in a - * specific GenBank record (more information and example).

- *

More details about sequence identification numbers and the difference - * between GI number and version are provided in Sequence Identifiers: A - * Historical Note.

- *

Entrez Search Field: use the default setting of "All Fields"

+ * specific GenBank record (more information and example). + *

+ *

+ * More details about sequence identification numbers and the difference between + * GI number and version are provided in Sequence Identifiers: A Historical + * Note. + *

+ *

+ * Entrez Search Field: use the default setting of "All Fields" + *

+ * * @author Dieval Guizelini * @see Entry */ -public class GenBankVersion { - private String version = ""; - private String gi = ""; - - public GenBankVersion() { - } +public class GenBankVersion +{ + private String version = ""; + private String gi = ""; - /** - * @return the version - */ - public String getVersion() { - return version; - } + public GenBankVersion() + { + } - /** - * @param version the version to set - */ - public void setVersion(String version) { - this.version = version; - } + /** + * @return the version + */ + public String getVersion() + { + return version; + } - /** - * @return the gi - */ - public String getGI() { - return gi; - } + /** + * @param version + * the version to set + */ + public void setVersion(String version) + { + this.version = version; + } - /** - * @param gi the gi to set - */ - public void setGI(String gi) { - this.gi = gi; - } + /** + * @return the gi + */ + public String getGI() + { + return gi; + } + /** + * @param gi + * the gi to set + */ + public void setGI(String gi) + { + this.gi = gi; + } - /** - * Version section in GenBank File Format is text with two fields (version and GI). - * - * @return version+" "+gi - */ - @Override - public String toString() { - return String.format("%s %s",version,gi); - } + /** + * Version section in GenBank File Format is text with two fields (version and + * GI). + * + * @return version+" "+gi + */ + @Override + public String toString() + { + return String.format("%s %s", version, gi); + } } diff --git a/test/jalview/io/GenBankTest.java b/test/jalview/io/GenBankTest.java index d3c41da..d2c9705 100644 --- a/test/jalview/io/GenBankTest.java +++ b/test/jalview/io/GenBankTest.java @@ -17,266 +17,301 @@ import java.util.Map; import org.junit.Test; -public class GenBankTest { -// private final static File GENBANK_FILE = new File("test/jalview/io/V00505.gb"); -// private final static File GENBANK_FILE = new File("test/jalview/io/NC_000011.10.gb"); - private final static File GENBANK_FILE = new File("test/jalview/io/M92650.1.gb"); +public class GenBankTest +{ + // private final static File GENBANK_FILE = new + // File("test/jalview/io/V00505.gb"); + // private final static File GENBANK_FILE = new + // File("test/jalview/io/NC_000011.10.gb"); + private final static File GENBANK_FILE = new File( + "test/jalview/io/M92650.1.gb"); - @Test - public void testParsing(){ - testFileIOwithFormat(GENBANK_FILE, "GENBANK"); - } - /** - * test alignment data in given file can be imported, exported and reimported - * with no dataloss - * - * @param f - * - source datafile (IdentifyFile.identify() should work with it) - * @param ioformat - * - label for IO class used to write and read back in the data from - * f - */ - public static void testFileIOwithFormat(File f, String ioformat) - { - System.out.println("Reading file: " + f); - String ff = f.getPath(); - try - { - AppletFormatAdapter rf = new AppletFormatAdapter(); + @Test + public void testParsing() + { + testFileIOwithFormat(GENBANK_FILE, "GENBANK"); + } - Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE, - new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE)); + /** + * test alignment data in given file can be imported, exported and reimported + * with no dataloss + * + * @param f + * - source datafile (IdentifyFile.identify() should work with it) + * @param ioformat + * - label for IO class used to write and read back in the data from + * f + */ + public static void testFileIOwithFormat(File f, String ioformat) + { + System.out.println("Reading file: " + f); + String ff = f.getPath(); + try + { + AppletFormatAdapter rf = new AppletFormatAdapter(); - assertNotNull("Couldn't read supplied alignment data.", al); + Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE, + new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE)); - // make sure dataset is initialised ? not sure about this - for (int i = 0; i < al.getSequencesArray().length; ++i) - { - al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i)); - } - String outputfile = rf.formatSequences(ioformat, al, true); - System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n< orig_groups=new HashMap(),new_groups=new HashMap(); + assertTrue( + "Alignment dimension mismatch: original contains " + + al.getHeight() + " and generated has " + + al_input.getHeight() + " sequences; original has " + + al.getWidth() + " and generated has " + + al_input.getWidth() + " columns.", + al.getHeight() == al_input.getHeight() + && al.getWidth() == al_input.getWidth()); - if (aa_new != null && aa_original != null) - { - for (int i = 0; i < aa_original.length; i++) - { - if (aa_new.length>i) { - assertTrue("Different alignment annotation at position "+i, - equalss(aa_original[i], aa_new[i])); - // compare graphGroup or graph properties - needed to verify JAL-1299 - assertTrue("Graph type not identical.",aa_original[i].graph==aa_new[i].graph); - assertTrue("Visibility not identical.", aa_original[i].visible==aa_new[i].visible); - assertTrue( - "Threshold line not identical.", - aa_original[i].threshold == null ? aa_new[i].threshold == null - : aa_original[i].threshold - .equals(aa_new[i].threshold)); - // graphGroup may differ, but pattern should be the same - Integer o_ggrp=new Integer(aa_original[i].graphGroup+2),n_ggrp=new Integer(aa_new[i].graphGroup+2); - BitSet orig_g=orig_groups.get(o_ggrp),new_g=new_groups.get(n_ggrp); - if (orig_g==null) { - orig_groups.put(o_ggrp,orig_g= new BitSet()); - } - if (new_g==null) { - new_groups.put(n_ggrp, new_g=new BitSet()); - } - assertTrue("Graph Group pattern differs at annotation "+i, orig_g.equals(new_g)); - orig_g.set(i); new_g.set(i); - } else { - System.err.println("No matching annotation row for "+aa_original[i].toString()); - } - } - } - assertTrue( - "Generated and imported alignment have different annotation sets (" - + aa_new_size + " != " + aa_original_size + ")", - aa_new_size == aa_original_size); + // check Alignment annotation + AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation(); + AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation(); - // check sequences, annotation and features - SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length]; - seq_original = al.getSequencesArray(); - SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length]; - seq_new = al_input.getSequencesArray(); - SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new; - AlignmentAnnotation annot_original, annot_new; - // - for (int i = 0; i < al.getSequencesArray().length; i++) - { - String name = seq_original[i].getName(); - int start = seq_original[i].getStart(); - int end = seq_original[i].getEnd(); - System.out.println("Check sequence: " + name + "/" + start + "-" - + end); + // note - at moment we do not distinguish between alignment without any + // annotation rows and alignment with no annotation row vector + // we might want to revise this in future + int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0 + : aa_original.length); + Map orig_groups = new HashMap(), new_groups = new HashMap(); - // search equal sequence - for (int in = 0; in < al_input.getSequencesArray().length; in++) - { - if (name.equals(seq_new[in].getName()) - && start == seq_new[in].getStart() - && end == seq_new[in].getEnd()) - { - String ss_original = seq_original[i].getSequenceAsString(); - String ss_new = seq_new[in].getSequenceAsString(); - assertTrue("The sequences " + name + "/" + start + "-" + end - + " are not equal", ss_original.equals(ss_new)); + if (aa_new != null && aa_original != null) + { + for (int i = 0; i < aa_original.length; i++) + { + if (aa_new.length > i) + { + assertTrue("Different alignment annotation at position " + i, + equalss(aa_original[i], aa_new[i])); + // compare graphGroup or graph properties - needed to verify JAL-1299 + assertTrue("Graph type not identical.", + aa_original[i].graph == aa_new[i].graph); + assertTrue("Visibility not identical.", + aa_original[i].visible == aa_new[i].visible); + assertTrue( + "Threshold line not identical.", + aa_original[i].threshold == null ? aa_new[i].threshold == null + : aa_original[i].threshold + .equals(aa_new[i].threshold)); + // graphGroup may differ, but pattern should be the same + Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer( + aa_new[i].graphGroup + 2); + BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups + .get(n_ggrp); + if (orig_g == null) + { + orig_groups.put(o_ggrp, orig_g = new BitSet()); + } + if (new_g == null) + { + new_groups.put(n_ggrp, new_g = new BitSet()); + } + assertTrue("Graph Group pattern differs at annotation " + i, + orig_g.equals(new_g)); + orig_g.set(i); + new_g.set(i); + } + else + { + System.err.println("No matching annotation row for " + + aa_original[i].toString()); + } + } + } + assertTrue( + "Generated and imported alignment have different annotation sets (" + + aa_new_size + " != " + aa_original_size + ")", + aa_new_size == aa_original_size); - assertTrue( - "Sequence Features were not equivalent", - (seq_original[i].getSequenceFeatures() == null && seq_new[in] - .getSequenceFeatures() == null) - || (seq_original[i].getSequenceFeatures() != null && seq_new[in] - .getSequenceFeatures() != null)); - // compare sequence features - if (seq_original[i].getSequenceFeatures() != null - && seq_new[in].getSequenceFeatures() != null) - { - System.out.println("There are feature!!!"); - sequenceFeatures_original = new SequenceFeature[seq_original[i] - .getSequenceFeatures().length]; - sequenceFeatures_original = seq_original[i] - .getSequenceFeatures(); - sequenceFeatures_new = new SequenceFeature[seq_new[in] - .getSequenceFeatures().length]; - sequenceFeatures_new = seq_new[in].getSequenceFeatures(); + // check sequences, annotation and features + SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length]; + seq_original = al.getSequencesArray(); + SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length]; + seq_new = al_input.getSequencesArray(); + SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new; + AlignmentAnnotation annot_original, annot_new; + // + for (int i = 0; i < al.getSequencesArray().length; i++) + { + String name = seq_original[i].getName(); + int start = seq_original[i].getStart(); + int end = seq_original[i].getEnd(); + System.out.println("Check sequence: " + name + "/" + start + "-" + + end); - assertTrue("different number of features", seq_original[i] - .getSequenceFeatures().length == seq_new[in] - .getSequenceFeatures().length); + // search equal sequence + for (int in = 0; in < al_input.getSequencesArray().length; in++) + { + if (name.equals(seq_new[in].getName()) + && start == seq_new[in].getStart() + && end == seq_new[in].getEnd()) + { + String ss_original = seq_original[i].getSequenceAsString(); + String ss_new = seq_new[in].getSequenceAsString(); + assertTrue("The sequences " + name + "/" + start + "-" + end + + " are not equal", ss_original.equals(ss_new)); - for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++) - { - assertTrue("Different features", - sequenceFeatures_original[feat] - .equals(sequenceFeatures_new[feat])); - } - } - // compare alignment annotation - if (al.getSequenceAt(i).getAnnotation() != null - && al_input.getSequenceAt(in).getAnnotation() != null) - { - for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++) - { - if (al.getSequenceAt(i).getAnnotation()[j] != null - && al_input.getSequenceAt(in).getAnnotation()[j] != null) - { - annot_original = al.getSequenceAt(i).getAnnotation()[j]; - annot_new = al_input.getSequenceAt(in).getAnnotation()[j]; - assertTrue("Different annotation elements", - equalss(annot_original, annot_new)); - } - } - } - else if (al.getSequenceAt(i).getAnnotation() == null - && al_input.getSequenceAt(in).getAnnotation() == null) - { - System.out.println("No annotations"); - } - else if (al.getSequenceAt(i).getAnnotation() != null - && al_input.getSequenceAt(in).getAnnotation() == null) - { - assertTrue("Annotations differed between sequences (" - + al.getSequenceAt(i).getName() + ") and (" - + al_input.getSequenceAt(i).getName() + ")", false); - } - break; - } - } - } - } - /* - * compare annotations - */ - private static boolean equalss(AlignmentAnnotation annot_or, - AlignmentAnnotation annot_new) - { - if (annot_or.annotations.length != annot_new.annotations.length) - { - System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length); - return false; - } - for (int i = 0; i < annot_or.annotations.length; i++) - { - Annotation an_or=annot_or.annotations[i],an_new=annot_new.annotations[i]; - if (an_or != null - && an_new!= null) - { - if (!an_or.displayCharacter.trim() - .equals(an_new.displayCharacter.trim()) - || !(""+an_or.secondaryStructure).trim().equals((""+an_new.secondaryStructure).trim()) - || ((!an_or.description.equals(an_new.description)) && (an_or.description == null - || an_new.description == null || !an_or.description - .equals(an_new.description)))) - { - System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString()); - return false; - } - } - else if (annot_or.annotations[i] == null - && annot_new.annotations[i] == null) - { - continue; - } - else - { - System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString())); - return false; - } - } - return true; - } + assertTrue( + "Sequence Features were not equivalent", + (seq_original[i].getSequenceFeatures() == null && seq_new[in] + .getSequenceFeatures() == null) + || (seq_original[i].getSequenceFeatures() != null && seq_new[in] + .getSequenceFeatures() != null)); + // compare sequence features + if (seq_original[i].getSequenceFeatures() != null + && seq_new[in].getSequenceFeatures() != null) + { + System.out.println("There are feature!!!"); + sequenceFeatures_original = new SequenceFeature[seq_original[i] + .getSequenceFeatures().length]; + sequenceFeatures_original = seq_original[i] + .getSequenceFeatures(); + sequenceFeatures_new = new SequenceFeature[seq_new[in] + .getSequenceFeatures().length]; + sequenceFeatures_new = seq_new[in].getSequenceFeatures(); + + assertTrue("different number of features", seq_original[i] + .getSequenceFeatures().length == seq_new[in] + .getSequenceFeatures().length); + + for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++) + { + assertTrue("Different features", + sequenceFeatures_original[feat] + .equals(sequenceFeatures_new[feat])); + } + } + // compare alignment annotation + if (al.getSequenceAt(i).getAnnotation() != null + && al_input.getSequenceAt(in).getAnnotation() != null) + { + for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++) + { + if (al.getSequenceAt(i).getAnnotation()[j] != null + && al_input.getSequenceAt(in).getAnnotation()[j] != null) + { + annot_original = al.getSequenceAt(i).getAnnotation()[j]; + annot_new = al_input.getSequenceAt(in).getAnnotation()[j]; + assertTrue("Different annotation elements", + equalss(annot_original, annot_new)); + } + } + } + else if (al.getSequenceAt(i).getAnnotation() == null + && al_input.getSequenceAt(in).getAnnotation() == null) + { + System.out.println("No annotations"); + } + else if (al.getSequenceAt(i).getAnnotation() != null + && al_input.getSequenceAt(in).getAnnotation() == null) + { + assertTrue("Annotations differed between sequences (" + + al.getSequenceAt(i).getName() + ") and (" + + al_input.getSequenceAt(i).getName() + ")", false); + } + break; + } + } + } + } + + /* + * compare annotations + */ + private static boolean equalss(AlignmentAnnotation annot_or, + AlignmentAnnotation annot_new) + { + if (annot_or.annotations.length != annot_new.annotations.length) + { + System.err.println("Different lengths for annotation row elements: " + + annot_or.annotations.length + "!=" + + annot_new.annotations.length); + return false; + } + for (int i = 0; i < annot_or.annotations.length; i++) + { + Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i]; + if (an_or != null && an_new != null) + { + if (!an_or.displayCharacter.trim().equals( + an_new.displayCharacter.trim()) + || !("" + an_or.secondaryStructure).trim().equals( + ("" + an_new.secondaryStructure).trim()) + || ((!an_or.description.equals(an_new.description)) && (an_or.description == null + || an_new.description == null || !an_or.description + .equals(an_new.description)))) + { + System.err.println("Annotation Element Mismatch\nElement " + i + + " in original: " + annot_or.annotations[i].toString() + + "\nElement " + i + " in new: " + + annot_new.annotations[i].toString()); + return false; + } + } + else if (annot_or.annotations[i] == null + && annot_new.annotations[i] == null) + { + continue; + } + else + { + System.err.println("Annotation Element Mismatch\nElement " + + i + + " in original: " + + (annot_or.annotations[i] == null ? "is null" + : annot_or.annotations[i].toString()) + + "\nElement " + + i + + " in new: " + + (annot_new.annotations[i] == null ? "is null" + : annot_new.annotations[i].toString())); + return false; + } + } + return true; + } }