* GeneDB ID
*/
public static final String GENEDB = "GeneDB";
+
/**
* GeneBank
*/
public static final String GENBANK = "GenBank";
+
/**
* List of databases whose sequences might have coding regions annotated
*/
import java.util.List;
import java.util.Vector;
-public class DnaUtils {
-
- /**
- * @param gbf CDS feature data
- * @param sequences ORIGIN data
- * @return Nucleotid String (sequence) of CDS
- */
- public static String getSequence(GenBankFeature gbf, Vector<GenBankSequence> sequences){
- if (!gbf.getType().equals(GenBankFeature.CDS)){
- //If the feature is not a CDS, no sequence is returned
- return null;
- }else{
- String range = gbf.getQualifier("range");
- if (range.startsWith("join")){
- //TODO
- //It's a composed sequence
- }else{
- //It's a simple range
- String[] positions = range.split("..");
- int initRange = Integer.parseInt(positions[0]);
- int endRange = Integer.parseInt(positions[1]);
- String sourceSequence = getNucleotidesFromSequenceVector(sequences);
- return sourceSequence.substring(initRange, endRange);
- }
- }
- return null;
-
- }
- private static boolean isSequenceInRange(int initRange, int endRange, GenBankSequence gbs){
- return ((initRange>=gbs.getId()) && (endRange>=gbs.getId()));
- }
- private static String getNucleotidesInRangeFromSequence(int initRange, int endRange, GenBankSequence gbs){
- return "";
- }
- public static String getNucleotidesFromSequenceVector(Vector<GenBankSequence> v){
- StringBuffer sb = new StringBuffer();
- for (GenBankSequence gbs:v){
- Vector<String> seqs = gbs.getSequences();
- for (String s:seqs)
- sb.append(s);
- }
- return sb.toString();
- }
- /**
- * @param args
- */
- public static void main(String[] args) {
- // TODO Auto-generated method stub
-
- }
+public class DnaUtils
+{
+
+ /**
+ * @param gbf
+ * CDS feature data
+ * @param sequences
+ * ORIGIN data
+ * @return Nucleotid String (sequence) of CDS
+ */
+ public static String getSequence(GenBankFeature gbf,
+ Vector<GenBankSequence> sequences)
+ {
+ if (!gbf.getType().equals(GenBankFeature.CDS))
+ {
+ // If the feature is not a CDS, no sequence is returned
+ return null;
+ }
+ else
+ {
+ String range = gbf.getQualifier("range");
+ if (range.startsWith("join"))
+ {
+ // TODO
+ // It's a composed sequence
+ }
+ else
+ {
+ // It's a simple range
+ String[] positions = range.split("..");
+ int initRange = Integer.parseInt(positions[0]);
+ int endRange = Integer.parseInt(positions[1]);
+ String sourceSequence = getNucleotidesFromSequenceVector(sequences);
+ return sourceSequence.substring(initRange, endRange);
+ }
+ }
+ return null;
+
+ }
+
+ private static boolean isSequenceInRange(int initRange, int endRange,
+ GenBankSequence gbs)
+ {
+ return ((initRange >= gbs.getId()) && (endRange >= gbs.getId()));
+ }
+
+ private static String getNucleotidesInRangeFromSequence(int initRange,
+ int endRange, GenBankSequence gbs)
+ {
+ return "";
+ }
+
+ public static String getNucleotidesFromSequenceVector(
+ Vector<GenBankSequence> v)
+ {
+ StringBuffer sb = new StringBuffer();
+ for (GenBankSequence gbs : v)
+ {
+ Vector<String> seqs = gbs.getSequences();
+ for (String s : seqs)
+ sb.append(s);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * @param args
+ */
+ public static void main(String[] args)
+ {
+ // TODO Auto-generated method stub
+
+ }
}
import org.apache.james.mime4j.field.ParsedField;
-public class GenBankFile extends AlignFile {
- private static final Logger log = Logger.getLogger(GenBankFile.class.getName());
- private GenBankVersion version = new GenBankVersion();
- private GenBankLocus locus = new GenBankLocus();
- private GenBankSource source = new GenBankSource();
- private static final Pattern patLocation = Pattern.compile("(\\d+)\\.\\.(\\d+)");
- private static final Pattern patLocationComp = Pattern.compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
- private static final Pattern patLocus = Pattern.compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
- private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
- private static final Pattern patFeatureKey = Pattern.compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
-
- private String definition;
- private String accession;
- private String keywords;
- private String dblink;
- private String baseCount;
-
- private Vector<GenBankFeature> features;
- private Vector<String> comments;
- //Items under origin
- private Vector<GenBankSequence> sequences;
- private Vector<GenBankReference> references;
-
- private SequenceI genBankSequence;
-
- public GenBankFile() {
- }
-
- public GenBankFile(String inFile, String type) throws IOException {
- super(inFile, type);
- }
-
- public GenBankFile(FileParse source) throws IOException {
- super(source);
- }
-
- public void initData() {
- super.initData();
- features = new Vector<GenBankFeature>();
- comments = new Vector<String>();
- sequences = new Vector<GenBankSequence>();
- references = new Vector<GenBankReference>();
- }
-
- public void parse() throws IOException {
- String line;
- boolean featureMode = false; //FEATURES found
- boolean seqMode = false; //Parsing Sequences from SOURCE
- boolean referenceMode = false; //REFERENCE found
- boolean sourceMode = false; //SOURCE found
- boolean commentMode = false; //COMMENT found
- boolean parsingAuthors = false; //Parsing authors (multiline)
- boolean parsingDefinition = false; //Parsing definition (multiline)
- boolean parsingKeywords = false; //Parsing keywords (multiline)
- boolean parsingDbLink = false; //Parsing DBLINK (multiline)
- boolean parsingTitle = false; //Parsing title (multiline)
- boolean parsingQualifier = false; //Parsing feature qualifier (multine)
- String currentQualifierName = "";
- GenBankReference reference = null;
- GenBankFeature feature = null;
- List<String> sourceLines = new ArrayList<String>();
-
- if (this.isValid()){
-
- while ((line = nextLine()) != null) {
- // We only process lines if they have contents within
- if (line.length() == 0)
- continue;
-
- if (line.startsWith("FEATURES")){
- featureMode = true;
- seqMode = false;
- referenceMode = false;
- sourceMode = false;
- commentMode = false;
- feature = new GenBankFeature();
- source = parseSource(sourceLines);
- }
-
-
- if (seqMode) {
- if (!line.startsWith("//")){
- GenBankSequence seq = processSequenceLine(line);
- sequences.add(seq);
- }
- featureMode = false;
- referenceMode = false;
- sourceMode = false;
- }
-
- if (line.startsWith("ORIGIN")){
- if (feature.getType()!=null)
- features.add(feature);
- featureMode = false;
- referenceMode = false;
- sourceMode = false;
- seqMode = true;
- }
-
- if (featureMode){
- // Process feature line
- if (!line.startsWith("FEATURES") && !line.startsWith("BASE COUNT")){
- //Parse type
- if (!line.trim().startsWith("/")){
- Matcher featuresMatch = patFeatureKey.matcher(line);
- if (featuresMatch.find()){
- if (feature.getType()!=null)
- features.add(feature); //Hay que añadirlo sólo si no se está a mitad de un qualif o una feature
- //It's a feature
- String type = featuresMatch.group(0);
- feature = new GenBankFeature();
- feature.setType(type);
- GenBankLocation loc = parserFeatureLocation(feature, line.replace(type,""));
- feature.setLocation(loc);
- parsingQualifier = false;
- continue;
- }else if (parsingQualifier) { //If not a feature, it's another part of a qualifier
- String qValue = feature.getQualifier(currentQualifierName);
- StringBuffer sb = new StringBuffer().append(qValue).append(ltrim(line));
- feature.updateQualifier(currentQualifierName, sb.toString());
- continue;
- }
- }else{
- //It's the begining of a qualifier line
- Matcher matcher = patQualifierKey.matcher(line);
- if (matcher.find()){
- String qName = matcher.group(1);
- currentQualifierName = qName.replace("/","");
- line = line.replace(qName,"").replace("/", "").replace("=","");
- feature.addQualifier(currentQualifierName, ltrim(line));
- parsingQualifier = true;
- continue;
- }
- }
- }
- }
- // Process REFERENCE line
- if (line.startsWith("REFERENCE")) {
- if (!referenceMode){
- //This is line is the REFERENCE line
- referenceMode = true;
- featureMode = false;
- sourceMode = false;
- seqMode = false;
- }else{
- //We were at referenceMode, then add current reference to the list and create a new one
- references.add(reference);
- }
- reference = new GenBankReference();
- String desc = processReferenceLine(line,"REFERENCE");
- int[] ranges = parseReferenceDescriptor(desc);
- reference.setDescriptor(desc);
- reference.setOrder(ranges[0]);
- reference.setBegin(ranges[1]);
- reference.setEnd(ranges[2]);
- parsingAuthors = false;
- parsingTitle = false;
- continue;
- }
-
- if (line.startsWith(" AUTHORS")){
- if (referenceMode){
- reference.setAuthors(processReferenceLine(line,"AUTHORS"));
- parsingAuthors = true;
- parsingTitle = false;
- }
- continue;
- }
- if (line.startsWith(" TITLE")){
- if (referenceMode){
- reference.setTitle(processReferenceLine(line,"TITLE"));
- parsingAuthors = false;
- parsingTitle = true;
- }
- continue;
- }
- if (line.startsWith(" JOURNAL")){
- if (referenceMode){
- reference.setJournal(processReferenceLine(line,"JOURNAL"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" PUBMED")){
- if (referenceMode){
- reference.setPubmed(processReferenceLine(line,"PUBMED"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
-
- if (line.startsWith(" MEDLINE")){
- if (referenceMode){
- reference.setMedline(processReferenceLine(line,"MEDLINE"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" REMARK")){
- if (referenceMode){
- reference.setRemark(processReferenceLine(line,"REMARK"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
- if (line.startsWith(" CONSRTM")){
- if (referenceMode){
- reference.setConsortia(processReferenceLine(line,"CONSRTM"));
- parsingTitle = false;
- parsingAuthors = false;
- }
- continue;
- }
-
-
- if (line.startsWith("SOURCE")) {
- parsingKeywords = false;
- sourceMode = true;
- commentMode = false;
- if (sourceMode){
- sourceLines.add(line);
- }
- continue;
- }
- if (line.indexOf("ORGANISM")!=-1) {
- if (sourceMode){
- sourceLines.add(line);
- continue;
- }
- }
-
- if (line.startsWith("COMMENT")){
- if (reference!=null)
- references.add(reference);
- commentMode = true;
- sourceMode = false;
- referenceMode = false;
- sourceMode = false;
- seqMode = false;
- comments.add(processCommentLine(line));
- continue;
- }
- // Process LOCUS line
- if (line.startsWith("LOCUS")) {
- locus = parseLocus(line);
- continue;
- }
- // Process BASE COUNT line
- if (line.startsWith("BASE COUNT")) {
- baseCount = processHeaderLine(line,"BASE COUNT");
- featureMode = false;
- continue;
- }
- // Process DEFINITION line
- if (line.startsWith("DEFINITION")) {
- definition = processHeaderLine(line,"DEFINITION");
- parsingDefinition = true;
- continue;
- }
- // Process ACCESSION line
- if (line.startsWith("ACCESSION")) {
- accession = processHeaderLine(line,"ACCESSION");
- parsingDefinition = false;
- continue;
- }
- // Process VERSION line
- if (line.startsWith("VERSION")) {
- version = parseVersion(line);
- //headers.put("VERSION", processHeaderLine(line,"VERSION"));
- continue;
- }
- // Process DBLINK line
- if (line.startsWith("DBLINK")) {
- dblink = processHeaderLine(line,"DBLINK");
- parsingDbLink = true;
- continue;
- }
- // Process KEYWORDS line
- if (line.startsWith("KEYWORDS")) {
- keywords = processHeaderLine(line,"KEYWORDS");
- parsingKeywords = true;
- parsingDbLink = false;
- continue;
- }
- if (sourceMode){
- sourceLines.add(line);
- continue;
- }
- if (parsingDefinition){
- StringBuffer sb = new StringBuffer().append(definition).append(line);
- definition = sb.toString();
- continue;
- }
- if (referenceMode && parsingAuthors){
- if (reference!=null){
- StringBuffer authors = new StringBuffer().append(reference.getAuthors()).append(line);
- reference.setAuthors(authors.toString());
- }
- continue;
- }
- if (referenceMode && parsingTitle){
- if (reference!=null){
- StringBuffer title = new StringBuffer().append(reference.getTitle()).append(line);
- reference.setTitle(title.toString());
- }
- continue;
- }
- if (parsingKeywords){
- StringBuffer sb = new StringBuffer().append(keywords).append(line);
- keywords = sb.toString();
- continue;
- }
- if (parsingDbLink){
- StringBuffer sb = new StringBuffer().append(dblink).append(line);
- dblink = sb.toString();
- continue;
- }
- if (commentMode){
- comments.add(line);
- }
- }
- setEntries();
- }else{
- //File is not valid
- throw new IOException("GenBankFile is not valid.");
- }
- }
-
- protected void setEntries(){
- StringBuffer result = new StringBuffer();
- //Mapping GenBank info into Jalview data model
- genBankSequence = new Sequence(accession,DnaUtils.getNucleotidesFromSequenceVector(sequences));
- //Mapping DBRefEntry
- DBRefEntry dbRef = new DBRefEntry();
- dbRef.setSource(DBRefSource.GENBANK);
- dbRef.setVersion(version == null ? "" : version.toString());
- dbRef.setAccessionId(accession);
- // add map to indicate the sequence is a valid coordinate frame for the dbref
- dbRef.setMap(new Mapping(null, new int[]
- { 1, genBankSequence.getLength() }, new int[]
- { 1, genBankSequence.getLength() }, 1, 1));
- genBankSequence.addDBRef(dbRef);
-
- //add header info as non-positional features
- //add LOCUS
- SequenceFeature locusF = new SequenceFeature("LOCUS", (locus == null ? "" : locus.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(locusF);
- //add DEFNITION
- SequenceFeature defF = new SequenceFeature("DEFINITION", definition, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(defF);
- //add ACCESSION
- SequenceFeature accessionF = new SequenceFeature("ACCESSION", accession, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(accessionF);
- //add VERSION
- SequenceFeature versionF = new SequenceFeature("VERSION", (version == null ? "" : version.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(versionF);
- //add DBLINK
- SequenceFeature dblinkF = new SequenceFeature("DBLINK", (dblink == null ? "" : dblink.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(dblinkF);
- //add KEYWORDS
- SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords, null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(keywordsF);
- //add SOURCE
- SequenceFeature sourceF = new SequenceFeature("SOURCE", (source == null ? "" : source.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(sourceF);
- //add BASE COUNT
- SequenceFeature baseCountF = new SequenceFeature("BASE COUNT", (baseCount == null ? "" : baseCount.toString()), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(baseCountF);
-
- // add literature and database cross references in the file
- for (GenBankReference gbRef:references){
- //They are non-positional features
- SequenceFeature refFeature = new SequenceFeature("REFERENCE", gbRef.toString(),null,gbRef.getBegin(),gbRef.getEnd(),DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(refFeature);
- }
- //add COMMENTS
- if (comments.size()>0){
- StringBuffer sb = new StringBuffer();
- for (String comment: comments){
- sb.append(comment).append(newline);
- }
- SequenceFeature commentF = new SequenceFeature("COMMENT", sb.toString(), null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
- genBankSequence.addSequenceFeature(commentF);
- }
- //Mapping FEATURES
- for (GenBankFeature feature:features){
- if (feature.getType()!=null){
- SequenceFeature sf = new SequenceFeature();
- sf.setType(feature.getType());
- sf.setDescription(feature.getType());
-
- sf.setBegin(feature.getLocation()==null ? 0 : feature.getLocation().getMinor());
- sf.setEnd(feature.getLocation()==null ? 0 : feature.getLocation().getMajor());
- Enumeration<String> names = feature.getQualifiersNames();
- while (names.hasMoreElements()){
- String qName = names.nextElement();
- String qValue = feature.getQualifier(qName);
- sf.setValue(qName, qValue);
- }
- genBankSequence.addSequenceFeature(sf);
- }
- }
- SequenceI[] parsedSeqs = new SequenceI[1];
- parsedSeqs[0] = genBankSequence;
- this.setSeqs(parsedSeqs);
- }
- private GenBankVersion parseVersion(String line) {
- //VERSION U00096.2 GI:48994873
- if (line.trim().equalsIgnoreCase("VERSION")){
- return null;
- }else{
- GenBankVersion ver = new GenBankVersion();
- String v = line.substring(11, line.indexOf(" ", 12)).trim();
- ver.setVersion(v);
- int posGI = line.indexOf("GI:", 11 + v.length());
- if (posGI > -1) {
- ver.setGI(line.substring(posGI));
- }
- return ver;
- }
+public class GenBankFile extends AlignFile
+{
+ private static final Logger log = Logger.getLogger(GenBankFile.class
+ .getName());
+
+ private GenBankVersion version = new GenBankVersion();
+
+ private GenBankLocus locus = new GenBankLocus();
+
+ private GenBankSource source = new GenBankSource();
+
+ private static final Pattern patLocation = Pattern
+ .compile("(\\d+)\\.\\.(\\d+)");
+
+ private static final Pattern patLocationComp = Pattern
+ .compile("(complement)\\((\\d+)\\.\\.(\\d+)\\)");
+
+ private static final Pattern patLocus = Pattern
+ .compile("^LOCUS +([a-z|A-Z|0-9|_]+) +([0-9]+) bp ( {3}|ss\\-|ds\\-|ms\\-)([a-z|A-Z|-|\\s]+) ([a-z| ]{8}) ([A-Z| ]{3}) ([0-9]+-[A-Z]+-[0-9]+)");
+
+ private static final Pattern patQualifierKey = Pattern.compile("/(.*?)=");
+
+ private static final Pattern patFeatureKey = Pattern
+ .compile("^\\s{5}([A-Za-z0-9\\_\\']+)\\s+");
+
+ private String definition;
+
+ private String accession;
+
+ private String keywords;
+
+ private String dblink;
+
+ private String baseCount;
+
+ private Vector<GenBankFeature> features;
+
+ private Vector<String> comments;
+
+ // Items under origin
+ private Vector<GenBankSequence> sequences;
+
+ private Vector<GenBankReference> references;
+
+ private SequenceI genBankSequence;
+
+ public GenBankFile()
+ {
+ }
+
+ public GenBankFile(String inFile, String type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ public GenBankFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
+
+ public void initData()
+ {
+ super.initData();
+ features = new Vector<GenBankFeature>();
+ comments = new Vector<String>();
+ sequences = new Vector<GenBankSequence>();
+ references = new Vector<GenBankReference>();
+ }
+
+ public void parse() throws IOException
+ {
+ String line;
+ boolean featureMode = false; // FEATURES found
+ boolean seqMode = false; // Parsing Sequences from SOURCE
+ boolean referenceMode = false; // REFERENCE found
+ boolean sourceMode = false; // SOURCE found
+ boolean commentMode = false; // COMMENT found
+ boolean parsingAuthors = false; // Parsing authors (multiline)
+ boolean parsingDefinition = false; // Parsing definition (multiline)
+ boolean parsingKeywords = false; // Parsing keywords (multiline)
+ boolean parsingDbLink = false; // Parsing DBLINK (multiline)
+ boolean parsingTitle = false; // Parsing title (multiline)
+ boolean parsingQualifier = false; // Parsing feature qualifier (multine)
+ String currentQualifierName = "";
+ GenBankReference reference = null;
+ GenBankFeature feature = null;
+ List<String> sourceLines = new ArrayList<String>();
+
+ if (this.isValid())
+ {
+
+ while ((line = nextLine()) != null)
+ {
+ // We only process lines if they have contents within
+ if (line.length() == 0)
+ continue;
+
+ if (line.startsWith("FEATURES"))
+ {
+ featureMode = true;
+ seqMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ commentMode = false;
+ feature = new GenBankFeature();
+ source = parseSource(sourceLines);
+ }
+
+ if (seqMode)
+ {
+ if (!line.startsWith("//"))
+ {
+ GenBankSequence seq = processSequenceLine(line);
+ sequences.add(seq);
+ }
+ featureMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ }
+
+ if (line.startsWith("ORIGIN"))
+ {
+ if (feature.getType() != null)
+ features.add(feature);
+ featureMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ seqMode = true;
+ }
+
+ if (featureMode)
+ {
+ // Process feature line
+ if (!line.startsWith("FEATURES")
+ && !line.startsWith("BASE COUNT"))
+ {
+ // Parse type
+ if (!line.trim().startsWith("/"))
+ {
+ Matcher featuresMatch = patFeatureKey.matcher(line);
+ if (featuresMatch.find())
+ {
+ if (feature.getType() != null)
+ features.add(feature); // Hay que a�adirlo s�lo si no se est�
+ // a mitad de un qualif o una feature
+ // It's a feature
+ String type = featuresMatch.group(0);
+ feature = new GenBankFeature();
+ feature.setType(type);
+ GenBankLocation loc = parserFeatureLocation(feature,
+ line.replace(type, ""));
+ feature.setLocation(loc);
+ parsingQualifier = false;
+ continue;
+ }
+ else if (parsingQualifier)
+ { // If not a feature, it's another part of a qualifier
+ String qValue = feature.getQualifier(currentQualifierName);
+ StringBuffer sb = new StringBuffer().append(qValue).append(
+ ltrim(line));
+ feature.updateQualifier(currentQualifierName, sb.toString());
+ continue;
+ }
+ }
+ else
+ {
+ // It's the begining of a qualifier line
+ Matcher matcher = patQualifierKey.matcher(line);
+ if (matcher.find())
+ {
+ String qName = matcher.group(1);
+ currentQualifierName = qName.replace("/", "");
+ line = line.replace(qName, "").replace("/", "")
+ .replace("=", "");
+ feature.addQualifier(currentQualifierName, ltrim(line));
+ parsingQualifier = true;
+ continue;
+ }
+ }
+ }
+ }
+ // Process REFERENCE line
+ if (line.startsWith("REFERENCE"))
+ {
+ if (!referenceMode)
+ {
+ // This is line is the REFERENCE line
+ referenceMode = true;
+ featureMode = false;
+ sourceMode = false;
+ seqMode = false;
+ }
+ else
+ {
+ // We were at referenceMode, then add current reference to the list
+ // and create a new one
+ references.add(reference);
+ }
+ reference = new GenBankReference();
+ String desc = processReferenceLine(line, "REFERENCE");
+ int[] ranges = parseReferenceDescriptor(desc);
+ reference.setDescriptor(desc);
+ reference.setOrder(ranges[0]);
+ reference.setBegin(ranges[1]);
+ reference.setEnd(ranges[2]);
+ parsingAuthors = false;
+ parsingTitle = false;
+ continue;
+ }
+
+ if (line.startsWith(" AUTHORS"))
+ {
+ if (referenceMode)
+ {
+ reference.setAuthors(processReferenceLine(line, "AUTHORS"));
+ parsingAuthors = true;
+ parsingTitle = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" TITLE"))
+ {
+ if (referenceMode)
+ {
+ reference.setTitle(processReferenceLine(line, "TITLE"));
+ parsingAuthors = false;
+ parsingTitle = true;
+ }
+ continue;
+ }
+ if (line.startsWith(" JOURNAL"))
+ {
+ if (referenceMode)
+ {
+ reference.setJournal(processReferenceLine(line, "JOURNAL"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" PUBMED"))
+ {
+ if (referenceMode)
+ {
+ reference.setPubmed(processReferenceLine(line, "PUBMED"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+
+ if (line.startsWith(" MEDLINE"))
+ {
+ if (referenceMode)
+ {
+ reference.setMedline(processReferenceLine(line, "MEDLINE"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" REMARK"))
+ {
+ if (referenceMode)
+ {
+ reference.setRemark(processReferenceLine(line, "REMARK"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+ if (line.startsWith(" CONSRTM"))
+ {
+ if (referenceMode)
+ {
+ reference.setConsortia(processReferenceLine(line, "CONSRTM"));
+ parsingTitle = false;
+ parsingAuthors = false;
+ }
+ continue;
+ }
+
+ if (line.startsWith("SOURCE"))
+ {
+ parsingKeywords = false;
+ sourceMode = true;
+ commentMode = false;
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ }
+ continue;
+ }
+ if (line.indexOf("ORGANISM") != -1)
+ {
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ continue;
+ }
+ }
+
+ if (line.startsWith("COMMENT"))
+ {
+ if (reference != null)
+ references.add(reference);
+ commentMode = true;
+ sourceMode = false;
+ referenceMode = false;
+ sourceMode = false;
+ seqMode = false;
+ comments.add(processCommentLine(line));
+ continue;
+ }
+ // Process LOCUS line
+ if (line.startsWith("LOCUS"))
+ {
+ locus = parseLocus(line);
+ continue;
+ }
+ // Process BASE COUNT line
+ if (line.startsWith("BASE COUNT"))
+ {
+ baseCount = processHeaderLine(line, "BASE COUNT");
+ featureMode = false;
+ continue;
+ }
+ // Process DEFINITION line
+ if (line.startsWith("DEFINITION"))
+ {
+ definition = processHeaderLine(line, "DEFINITION");
+ parsingDefinition = true;
+ continue;
+ }
+ // Process ACCESSION line
+ if (line.startsWith("ACCESSION"))
+ {
+ accession = processHeaderLine(line, "ACCESSION");
+ parsingDefinition = false;
+ continue;
+ }
+ // Process VERSION line
+ if (line.startsWith("VERSION"))
+ {
+ version = parseVersion(line);
+ // headers.put("VERSION", processHeaderLine(line,"VERSION"));
+ continue;
+ }
+ // Process DBLINK line
+ if (line.startsWith("DBLINK"))
+ {
+ dblink = processHeaderLine(line, "DBLINK");
+ parsingDbLink = true;
+ continue;
+ }
+ // Process KEYWORDS line
+ if (line.startsWith("KEYWORDS"))
+ {
+ keywords = processHeaderLine(line, "KEYWORDS");
+ parsingKeywords = true;
+ parsingDbLink = false;
+ continue;
+ }
+ if (sourceMode)
+ {
+ sourceLines.add(line);
+ continue;
+ }
+ if (parsingDefinition)
+ {
+ StringBuffer sb = new StringBuffer().append(definition).append(
+ line);
+ definition = sb.toString();
+ continue;
+ }
+ if (referenceMode && parsingAuthors)
+ {
+ if (reference != null)
+ {
+ StringBuffer authors = new StringBuffer().append(
+ reference.getAuthors()).append(line);
+ reference.setAuthors(authors.toString());
+ }
+ continue;
+ }
+ if (referenceMode && parsingTitle)
+ {
+ if (reference != null)
+ {
+ StringBuffer title = new StringBuffer().append(
+ reference.getTitle()).append(line);
+ reference.setTitle(title.toString());
+ }
+ continue;
+ }
+ if (parsingKeywords)
+ {
+ StringBuffer sb = new StringBuffer().append(keywords)
+ .append(line);
+ keywords = sb.toString();
+ continue;
+ }
+ if (parsingDbLink)
+ {
+ StringBuffer sb = new StringBuffer().append(dblink).append(line);
+ dblink = sb.toString();
+ continue;
+ }
+ if (commentMode)
+ {
+ comments.add(line);
+ }
+ }
+ setEntries();
}
-
- private GenBankLocus parseLocus(String line){
- GenBankLocus loc = new GenBankLocus();
- Matcher mat = patLocus.matcher(line);
- if (mat.find()) {
- String name = mat.group(1);
- String len = mat.group(2);
- String strand = mat.group(3);
- String mtype = mat.group(4);
- String linear = mat.group(5);
- String division = mat.group(6);
- String date = mat.group(7);
-
- loc.setName(name == null ? "" : name.trim());
- loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
- loc.setStrand(strand == null ? "" : strand);
- loc.setMoleculeType(mtype == null ? "" : mtype);
- loc.setLinearSequence("linear".equals(linear));
- loc.setDivision(division == null ? "" : division);
- loc.setModificationDate(date == null ? "" :date);
- }
- return loc;
+ else
+ {
+ // File is not valid
+ throw new IOException("GenBankFile is not valid.");
}
- private GenBankSource parseSource(List<String> lines){
- StringBuffer sb = new StringBuffer();
- for(String line:lines){
- sb.append(line).append(newline);
- }
- // Source section
- GenBankSource sou = new GenBankSource();
- String aux = sb.toString().substring(11);
- int fim1 = aux.indexOf("\n");
- if (fim1 > -1) {
- sou.setSource(aux.substring(0, fim1));
- int ini2 = aux.indexOf("ORGANISM");
- if (ini2 > -1) {
- fim1 = aux.indexOf("\n", ini2 + 10);
- if (fim1 > -1) {
- sou.setOrganism(aux.substring(ini2 + 10, fim1));
- sou.setTaxonomic(aux.substring(fim1).replaceAll(" ", "").replaceAll("\\s+", ""));
- } else {
- sou.setOrganism(aux);
- }
- }
- } else {
- sou.setSource(aux);
- }
- return sou;
- }
-
- /**
- * Possible situations:
- *
- * 467 Points to a single base in the presented sequence 340..565 Points to
- * a continuous range of bases bounded by and including the starting and
- * ending bases <345..500 Indicates that the exact lower boundary point
- * of a feature is unknown. The location begins at some base previous to the
- * first base specified (which need not be contained in the presented
- * sequence) and continues to and includes the ending base <1..888 The
- * feature starts before the first sequenced base and continues to and
- * includes base 888 1..>888 The feature starts at the first sequenced
- * base and continues beyond base 888 102.110 Indicates that the exact
- * location is unknown but that it is one of the bases between bases 102 and
- * 110, inclusive 123^124 Points to a site between bases 123 and 124
- * join(12..78,134..202) Regions 12 to 78 and 134 to 202 should be joined to
- * form one contiguous sequence complement(34..126) Start at the base
- * complementary to 126 and finish at the base complementary to base 34 (the
- * feature is on the strand complementary to the presented strand)
- * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and
- * 4918 to 5163, then complements the joined segments (the feature is on the
- * strand complementary to the presented strand)
- * join(complement(4918..5163),complement(2691..4571)) Complements regions
- * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
- * feature is on the strand complementary to the presented strand)
- * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
- * this database) with primary accession number 'J00194'
- * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
- * with the region 100..202 of remote entry J00194
- *
- * @param fea
- * @param localiza
- */
- private GenBankLocation parserFeatureLocation(GenBankFeature fea, String localiza) {
- // remove os espaços, quebra de linhas etc
- String buf = localiza.replaceAll("\\s", "");
-
- // checks if there is a comma present between ranges
- // complement(100..110),complement(90..100)
- char[] buf2 = buf.toCharArray();
- int abertos = 0;
- java.util.List<String> lista = new java.util.ArrayList<String>();
- int pinicial = 0;
- for (int i = 0; i < buf2.length; i++) {
- if (buf2[i] == '(') {
- abertos++;
- } else if (buf2[i] == ')') {
- abertos--;
- } else if (buf2[i] == ',' && abertos == 0) {
- lista.add(buf.substring(pinicial, i));
- pinicial = i + 1;
+ }
+
+ protected void setEntries()
+ {
+ StringBuffer result = new StringBuffer();
+ // Mapping GenBank info into Jalview data model
+ genBankSequence = new Sequence(accession,
+ DnaUtils.getNucleotidesFromSequenceVector(sequences));
+ // Mapping DBRefEntry
+ DBRefEntry dbRef = new DBRefEntry();
+ dbRef.setSource(DBRefSource.GENBANK);
+ dbRef.setVersion(version == null ? "" : version.toString());
+ dbRef.setAccessionId(accession);
+ // add map to indicate the sequence is a valid coordinate frame for the
+ // dbref
+ dbRef.setMap(new Mapping(null, new int[]
+ { 1, genBankSequence.getLength() }, new int[]
+ { 1, genBankSequence.getLength() }, 1, 1));
+ genBankSequence.addDBRef(dbRef);
+
+ // add header info as non-positional features
+ // add LOCUS
+ SequenceFeature locusF = new SequenceFeature("LOCUS",
+ (locus == null ? "" : locus.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(locusF);
+ // add DEFNITION
+ SequenceFeature defF = new SequenceFeature("DEFINITION", definition,
+ null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(defF);
+ // add ACCESSION
+ SequenceFeature accessionF = new SequenceFeature("ACCESSION",
+ accession, null, 1, genBankSequence.getLength(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(accessionF);
+ // add VERSION
+ SequenceFeature versionF = new SequenceFeature("VERSION",
+ (version == null ? "" : version.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(versionF);
+ // add DBLINK
+ SequenceFeature dblinkF = new SequenceFeature("DBLINK",
+ (dblink == null ? "" : dblink.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(dblinkF);
+ // add KEYWORDS
+ SequenceFeature keywordsF = new SequenceFeature("KEYWORDS", keywords,
+ null, 1, genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(keywordsF);
+ // add SOURCE
+ SequenceFeature sourceF = new SequenceFeature("SOURCE",
+ (source == null ? "" : source.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(sourceF);
+ // add BASE COUNT
+ SequenceFeature baseCountF = new SequenceFeature("BASE COUNT",
+ (baseCount == null ? "" : baseCount.toString()), null, 1,
+ genBankSequence.getLength(), DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(baseCountF);
+
+ // add literature and database cross references in the file
+ for (GenBankReference gbRef : references)
+ {
+ // They are non-positional features
+ SequenceFeature refFeature = new SequenceFeature("REFERENCE",
+ gbRef.toString(), null, gbRef.getBegin(), gbRef.getEnd(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(refFeature);
+ }
+ // add COMMENTS
+ if (comments.size() > 0)
+ {
+ StringBuffer sb = new StringBuffer();
+ for (String comment : comments)
+ {
+ sb.append(comment).append(newline);
+ }
+ SequenceFeature commentF = new SequenceFeature("COMMENT",
+ sb.toString(), null, 1, genBankSequence.getLength(),
+ DBRefSource.GENBANK);
+ genBankSequence.addSequenceFeature(commentF);
+ }
+ // Mapping FEATURES
+ for (GenBankFeature feature : features)
+ {
+ if (feature.getType() != null)
+ {
+ SequenceFeature sf = new SequenceFeature();
+ sf.setType(feature.getType());
+ sf.setDescription(feature.getType());
+
+ sf.setBegin(feature.getLocation() == null ? 0 : feature
+ .getLocation().getMinor());
+ sf.setEnd(feature.getLocation() == null ? 0 : feature.getLocation()
+ .getMajor());
+ Enumeration<String> names = feature.getQualifiersNames();
+ while (names.hasMoreElements())
+ {
+ String qName = names.nextElement();
+ String qValue = feature.getQualifier(qName);
+ sf.setValue(qName, qValue);
+ }
+ genBankSequence.addSequenceFeature(sf);
+ }
+ }
+ SequenceI[] parsedSeqs = new SequenceI[1];
+ parsedSeqs[0] = genBankSequence;
+ this.setSeqs(parsedSeqs);
+ }
+
+ private GenBankVersion parseVersion(String line)
+ {
+ // VERSION U00096.2 GI:48994873
+ if (line.trim().equalsIgnoreCase("VERSION"))
+ {
+ return null;
+ }
+ else
+ {
+ GenBankVersion ver = new GenBankVersion();
+ String v = line.substring(11, line.indexOf(" ", 12)).trim();
+ ver.setVersion(v);
+ int posGI = line.indexOf("GI:", 11 + v.length());
+ if (posGI > -1)
+ {
+ ver.setGI(line.substring(posGI));
+ }
+ return ver;
+ }
+ }
+
+ private GenBankLocus parseLocus(String line)
+ {
+ GenBankLocus loc = new GenBankLocus();
+ Matcher mat = patLocus.matcher(line);
+ if (mat.find())
+ {
+ String name = mat.group(1);
+ String len = mat.group(2);
+ String strand = mat.group(3);
+ String mtype = mat.group(4);
+ String linear = mat.group(5);
+ String division = mat.group(6);
+ String date = mat.group(7);
+
+ loc.setName(name == null ? "" : name.trim());
+ loc.setSequenceLength(len == null ? 0 : Integer.parseInt(len));
+ loc.setStrand(strand == null ? "" : strand);
+ loc.setMoleculeType(mtype == null ? "" : mtype);
+ loc.setLinearSequence("linear".equals(linear));
+ loc.setDivision(division == null ? "" : division);
+ loc.setModificationDate(date == null ? "" : date);
+ }
+ return loc;
+ }
+
+ private GenBankSource parseSource(List<String> lines)
+ {
+ StringBuffer sb = new StringBuffer();
+ for (String line : lines)
+ {
+ sb.append(line).append(newline);
+ }
+ // Source section
+ GenBankSource sou = new GenBankSource();
+ String aux = sb.toString().substring(11);
+ int fim1 = aux.indexOf("\n");
+ if (fim1 > -1)
+ {
+ sou.setSource(aux.substring(0, fim1));
+ int ini2 = aux.indexOf("ORGANISM");
+ if (ini2 > -1)
+ {
+ fim1 = aux.indexOf("\n", ini2 + 10);
+ if (fim1 > -1)
+ {
+ sou.setOrganism(aux.substring(ini2 + 10, fim1));
+ sou.setTaxonomic(aux.substring(fim1)
+ .replaceAll(" ", "").replaceAll("\\s+", ""));
+ }
+ else
+ {
+ sou.setOrganism(aux);
+ }
+ }
+ }
+ else
+ {
+ sou.setSource(aux);
+ }
+ return sou;
+ }
+
+ /**
+ * Possible situations:
+ *
+ * 467 Points to a single base in the presented sequence 340..565 Points to a
+ * continuous range of bases bounded by and including the starting and ending
+ * bases <345..500 Indicates that the exact lower boundary point of a
+ * feature is unknown. The location begins at some base previous to the first
+ * base specified (which need not be contained in the presented sequence) and
+ * continues to and includes the ending base <1..888 The feature starts
+ * before the first sequenced base and continues to and includes base 888
+ * 1..>888 The feature starts at the first sequenced base and continues
+ * beyond base 888 102.110 Indicates that the exact location is unknown but
+ * that it is one of the bases between bases 102 and 110, inclusive 123^124
+ * Points to a site between bases 123 and 124 join(12..78,134..202) Regions 12
+ * to 78 and 134 to 202 should be joined to form one contiguous sequence
+ * complement(34..126) Start at the base complementary to 126 and finish at
+ * the base complementary to base 34 (the feature is on the strand
+ * complementary to the presented strand)
+ * complement(join(2691..4571,4918..5163)) Joins regions 2691 to 4571 and 4918
+ * to 5163, then complements the joined segments (the feature is on the strand
+ * complementary to the presented strand)
+ * join(complement(4918..5163),complement(2691..4571)) Complements regions
+ * 4918 to 5163 and 2691 to 4571, then joins the complemented segments (the
+ * feature is on the strand complementary to the presented strand)
+ * J00194.1:100..202 Points to bases 100 to 202, inclusive, in the entry (in
+ * this database) with primary accession number 'J00194'
+ * join(1..100,J00194.1:100..202) Joins region 1..100 of the existing entry
+ * with the region 100..202 of remote entry J00194
+ *
+ * @param fea
+ * @param localiza
+ */
+ private GenBankLocation parserFeatureLocation(GenBankFeature fea,
+ String localiza)
+ {
+ // remove os espaços, quebra de linhas etc
+ String buf = localiza.replaceAll("\\s", "");
+
+ // checks if there is a comma present between ranges
+ // complement(100..110),complement(90..100)
+ char[] buf2 = buf.toCharArray();
+ int abertos = 0;
+ java.util.List<String> lista = new java.util.ArrayList<String>();
+ int pinicial = 0;
+ for (int i = 0; i < buf2.length; i++)
+ {
+ if (buf2[i] == '(')
+ {
+ abertos++;
+ }
+ else if (buf2[i] == ')')
+ {
+ abertos--;
+ }
+ else if (buf2[i] == ',' && abertos == 0)
+ {
+ lista.add(buf.substring(pinicial, i));
+ pinicial = i + 1;
+ }
+ }
+ if (lista.size() > 0)
+ {
+ lista.add(buf.substring(pinicial));
+ GenBankLocations um = new GenBankLocations();
+ um.setOperator(GenBankLocations.NONE);
+ for (String s : lista)
+ {
+ um.getUnits().add(parserFeatureLocation(fea, s));
+ }
+ fea.setLocation(um);
+ return um;
+ }
+
+ // trata as funcoes: complement(location,location...),
+ // join(location,location...), order(location,location...)
+ if (buf.contains("("))
+ {
+ GenBankLocations um = new GenBankLocations();
+ int ini = buf.indexOf("(");
+ int fim = buf.lastIndexOf(")");
+ String token = buf.substring(0, ini);
+ if ("complement".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ interno.setComplement(true);
+ um.setOperator(GenBankLocations.COMPLEMENT);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else if ("join".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ um.setOperator(GenBankLocations.JOIN);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else if ("order".equalsIgnoreCase(token))
+ {
+ String inter = buf.substring(ini + 1, fim);
+ GenBankLocation interno = parserFeatureLocation(fea, inter);
+ um.setOperator(GenBankLocations.ORDER);
+ um.getUnits().add(interno);
+ fea.setLocation(um);
+ }
+ else
+ {
+ log.log(Level.WARNING,
+ "Token desconhecido em location/features - {0}", token);
+ String inter = buf.substring(ini + 1, fim);
+ fea.setLocation(parserFeatureLocation(fea, inter));
+ }
+ return fea.getLocation();
+ }
+ else
+ {
+ // trata quando tiver uma lista de location
+ if (buf.contains(","))
+ {
+ String[] partes = buf.split(",");
+ GenBankLocations um = new GenBankLocations();
+ for (String p : partes)
+ {
+ um.getUnits().add(parserFeatureLocation(fea, p));
+ }
+ fea.setLocation(um);
+ return um;
+ }
+ else
+ {
+ // trata quando tiver range
+ if (buf.contains(".."))
+ {
+ String[] partes = buf.split("\\.\\.");
+ GenBankLocationRange range = new GenBankLocationRange();
+ if (buf.contains(":"))
+ {
+ for (int i = 0; i < partes.length; i++)
+ {
+ int pos = partes[i].indexOf(":");
+ if (pos > 0)
+ {
+ String entry = partes[i].substring(0, pos);
+ partes[i] = partes[i].substring(pos + 1);
+ range.setEntry(entry);
+ }
}
+ }
+ GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(
+ fea, partes[0]);
+ range.setStart(gp0);
+ GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(
+ fea, partes[1]);
+ range.setEnd(gp1);
+ fea.setLocation(range);
+ return range;
}
- if (lista.size() > 0) {
- lista.add(buf.substring(pinicial));
- GenBankLocations um = new GenBankLocations();
- um.setOperator(GenBankLocations.NONE);
- for (String s : lista) {
- um.getUnits().add(parserFeatureLocation(fea, s));
+ else
+ {
+ // trata um ponto
+ // possibilidades consideradas:
+ // 467
+ // 102.110
+ // 123^124
+ // <345
+ // >400
+ // 345>
+ // 400<
+ // ou uma combinacao dessas
+ GenBankLocationPoint gp = new GenBankLocationPoint();
+ if (buf.contains(":"))
+ {
+ int pos = buf.indexOf(":");
+ if (pos > 0)
+ {
+ String entry = buf.substring(0, pos);
+ buf = buf.substring(pos + 1);
+ gp.setEntry(entry);
}
- fea.setLocation(um);
- return um;
- }
-
- // trata as funcoes: complement(location,location...),
- // join(location,location...), order(location,location...)
- if (buf.contains("(")) {
- GenBankLocations um = new GenBankLocations();
- int ini = buf.indexOf("(");
- int fim = buf.lastIndexOf(")");
- String token = buf.substring(0, ini);
- if ("complement".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- interno.setComplement(true);
- um.setOperator(GenBankLocations.COMPLEMENT);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else if ("join".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- um.setOperator(GenBankLocations.JOIN);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else if ("order".equalsIgnoreCase(token)) {
- String inter = buf.substring(ini + 1, fim);
- GenBankLocation interno = parserFeatureLocation(fea, inter);
- um.setOperator(GenBankLocations.ORDER);
- um.getUnits().add(interno);
- fea.setLocation(um);
- } else {
- log.log(Level.WARNING, "Token desconhecido em location/features - {0}", token);
- String inter = buf.substring(ini + 1, fim);
- fea.setLocation(parserFeatureLocation(fea, inter));
+ }
+ int pos = 0;
+ // verifica os simb < e > antes do primeiro numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ gp.setPrefix(buf.charAt(pos));
+ pos++;
+ }
+ // pega o primeiro numero
+ int ini = pos;
+ while (pos < buf.length() && buf.charAt(pos) >= '0'
+ && buf.charAt(pos) <= '9')
+ {
+ pos++;
+ }
+ if (buf.subSequence(ini, pos).length() < 1)
+ {
+ System.out.println(localiza);
+ }
+ int num = Integer.parseInt(buf.substring(ini, pos));
+ int num2 = num;
+ // o primeiro numero pode ser o unico numero
+ if (pos < buf.length())
+ {
+ // verifica se tem os sinais < e > apos o primeiro numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ if (buf.contains(".") || buf.contains("^"))
+ {
+ gp.setPrefix(buf.charAt(pos));
+ }
+ else
+ {
+ gp.setSufix(buf.charAt(pos));
+ }
+ pos++;
}
- return fea.getLocation();
- } else {
- // trata quando tiver uma lista de location
- if (buf.contains(",")) {
- String[] partes = buf.split(",");
- GenBankLocations um = new GenBankLocations();
- for (String p : partes) {
- um.getUnits().add(
- parserFeatureLocation(fea, p));
- }
- fea.setLocation(um);
- return um;
- } else {
- // trata quando tiver range
- if (buf.contains("..")) {
- String[] partes = buf.split("\\.\\.");
- GenBankLocationRange range = new GenBankLocationRange();
- if (buf.contains(":")) {
- for (int i = 0; i < partes.length; i++) {
- int pos = partes[i].indexOf(":");
- if (pos > 0) {
- String entry = partes[i].substring(0, pos);
- partes[i] = partes[i].substring(pos + 1);
- range.setEntry(entry);
- }
- }
- }
- GenBankLocationPoint gp0 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[0]);
- range.setStart(gp0);
- GenBankLocationPoint gp1 = (GenBankLocationPoint) parserFeatureLocation(fea, partes[1]);
- range.setEnd(gp1);
- fea.setLocation(range);
- return range;
- } else {
- // trata um ponto
- // possibilidades consideradas:
- // 467
- // 102.110
- // 123^124
- // <345
- // >400
- // 345>
- // 400<
- // ou uma combinacao dessas
- GenBankLocationPoint gp = new GenBankLocationPoint();
- if (buf.contains(":")) {
- int pos = buf.indexOf(":");
- if (pos > 0) {
- String entry = buf.substring(0, pos);
- buf = buf.substring(pos + 1);
- gp.setEntry(entry);
- }
- }
- int pos = 0;
- // verifica os simb < e > antes do primeiro numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- gp.setPrefix(buf.charAt(pos));
- pos++;
- }
- // pega o primeiro numero
- int ini = pos;
- while (pos < buf.length() && buf.charAt(pos) >= '0'
- && buf.charAt(pos) <= '9') {
- pos++;
- }
- if (buf.subSequence(ini, pos).length() < 1) {
- System.out.println(localiza);
- }
- int num = Integer.parseInt(buf.substring(ini, pos));
- int num2 = num;
- // o primeiro numero pode ser o unico numero
- if (pos < buf.length()) {
- // verifica se tem os sinais < e > apos o primeiro numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- if (buf.contains(".") || buf.contains("^")) {
- gp.setPrefix(buf.charAt(pos));
- } else {
- gp.setSufix(buf.charAt(pos));
- }
- pos++;
- }
-
- // verifica a separacao dos numeros . ou ^
- if (pos < buf.length()
- && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^')) {
- // separação localizada, possibilidade de mais numero
- gp.setSymbol(buf.charAt(pos));
- pos++;
-
- // verifica os simb < e > antes do segundo numero
- if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>') {
- gp.setSufix(buf.charAt(pos));
- pos++;
- }
-
- // pega o segundo numero
- ini = pos;
- while (pos < buf.length() && buf.charAt(pos) >= '0'
- && buf.charAt(pos) <= '9') {
- pos++;
- }
- num2 = Integer.parseInt(buf.substring(ini, pos));
-
- // verifica os simb < e > após o segundo numero
- if (pos < buf.length() && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')) {
- gp.setSufix(buf.charAt(pos));
- pos++;
- }
- }
- }
- gp.setMin(num);
- gp.setMax(num2);
- fea.setLocation(gp);
- return gp;
- }
+
+ // verifica a separacao dos numeros . ou ^
+ if (pos < buf.length()
+ && (buf.charAt(pos) == '.' || buf.charAt(pos) == '^'))
+ {
+ // separação localizada, possibilidade de mais numero
+ gp.setSymbol(buf.charAt(pos));
+ pos++;
+
+ // verifica os simb < e > antes do segundo numero
+ if (buf.charAt(pos) == '<' || buf.charAt(pos) == '>')
+ {
+ gp.setSufix(buf.charAt(pos));
+ pos++;
+ }
+
+ // pega o segundo numero
+ ini = pos;
+ while (pos < buf.length() && buf.charAt(pos) >= '0'
+ && buf.charAt(pos) <= '9')
+ {
+ pos++;
+ }
+ num2 = Integer.parseInt(buf.substring(ini, pos));
+
+ // verifica os simb < e > após o segundo numero
+ if (pos < buf.length()
+ && (buf.charAt(pos) == '<' || buf.charAt(pos) == '>'))
+ {
+ gp.setSufix(buf.charAt(pos));
+ pos++;
+ }
}
+ }
+ gp.setMin(num);
+ gp.setMax(num2);
+ fea.setLocation(gp);
+ return gp;
}
+ }
+ }
+ }
+
+ private int[] parseReferenceDescriptor(String descriptor)
+ {
+ // 1 (bases 1 to 1609)
+ int[] resultado = new int[3];
+ descriptor = descriptor.replace("(bases", ",").replace("to", ",")
+ .replace(")", "");
+ String[] args = descriptor.split(",");
+ resultado[0] = Integer.parseInt(args[0].trim());
+ resultado[1] = Integer.parseInt(args[1].trim());
+ resultado[2] = Integer.parseInt(args[2].trim());
+ return resultado;
+ }
+
+ private String processReferenceLine(String line, String component)
+ {
+ int init = line.indexOf(component);
+ if (init != -1)
+ {
+ line = line.replace(component, "");
+ }
+ return line;
+ }
+
+ private String processHeaderLine(String line, String header)
+ {
+ int init = line.indexOf(header);
+ if (init != -1)
+ {
+ line = line.replace(header, "");
}
-
- private int[] parseReferenceDescriptor(String descriptor){
- // 1 (bases 1 to 1609)
- int[] resultado = new int[3];
- descriptor = descriptor.replace("(bases", ",").replace("to", ",").replace(")", "");
- String[] args = descriptor.split(",");
- resultado[0] = Integer.parseInt(args[0].trim());
- resultado[1] = Integer.parseInt(args[1].trim());
- resultado[2] = Integer.parseInt(args[2].trim());
- return resultado;
+ return line;
+ }
+
+ private GenBankSequence processSequenceLine(String line)
+ {
+ GenBankSequence gbs = new GenBankSequence();
+ line = ltrim(line);
+ String[] args = line.split(" ");
+ gbs.setId(Integer.parseInt(args[0]));
+ int len = args.length - 1;
+ Vector<String> seqs = new Vector<String>();
+ for (int i = 0; i < len; i++)
+ seqs.add(args[i + 1]);
+ gbs.setSequences(seqs);
+ return gbs;
+ }
+
+ private String processCommentLine(String line)
+ {
+ int init = line.indexOf("COMMENT");
+ if (init != -1)
+ {
+ line = line.replace("COMMENT", "");
+ }
+ return line;
+ }
+
+ public String rtrim(String s)
+ {
+ int i = s.length() - 1;
+ while (i >= 0 && Character.isWhitespace(s.charAt(i)))
+ {
+ i--;
}
- private String processReferenceLine(String line, String component){
- int init = line.indexOf(component);
- if (init!=-1){
- line = line.replace(component,"");
- }
- return line;
- }
- private String processHeaderLine(String line, String header){
- int init = line.indexOf(header);
- if (init!=-1){
- line = line.replace(header,"");
- }
- return line;
- }
-
- private GenBankSequence processSequenceLine(String line) {
- GenBankSequence gbs = new GenBankSequence();
- line = ltrim(line);
- String[] args = line.split(" ");
- gbs.setId(Integer.parseInt(args[0]));
- int len = args.length-1;
- Vector<String> seqs = new Vector<String>();
- for (int i=0;i<len;i++)
- seqs.add(args[i+1]);
- gbs.setSequences(seqs);
- return gbs;
- }
-
- private String processCommentLine(String line){
- int init = line.indexOf("COMMENT");
- if (init!=-1){
- line = line.replace("COMMENT","");
- }
- return line;
- }
- public String rtrim(String s) {
- int i = s.length()-1;
- while (i >= 0 && Character.isWhitespace(s.charAt(i))) {
- i--;
- }
- return s.substring(0,i+1);
+ return s.substring(0, i + 1);
+ }
+
+ public String ltrim(String s)
+ {
+ int i = 0;
+ while (i < s.length() && Character.isWhitespace(s.charAt(i)))
+ {
+ i++;
}
+ return s.substring(i);
+ }
- public String ltrim(String s) {
- int i = 0;
- while (i < s.length() && Character.isWhitespace(s.charAt(i))) {
- i++;
- }
- return s.substring(i);
- }
-
- public String print(){
- StringBuffer out = new StringBuffer();
- for (SequenceI seq: this.getSeqs()){
- SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
- boolean featureLinePrinted = false;
- for(SequenceFeature sf:seqFeatures){
- if(sf.getType().equals("LOCUS")){
- out.append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("DEFINITION")){
- out.append("DEFINITION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("VERSION")){
- out.append("VERSION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("ACCESSION")){
- out.append("ACCESSION ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("DBLINK")){
- out.append("DBLINK ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("KEYWORDS")){
- out.append("KEYWORDS ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("SOURCE")){
- out.append("SOURCE ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("REFERENCE")){
- out.append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("COMMENT")){
- out.append("COMMENT ").append(sf.getDescription()).append(newline);
- }else if (sf.getType().equals("BASE COUNT")){
- out.append("BASE COUNT ").append(sf.getDescription()).append(newline);
- }else{
- if (!featureLinePrinted){
- out.append("FEATURES Location/Qualifiers").append(newline);
- featureLinePrinted = true;
- }
- out.append(" ").append(sf.getType()).append(" ").append(sf.getBegin()).append("..").append(sf.getEnd()).append(newline);
- Hashtable<String,String> qualifiers = sf.otherDetails;
- if (qualifiers!=null){
- Enumeration<String> keys = qualifiers.keys();
- while (keys.hasMoreElements()){
- String key = keys.nextElement();
- String value = qualifiers.get(key);
- if (value!=null){
- out.append(" /").append(key).append("=").append(value).append(newline);
- }
- }
- }
- }
- }
- out.append("ORIGIN").append(newline);
- //We have to divide sequence in groups of 6x10 chars
- String sequenceString = seq.getSequenceAsString();
- int howManyGroups = (int) Math.floor(sequenceString.length()/60);
- for (int i=0;i<=howManyGroups;i++){
- String sequenceSegment = sequenceString.substring(i*60,Math.min((i+1)*60, sequenceString.length()));
- if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
- out.append(" ").append(60*i+1).append(" ");
- }
- int segmentLength = sequenceSegment.length();
- if (segmentLength>=10){
- out.append(sequenceSegment.substring(0,10)).append(" ");
- if (segmentLength>=20){
- out.append(sequenceSegment.substring(10,20)).append(" ");
- if (segmentLength>=30){
- out.append(sequenceSegment.substring(20,30)).append(" ");
- if (segmentLength>=40){
- out.append(sequenceSegment.substring(30,40)).append(" ");
- if (segmentLength>=50){
- out.append(sequenceSegment.substring(40,50)).append(" ");
- if (segmentLength<=60){
- out.append(sequenceSegment.substring(50,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(40,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(30,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(20,sequenceSegment.length()));
- }
- }else{
- out.append(sequenceSegment.substring(10,sequenceSegment.length()));
- }
- } else if ((!"".equals(sequenceSegment) && (sequenceSegment!=null) && (sequenceSegment.length()>0))){
- out.append(sequenceSegment);
- }
- out.append(newline);
- }
- out.append("//");
- }
- return out.toString();
+ public String print()
+ {
+ StringBuffer out = new StringBuffer();
+ for (SequenceI seq : this.getSeqs())
+ {
+ SequenceFeature[] seqFeatures = seq.getSequenceFeatures();
+ boolean featureLinePrinted = false;
+ for (SequenceFeature sf : seqFeatures)
+ {
+ if (sf.getType().equals("LOCUS"))
+ {
+ out.append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("DEFINITION"))
+ {
+ out.append("DEFINITION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("VERSION"))
+ {
+ out.append("VERSION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("ACCESSION"))
+ {
+ out.append("ACCESSION ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("DBLINK"))
+ {
+ out.append("DBLINK ").append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("KEYWORDS"))
+ {
+ out.append("KEYWORDS ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("SOURCE"))
+ {
+ out.append("SOURCE ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("REFERENCE"))
+ {
+ out.append(sf.getDescription()).append(newline);
+ }
+ else if (sf.getType().equals("COMMENT"))
+ {
+ out.append("COMMENT ").append(sf.getDescription())
+ .append(newline);
+ }
+ else if (sf.getType().equals("BASE COUNT"))
+ {
+ out.append("BASE COUNT ").append(sf.getDescription())
+ .append(newline);
+ }
+ else
+ {
+ if (!featureLinePrinted)
+ {
+ out.append("FEATURES Location/Qualifiers").append(
+ newline);
+ featureLinePrinted = true;
+ }
+ out.append(" ").append(sf.getType()).append(" ")
+ .append(sf.getBegin()).append("..").append(sf.getEnd())
+ .append(newline);
+ Hashtable<String, String> qualifiers = sf.otherDetails;
+ if (qualifiers != null)
+ {
+ Enumeration<String> keys = qualifiers.keys();
+ while (keys.hasMoreElements())
+ {
+ String key = keys.nextElement();
+ String value = qualifiers.get(key);
+ if (value != null)
+ {
+ out.append(" /").append(key)
+ .append("=").append(value).append(newline);
+ }
+ }
+ }
+ }
+ }
+ out.append("ORIGIN").append(newline);
+ // We have to divide sequence in groups of 6x10 chars
+ String sequenceString = seq.getSequenceAsString();
+ int howManyGroups = (int) Math.floor(sequenceString.length() / 60);
+ for (int i = 0; i <= howManyGroups; i++)
+ {
+ String sequenceSegment = sequenceString.substring(i * 60,
+ Math.min((i + 1) * 60, sequenceString.length()));
+ if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
+ .length() > 0)))
+ {
+ out.append(" ").append(60 * i + 1).append(" ");
+ }
+ int segmentLength = sequenceSegment.length();
+ if (segmentLength >= 10)
+ {
+ out.append(sequenceSegment.substring(0, 10)).append(" ");
+ if (segmentLength >= 20)
+ {
+ out.append(sequenceSegment.substring(10, 20)).append(" ");
+ if (segmentLength >= 30)
+ {
+ out.append(sequenceSegment.substring(20, 30)).append(" ");
+ if (segmentLength >= 40)
+ {
+ out.append(sequenceSegment.substring(30, 40)).append(" ");
+ if (segmentLength >= 50)
+ {
+ out.append(sequenceSegment.substring(40, 50)).append(" ");
+ if (segmentLength <= 60)
+ {
+ out.append(sequenceSegment.substring(50,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(40,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(30,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(20,
+ sequenceSegment.length()));
+ }
+ }
+ else
+ {
+ out.append(sequenceSegment.substring(10,
+ sequenceSegment.length()));
+ }
+ }
+ else if ((!"".equals(sequenceSegment) && (sequenceSegment != null) && (sequenceSegment
+ .length() > 0)))
+ {
+ out.append(sequenceSegment);
+ }
+ out.append(newline);
+ }
+ out.append("//");
}
+ return out.toString();
+ }
}
import java.util.Enumeration;
import java.util.Hashtable;
-public class GenBankFeature {
- public static final String MISC_TYPE = "misc_feature";
- public static final String SOURCE = "source";
- public static final String CDS = "CDS";
- public static final String GENE = "gene";
- public static final String EXON = "exon";
- public static final String INTRON = "intron";
- public static final String PRIM_TRANSCRIPT = "prim_transcript";
- public static final String mRNA = "mRNA";
- public static final String MOBILE_ELEMENT = "mobile_element";
- public static final String VARIATION = "variation";
-
- private String type;
- private Hashtable<String,String> qualifiers = new Hashtable<String,String>();
- private GenBankLocation location = null;
-
- public GenBankFeature() {
- super();
- }
-
- public GenBankFeature(String type) {
- super();
- this.type = type;
- }
-
- public void addQualifier(String key, String value){
- this.qualifiers.put(key, value);
- }
- public void updateQualifier(String key, String newValue){
- this.qualifiers.remove(key);
- this.qualifiers.put(key, newValue);
- }
-
- public String getQualifier(String key){
- return this.qualifiers.get(key);
- }
-
- public String getType() {
- return type;
- }
-
- public void setType(String type) {
- this.type = type;
- }
- public Enumeration<String> getQualifiersNames(){
- return this.qualifiers.keys();
- }
- public int getQualifiersSize(){
- return this.qualifiers.size();
- }
-
- public Hashtable<String, String> getFields() {
- return qualifiers;
- }
-
- public GenBankLocation getLocation() {
- return location;
- }
-
- public void setLocation(GenBankLocation location) {
- this.location = location;
- }
+public class GenBankFeature
+{
+ public static final String MISC_TYPE = "misc_feature";
+
+ public static final String SOURCE = "source";
+
+ public static final String CDS = "CDS";
+
+ public static final String GENE = "gene";
+
+ public static final String EXON = "exon";
+
+ public static final String INTRON = "intron";
+
+ public static final String PRIM_TRANSCRIPT = "prim_transcript";
+
+ public static final String mRNA = "mRNA";
+
+ public static final String MOBILE_ELEMENT = "mobile_element";
+
+ public static final String VARIATION = "variation";
+
+ private String type;
+
+ private Hashtable<String, String> qualifiers = new Hashtable<String, String>();
+
+ private GenBankLocation location = null;
+
+ public GenBankFeature()
+ {
+ super();
+ }
+
+ public GenBankFeature(String type)
+ {
+ super();
+ this.type = type;
+ }
+
+ public void addQualifier(String key, String value)
+ {
+ this.qualifiers.put(key, value);
+ }
+
+ public void updateQualifier(String key, String newValue)
+ {
+ this.qualifiers.remove(key);
+ this.qualifiers.put(key, newValue);
+ }
+
+ public String getQualifier(String key)
+ {
+ return this.qualifiers.get(key);
+ }
+
+ public String getType()
+ {
+ return type;
+ }
+
+ public void setType(String type)
+ {
+ this.type = type;
+ }
+
+ public Enumeration<String> getQualifiersNames()
+ {
+ return this.qualifiers.keys();
+ }
+
+ public int getQualifiersSize()
+ {
+ return this.qualifiers.size();
+ }
+
+ public Hashtable<String, String> getFields()
+ {
+ return qualifiers;
+ }
+
+ public GenBankLocation getLocation()
+ {
+ return location;
+ }
+
+ public void setLocation(GenBankLocation location)
+ {
+ this.location = location;
+ }
}
*
*
*/
-public abstract class GenBankLocation {
- // the location is complement strand?
- private boolean complement = false;
+public abstract class GenBankLocation
+{
+ // the location is complement strand?
+ private boolean complement = false;
- public GenBankLocation() {
- }
+ public GenBankLocation()
+ {
+ }
- /**
- * The minor location in genome sequence
- *
- * @return position
- */
- public abstract int getMinor();
+ /**
+ * The minor location in genome sequence
+ *
+ * @return position
+ */
+ public abstract int getMinor();
- /**
- * The major location in genome sequence
- *
- * @return position
- */
- public abstract int getMajor();
+ /**
+ * The major location in genome sequence
+ *
+ * @return position
+ */
+ public abstract int getMajor();
- /**
- * @return the complement
- */
- public boolean isComplement() {
- return complement;
- }
+ /**
+ * @return the complement
+ */
+ public boolean isComplement()
+ {
+ return complement;
+ }
- /**
- * @param complement the complement to set
- */
- public void setComplement(boolean complement) {
- this.complement = complement;
- }
+ /**
+ * @param complement
+ * the complement to set
+ */
+ public void setComplement(boolean complement)
+ {
+ this.complement = complement;
+ }
}
\ No newline at end of file
/**
*
*/
-public class GenBankLocationPoint extends GenBankLocation {
- private String entry;
- private char prefix = 0;
- private int min = 0;
- private char symbol = 0;
- private int max = 0;
- private char sufix = 0;
-
- public GenBankLocationPoint() {
- }
-
- public GenBankLocationPoint(int point) {
- this.min = point;
- this.max = point;
- }
-
- public GenBankLocationPoint(int min, int max) {
- this.min = min;
- this.max = max;
- }
-
- public int getMinor() {
- return this.min;
- }
-
- public int getMajor() {
- return this.max;
- }
-
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- if( prefix != 0 && prefix != ' ' ) {
- sb.append(prefix);
- }
- if( symbol == '.' || symbol == '^' ) {
- sb.append( String.format("%d%c%d",min,symbol,max) );
- } else {
- if( min != max ) {
- sb.append( String.format("%d.%d",min,max) );
- } else {
- sb.append( min );
- }
- }
- if( sufix != 0 && sufix != ' ' ) {
- sb.append(sufix);
- }
- return sb.toString();
- }
-
- /**
- * @return the prefix
- */
- public char getPrefix() {
- return prefix;
- }
-
- /**
- * @param prefix the prefix to set
- */
- public void setPrefix(char prefix) {
- this.prefix = prefix;
- }
-
- /**
- * @return the min
- */
- public int getMin() {
- return min;
- }
-
- /**
- * @param min the min to set
- */
- public void setMin(int min) {
- this.min = min;
- }
-
- /**
- * @return the symbol
- */
- public char getSymbol() {
- return symbol;
- }
-
- /**
- * @param symbol the symbol to set
- */
- public void setSymbol(char symbol) {
- this.symbol = symbol;
- }
-
- /**
- * @return the max
- */
- public int getMax() {
- return max;
- }
-
- /**
- * @param max the max to set
- */
- public void setMax(int max) {
- this.max = max;
- }
-
- /**
- * @return the sufix
- */
- public char getSufix() {
- return sufix;
- }
-
- /**
- * @param sufix the sufix to set
- */
- public void setSufix(char sufix) {
- this.sufix = sufix;
- }
-
- /**
- * @return the entry
- */
- public String getEntry() {
- return entry;
- }
-
- /**
- * @param entry the entry to set
- */
- public void setEntry(String entry) {
- this.entry = entry;
- }
+public class GenBankLocationPoint extends GenBankLocation
+{
+ private String entry;
+
+ private char prefix = 0;
+
+ private int min = 0;
+
+ private char symbol = 0;
+
+ private int max = 0;
+
+ private char sufix = 0;
+
+ public GenBankLocationPoint()
+ {
+ }
+
+ public GenBankLocationPoint(int point)
+ {
+ this.min = point;
+ this.max = point;
+ }
+
+ public GenBankLocationPoint(int min, int max)
+ {
+ this.min = min;
+ this.max = max;
+ }
+
+ public int getMinor()
+ {
+ return this.min;
+ }
+
+ public int getMajor()
+ {
+ return this.max;
+ }
+
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder();
+ if (prefix != 0 && prefix != ' ')
+ {
+ sb.append(prefix);
+ }
+ if (symbol == '.' || symbol == '^')
+ {
+ sb.append(String.format("%d%c%d", min, symbol, max));
+ }
+ else
+ {
+ if (min != max)
+ {
+ sb.append(String.format("%d.%d", min, max));
+ }
+ else
+ {
+ sb.append(min);
+ }
+ }
+ if (sufix != 0 && sufix != ' ')
+ {
+ sb.append(sufix);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * @return the prefix
+ */
+ public char getPrefix()
+ {
+ return prefix;
+ }
+
+ /**
+ * @param prefix
+ * the prefix to set
+ */
+ public void setPrefix(char prefix)
+ {
+ this.prefix = prefix;
+ }
+
+ /**
+ * @return the min
+ */
+ public int getMin()
+ {
+ return min;
+ }
+
+ /**
+ * @param min
+ * the min to set
+ */
+ public void setMin(int min)
+ {
+ this.min = min;
+ }
+
+ /**
+ * @return the symbol
+ */
+ public char getSymbol()
+ {
+ return symbol;
+ }
+
+ /**
+ * @param symbol
+ * the symbol to set
+ */
+ public void setSymbol(char symbol)
+ {
+ this.symbol = symbol;
+ }
+
+ /**
+ * @return the max
+ */
+ public int getMax()
+ {
+ return max;
+ }
+
+ /**
+ * @param max
+ * the max to set
+ */
+ public void setMax(int max)
+ {
+ this.max = max;
+ }
+
+ /**
+ * @return the sufix
+ */
+ public char getSufix()
+ {
+ return sufix;
+ }
+
+ /**
+ * @param sufix
+ * the sufix to set
+ */
+ public void setSufix(char sufix)
+ {
+ this.sufix = sufix;
+ }
+
+ /**
+ * @return the entry
+ */
+ public String getEntry()
+ {
+ return entry;
+ }
+
+ /**
+ * @param entry
+ * the entry to set
+ */
+ public void setEntry(String entry)
+ {
+ this.entry = entry;
+ }
}
/**
*
*/
-public class GenBankLocationRange extends GenBankLocation {
- private String entry = null;
- private GenBankLocationPoint start = null;
- private GenBankLocationPoint end = null;
+public class GenBankLocationRange extends GenBankLocation
+{
+ private String entry = null;
- public GenBankLocationRange() {
- }
+ private GenBankLocationPoint start = null;
- @Override
- public int getMinor() {
- return start == null ? 0 : start.getMinor();
- }
+ private GenBankLocationPoint end = null;
- @Override
- public int getMajor() {
- return end == null ? 0 : end.getMajor();
- }
+ public GenBankLocationRange()
+ {
+ }
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
-// if( getDirecao() != '5' ) {
-// sb.append("complement(");
-// }
- if( entry != null ) {
- sb.append(entry);
- sb.append(":");
- }
- if( getStart() != null ) {
- sb.append( getStart().toString() );
- }
- if( getEnd() != null && getStart() != getEnd() && !start.equals(end) ) {
- sb.append("..");
- sb.append( getEnd().toString() );
- }
- return sb.toString();
- }
+ @Override
+ public int getMinor()
+ {
+ return start == null ? 0 : start.getMinor();
+ }
- /**
- * @return the entry
- */
- public String getEntry() {
- return entry;
- }
+ @Override
+ public int getMajor()
+ {
+ return end == null ? 0 : end.getMajor();
+ }
- /**
- * @param entry the entry to set
- */
- public void setEntry(String entry) {
- this.entry = entry;
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder();
+ // if( getDirecao() != '5' ) {
+ // sb.append("complement(");
+ // }
+ if (entry != null)
+ {
+ sb.append(entry);
+ sb.append(":");
}
-
- /**
- * @return the start
- */
- public GenBankLocationPoint getStart() {
- return start;
+ if (getStart() != null)
+ {
+ sb.append(getStart().toString());
}
-
- /**
- * @param start the start to set
- */
- public void setStart(GenBankLocationPoint start) {
- this.start = start;
+ if (getEnd() != null && getStart() != getEnd() && !start.equals(end))
+ {
+ sb.append("..");
+ sb.append(getEnd().toString());
}
+ return sb.toString();
+ }
- /**
- * @return the end
- */
- public GenBankLocationPoint getEnd() {
- return end;
- }
+ /**
+ * @return the entry
+ */
+ public String getEntry()
+ {
+ return entry;
+ }
- /**
- * @param end the end to set
- */
- public void setEnd(GenBankLocationPoint end) {
- this.end = end;
- }
+ /**
+ * @param entry
+ * the entry to set
+ */
+ public void setEntry(String entry)
+ {
+ this.entry = entry;
+ }
+
+ /**
+ * @return the start
+ */
+ public GenBankLocationPoint getStart()
+ {
+ return start;
+ }
+
+ /**
+ * @param start
+ * the start to set
+ */
+ public void setStart(GenBankLocationPoint start)
+ {
+ this.start = start;
+ }
+
+ /**
+ * @return the end
+ */
+ public GenBankLocationPoint getEnd()
+ {
+ return end;
+ }
+
+ /**
+ * @param end
+ * the end to set
+ */
+ public void setEnd(GenBankLocationPoint end)
+ {
+ this.end = end;
+ }
}
package jalview.io.xdb.genbank;
/**
- *
+ *
* @author Dieval Guizelini
*/
-public class GenBankLocations extends GenBankLocation {
- public static final int NONE = 1; // default
- public static final int COMPLEMENT = 2;
- public static final int JOIN = 3;
- public static final int ORDER = 4; // conj com ordem desconhecida
- private int operator = NONE;
- private java.util.List<GenBankLocation> units;
-
- public GenBankLocations() {
- units = new java.util.ArrayList<GenBankLocation>();
- }
+public class GenBankLocations extends GenBankLocation
+{
+ public static final int NONE = 1; // default
- @Override
- public void setComplement(boolean complement){
- super.setComplement(complement);
- this.operator = COMPLEMENT;
- if (units != null) {
- for (GenBankLocation o : units) {
- o.setComplement(complement);
- }
- }
- }
+ public static final int COMPLEMENT = 2;
- @Override
- public String toString() {
- StringBuilder sb = new StringBuilder();
- if (getOperator() == COMPLEMENT) {
- sb.append("complement(");
- } else if (getOperator() == JOIN) {
- sb.append("join(");
- } else if (getOperator() == ORDER) {
- sb.append("order(");
- }
- if (units.size() > 0) {
- sb.append(units.get(0).toString());
- for (int i = 1; i < units.size(); i++) {
- sb.append(",");
- sb.append(units.get(i).toString());
- }
- }
- if (getOperator() != NONE) {
- sb.append(")");
- }
- return sb.toString();
- }
+ public static final int JOIN = 3;
- /**
- * @return the units
- */
- public java.util.List<GenBankLocation> getUnits() {
- return units;
- }
+ public static final int ORDER = 4; // conj com ordem desconhecida
- /**
- * @param units the units to set
- */
- public void setUnits(java.util.List<GenBankLocation> units) {
- this.units = units;
- }
+ private int operator = NONE;
+
+ private java.util.List<GenBankLocation> units;
- @Override
- public int getMinor() {
- if( units.size() > 0 ) {
- return units.get(0).getMinor();
- }
- return 0;
+ public GenBankLocations()
+ {
+ units = new java.util.ArrayList<GenBankLocation>();
+ }
+
+ @Override
+ public void setComplement(boolean complement)
+ {
+ super.setComplement(complement);
+ this.operator = COMPLEMENT;
+ if (units != null)
+ {
+ for (GenBankLocation o : units)
+ {
+ o.setComplement(complement);
+ }
}
+ }
- @Override
- public int getMajor() {
- int ind = units.size();
- if( ind > 0 ) {
- return units.get(ind-1).getMajor();
- }
- return 0;
+ @Override
+ public String toString()
+ {
+ StringBuilder sb = new StringBuilder();
+ if (getOperator() == COMPLEMENT)
+ {
+ sb.append("complement(");
+ }
+ else if (getOperator() == JOIN)
+ {
+ sb.append("join(");
}
+ else if (getOperator() == ORDER)
+ {
+ sb.append("order(");
+ }
+ if (units.size() > 0)
+ {
+ sb.append(units.get(0).toString());
+ for (int i = 1; i < units.size(); i++)
+ {
+ sb.append(",");
+ sb.append(units.get(i).toString());
+ }
+ }
+ if (getOperator() != NONE)
+ {
+ sb.append(")");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * @return the units
+ */
+ public java.util.List<GenBankLocation> getUnits()
+ {
+ return units;
+ }
- /**
- * @return the operator
- */
- public int getOperator() {
- return operator;
+ /**
+ * @param units
+ * the units to set
+ */
+ public void setUnits(java.util.List<GenBankLocation> units)
+ {
+ this.units = units;
+ }
+
+ @Override
+ public int getMinor()
+ {
+ if (units.size() > 0)
+ {
+ return units.get(0).getMinor();
}
+ return 0;
+ }
- /**
- * @param operator the operator to set
- */
- public void setOperator(int operator) {
- this.operator = operator;
+ @Override
+ public int getMajor()
+ {
+ int ind = units.size();
+ if (ind > 0)
+ {
+ return units.get(ind - 1).getMajor();
}
+ return 0;
+ }
+
+ /**
+ * @return the operator
+ */
+ public int getOperator()
+ {
+ return operator;
+ }
+
+ /**
+ * @param operator
+ * the operator to set
+ */
+ public void setOperator(int operator)
+ {
+ this.operator = operator;
+ }
}
package jalview.io.xdb.genbank;
/**
- * A short mnemonic name for the entry, chosen to suggest the
- * sequence's definition. Mandatory keyword/exactly one record.
- *
- * <p>The LOCUS field contains a number of different data elements, including locus name,
- * sequence length, molecule type, GenBank division, and modification date. Each element
- * is described below.</p>
- *
+ * A short mnemonic name for the entry, chosen to suggest the sequence's
+ * definition. Mandatory keyword/exactly one record.
+ *
+ * <p>
+ * The LOCUS field contains a number of different data elements, including locus
+ * name, sequence length, molecule type, GenBank division, and modification
+ * date. Each element is described below.
+ * </p>
+ *
*/
-public class GenBankLocus {
- private String name;
- private int sequenceLength;
- private String strand;
- private String moleculeType;
- private boolean linearSequence;
- private String division;
- private String modificationDate;
-
- public GenBankLocus() {
- }
-
- public GenBankLocus(String name, int sequenceLength) {
- this.name = name;
- this.sequenceLength = sequenceLength;
- }
-
-
- /**
- * @return the name
- */
- public String getName() {
- return name;
- }
-
- /**
- * @param name the name to set
- */
- public void setName(String name) {
- this.name = name;
- }
-
- /**
- * @return the sequenceLength
- */
- public int getSequenceLength() {
- return sequenceLength;
- }
-
- /**
- * @param sequenceLength the sequenceLength to set
- */
- public void setSequenceLength(int sequenceLength) {
- this.sequenceLength = sequenceLength;
- }
-
- /**
- * @return the strand
- */
- public String getStrand() {
- return strand;
- }
-
- /**
- * @param strand the strand to set
- */
- public void setStrand(String strand) {
- this.strand = strand;
- }
-
- /**
- * @return the moleculeType
- */
- public String getMoleculeType() {
- return moleculeType;
- }
-
- /**
- * @param moleculeType the moleculeType to set
- */
- public void setMoleculeType(String moleculeType) {
- this.moleculeType = moleculeType;
- }
-
- /**
- * @return the linearSequence
- */
- public boolean isLinearSequence() {
- return linearSequence;
- }
-
- /**
- * @param linearSequence the linearSequence to set
- */
- public void setLinearSequence(boolean linearSequence) {
- this.linearSequence = linearSequence;
- }
-
- /**
- * @return the division
- */
- public String getDivision() {
- return division;
- }
-
- /**
- * @param division the division to set
- */
- public void setDivision(String division) {
- this.division = division;
- }
-
- /**
- * @return the modificationDate
- */
- public String getModificationDate() {
- return modificationDate;
- }
-
- /**
- * @param modificationDate the modificationDate to set
- */
- public void setModificationDate(String modificationDate) {
- this.modificationDate = modificationDate;
- }
-
- @Override
- public String toString() {
-
- return String.format("LOCUS %-16s %11d bp %3s %6s %-8s %3s %s",
- this.name, this.sequenceLength, this.strand,
- this.moleculeType, linearSequence?"linear ":"circular",
- this.division, ((modificationDate == null) || (modificationDate.equals("")) ? "" : modificationDate.toUpperCase())
- );
- }
+public class GenBankLocus
+{
+ private String name;
+
+ private int sequenceLength;
+
+ private String strand;
+
+ private String moleculeType;
+
+ private boolean linearSequence;
+
+ private String division;
+
+ private String modificationDate;
+
+ public GenBankLocus()
+ {
+ }
+
+ public GenBankLocus(String name, int sequenceLength)
+ {
+ this.name = name;
+ this.sequenceLength = sequenceLength;
+ }
+
+ /**
+ * @return the name
+ */
+ public String getName()
+ {
+ return name;
+ }
+
+ /**
+ * @param name
+ * the name to set
+ */
+ public void setName(String name)
+ {
+ this.name = name;
+ }
+
+ /**
+ * @return the sequenceLength
+ */
+ public int getSequenceLength()
+ {
+ return sequenceLength;
+ }
+
+ /**
+ * @param sequenceLength
+ * the sequenceLength to set
+ */
+ public void setSequenceLength(int sequenceLength)
+ {
+ this.sequenceLength = sequenceLength;
+ }
+
+ /**
+ * @return the strand
+ */
+ public String getStrand()
+ {
+ return strand;
+ }
+
+ /**
+ * @param strand
+ * the strand to set
+ */
+ public void setStrand(String strand)
+ {
+ this.strand = strand;
+ }
+
+ /**
+ * @return the moleculeType
+ */
+ public String getMoleculeType()
+ {
+ return moleculeType;
+ }
+
+ /**
+ * @param moleculeType
+ * the moleculeType to set
+ */
+ public void setMoleculeType(String moleculeType)
+ {
+ this.moleculeType = moleculeType;
+ }
+
+ /**
+ * @return the linearSequence
+ */
+ public boolean isLinearSequence()
+ {
+ return linearSequence;
+ }
+
+ /**
+ * @param linearSequence
+ * the linearSequence to set
+ */
+ public void setLinearSequence(boolean linearSequence)
+ {
+ this.linearSequence = linearSequence;
+ }
+
+ /**
+ * @return the division
+ */
+ public String getDivision()
+ {
+ return division;
+ }
+
+ /**
+ * @param division
+ * the division to set
+ */
+ public void setDivision(String division)
+ {
+ this.division = division;
+ }
+
+ /**
+ * @return the modificationDate
+ */
+ public String getModificationDate()
+ {
+ return modificationDate;
+ }
+
+ /**
+ * @param modificationDate
+ * the modificationDate to set
+ */
+ public void setModificationDate(String modificationDate)
+ {
+ this.modificationDate = modificationDate;
+ }
+
+ @Override
+ public String toString()
+ {
+
+ return String
+ .format("LOCUS %-16s %11d bp %3s %6s %-8s %3s %s",
+ this.name,
+ this.sequenceLength,
+ this.strand,
+ this.moleculeType,
+ linearSequence ? "linear " : "circular",
+ this.division,
+ ((modificationDate == null)
+ || (modificationDate.equals("")) ? ""
+ : modificationDate.toUpperCase()));
+ }
}
package jalview.io.xdb.genbank;
-public class GenBankReference {
- private int order;
- private int begin;
- private int end;
- private String descriptor;
- private String authors;
- private String title;
- private String journal;
- private String pubmed;
- private String medline;
- private String consortia;
- private String remark;
-
- public GenBankReference() {
- super();
- }
-
- public String getDescriptor() {
- return descriptor;
- }
-
- public void setDescriptor(String descriptor) {
- this.descriptor = descriptor;
- }
-
- public String getAuthors() {
- return authors;
- }
-
- public void setAuthors(String authors) {
- this.authors = authors;
- }
-
- public String getTitle() {
- return title;
- }
-
- public void setTitle(String title) {
- this.title = title;
- }
-
- public String getJournal() {
- return journal;
- }
-
- public void setJournal(String journal) {
- this.journal = journal;
- }
-
- public String getPubmed() {
- return pubmed;
- }
-
- public void setPubmed(String pubmed) {
- this.pubmed = pubmed;
- }
-
- public int getOrder() {
- return order;
- }
-
- public void setOrder(int order) {
- this.order = order;
- }
- public int getBegin() {
- return begin;
- }
-
- public void setBegin(int begin) {
- this.begin = begin;
- }
-
- public int getEnd() {
- return end;
- }
-
- public void setEnd(int end) {
- this.end = end;
- }
-
-
- public String getMedline() {
- return medline;
- }
-
- public void setMedline(String medline) {
- this.medline = medline;
- }
-
- public String getConsortia() {
- return consortia;
- }
-
- public void setConsortia(String consortia) {
- this.consortia = consortia;
- }
-
- public String getRemark() {
- return remark;
- }
-
- public void setRemark(String remark) {
- this.remark = remark;
- }
-
- public String toString(){
-// References has the following format
-// REFERENCE 1 (bases 1 to 1976)
-// AUTHORS Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M.
-// TITLE Complete nucleotide sequence of the human delta-globin gene
-// JOURNAL Cell 21 (3), 639-646 (1980)
-// PUBMED 7438204
-
- StringBuffer buf = new StringBuffer();
- buf.append("REFERENCE ").append(this.getOrder()).append(" (bases ").append(this.getBegin()).append(" to ").append(this.getEnd()).append(")\n");
- if (this.getAuthors()!=null)
- buf.append(" AUTHORS ").append(this.getAuthors()).append("\n");
- if (this.getTitle()!=null)
- buf.append(" TITLE ").append(this.getTitle()).append("\n");
- if (this.getJournal()!=null)
- buf.append(" JOURNAL ").append(this.getJournal()).append("\n");
- if (this.getPubmed()!=null)
- buf.append(" PUBMED ").append(this.getPubmed()).append("\n");;
- if (this.getMedline()!=null)
- buf.append(" MEDLINE ").append(this.getMedline()).append("\n");;
- if (this.getRemark()!=null)
- buf.append(" REMARK ").append(this.getRemark()).append("\n");;
- if (this.getConsortia()!=null)
- buf.append(" CONSRTM ").append(this.getConsortia()).append("\n");;
- return buf.toString();
- }
-
+public class GenBankReference
+{
+ private int order;
+
+ private int begin;
+
+ private int end;
+
+ private String descriptor;
+
+ private String authors;
+
+ private String title;
+
+ private String journal;
+
+ private String pubmed;
+
+ private String medline;
+
+ private String consortia;
+
+ private String remark;
+
+ public GenBankReference()
+ {
+ super();
+ }
+
+ public String getDescriptor()
+ {
+ return descriptor;
+ }
+
+ public void setDescriptor(String descriptor)
+ {
+ this.descriptor = descriptor;
+ }
+
+ public String getAuthors()
+ {
+ return authors;
+ }
+
+ public void setAuthors(String authors)
+ {
+ this.authors = authors;
+ }
+
+ public String getTitle()
+ {
+ return title;
+ }
+
+ public void setTitle(String title)
+ {
+ this.title = title;
+ }
+
+ public String getJournal()
+ {
+ return journal;
+ }
+
+ public void setJournal(String journal)
+ {
+ this.journal = journal;
+ }
+
+ public String getPubmed()
+ {
+ return pubmed;
+ }
+
+ public void setPubmed(String pubmed)
+ {
+ this.pubmed = pubmed;
+ }
+
+ public int getOrder()
+ {
+ return order;
+ }
+
+ public void setOrder(int order)
+ {
+ this.order = order;
+ }
+
+ public int getBegin()
+ {
+ return begin;
+ }
+
+ public void setBegin(int begin)
+ {
+ this.begin = begin;
+ }
+
+ public int getEnd()
+ {
+ return end;
+ }
+
+ public void setEnd(int end)
+ {
+ this.end = end;
+ }
+
+ public String getMedline()
+ {
+ return medline;
+ }
+
+ public void setMedline(String medline)
+ {
+ this.medline = medline;
+ }
+
+ public String getConsortia()
+ {
+ return consortia;
+ }
+
+ public void setConsortia(String consortia)
+ {
+ this.consortia = consortia;
+ }
+
+ public String getRemark()
+ {
+ return remark;
+ }
+
+ public void setRemark(String remark)
+ {
+ this.remark = remark;
+ }
+
+ public String toString()
+ {
+ // References has the following format
+ // REFERENCE 1 (bases 1 to 1976)
+ // AUTHORS Spritz,R.A., DeRiel,J.K., Forget,B.G. and Weissman,S.M.
+ // TITLE Complete nucleotide sequence of the human delta-globin gene
+ // JOURNAL Cell 21 (3), 639-646 (1980)
+ // PUBMED 7438204
+
+ StringBuffer buf = new StringBuffer();
+ buf.append("REFERENCE ").append(this.getOrder()).append(" (bases ")
+ .append(this.getBegin()).append(" to ").append(this.getEnd())
+ .append(")\n");
+ if (this.getAuthors() != null)
+ buf.append(" AUTHORS ").append(this.getAuthors()).append("\n");
+ if (this.getTitle() != null)
+ buf.append(" TITLE ").append(this.getTitle()).append("\n");
+ if (this.getJournal() != null)
+ buf.append(" JOURNAL ").append(this.getJournal()).append("\n");
+ if (this.getPubmed() != null)
+ buf.append(" PUBMED ").append(this.getPubmed()).append("\n");
+ ;
+ if (this.getMedline() != null)
+ buf.append(" MEDLINE ").append(this.getMedline()).append("\n");
+ ;
+ if (this.getRemark() != null)
+ buf.append(" REMARK ").append(this.getRemark()).append("\n");
+ ;
+ if (this.getConsortia() != null)
+ buf.append(" CONSRTM ").append(this.getConsortia()).append("\n");
+ ;
+ return buf.toString();
+ }
+
}
package jalview.io.xdb.genbank;
import java.util.Vector;
+
/**
- * A line like the following:
- * 1 aatgaaggtt catttttcat tctcacaaac taatgaaacc ctgcttatct taaaccaacc
- * will be mapped as:
- * id: 1
- * sequences: {"aatgaaggtt", "catttttcat", "tctcacaaac", "taatgaaacc", "ctgcttatct", "taaaccaacc"}
- * Each sequence has 8 nucleotides long
+ * A line like the following: 1 aatgaaggtt catttttcat tctcacaaac taatgaaacc
+ * ctgcttatct taaaccaacc will be mapped as: id: 1 sequences: {"aatgaaggtt",
+ * "catttttcat", "tctcacaaac", "taatgaaacc", "ctgcttatct", "taaaccaacc"} Each
+ * sequence has 8 nucleotides long
+ *
* @author darolmar
- *
+ *
*/
-public class GenBankSequence {
- //Initial position
- private int id;
- //Sequences in that line
- private Vector<String> sequences;
-
- public GenBankSequence() {
- super();
- sequences = new Vector<String>();
- }
-
- public int getId() {
- return id;
- }
-
- public void setId(int id) {
- this.id = id;
- }
-
- public Vector<String> getSequences() {
- return sequences;
- }
-
- public void setSequences(Vector<String> sequences) {
- this.sequences = sequences;
- }
-
- public String getSequencesAsString(){
- StringBuffer sb = new StringBuffer();
- for (String seq:sequences)
- sb.append(seq).append(" ");
- return sb.toString();
- }
-
- public String toString(){
- StringBuffer sb = new StringBuffer()
- .append(" ").append(this.id);
- for (String seq:sequences)
- sb.append(" ").append(seq);
- sb.append("\n");
- return sb.toString();
- }
-
+public class GenBankSequence
+{
+ // Initial position
+ private int id;
+
+ // Sequences in that line
+ private Vector<String> sequences;
+
+ public GenBankSequence()
+ {
+ super();
+ sequences = new Vector<String>();
+ }
+
+ public int getId()
+ {
+ return id;
+ }
+
+ public void setId(int id)
+ {
+ this.id = id;
+ }
+
+ public Vector<String> getSequences()
+ {
+ return sequences;
+ }
+
+ public void setSequences(Vector<String> sequences)
+ {
+ this.sequences = sequences;
+ }
+
+ public String getSequencesAsString()
+ {
+ StringBuffer sb = new StringBuffer();
+ for (String seq : sequences)
+ sb.append(seq).append(" ");
+ return sb.toString();
+ }
+
+ public String toString()
+ {
+ StringBuffer sb = new StringBuffer().append(" ").append(this.id);
+ for (String seq : sequences)
+ sb.append(" ").append(seq);
+ sb.append("\n");
+ return sb.toString();
+ }
+
}
package jalview.io.xdb.genbank;
/**
- * <p>Free-format information including an abbreviated form of the organism
- * name, sometimes followed by a molecule type. (See section 3.4.10 of the
- * GenBank release notes for more info.)</p>
- * <p>Entrez Search Field: Organism [ORGN] </p>
- * <p>Search Tip: For some organisms that have well-established common names,
- * such as baker's yeast, mouse, and human, a search for the common name will
- * yield the same results as a search for the scientific name, e.g., a search
- * for "baker's yeast" in the organism field retrieves the same number of
- * documents as "Saccharomyces cerevisiae". This is true because the Organism
- * field is connected to the NCBI Taxonomy Database, which contains
- * cross-references between common names, scientific names, and synonyms for
- * organisms represented in the Sequence databases.</p>
+ * <p>
+ * Free-format information including an abbreviated form of the organism name,
+ * sometimes followed by a molecule type. (See section 3.4.10 of the GenBank
+ * release notes for more info.)
+ * </p>
+ * <p>
+ * Entrez Search Field: Organism [ORGN]
+ * </p>
+ * <p>
+ * Search Tip: For some organisms that have well-established common names, such
+ * as baker's yeast, mouse, and human, a search for the common name will yield
+ * the same results as a search for the scientific name, e.g., a search for
+ * "baker's yeast" in the organism field retrieves the same number of documents
+ * as "Saccharomyces cerevisiae". This is true because the Organism field is
+ * connected to the NCBI Taxonomy Database, which contains cross-references
+ * between common names, scientific names, and synonyms for organisms
+ * represented in the Sequence databases.
+ * </p>
* <h1>Organism</h1>
- * <p>The formal scientific name for the source organism (genus and species,
- * where appropriate) and its lineage, based on the phylogenetic classification
- * scheme used in the NCBI Taxonomy Database. If the complete lineage of an
- * organism is very long, an abbreviated lineage will be shown in the GenBank
- * record and the complete lineage will be available in the Taxonomy Database.
- * (See also the /db_xref=taxon:nnnn Feature qualifer, below.)</p>
- * <p>Entrez Search Field: Organism [ORGN] </p>
- * <p>Search Tip: You can search the Organism field by any node in the taxonomic
+ * <p>
+ * The formal scientific name for the source organism (genus and species, where
+ * appropriate) and its lineage, based on the phylogenetic classification scheme
+ * used in the NCBI Taxonomy Database. If the complete lineage of an organism is
+ * very long, an abbreviated lineage will be shown in the GenBank record and the
+ * complete lineage will be available in the Taxonomy Database. (See also the
+ * /db_xref=taxon:nnnn Feature qualifer, below.)
+ * </p>
+ * <p>
+ * Entrez Search Field: Organism [ORGN]
+ * </p>
+ * <p>
+ * Search Tip: You can search the Organism field by any node in the taxonomic
* hierarchy, e.g., you can search for the term "Saccharomyces cerevisiae",
* "Saccharomycetales", "Ascomycota", etc. to retrieve all the sequences from
- * organisms in a particular taxon. </p>
+ * organisms in a particular taxon.
+ * </p>
*
*/
-public class GenBankSource {
- private String source="";
- private String organism="";
- private String taxonomic="";
+public class GenBankSource
+{
+ private String source = "";
- public GenBankSource() {
- }
+ private String organism = "";
- @Override
- public String toString() {
- return String.format("%s\n\t%s\n\t%s", getSource(), getOrganism(), getTaxonomic());
- }
+ private String taxonomic = "";
- /**
- * @return the source
- */
- public String getSource() {
- return source;
- }
+ public GenBankSource()
+ {
+ }
- /**
- * @param source the source to set
- */
- public void setSource(String source) {
- this.source = source;
- }
+ @Override
+ public String toString()
+ {
+ return String.format("%s\n\t%s\n\t%s", getSource(), getOrganism(),
+ getTaxonomic());
+ }
- /**
- * @return the organism
- */
- public String getOrganism() {
- return organism;
- }
+ /**
+ * @return the source
+ */
+ public String getSource()
+ {
+ return source;
+ }
- /**
- * @param organism the organism to set
- */
- public void setOrganism(String organism) {
- this.organism = organism;
- }
+ /**
+ * @param source
+ * the source to set
+ */
+ public void setSource(String source)
+ {
+ this.source = source;
+ }
- /**
- * @return the taxonomic
- */
- public String getTaxonomic() {
- return taxonomic;
- }
+ /**
+ * @return the organism
+ */
+ public String getOrganism()
+ {
+ return organism;
+ }
- /**
- * @param taxonomic the taxonomic to set
- */
- public void setTaxonomic(String taxonomic) {
- this.taxonomic = taxonomic;
- }
+ /**
+ * @param organism
+ * the organism to set
+ */
+ public void setOrganism(String organism)
+ {
+ this.organism = organism;
+ }
+
+ /**
+ * @return the taxonomic
+ */
+ public String getTaxonomic()
+ {
+ return taxonomic;
+ }
+
+ /**
+ * @param taxonomic
+ * the taxonomic to set
+ */
+ public void setTaxonomic(String taxonomic)
+ {
+ this.taxonomic = taxonomic;
+ }
}
package jalview.io.xdb.genbank;
/**
- * <p>A nucleotide sequence identification number that represents a single,
+ * <p>
+ * A nucleotide sequence identification number that represents a single,
* specific sequence in the GenBank database. This identification number uses
- * the accession.version format implemented by GenBank/EMBL/DDBJ in
- * February 1999.</p>
- * <p>If there is any change to the sequence data (even a single base), the
- * version number will be increased, e.g., U12345.1 → U12345.2, but the
- * accession portion will remain stable.</p>
- * <p>The accession.version system of sequence identifiers runs parallel to
- * the GI number system, i.e., when any change is made to a sequence, it
- * receives a new GI number AND an increase to its version number.</p>
- * <p>For more information, see section 1.3.2 of the GenBank 111.0 release
- * notes, and section 3.4.7 of the current GenBank release notes.</p>
- * <p>A Sequence Revision History tool is available to track the various GI
- * numbers, version numbers, and update dates for sequences that appeared in
- * a specific GenBank record (more information and example).</p>
- * <p>More details about sequence identification numbers and the difference
- * between GI number and version are provided in Sequence Identifiers:
- * A Historical Note.</p>
- * <p>Entrez Search Field: use the default setting of "All Fields"</p>
+ * the accession.version format implemented by GenBank/EMBL/DDBJ in February
+ * 1999.
+ * </p>
+ * <p>
+ * If there is any change to the sequence data (even a single base), the version
+ * number will be increased, e.g., U12345.1 → U12345.2, but the accession
+ * portion will remain stable.
+ * </p>
+ * <p>
+ * The accession.version system of sequence identifiers runs parallel to the GI
+ * number system, i.e., when any change is made to a sequence, it receives a new
+ * GI number AND an increase to its version number.
+ * </p>
+ * <p>
+ * For more information, see section 1.3.2 of the GenBank 111.0 release notes,
+ * and section 3.4.7 of the current GenBank release notes.
+ * </p>
+ * <p>
+ * A Sequence Revision History tool is available to track the various GI
+ * numbers, version numbers, and update dates for sequences that appeared in a
+ * specific GenBank record (more information and example).
+ * </p>
+ * <p>
+ * More details about sequence identification numbers and the difference between
+ * GI number and version are provided in Sequence Identifiers: A Historical
+ * Note.
+ * </p>
+ * <p>
+ * Entrez Search Field: use the default setting of "All Fields"
+ * </p>
* <h1>GI</h1>
- * <p>"GenInfo Identifier" sequence identification number, in this case, for
- * the nucleotide sequence. If a sequence changes in any way, a new GI number
- * will be assigned.</p>
- * <p>A separate GI number is also assigned to each protein translation within
- * a nucleotide sequence record, and a new GI is assigned if the protein
- * translation changes in any way (see below).</p>
- * <p>GI sequence identifiers run parallel to the new accession.version system
- * of sequence identifiers. For more information, see the description of Version,
- * above, and section 3.4.7 of the current GenBank release notes.</p>
- * <p>A Sequence Revision History tool is available to track the various GI
+ * <p>
+ * "GenInfo Identifier" sequence identification number, in this case, for the
+ * nucleotide sequence. If a sequence changes in any way, a new GI number will
+ * be assigned.
+ * </p>
+ * <p>
+ * A separate GI number is also assigned to each protein translation within a
+ * nucleotide sequence record, and a new GI is assigned if the protein
+ * translation changes in any way (see below).
+ * </p>
+ * <p>
+ * GI sequence identifiers run parallel to the new accession.version system of
+ * sequence identifiers. For more information, see the description of Version,
+ * above, and section 3.4.7 of the current GenBank release notes.
+ * </p>
+ * <p>
+ * A Sequence Revision History tool is available to track the various GI
* numbers, version numbers, and update dates for sequences that appeared in a
- * specific GenBank record (more information and example).</p>
- * <p>More details about sequence identification numbers and the difference
- * between GI number and version are provided in Sequence Identifiers: A
- * Historical Note.</p>
- * <p>Entrez Search Field: use the default setting of "All Fields"</p>
+ * specific GenBank record (more information and example).
+ * </p>
+ * <p>
+ * More details about sequence identification numbers and the difference between
+ * GI number and version are provided in Sequence Identifiers: A Historical
+ * Note.
+ * </p>
+ * <p>
+ * Entrez Search Field: use the default setting of "All Fields"
+ * </p>
+ *
* @author Dieval Guizelini
* @see Entry
*/
-public class GenBankVersion {
- private String version = "";
- private String gi = "";
-
- public GenBankVersion() {
- }
+public class GenBankVersion
+{
+ private String version = "";
+ private String gi = "";
- /**
- * @return the version
- */
- public String getVersion() {
- return version;
- }
+ public GenBankVersion()
+ {
+ }
- /**
- * @param version the version to set
- */
- public void setVersion(String version) {
- this.version = version;
- }
+ /**
+ * @return the version
+ */
+ public String getVersion()
+ {
+ return version;
+ }
- /**
- * @return the gi
- */
- public String getGI() {
- return gi;
- }
+ /**
+ * @param version
+ * the version to set
+ */
+ public void setVersion(String version)
+ {
+ this.version = version;
+ }
- /**
- * @param gi the gi to set
- */
- public void setGI(String gi) {
- this.gi = gi;
- }
+ /**
+ * @return the gi
+ */
+ public String getGI()
+ {
+ return gi;
+ }
+ /**
+ * @param gi
+ * the gi to set
+ */
+ public void setGI(String gi)
+ {
+ this.gi = gi;
+ }
- /**
- * Version section in GenBank File Format is text with two fields (version and GI).
- *
- * @return version+" "+gi
- */
- @Override
- public String toString() {
- return String.format("%s %s",version,gi);
- }
+ /**
+ * Version section in GenBank File Format is text with two fields (version and
+ * GI).
+ *
+ * @return version+" "+gi
+ */
+ @Override
+ public String toString()
+ {
+ return String.format("%s %s", version, gi);
+ }
}
import org.junit.Test;
-public class GenBankTest {
-// private final static File GENBANK_FILE = new File("test/jalview/io/V00505.gb");
-// private final static File GENBANK_FILE = new File("test/jalview/io/NC_000011.10.gb");
- private final static File GENBANK_FILE = new File("test/jalview/io/M92650.1.gb");
+public class GenBankTest
+{
+ // private final static File GENBANK_FILE = new
+ // File("test/jalview/io/V00505.gb");
+ // private final static File GENBANK_FILE = new
+ // File("test/jalview/io/NC_000011.10.gb");
+ private final static File GENBANK_FILE = new File(
+ "test/jalview/io/M92650.1.gb");
- @Test
- public void testParsing(){
- testFileIOwithFormat(GENBANK_FILE, "GENBANK");
- }
- /**
- * test alignment data in given file can be imported, exported and reimported
- * with no dataloss
- *
- * @param f
- * - source datafile (IdentifyFile.identify() should work with it)
- * @param ioformat
- * - label for IO class used to write and read back in the data from
- * f
- */
- public static void testFileIOwithFormat(File f, String ioformat)
- {
- System.out.println("Reading file: " + f);
- String ff = f.getPath();
- try
- {
- AppletFormatAdapter rf = new AppletFormatAdapter();
+ @Test
+ public void testParsing()
+ {
+ testFileIOwithFormat(GENBANK_FILE, "GENBANK");
+ }
- Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
- new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
+ /**
+ * test alignment data in given file can be imported, exported and reimported
+ * with no dataloss
+ *
+ * @param f
+ * - source datafile (IdentifyFile.identify() should work with it)
+ * @param ioformat
+ * - label for IO class used to write and read back in the data from
+ * f
+ */
+ public static void testFileIOwithFormat(File f, String ioformat)
+ {
+ System.out.println("Reading file: " + f);
+ String ff = f.getPath();
+ try
+ {
+ AppletFormatAdapter rf = new AppletFormatAdapter();
- assertNotNull("Couldn't read supplied alignment data.", al);
+ Alignment al = rf.readFile(ff, AppletFormatAdapter.FILE,
+ new IdentifyFile().Identify(ff, AppletFormatAdapter.FILE));
- // make sure dataset is initialised ? not sure about this
- for (int i = 0; i < al.getSequencesArray().length; ++i)
- {
- al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
- }
- String outputfile = rf.formatSequences(ioformat, al, true);
- System.out.println("Output file in '"+ioformat+"':\n"+outputfile+"\n<<EOF\n");
- // test for consistency in io
- Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
- AppletFormatAdapter.PASTE, ioformat);
- assertNotNull("Couldn't parse reimported alignment data.", al_input);
+ assertNotNull("Couldn't read supplied alignment data.", al);
- String identifyoutput = new IdentifyFile().Identify(outputfile,
- AppletFormatAdapter.PASTE);
- assertNotNull("Identify routine failed for outputformat " + ioformat,
- identifyoutput);
- assertTrue(
- "Identify routine could not recognise output generated by '"
- + ioformat + "' writer",
- ioformat.equals(identifyoutput));
- testAlignmentEquivalence(al, al_input);
- } catch (Exception e)
- {
- e.printStackTrace();
- assertTrue("Couln't format the alignment for output file.", false);
- }
- }
- /**
- * assert alignment equivalence
- *
- * @param al
- * 'original'
- * @param al_input
- * 'secondary' or generated alignment from some datapreserving
- * transformation
- */
- public static void testAlignmentEquivalence(AlignmentI al,
- AlignmentI al_input)
- {
- assertNotNull("Original alignment was null", al);
- assertNotNull("Generated alignment was null", al_input);
+ // make sure dataset is initialised ? not sure about this
+ for (int i = 0; i < al.getSequencesArray().length; ++i)
+ {
+ al.getSequenceAt(i).setDatasetSequence(al.getSequenceAt(i));
+ }
+ String outputfile = rf.formatSequences(ioformat, al, true);
+ System.out.println("Output file in '" + ioformat + "':\n"
+ + outputfile + "\n<<EOF\n");
+ // test for consistency in io
+ Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
+ AppletFormatAdapter.PASTE, ioformat);
+ assertNotNull("Couldn't parse reimported alignment data.", al_input);
- assertTrue(
- "Alignment dimension mismatch: original contains "
- + al.getHeight() + " and generated has "
- + al_input.getHeight() + " sequences; original has "
- + al.getWidth() + " and generated has "
- + al_input.getWidth() + " columns.",
- al.getHeight() == al_input.getHeight()
- && al.getWidth() == al_input.getWidth());
+ String identifyoutput = new IdentifyFile().Identify(outputfile,
+ AppletFormatAdapter.PASTE);
+ assertNotNull("Identify routine failed for outputformat " + ioformat,
+ identifyoutput);
+ assertTrue(
+ "Identify routine could not recognise output generated by '"
+ + ioformat + "' writer",
+ ioformat.equals(identifyoutput));
+ testAlignmentEquivalence(al, al_input);
+ } catch (Exception e)
+ {
+ e.printStackTrace();
+ assertTrue("Couln't format the alignment for output file.", false);
+ }
+ }
- // check Alignment annotation
- AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
- AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
+ /**
+ * assert alignment equivalence
+ *
+ * @param al
+ * 'original'
+ * @param al_input
+ * 'secondary' or generated alignment from some datapreserving
+ * transformation
+ */
+ public static void testAlignmentEquivalence(AlignmentI al,
+ AlignmentI al_input)
+ {
+ assertNotNull("Original alignment was null", al);
+ assertNotNull("Generated alignment was null", al_input);
- // note - at moment we do not distinguish between alignment without any
- // annotation rows and alignment with no annotation row vector
- // we might want to revise this in future
- int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
- : aa_original.length);
- Map<Integer,java.util.BitSet> orig_groups=new HashMap<Integer,java.util.BitSet>(),new_groups=new HashMap<Integer,java.util.BitSet>();
+ assertTrue(
+ "Alignment dimension mismatch: original contains "
+ + al.getHeight() + " and generated has "
+ + al_input.getHeight() + " sequences; original has "
+ + al.getWidth() + " and generated has "
+ + al_input.getWidth() + " columns.",
+ al.getHeight() == al_input.getHeight()
+ && al.getWidth() == al_input.getWidth());
- if (aa_new != null && aa_original != null)
- {
- for (int i = 0; i < aa_original.length; i++)
- {
- if (aa_new.length>i) {
- assertTrue("Different alignment annotation at position "+i,
- equalss(aa_original[i], aa_new[i]));
- // compare graphGroup or graph properties - needed to verify JAL-1299
- assertTrue("Graph type not identical.",aa_original[i].graph==aa_new[i].graph);
- assertTrue("Visibility not identical.", aa_original[i].visible==aa_new[i].visible);
- assertTrue(
- "Threshold line not identical.",
- aa_original[i].threshold == null ? aa_new[i].threshold == null
- : aa_original[i].threshold
- .equals(aa_new[i].threshold));
- // graphGroup may differ, but pattern should be the same
- Integer o_ggrp=new Integer(aa_original[i].graphGroup+2),n_ggrp=new Integer(aa_new[i].graphGroup+2);
- BitSet orig_g=orig_groups.get(o_ggrp),new_g=new_groups.get(n_ggrp);
- if (orig_g==null) {
- orig_groups.put(o_ggrp,orig_g= new BitSet());
- }
- if (new_g==null) {
- new_groups.put(n_ggrp, new_g=new BitSet());
- }
- assertTrue("Graph Group pattern differs at annotation "+i, orig_g.equals(new_g));
- orig_g.set(i); new_g.set(i);
- } else {
- System.err.println("No matching annotation row for "+aa_original[i].toString());
- }
- }
- }
- assertTrue(
- "Generated and imported alignment have different annotation sets ("
- + aa_new_size + " != " + aa_original_size + ")",
- aa_new_size == aa_original_size);
+ // check Alignment annotation
+ AlignmentAnnotation[] aa_new = al_input.getAlignmentAnnotation();
+ AlignmentAnnotation[] aa_original = al.getAlignmentAnnotation();
- // check sequences, annotation and features
- SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
- seq_original = al.getSequencesArray();
- SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
- seq_new = al_input.getSequencesArray();
- SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
- AlignmentAnnotation annot_original, annot_new;
- //
- for (int i = 0; i < al.getSequencesArray().length; i++)
- {
- String name = seq_original[i].getName();
- int start = seq_original[i].getStart();
- int end = seq_original[i].getEnd();
- System.out.println("Check sequence: " + name + "/" + start + "-"
- + end);
+ // note - at moment we do not distinguish between alignment without any
+ // annotation rows and alignment with no annotation row vector
+ // we might want to revise this in future
+ int aa_new_size = (aa_new == null ? 0 : aa_new.length), aa_original_size = (aa_original == null ? 0
+ : aa_original.length);
+ Map<Integer, java.util.BitSet> orig_groups = new HashMap<Integer, java.util.BitSet>(), new_groups = new HashMap<Integer, java.util.BitSet>();
- // search equal sequence
- for (int in = 0; in < al_input.getSequencesArray().length; in++)
- {
- if (name.equals(seq_new[in].getName())
- && start == seq_new[in].getStart()
- && end == seq_new[in].getEnd())
- {
- String ss_original = seq_original[i].getSequenceAsString();
- String ss_new = seq_new[in].getSequenceAsString();
- assertTrue("The sequences " + name + "/" + start + "-" + end
- + " are not equal", ss_original.equals(ss_new));
+ if (aa_new != null && aa_original != null)
+ {
+ for (int i = 0; i < aa_original.length; i++)
+ {
+ if (aa_new.length > i)
+ {
+ assertTrue("Different alignment annotation at position " + i,
+ equalss(aa_original[i], aa_new[i]));
+ // compare graphGroup or graph properties - needed to verify JAL-1299
+ assertTrue("Graph type not identical.",
+ aa_original[i].graph == aa_new[i].graph);
+ assertTrue("Visibility not identical.",
+ aa_original[i].visible == aa_new[i].visible);
+ assertTrue(
+ "Threshold line not identical.",
+ aa_original[i].threshold == null ? aa_new[i].threshold == null
+ : aa_original[i].threshold
+ .equals(aa_new[i].threshold));
+ // graphGroup may differ, but pattern should be the same
+ Integer o_ggrp = new Integer(aa_original[i].graphGroup + 2), n_ggrp = new Integer(
+ aa_new[i].graphGroup + 2);
+ BitSet orig_g = orig_groups.get(o_ggrp), new_g = new_groups
+ .get(n_ggrp);
+ if (orig_g == null)
+ {
+ orig_groups.put(o_ggrp, orig_g = new BitSet());
+ }
+ if (new_g == null)
+ {
+ new_groups.put(n_ggrp, new_g = new BitSet());
+ }
+ assertTrue("Graph Group pattern differs at annotation " + i,
+ orig_g.equals(new_g));
+ orig_g.set(i);
+ new_g.set(i);
+ }
+ else
+ {
+ System.err.println("No matching annotation row for "
+ + aa_original[i].toString());
+ }
+ }
+ }
+ assertTrue(
+ "Generated and imported alignment have different annotation sets ("
+ + aa_new_size + " != " + aa_original_size + ")",
+ aa_new_size == aa_original_size);
- assertTrue(
- "Sequence Features were not equivalent",
- (seq_original[i].getSequenceFeatures() == null && seq_new[in]
- .getSequenceFeatures() == null)
- || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
- .getSequenceFeatures() != null));
- // compare sequence features
- if (seq_original[i].getSequenceFeatures() != null
- && seq_new[in].getSequenceFeatures() != null)
- {
- System.out.println("There are feature!!!");
- sequenceFeatures_original = new SequenceFeature[seq_original[i]
- .getSequenceFeatures().length];
- sequenceFeatures_original = seq_original[i]
- .getSequenceFeatures();
- sequenceFeatures_new = new SequenceFeature[seq_new[in]
- .getSequenceFeatures().length];
- sequenceFeatures_new = seq_new[in].getSequenceFeatures();
+ // check sequences, annotation and features
+ SequenceI[] seq_original = new SequenceI[al.getSequencesArray().length];
+ seq_original = al.getSequencesArray();
+ SequenceI[] seq_new = new SequenceI[al_input.getSequencesArray().length];
+ seq_new = al_input.getSequencesArray();
+ SequenceFeature[] sequenceFeatures_original, sequenceFeatures_new;
+ AlignmentAnnotation annot_original, annot_new;
+ //
+ for (int i = 0; i < al.getSequencesArray().length; i++)
+ {
+ String name = seq_original[i].getName();
+ int start = seq_original[i].getStart();
+ int end = seq_original[i].getEnd();
+ System.out.println("Check sequence: " + name + "/" + start + "-"
+ + end);
- assertTrue("different number of features", seq_original[i]
- .getSequenceFeatures().length == seq_new[in]
- .getSequenceFeatures().length);
+ // search equal sequence
+ for (int in = 0; in < al_input.getSequencesArray().length; in++)
+ {
+ if (name.equals(seq_new[in].getName())
+ && start == seq_new[in].getStart()
+ && end == seq_new[in].getEnd())
+ {
+ String ss_original = seq_original[i].getSequenceAsString();
+ String ss_new = seq_new[in].getSequenceAsString();
+ assertTrue("The sequences " + name + "/" + start + "-" + end
+ + " are not equal", ss_original.equals(ss_new));
- for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
- {
- assertTrue("Different features",
- sequenceFeatures_original[feat]
- .equals(sequenceFeatures_new[feat]));
- }
- }
- // compare alignment annotation
- if (al.getSequenceAt(i).getAnnotation() != null
- && al_input.getSequenceAt(in).getAnnotation() != null)
- {
- for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
- {
- if (al.getSequenceAt(i).getAnnotation()[j] != null
- && al_input.getSequenceAt(in).getAnnotation()[j] != null)
- {
- annot_original = al.getSequenceAt(i).getAnnotation()[j];
- annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
- assertTrue("Different annotation elements",
- equalss(annot_original, annot_new));
- }
- }
- }
- else if (al.getSequenceAt(i).getAnnotation() == null
- && al_input.getSequenceAt(in).getAnnotation() == null)
- {
- System.out.println("No annotations");
- }
- else if (al.getSequenceAt(i).getAnnotation() != null
- && al_input.getSequenceAt(in).getAnnotation() == null)
- {
- assertTrue("Annotations differed between sequences ("
- + al.getSequenceAt(i).getName() + ") and ("
- + al_input.getSequenceAt(i).getName() + ")", false);
- }
- break;
- }
- }
- }
- }
- /*
- * compare annotations
- */
- private static boolean equalss(AlignmentAnnotation annot_or,
- AlignmentAnnotation annot_new)
- {
- if (annot_or.annotations.length != annot_new.annotations.length)
- {
- System.err.println("Different lengths for annotation row elements: "+annot_or.annotations.length +"!="+ annot_new.annotations.length);
- return false;
- }
- for (int i = 0; i < annot_or.annotations.length; i++)
- {
- Annotation an_or=annot_or.annotations[i],an_new=annot_new.annotations[i];
- if (an_or != null
- && an_new!= null)
- {
- if (!an_or.displayCharacter.trim()
- .equals(an_new.displayCharacter.trim())
- || !(""+an_or.secondaryStructure).trim().equals((""+an_new.secondaryStructure).trim())
- || ((!an_or.description.equals(an_new.description)) && (an_or.description == null
- || an_new.description == null || !an_or.description
- .equals(an_new.description))))
- {
- System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+annot_or.annotations[i].toString()+"\nElement "+i+" in new: "+annot_new.annotations[i].toString());
- return false;
- }
- }
- else if (annot_or.annotations[i] == null
- && annot_new.annotations[i] == null)
- {
- continue;
- }
- else
- {
- System.err.println("Annotation Element Mismatch\nElement "+i+" in original: "+(annot_or.annotations[i]==null ? "is null" : annot_or.annotations[i].toString())+"\nElement "+i+" in new: "+(annot_new.annotations[i] == null ? "is null" : annot_new.annotations[i].toString()));
- return false;
- }
- }
- return true;
- }
+ assertTrue(
+ "Sequence Features were not equivalent",
+ (seq_original[i].getSequenceFeatures() == null && seq_new[in]
+ .getSequenceFeatures() == null)
+ || (seq_original[i].getSequenceFeatures() != null && seq_new[in]
+ .getSequenceFeatures() != null));
+ // compare sequence features
+ if (seq_original[i].getSequenceFeatures() != null
+ && seq_new[in].getSequenceFeatures() != null)
+ {
+ System.out.println("There are feature!!!");
+ sequenceFeatures_original = new SequenceFeature[seq_original[i]
+ .getSequenceFeatures().length];
+ sequenceFeatures_original = seq_original[i]
+ .getSequenceFeatures();
+ sequenceFeatures_new = new SequenceFeature[seq_new[in]
+ .getSequenceFeatures().length];
+ sequenceFeatures_new = seq_new[in].getSequenceFeatures();
+
+ assertTrue("different number of features", seq_original[i]
+ .getSequenceFeatures().length == seq_new[in]
+ .getSequenceFeatures().length);
+
+ for (int feat = 0; feat < seq_original[i].getSequenceFeatures().length; feat++)
+ {
+ assertTrue("Different features",
+ sequenceFeatures_original[feat]
+ .equals(sequenceFeatures_new[feat]));
+ }
+ }
+ // compare alignment annotation
+ if (al.getSequenceAt(i).getAnnotation() != null
+ && al_input.getSequenceAt(in).getAnnotation() != null)
+ {
+ for (int j = 0; j < al.getSequenceAt(i).getAnnotation().length; j++)
+ {
+ if (al.getSequenceAt(i).getAnnotation()[j] != null
+ && al_input.getSequenceAt(in).getAnnotation()[j] != null)
+ {
+ annot_original = al.getSequenceAt(i).getAnnotation()[j];
+ annot_new = al_input.getSequenceAt(in).getAnnotation()[j];
+ assertTrue("Different annotation elements",
+ equalss(annot_original, annot_new));
+ }
+ }
+ }
+ else if (al.getSequenceAt(i).getAnnotation() == null
+ && al_input.getSequenceAt(in).getAnnotation() == null)
+ {
+ System.out.println("No annotations");
+ }
+ else if (al.getSequenceAt(i).getAnnotation() != null
+ && al_input.getSequenceAt(in).getAnnotation() == null)
+ {
+ assertTrue("Annotations differed between sequences ("
+ + al.getSequenceAt(i).getName() + ") and ("
+ + al_input.getSequenceAt(i).getName() + ")", false);
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * compare annotations
+ */
+ private static boolean equalss(AlignmentAnnotation annot_or,
+ AlignmentAnnotation annot_new)
+ {
+ if (annot_or.annotations.length != annot_new.annotations.length)
+ {
+ System.err.println("Different lengths for annotation row elements: "
+ + annot_or.annotations.length + "!="
+ + annot_new.annotations.length);
+ return false;
+ }
+ for (int i = 0; i < annot_or.annotations.length; i++)
+ {
+ Annotation an_or = annot_or.annotations[i], an_new = annot_new.annotations[i];
+ if (an_or != null && an_new != null)
+ {
+ if (!an_or.displayCharacter.trim().equals(
+ an_new.displayCharacter.trim())
+ || !("" + an_or.secondaryStructure).trim().equals(
+ ("" + an_new.secondaryStructure).trim())
+ || ((!an_or.description.equals(an_new.description)) && (an_or.description == null
+ || an_new.description == null || !an_or.description
+ .equals(an_new.description))))
+ {
+ System.err.println("Annotation Element Mismatch\nElement " + i
+ + " in original: " + annot_or.annotations[i].toString()
+ + "\nElement " + i + " in new: "
+ + annot_new.annotations[i].toString());
+ return false;
+ }
+ }
+ else if (annot_or.annotations[i] == null
+ && annot_new.annotations[i] == null)
+ {
+ continue;
+ }
+ else
+ {
+ System.err.println("Annotation Element Mismatch\nElement "
+ + i
+ + " in original: "
+ + (annot_or.annotations[i] == null ? "is null"
+ : annot_or.annotations[i].toString())
+ + "\nElement "
+ + i
+ + " in new: "
+ + (annot_new.annotations[i] == null ? "is null"
+ : annot_new.annotations[i].toString()));
+ return false;
+ }
+ }
+ return true;
+ }
}