2 // FORESTER -- software libraries and applications
3 // for evolutionary biology research and applications.
5 // Copyright (C) 2008-2009 Christian M. Zmasek
6 // Copyright (C) 2008-2009 Burnham Institute for Medical Research
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 // Contact: phylosoft @ gmail . com
24 // WWW: www.phylosoft.org/forester
26 package org.forester.io.parsers.phyloxml;
28 import java.util.HashSet;
30 import java.util.regex.Pattern;
32 public final class PhyloXmlUtil {
34 public final static Pattern SEQUENCE_SYMBOL_PATTERN = Pattern.compile( "\\S{1,20}" );
35 public final static Pattern TAXOMONY_CODE_PATTERN = Pattern.compile( "[a-zA-Z0-9_]{1,10}" );
36 public final static Pattern LIT_REF_DOI_PATTERN = Pattern
37 .compile( "[a-zA-Z0-9_\\.]+\\S+" );
38 public final static Set<String> SEQUENCE_TYPES = new HashSet<String>();
39 public final static Set<String> TAXONOMY_RANKS = new HashSet<String>();
40 public static final int ROUNDING_DIGITS_FOR_PHYLOXML_DOUBLE_OUTPUT = 9;
41 public static final String VECTOR_PROPERTY_REF = "vector:index=";
42 public static final String VECTOR_PROPERTY_TYPE = "xsd:decimal";
43 public static final String UNIPROT_TAX_PROVIDER = "uniprot";
45 SEQUENCE_TYPES.add( "rna" );
46 SEQUENCE_TYPES.add( "protein" );
47 SEQUENCE_TYPES.add( "dna" );
48 TAXONOMY_RANKS.add( "domain" );
49 TAXONOMY_RANKS.add( "superkingdom" );
50 TAXONOMY_RANKS.add( "kingdom" );
51 TAXONOMY_RANKS.add( "subkingdom" );
52 TAXONOMY_RANKS.add( "branch" );
53 TAXONOMY_RANKS.add( "infrakingdom" );
54 TAXONOMY_RANKS.add( "superphylum" );
55 TAXONOMY_RANKS.add( "phylum" );
56 TAXONOMY_RANKS.add( "subphylum" );
57 TAXONOMY_RANKS.add( "infraphylum" );
58 TAXONOMY_RANKS.add( "microphylum" );
59 TAXONOMY_RANKS.add( "superdivision" );
60 TAXONOMY_RANKS.add( "division" );
61 TAXONOMY_RANKS.add( "subdivision" );
62 TAXONOMY_RANKS.add( "infradivision" );
63 TAXONOMY_RANKS.add( "superclass" );
64 TAXONOMY_RANKS.add( "class" );
65 TAXONOMY_RANKS.add( "subclass" );
66 TAXONOMY_RANKS.add( "infraclass" );
67 TAXONOMY_RANKS.add( "superlegion" );
68 TAXONOMY_RANKS.add( "legion" );
69 TAXONOMY_RANKS.add( "sublegion" );
70 TAXONOMY_RANKS.add( "infralegion" );
71 TAXONOMY_RANKS.add( "supercohort" );
72 TAXONOMY_RANKS.add( "cohort" );
73 TAXONOMY_RANKS.add( "subcohort" );
74 TAXONOMY_RANKS.add( "infracohort" );
75 TAXONOMY_RANKS.add( "superorder" );
76 TAXONOMY_RANKS.add( "order" );
77 TAXONOMY_RANKS.add( "suborder" );
78 TAXONOMY_RANKS.add( "superfamily" );
79 TAXONOMY_RANKS.add( "family" );
80 TAXONOMY_RANKS.add( "subfamily" );
81 TAXONOMY_RANKS.add( "supertribe" );
82 TAXONOMY_RANKS.add( "tribe" );
83 TAXONOMY_RANKS.add( "subtribe" );
84 TAXONOMY_RANKS.add( "infratribe" );
85 TAXONOMY_RANKS.add( "genus" );
86 TAXONOMY_RANKS.add( "subgenus" );
87 TAXONOMY_RANKS.add( "superspecies" );
88 TAXONOMY_RANKS.add( "species" );
89 TAXONOMY_RANKS.add( "subspecies" );
90 TAXONOMY_RANKS.add( "variety" );
91 TAXONOMY_RANKS.add( "subvariety" );
92 TAXONOMY_RANKS.add( "form" );
93 TAXONOMY_RANKS.add( "subform" );
94 TAXONOMY_RANKS.add( "cultivar" );
95 TAXONOMY_RANKS.add( "strain" );
96 TAXONOMY_RANKS.add( "unknown" );
97 TAXONOMY_RANKS.add( "other" );