package javajs.util;
import java.io.BufferedReader;
import java.util.Hashtable;
import java.util.Map;
import javajs.api.GenericCifDataParser;
import javajs.api.GenericLineReader;
// BH 11/21/16 -- adds support for array grouping [...] - used in 2016-format magCIF files
/**
*
* A CIF 1.0 tokenizer class for dealing with quoted strings in CIF files.
*
* Subclassed by org.jmol.adapters.readers.cif.Cif2DataParser
*
* Greek letters implemented in Jmol 13.3.9 and only for
* titles and space groups. All other mark ups ignored.
*
*
* regarding the treatment of single quotes vs. primes in
* cif file, PMR wrote:
*
*
* * There is a formal grammar for CIF
* (see http://www.iucr.org/iucr-top/cif/index.html)
* which confirms this. The textual explanation is
*
*
* 14. Matching single or double quote characters (' or ") may
* be used to bound a string representing a non-simple data value
* provided the string does not extend over more than one line.
*
*
* 15. Because data values are invariably separated from other
* tokens in the file by white space, such a quote-delimited
* character string may contain instances of the character used
* to delimit the string provided they are not followed by white
* space. For example, the data item
*
* _example 'a dog's life'
*
* is legal; the data value is a dog's life.
*
*
* [PMR - the terminating character(s) are quote+whitespace.
* That would mean that:
*
* _example 'Jones' life'
*
* would be an error
*
*
* The CIF format was developed in that late 1980's under the aegis of the
* International Union of Crystallography (I am a consultant to the COMCIFs
* committee). It was ratified by the Union and there have been several
* workshops. mmCIF is an extension of CIF which includes a relational
* structure. The formal publications are:
*
*
* Hall, S. R. (1991). "The STAR File: A New Format for Electronic Data
* Transfer and Archiving", J. Chem. Inform. Comp. Sci., 31, 326-333.
* Hall, S. R., Allen, F. H. and Brown, I. D. (1991). "The Crystallographic
* Information File (CIF): A New Standard Archive File for Crystallography",
* Acta Cryst., A47, 655-685.
* Hall, S.R. & Spadaccini, N. (1994). "The STAR File: Detailed
* Specifications," J. Chem. Info. Comp. Sci., 34, 505-508.
*
*/
public class CifDataParser implements GenericCifDataParser {
protected int getVersion() {
return 1;
}
/**
* The maximum number of columns (data keys) passed to the parser or found in the file
* for a given loop_ or category.subkey listing.
*
*/
public static final int KEY_MAX = 100;
private GenericLineReader reader;
private BufferedReader br;
/**
* from buffered reader
*/
protected String line;
/**
* working string (buffer)
*
*/
protected String str;
/**
* pointer to current character on str
*/
protected int ich;
/**
* length of str
*
*/
protected int cch;
/**
* whether we are processing an unquoted value or key
*/
protected boolean wasUnquoted;
/**
* optional token terminator; in CIF 2.0 could be } or ]
*/
protected char cterm = '\0';
/**
* string to return for CIF data value . and ?
*/
protected String nullString = "\0";
/**
* A flag to create and return Java objects, not strings.
* Used only by Jmol scripting x = getProperty("cifInfo", filename).
*/
protected boolean asObject;
/**
* debugging flag passed from reader; unused
*
*/
protected boolean debugging;
/**
* private processing fields
*
*/
private Object strPeeked;
private int ichPeeked;
private int columnCount;
private String[] columnNames;
private Object[] columnData = new Object[KEY_MAX];
private boolean isLoop;
private boolean haveData;
/**
* comments at the top of a file, including #\#CIF_2.0, for example
*/
private SB fileHeader = new SB();
private boolean isHeader = true;
/**
* Set the string value of what is returned for "." and "?"
*
* @param nullString null here returns "." and "?"; default is "\0"
*
*/
public void setNullValue(String nullString) {
this.nullString = nullString;
}
/**
* A global, static map that contains field information. The assumption is that
* if we read a set of fields for, say, atom_site, once in a lifetime, then
* that should be good forever. Those are static lists. Or should be....
*/
private static Map htFields = new Hashtable();
////////////////////////////////////////////////////////////////
// special tokenizer class
////////////////////////////////////////////////////////////////
public CifDataParser() {
// for reflection
}
@Override
public Object getColumnData(int i) {
return columnData[i];
}
@Override
public int getColumnCount() {
return columnCount;
}
@Override
public String getColumnName(int i) {
return columnNames[i];
}
/**
* A Chemical Information File data parser.
*
* set() should be called immediately upon construction.
*
* Two options; one of reader or br should be null, or reader will be
* ignored. Just simpler this way...
*
* @param reader Anything that can deliver a line of text or null
* @param br A standard BufferedReader.
* @param debugging
*
*/
@Override
public CifDataParser set(GenericLineReader reader, BufferedReader br, boolean debugging) {
this.reader = reader;
this.br = br;
this.debugging = debugging;
return this;
}
/**
*
* @return commented-out section at the start of a CIF file.
*
*/
@Override
public String getFileHeader() {
return fileHeader.toString();
}
/**
* Parses all CIF data for a reader defined in the constructor
* into a standard Map structure and close the BufferedReader if
* it exists.
*
* @return Hashtable of models Vector of Hashtable data
*/
@Override
public Map getAllCifData() {
line = "";
String key;
Map data = null, data0 = null;
Map allData = new Hashtable();
Lst