unused/javajs/util/CifDataParser.java

   1 package javajs.util;
   2
   3 import java.io.BufferedReader;
   4
   5 import java.util.Hashtable;
   6
   7 import java.util.Map;
   8
   9 import javajs.api.GenericCifDataParser;
  10 import javajs.api.GenericLineReader;
  11
  12
  13 // BH 11/21/16 -- adds support for array grouping [...] - used in 2016-format magCIF files
  14
  15 /**
  16 *
  17 * A CIF 1.0 tokenizer class for dealing with quoted strings in CIF files.
  18 *
  19 * Subclassed by org.jmol.adapters.readers.cif.Cif2DataParser
  20 *
  21 * Greek letters implemented in Jmol 13.3.9 and only for
  22 * titles and space groups. All other mark ups ignored.
  23 *
  24 *<p>
  25 * regarding the treatment of single quotes vs. primes in
  26 * cif file, PMR wrote:
  27 *</p>
  28 *<p>
  29 *   * There is a formal grammar for CIF
  30 * (see http://www.iucr.org/iucr-top/cif/index.html)
  31 * which confirms this. The textual explanation is
  32 *<p />
  33 *<p>
  34 * 14. Matching single or double quote characters (' or ") may
  35 * be used to bound a string representing a non-simple data value
  36 * provided the string does not extend over more than one line.
  37 *<p />
  38 *<p>
  39 * 15. Because data values are invariably separated from other
  40 * tokens in the file by white space, such a quote-delimited
  41 * character string may contain instances of the character used
  42 * to delimit the string provided they are not followed by white
  43 * space. For example, the data item
  44 *<code>
  45 *  _example  'a dog's life'
  46 *</code>
  47 * is legal; the data value is a dog's life.
  48 *</p>
  49 *<p>
  50 * [PMR - the terminating character(s) are quote+whitespace.
  51 * That would mean that:
  52 *<code>
  53 *  _example 'Jones' life'
  54 *</code>
  55 * would be an error
  56 *</p>
  57 *<p>
  58 * The CIF format was developed in that late 1980's under the aegis of the
  59 * International Union of Crystallography (I am a consultant to the COMCIFs
  60 * committee). It was ratified by the Union and there have been several
  61 * workshops. mmCIF is an extension of CIF which includes a relational
  62 * structure. The formal publications are:
  63 *</p>
  64 *<p>
  65 * Hall, S. R. (1991). "The STAR File: A New Format for Electronic Data
  66 * Transfer and Archiving", J. Chem. Inform. Comp. Sci., 31, 326-333.
  67 * Hall, S. R., Allen, F. H. and Brown, I. D. (1991). "The Crystallographic
  68 * Information File (CIF): A New Standard Archive File for Crystallography",
  69 * Acta Cryst., A47, 655-685.
  70 * Hall, S.R. & Spadaccini, N. (1994). "The STAR File: Detailed
  71 * Specifications," J. Chem. Info. Comp. Sci., 34, 505-508.
  72 *</p>
  73 */
  74
  75 public class CifDataParser implements GenericCifDataParser {
  76
  77   protected int getVersion() {
  78     return 1;
  79   }
  80
  81   /**
  82    * The maximum number of columns (data keys) passed to the parser or found in the file
  83    * for a given loop_ or category.subkey listing.
  84    *
  85    */
  86   public static final int KEY_MAX = 100;
  87
  88   private GenericLineReader reader;
  89   private BufferedReader br;
  90
  91   /**
  92    * from buffered reader
  93    */
  94   protected String line;
  95
  96   /**
  97    * working string (buffer)
  98    *
  99    */
 100   protected String str;
 101
 102   /**
 103    * pointer to current character on str
 104    */
 105   protected int ich;
 106
 107   /**
 108    * length of str
 109    *
 110    */
 111   protected int cch;
 112
 113   /**
 114    * whether we are processing an unquoted value or key
 115    */
 116   protected boolean wasUnquoted;
 117
 118   /**
 119    * optional token terminator; in CIF 2.0 could be } or ]
 120    */
 121   protected char cterm = '\0';
 122
 123   /**
 124    * string to return for CIF data value . and ?
 125    */
 126   protected String nullString = "\0";
 127
 128   /**
 129    * A flag to create and return Java objects, not strings.
 130    * Used only by Jmol scripting x = getProperty("cifInfo", filename).
 131    */
 132   protected boolean asObject;
 133
 134
 135   /**
 136    * debugging flag passed from reader; unused
 137    *
 138    */
 139   protected boolean debugging;
 140
 141
 142   /**
 143    * private processing fields
 144    *
 145    */
 146   private Object strPeeked;
 147   private int ichPeeked;
 148   private int columnCount;
 149   private String[] columnNames;
 150   private Object[] columnData = new Object[KEY_MAX];
 151   private boolean isLoop;
 152   private boolean haveData;
 153
 154   /**
 155    * comments at the top of a file, including #\#CIF_2.0, for example
 156    */
 157   private SB fileHeader = new SB();
 158   private boolean isHeader = true;
 159
 160
 161   /**
 162    * Set the string value of what is returned for "." and "?"
 163    *
 164    * @param nullString null here returns "." and "?"; default is "\0"
 165    *
 166    */
 167   public void setNullValue(String nullString) {
 168     this.nullString  = nullString;
 169   }
 170
 171   /**
 172    * A global, static map that contains field information. The assumption is that
 173    * if we read a set of fields for, say, atom_site, once in a lifetime, then
 174    * that should be good forever. Those are static lists. Or should be....
 175    */
 176   private static Map<String, Integer> htFields = new Hashtable<String, Integer>();
 177
 178   ////////////////////////////////////////////////////////////////
 179   // special tokenizer class
 180   ////////////////////////////////////////////////////////////////
 181
 182   public CifDataParser() {
 183     // for reflection
 184   }
 185
 186   @Override
 187   public Object getColumnData(int i) {
 188     return columnData[i];
 189   }
 190
 191   @Override
 192   public int getColumnCount() {
 193     return columnCount;
 194   }
 195
 196   @Override
 197   public String getColumnName(int i) {
 198     return columnNames[i];
 199   }
 200
 201   /**
 202    * A Chemical Information File data parser.
 203    *
 204    * set() should be called immediately upon construction.
 205    *
 206    * Two options; one of reader or br should be null, or reader will be
 207    * ignored. Just simpler this way...
 208    *
 209    * @param reader  Anything that can deliver a line of text or null
 210    * @param br      A standard BufferedReader.
 211    * @param debugging
 212    *
 213    */
 214   @Override
 215   public CifDataParser set(GenericLineReader reader, BufferedReader br, boolean debugging) {
 216     this.reader = reader;
 217     this.br = br;
 218     this.debugging = debugging;
 219     return this;
 220   }
 221
 222
 223   /**
 224    *
 225    * @return commented-out section at the start of a CIF file.
 226    *
 227    */
 228   @Override
 229   public String getFileHeader() {
 230     return fileHeader.toString();
 231   }
 232
 233
 234   /**
 235    * Parses all CIF data for a reader defined in the constructor
 236    * into a standard Map structure and close the BufferedReader if
 237    * it exists.
 238    *
 239    * @return Hashtable of models Vector of Hashtable data
 240    */
 241   @Override
 242   public Map<String, Object> getAllCifData() {
 243     line = "";
 244     String key;
 245     Map<String, Object> data = null, data0 = null;
 246     Map<String, Object> allData = new Hashtable<String, Object>();
 247     Lst<Map<String, Object>> models = new  Lst<Map<String,Object>>();
 248     allData.put("models", models);
 249     asObject = (getVersion() >= 2);
 250     nullString = null;
 251     Lst<Map<String, Object>> saveFrames = new Lst<Map<String, Object>>();
 252     try {
 253       while ((key = getNextToken()) != null) {
 254         if (key.startsWith("global_") || key.startsWith("data_")) {
 255           models.addLast(data0 = data = new Hashtable<String, Object>());
 256           data.put("name", key);
 257           continue;
 258         }
 259         if (key.startsWith("loop_")) {
 260           getAllCifLoopData(data);
 261           continue;
 262         }
 263         if (key.startsWith("save_")) {
 264           if (key.equals("save_")) {
 265             int n = saveFrames.size();
 266             if (n == 0) {
 267               System.out.println("CIF ERROR ? save_ without corresponding save_xxxx");
 268               data = data0;
 269             } else {
 270               data = saveFrames.removeItemAt(n - 1);
 271             }
 272           } else {
 273             saveFrames.addLast(data);
 274             Map<String, Object> d = data;
 275             data = new Hashtable<String, Object>();
 276             d.put(key, data);
 277           }
 278           continue;
 279         }
 280         if (key.charAt(0) != '_') {
 281           System.out.println("CIF ERROR ? should be an underscore: " + key);
 282         } else {
 283           Object value = (asObject ? getNextTokenObject() : getNextToken());
 284           if (value == null) {
 285             System.out.println("CIF ERROR ? end of file; data missing: " + key);
 286           } else {
 287             data.put(fixKey(key), value);
 288           }
 289         }
 290       }
 291     } catch (Exception e) {
 292       // ?
 293     }
 294     asObject = false;
 295     try {
 296       if (br != null)
 297         br.close();
 298     } catch (Exception e) {
 299       // ?
 300     }
 301     nullString = "\0";
 302     return allData;
 303   }
 304
 305   /**
 306    * create our own list of keywords and for each one create a list
 307    * of data associated with that keyword. For example, a list of all
 308    * x coordinates, then a list of all y coordinates, etc.
 309    *
 310    * @param data
 311    * @throws Exception
 312    */
 313   @SuppressWarnings("unchecked")
 314   private void getAllCifLoopData(Map<String, Object> data) throws Exception {
 315     String key;
 316     Lst<String> keyWords = new  Lst<String>();
 317     Object o;
 318     while ((o = peekToken()) != null && o instanceof String &&  ((String) o).charAt(0) == '_') {
 319       key = fixKey((String) getTokenPeeked());
 320       keyWords.addLast(key);
 321       data.put(key, new  Lst<String>());
 322     }
 323     columnCount = keyWords.size();
 324     if (columnCount == 0)
 325       return;
 326     isLoop = true;
 327     while (getData())
 328       for (int i = 0; i < columnCount; i++)
 329         ((Lst<Object>)data.get(keyWords.get(i))).addLast(columnData[i]);
 330     isLoop = false;
 331   }
 332
 333   @Override
 334   public String readLine() {
 335     try {
 336       line = (reader == null ? br.readLine() : reader.readNextLine());
 337       if (line == null)
 338         return null;
 339       if (isHeader) {
 340         if (line.startsWith("#"))
 341           fileHeader.append(line).appendC('\n');
 342         else
 343           isHeader = false;
 344       }
 345       return line;
 346     } catch (Exception e) {
 347       return null;
 348     }
 349   }
 350
 351   /**
 352    * The work horse; a general reader for loop data. Fills colunnData with
 353    * fieldCount fields.
 354    *
 355    * @return false if EOF
 356    * @throws Exception
 357    */
 358   @Override
 359   public boolean getData() throws Exception {
 360     // line is already present, and we leave with the next line to parse
 361     if (isLoop) {
 362       for (int i = 0; i < columnCount; ++i)
 363         if ((columnData[i] = getNextDataToken()) == null)
 364           return false;
 365     } else if (haveData) {
 366       haveData = false;
 367     } else {
 368       return false;
 369     }
 370     return (columnCount > 0);
 371   }
 372
 373   /**
 374    *
 375    * Skips all associated loop data. (Skips to next control word.)
 376    *
 377    * @throws Exception
 378    */
 379   @Override
 380   public String skipLoop(boolean doReport) throws Exception {
 381     String str;
 382     SB ret = (doReport ? new SB() : null);
 383     int n = 0;
 384     while ((str = (String) peekToken()) != null && str.charAt(0) == '_') {
 385       if (ret != null)
 386         ret.append(str).append("\n");
 387       getTokenPeeked();
 388       n++;
 389     }
 390     if (n == 0)
 391       n = columnCount; // end-of-label-section skip
 392     int m = 0;
 393     while ((str = (String) getNextDataToken()) != null) {
 394       if (ret == null)
 395         continue;
 396       ret.append(str).append(" ");
 397       if ((++m % n) == 0)
 398         ret.append("\n");
 399     }
 400     return (ret == null ? null : ret.toString());
 401   }
 402
 403   /**
 404    * Get a token as a String value (for the reader)
 405    *
 406    * @return the next token of any kind, or null
 407    * @throws Exception
 408    */
 409   @Override
 410   public String getNextToken() throws Exception {
 411     wasUnquoted = true;
 412     return (String) getNextTokenProtected();
 413   }
 414
 415   /**
 416    * Get the token as a Java Object
 417    *
 418    * @return the next token of any kind, or null
 419    * @throws Exception
 420    */
 421   public Object getNextTokenObject() throws Exception {
 422     wasUnquoted = true;
 423     return getNextTokenProtected();
 424   }
 425
 426   /**
 427    * Just makes sure
 428    * @return String from buffer.
 429    * @throws Exception
 430    */
 431   protected Object getNextTokenProtected() throws Exception {
 432     return (getNextLine() ? nextStrToken() : null);
 433   }
 434
 435   /**
 436    *
 437    * first checks to see if the next token is an unquoted
 438    * control code, and if so, returns null
 439    *
 440    * @return next data token or null
 441    * @throws Exception
 442    */
 443   @Override
 444   public Object getNextDataToken() throws Exception {
 445     Object o = peekToken();
 446     if (o == null)
 447       return null;
 448     if (wasUnquoted && o instanceof String) {
 449       String str = (String) o;
 450       if (str.charAt(0) == '_' || str.startsWith("loop_")
 451           || str.startsWith("data_")
 452           || str.startsWith("save_")
 453           || str.startsWith("stop_")
 454           || str.startsWith("global_"))
 455         return null;
 456     }
 457     return getTokenPeeked();
 458   }
 459
 460   /**
 461    * Just look at the next token. Saves it for retrieval
 462    * using getTokenPeeked()
 463    *
 464    * @return next token or null if EOF
 465    * @throws Exception
 466    */
 467   @Override
 468   public Object peekToken() throws Exception {
 469     if (!getNextLine())
 470       return null;
 471     int ich = this.ich;
 472     strPeeked = nextStrToken();
 473     ichPeeked= this.ich;
 474     this.ich = ich;
 475     return strPeeked;
 476   }
 477
 478   /**
 479    * grab a new line if necessary and prepare it
 480    * if it starts with ";"
 481    *
 482    * @return updated this.str
 483    * @throws Exception
 484    */
 485   private boolean getNextLine() throws Exception {
 486     while (!strHasMoreTokens())
 487       if (prepareNextLine() == null)
 488         return false;
 489     return true;
 490   }
 491
 492   /**
 493    *
 494    * @return the token last acquired; may be null
 495    */
 496   @Override
 497   public Object getTokenPeeked() {
 498     ich = ichPeeked;
 499     return strPeeked;
 500   }
 501
 502   /**
 503    * Used especially for data that might be multi-line data that
 504    * might have unwanted white space at start or end.
 505    *
 506    * @param str
 507    * @return str without any leading/trailing white space, and no '\n'
 508    */
 509   @Override
 510   public String fullTrim(String str) {
 511     int pt0 = -1;
 512     int pt1 = str.length();
 513     while (++pt0 < pt1 && PT.isWhitespace(str.charAt(pt0))) {
 514     }
 515     while (--pt1 > pt0 && PT.isWhitespace(str.charAt(pt1))) {
 516     }
 517     return str.substring(pt0, pt1 + 1);
 518   }
 519
 520   private final static String grABC =
 521       "ABX\u0394E\u03A6\u0393H"   // ABCDEFGH
 522       + "I_K\u039BMNO\u03A0"      // I_KLMNOP
 523       + "\u0398P\u03A3TY_\u03A9\u039E\u03A5Z"; // QRSTU_WXYZ
 524   private final static String grabc =
 525       "\u03B1\u03B2\u03C7\u03A4\u03A5\u03C6\u03B3\u03B7" // abcdefgh
 526       + "\u03B9_\u03BA\u03BB\u03BC\u03BD\u03BF\u03C0"    // i_klmnop
 527       + "\u03B8\u03C1\u03C3\u03C4\u03C5_\u03C9\u03BE\u03C5\u03B6"; // qrstu_wxyz
 528
 529   /**
 530    * Only translating the basic Greek set here, not all the other stuff. See
 531    * http://www.iucr.org/resources/cif/spec/version1.1/semantics#markup
 532    *
 533    * @param data
 534    * @return cleaned string
 535    */
 536   @Override
 537   public String toUnicode(String data) {
 538     int pt;
 539     try {
 540       while ((pt = data.indexOf('\\')) >= 0) {
 541         int c = data.charAt(pt + 1);
 542         String ch = (c >= 65 && c <= 90 ? grABC.substring(c - 65, c - 64)
 543             : c >= 97 && c <= 122 ? grabc.substring(c - 97, c - 96) : "_");
 544         data = data.substring(0, pt) + ch + data.substring(pt + 2);
 545       }
 546     } catch (Exception e) {
 547       // ignore
 548     }
 549
 550     return data;
 551   }
 552
 553   /**
 554    * Process a data block, with or without a loop_.
 555    *
 556    * Passed an array of field names, this method fills two int[] arrays. The
 557    * first, key2col, maps desired key values to actual order of appearance
 558    * (column number) in the file; the second, col2key, is a reverse loop-up for
 559    * that, mapping column numbers to desired field indices.
 560    *
 561    * When called within a loop_ context, this.columnData will be created but not filled.
 562    *
 563    * Alternatively, if fields is null, then this.fieldNames is
 564    * filled, in order, with key data, and both key2col and col2key will be
 565    * simply 0,1,2,... This array is used in cases such as matrices for which
 566    * there are simply too many possibilities to list, and the key name itself
 567    * contains information that we need.
 568    *
 569    * When not a loop_ context, keys are expected to be in the mmCIF form
 570    * category.subkey and will be unique within a data block (see
 571    * http://mmcif.wwpdb.org/docs/tutorials/mechanics/pdbx-mmcif-syntax.html).
 572    * Keys and data will be read for all data in the same category, filling this.columnData.
 573    *
 574    *
 575    * In this way, the calling class does not need to enumerate all possible
 576    * category names, but instead can focus on just those of interest.
 577    *
 578    *
 579    * @param fields
 580    *        list of normalized field names, such as
 581    *        "_pdbx_struct_assembly_gen_assembly_id" (with "_" instead of ".")
 582    * @param key
 583    *        null to indicate a loop_ construct, otherwise the initial category.subkey
 584    *        found
 585    * @param data
 586    *        when not loop_ the initial data read, otherwise ignored
 587    * @param key2col
 588    *        map of desired keys to actual columns
 589    * @param col2key
 590    *        map of actual columns to desired keys
 591    * @throws Exception
 592    */
 593   @Override
 594   public void parseDataBlockParameters(String[] fields, String key,
 595                                  String data, int[] key2col, int[] col2key) throws Exception {
 596     isLoop = (key == null);
 597     Object o;
 598     String s;
 599     if (fields == null) {
 600       // for reading full list of keys, as for matrices
 601       columnNames = new String[KEY_MAX];
 602     } else {
 603       if (!htFields.containsKey(fields[0]))
 604         for (int i = fields.length; --i >= 0;)
 605           htFields.put(fields[i], Integer.valueOf(i));
 606       for (int i = fields.length; --i >= 0;)
 607         key2col[i] = NONE;
 608     }
 609     columnCount = 0;
 610     int pt, i;
 611     if (isLoop) {
 612       while (true) {
 613         o = peekToken();
 614         if (o == null) {
 615           // we are PREMATURELY done; reset
 616           columnCount = 0;
 617           break;
 618         }
 619         // end of the loop is a new token not starting with underscore
 620         if (!(o instanceof String) || ((String) o).charAt(0) != '_')
 621           break;
 622
 623         pt = columnCount++;
 624         s = fixKey((String) getTokenPeeked());
 625         if (fields == null) {
 626           // just make a linear model, saving the list
 627           columnNames[col2key[pt] = key2col[pt] = pt] = s;
 628           continue;
 629         }
 630         Integer iField = htFields.get(s);
 631         i = (iField == null ? NONE : iField.intValue());
 632         if ((col2key[pt] = i) != NONE)
 633           key2col[i] = pt;
 634       }
 635     } else {
 636       pt = key.indexOf(".");
 637       String str0 = (pt < 0 ? key : key.substring(0, pt + 1));
 638       while (true) {
 639         // end of the loop is a new token not starting with underscore
 640         pt = columnCount++;
 641         if (key == null) {
 642           key = (String) getTokenPeeked();
 643           data = getNextToken();
 644         }
 645         Integer iField = htFields.get(fixKey(key));
 646         i = (iField == null ? NONE : iField.intValue());
 647         if ((col2key[pt] = i) != NONE)
 648           columnData[key2col[i] = pt] = data;
 649         if ((o = peekToken()) == null || !(o instanceof String) ||  !((String) o).startsWith(str0))
 650           break;
 651         key = null;
 652       }
 653       haveData = (columnCount > 0);
 654     }
 655   }
 656
 657   @Override
 658   public String fixKey(String key) {
 659     // PRELIMINARY -- BilBao _magnetic
 660     // PRELIMINARY -- Jana2006
 661     return (
 662         key.startsWith("_magnetic") ? key.substring(9)
 663             : key.startsWith("_jana") ? key.substring(5)
 664             : key).replace('.', '_').toLowerCase();
 665   }
 666
 667   //////////////////// private methods ////////////////////
 668
 669
 670   /**
 671    * sets global str and line to be parsed from the beginning
 672    *
 673    * \1 .... \1  indicates an embedded fully escaped data object
 674    *
 675    * @param str new data string
 676    * @return str
 677    */
 678   protected String setString(String str) {
 679     this.str = line = str;
 680     cch = (str == null ? 0 : str.length());
 681     ich = 0;
 682     return str;
 683   }
 684
 685   /*
 686    * http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax
 687    *
 688    * 17. The special sequence of end-of-line followed
 689    * immediately by a semicolon in column one (denoted "<eol>;")
 690    * may also be used as a delimiter at the beginning and end
 691    * of a character string comprising a data value. The complete
 692    * bounded string is called a text field, and may be used to
 693    * convey multi-line values. The end-of-line associated with
 694    * the closing semicolon does not form part of the data value.
 695    * Within a multi-line text field, leading white space within
 696    * text lines must be retained as part of the data value; trailing
 697    * white space on a line may however be elided.
 698    *
 699    * 18. A text field delimited by the <eol>; digraph may not
 700    * include a semicolon at the start of a line of text as
 701    * part of its value.
 702    *
 703    * 20. For example, the data value foo may be expressed
 704    * equivalently as an unquoted string foo, as a quoted
 705    * string 'foo' or as a text field
 706    *
 707    *;foo
 708    *;
 709    *
 710    * By contrast the value of the text field
 711    *
 712    *; foo
 713    *  bar
 714    *;
 715    *
 716    * is  foo<eol>  bar (where <eol> represents an end-of-line);
 717    * the embedded space characters are significant.
 718    *
 719    *
 720    * I (BH) note, however, that we sometimes have:
 721    *
 722    * _some_name
 723    * ;
 724    * the name here
 725    * ;
 726    *
 727    * so this should actually be
 728    *
 729    * ;the name here
 730    * ;
 731    *
 732    * for this, we use fullTrim();
 733    *
 734    */
 735
 736   /**
 737    *
 738    * sets the string for parsing to be from the next line
 739    * when the token buffer is empty, and if ';' is at the
 740    * beginning of that line, extends the string to include
 741    * that full multiline string. Uses \1 to indicate that
 742    * this is a special quotation.
 743    *
 744    *
 745    *
 746    * @return  the next line or null if EOF
 747    * @throws Exception
 748    */
 749   protected String prepareNextLine() throws Exception {
 750     setString(readLine());
 751     if (line == null || line.length() == 0)
 752       return line;
 753     if (line.charAt(0) == ';')
 754       return preprocessString();
 755     if (str.startsWith("###non-st#"))
 756         ich = 10;
 757     return line;
 758  }
 759
 760   /**
 761    * Preprocess the string on a line starting with a semicolon
 762    * to produce a string with a \1 ... \1 segment
 763    * that will be picked up in the next round
 764    *
 765    * @return escaped part with attached extra data
 766    * @throws Exception
 767    */
 768   protected String preprocessString() throws Exception {
 769     return setString(preprocessSemiString());
 770   }
 771
 772   /**
 773    * Encapsulate a multi-line ; .... ;  string with \1 ... \1
 774    *
 775    * CIF 1.0 and CIF 2.0
 776    *
 777    * @return ecapsulated string
 778    * @throws Exception
 779    */
 780   protected String preprocessSemiString() throws Exception {
 781     ich = 1;
 782     String str = '\1' + line.substring(1) + '\n';
 783     while (readLine() != null) {
 784       if (line.startsWith(";")) {
 785         // remove trailing <eol> only, and attach rest of next line
 786         str = str.substring(0, str.length() - 1)
 787           + '\1' + line.substring(1);
 788         break;
 789       }
 790       str += line + '\n';
 791     }
 792     return str;
 793   }
 794
 795   /**
 796    * @return TRUE if there are more tokens in the line buffer
 797    *
 798    */
 799   private boolean strHasMoreTokens() {
 800     if (str == null)
 801       return false;
 802     char ch = '#';
 803     while (ich < cch && ((ch = str.charAt(ich)) == ' ' || ch == '\t'))
 804       ++ich;
 805     return (ich < cch && ch != '#');
 806   }
 807
 808   /**
 809    * assume that hasMoreTokens() has been called and that ich is pointing at a
 810    * non-white character. Also sets boolean wasUnQuoted, because we need to know
 811    * if we should be checking for a control keyword. 'loop_' is different from
 812    * just loop_ without the quotes.
 813    *
 814    * @return null if no more tokens, "\0" if '.' or '?', or next token
 815    */
 816   private Object nextStrToken() {
 817     if (ich == cch)
 818       return null;
 819     char ch = str.charAt(ich);
 820     if (isQuote(ch)) {
 821       wasUnquoted = false;
 822       return getQuotedStringOrObject(ch);
 823     }
 824     int ichStart = ich;
 825     wasUnquoted = true;
 826     while (ich < cch && !isTerminator(ch = str.charAt(ich)))
 827       ++ich;
 828     if (ich == ichStart + 1)
 829       if (nullString != null
 830           && (str.charAt(ichStart) == '.' || str.charAt(ichStart) == '?'))
 831         return nullString;
 832     String s = str.substring(ichStart, ich);
 833     return unquoted(s);
 834   }
 835
 836   /**
 837    * In CIF 2.0, this method turns a String into an Integer or Float
 838    * In CIF 1.0 (here) just return the unchanged value.
 839    * @param s unquoted string
 840    * @return unchanged value
 841    */
 842   protected Object unquoted(String s) {
 843     return s;
 844   }
 845
 846   /**
 847    * The token terminator is space or tab in CIF 1.0,
 848    * but it can be quoted strings in CIF 2.0.
 849    *
 850    * @param c
 851    * @return true if this character is a terminator
 852    */
 853   protected boolean isTerminator(char c) {
 854     return  c == ' ' || c == '\t' || c == cterm ;
 855   }
 856
 857   /**
 858    * CIF 1.0 only; we handle various quote types here
 859    * @param ch
 860    * @return true if this character is a (starting) quote
 861    */
 862   protected boolean isQuote(char ch) {
 863     switch (ch) {
 864     case '\'':
 865     case '\"':
 866     case '\1':
 867       return  true;
 868     }
 869     return false;
 870   }
 871
 872   /**
 873    * CIF 1.0 only.
 874    *
 875    *
 876    * @param ch current character being pointed to
 877    * @return a String data object
 878    */
 879   protected Object getQuotedStringOrObject(char ch) {
 880     int ichStart = ich;
 881     char chClosingQuote = ch;
 882     boolean wasQuote = false;
 883     while (++ich < cch) {
 884       ch = str.charAt(ich);
 885       // CIF 1.0 rules require that the closing ' or ""  be followed by space or tab or EOL
 886       if (wasQuote && (ch == ' ' || ch == '\t'))
 887         break;
 888       wasQuote = (ch == chClosingQuote);
 889     }
 890     int pt1 = ichStart + 1;
 891     int pt2 = ich - 1;
 892     if (ich == cch && !wasQuote) {
 893       // reached the end of the string without finding closing '
 894       // so take the whole thing. Probably a bad CIF file.
 895       pt1--;
 896       pt2++;
 897     } else {
 898       // throw away the last white character
 899       ++ich;
 900     }
 901     return str.substring(pt1, pt2);
 902   }
 903
 904
 905 }