Merge branch 'merge/JAL-3285_mchmmer_with_211_develop' into alpha/JAL-3362_Jalview_21...
[jalview.git] / test / jalview / io / HMMFileTest.java
diff --git a/test/jalview/io/HMMFileTest.java b/test/jalview/io/HMMFileTest.java
new file mode 100644 (file)
index 0000000..cf74f55
--- /dev/null
@@ -0,0 +1,531 @@
+package jalview.io;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+import jalview.datamodel.HMMNode;
+import jalview.datamodel.HiddenMarkovModel;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Scanner;
+
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import junit.extensions.PA;
+
+public class HMMFileTest {
+
+  HMMFile fn3;
+
+  HMMFile pKinase;
+
+  HMMFile made1;
+
+  @BeforeClass(alwaysRun = true)
+  public void setUp() throws IOException
+  {
+    fn3 = new HMMFile("test/jalview/io/test_fn3_hmm.txt",
+            DataSourceType.FILE);
+
+    pKinase = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
+            DataSourceType.FILE);
+
+    made1 = new HMMFile("test/jalview/io/test_MADE1_hmm.txt",
+            DataSourceType.FILE);
+  }
+
+  @Test(groups = "Functional")
+  public void testParse() throws IOException
+  {
+    HiddenMarkovModel hmm = pKinase.getHMM();
+    assertEquals(hmm.getName(), "Pkinase");
+    assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00069.17");
+    assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
+            "Protein kinase domain");
+    assertEquals(hmm.getLength(), 260);
+    assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
+    assertEquals(hmm.getAlphabetType(), "amino");
+    assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+    assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+    assertEquals(hmm.getProperty(HMMFile.DATE), "Thu Jun 16 11:44:06 2011");
+    assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+    assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "54");
+    assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
+            "3.358521");
+    assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3106786190");
+    assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
+            "70.30 70.30");
+    assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "70.30 70.30");
+    assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "70.20 70.20");
+  
+    assertEquals(hmm.getSymbols(), "ACDEFGHIKLMNPQRSTVWY");
+  
+    assertEquals(hmm.getMatchEmissionProbability(0, 'Y'), 0.16102, 0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(11, 'P'), 0.0130, 0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(24, 'I'), 0.02583, 0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(83, 'C'), 0.008549,
+            0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(332, 'E'), 0.07998,
+            0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(381, 'D'), 0.014465,
+            0.001d);
+    assertEquals(hmm.getMatchEmissionProbability(475, 'Y'), 0.02213,
+            0.001d);
+  
+    assertEquals(hmm.getInsertEmissionProbability(1, 'C'), 0.012, 0.001d);
+    assertEquals(hmm.getInsertEmissionProbability(14, 'H'), 0.02411,
+            0.001d);
+    assertEquals(hmm.getInsertEmissionProbability(23, 'L'), 0.06764,
+            0.001d);
+    assertEquals(hmm.getInsertEmissionProbability(90, 'D'), 0.0623, 0.001d);
+    assertEquals(hmm.getInsertEmissionProbability(374, 'T'), 0.0623,
+            0.001d);
+    assertEquals(hmm.getInsertEmissionProbability(470, 'P'), 0.0647,
+            0.001d);
+  
+    assertEquals(hmm.getStateTransitionProbability(2, 6), 0.3848, 0.001d);
+    assertEquals(hmm.getStateTransitionProbability(38, 3), 0.5382, 0.001d);
+    assertEquals(hmm.getStateTransitionProbability(305, 3), 0.2916, 0.001d);
+    assertEquals(hmm.getStateTransitionProbability(380, 0), 0.99, 0.001d);
+    assertEquals(hmm.getStateTransitionProbability(453, 1), 0.0066, 0.001d);
+  
+    assertEquals(hmm.getNodeMapPosition(3), 3);
+    assertEquals(hmm.getReferenceAnnotation(7), '-');
+    assertEquals(hmm.getConsensusResidue(23), 't');
+    assertEquals(hmm.getMaskedValue(30), '-');
+    assertEquals(hmm.getConsensusStructure(56), 'S');
+  
+    assertEquals(hmm.getNodeMapPosition(78), 136);
+    assertEquals(hmm.getReferenceAnnotation(93), '-');
+    assertEquals(hmm.getConsensusResidue(145), 'a');
+    assertEquals(hmm.getMaskedValue(183), '-');
+    assertEquals(hmm.getConsensusStructure(240), 'H');
+  }
+
+  /**
+   * Test that Jalview can parse an HMM file even with a bunch of 'mandatory'
+   * fields missing (including no MAP annotation or // terminator line)
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testParse_minimalFile() throws IOException
+  {
+    /*
+     * ALPH is absent, alphabet inferred from HMM header line
+     * Optional COMPO line is absent
+     * first line after HMM is a guide line for readability
+     * next line is BEGIN node insert emissions
+     * next line is BEGIN node transitions
+     * next line is first sequence node match emissions 1.1 1.2 1.3
+     * next line is first sequence node insert emissions 1.4 1.5 1.6
+     * last line is first sequence node transitions
+     */
+    //@formatter:off
+    String hmmData = 
+            "HMMER3\n" +
+            "HMM P M J\n" +
+            // both spec and parser require a line after the HMM line
+            " m->m m->i m->d i->m i->i d->m d->d\n" +
+            " 0.1 0.2 0.3\n" + 
+            " 0.4 0.5 0.6 0.7 0.8 0.9 0.95\n" + 
+            " 1 1.1 1.2 1.3 - - - - -\n" + 
+            " 1.4 1.5 1.6\n" + 
+            " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n" +
+            " 2 1.01 1.02 1.03 - - - - -\n" + 
+            " 1.04 1.05 1.06\n" + 
+            " 1.7 1.8 1.9 2.0 2.1 2.2 2.3\n";
+    //@formatter:on
+    HMMFile parser = new HMMFile(hmmData, DataSourceType.PASTE);
+    HiddenMarkovModel hmm = parser.getHMM();
+    assertNotNull(hmm);
+    assertEquals(hmm.getSymbols(), "PMJ");
+    // no LENG property: this should return node count excluding BEGIN node
+    assertEquals(hmm.getLength(), 2);
+
+    // node 1 (implicitly mapped to column 0)
+    double prob = hmm.getMatchEmissionProbability(0, 'p');
+    assertEquals(prob, Math.pow(Math.E, -1.1));
+    prob = hmm.getInsertEmissionProbability(0, 'J');
+    assertEquals(prob, Math.pow(Math.E, -1.6));
+
+    // node 2 (implicitly mapped to column 1)
+    prob = hmm.getMatchEmissionProbability(1, 'M');
+    assertEquals(prob, Math.pow(Math.E, -1.02));
+    prob = hmm.getInsertEmissionProbability(1, 'm');
+    assertEquals(prob, Math.pow(Math.E, -1.05));
+  }
+  
+  @Test(groups = "Functional")
+  public void testParseHeaderLines_amino() throws IOException
+  {
+    FileReader fr = new FileReader(
+            new File("test/jalview/io/test_fn3_hmm.txt"));
+    BufferedReader br = new BufferedReader(fr);
+    HiddenMarkovModel hmm = new HiddenMarkovModel();
+    HMMFile testee = new HMMFile();
+    PA.setValue(testee, "hmm", hmm);
+    testee.parseHeaderLines(br);
+    br.close();
+    fr.close();
+  
+    assertEquals(hmm.getName(), "fn3");
+    assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER), "PF00041.13");
+    assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
+            "Fibronectin type III domain");
+    assertEquals(hmm.getProperty(HMMFile.LENGTH), "86");
+    assertNull(hmm.getProperty(HMMFile.MAX_LENGTH));
+    assertEquals(hmm.getAlphabetType(), "amino");
+    assertFalse(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+    assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+
+    assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+    assertEquals(hmm.getProperty(HMMFile.DATE), "Fri Jun 20 08:22:31 2014");
+    assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+    assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "106");
+    assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES),
+            "11.415833");
+    assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3564431818");
+    assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD), "8.00 7.20");
+    assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "8.00 7.20");
+    assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "7.90 7.90");
+    assertEquals(hmm.getViterbi(), "-9.7737  0.71847");
+    assertEquals(hmm.getMSV(), "-9.4043  0.71847");
+    assertEquals(hmm.getForward(), "-3.8341  0.71847");
+  }
+  
+  @Test(groups = "Functional")
+  public void testParseHeaderLines_dna() throws IOException
+  {
+    FileReader fr = new FileReader(
+            new File("test/jalview/io/test_MADE1_hmm.txt"));
+    BufferedReader br = new BufferedReader(fr);
+    HiddenMarkovModel hmm = new HiddenMarkovModel();
+    HMMFile testee = new HMMFile();
+    PA.setValue(testee, "hmm", hmm);
+    testee.parseHeaderLines(br);
+    br.close();
+    fr.close();
+  
+    assertEquals(hmm.getName(), "MADE1");
+    assertEquals(hmm.getProperty(HMMFile.ACCESSION_NUMBER),
+            "DF0000629.2");
+    assertEquals(hmm.getProperty(HMMFile.DESCRIPTION),
+            "MADE1 (MAriner Derived Element 1), a TcMar-Mariner DNA transposon");
+    assertEquals(hmm.getProperty(HMMFile.LENGTH), "80");
+    assertEquals(hmm.getProperty(HMMFile.MAX_LENGTH), "426");
+    assertEquals(hmm.getAlphabetType(), "DNA");
+    assertTrue(hmm.getBooleanProperty(HMMFile.REFERENCE_ANNOTATION));
+    assertFalse(hmm.getBooleanProperty(HMMFile.MASKED_VALUE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.CONSENSUS_RESIDUE));
+    assertFalse(hmm.getBooleanProperty(HMMFile.CONSENSUS_STRUCTURE));
+    assertTrue(hmm.getBooleanProperty(HMMFile.MAP));
+    assertEquals(hmm.getProperty(HMMFile.DATE), "Tue Feb 19 20:33:41 2013");
+    assertNull(hmm.getProperty(HMMFile.COMMAND_LOG));
+    assertEquals(hmm.getProperty(HMMFile.NUMBER_OF_SEQUENCES), "1997");
+    assertEquals(hmm.getProperty(HMMFile.EFF_NUMBER_OF_SEQUENCES), "3.911818");
+    assertEquals(hmm.getProperty(HMMFile.CHECK_SUM), "3015610723");
+    assertEquals(hmm.getProperty(HMMFile.GATHERING_THRESHOLD),
+            "2.324 4.234");
+    assertEquals(hmm.getProperty(HMMFile.TRUSTED_CUTOFF), "2.343 1.212");
+    assertEquals(hmm.getProperty(HMMFile.NOISE_CUTOFF), "2.354 5.456");
+    assertEquals(hmm.getViterbi(), "-9.3632  0.71858");
+    assertEquals(hmm.getMSV(), "-8.5786  0.71858");
+    assertEquals(hmm.getForward(), "-3.4823  0.71858");
+  }
+  
+  @Test(groups = "Functional")
+  public void testFillList() throws IOException
+  {
+    Scanner scanner1 = new Scanner("1.3 2.4 5.3 3.9 9.8 4.7 4.3 2.3 6.9");
+    ArrayList<Double> filledArray = new ArrayList<>();
+  
+    filledArray.add(0.27253);
+    filledArray.add(0.0907);
+    filledArray.add(0.00499);
+    filledArray.add(0.02024);
+    filledArray.add(0.00005);
+    filledArray.add(0.00909);
+    filledArray.add(0.01357);
+    filledArray.add(0.10026);
+    filledArray.add(0.001);
+  
+    double[] testList = HMMFile.parseDoubles(scanner1, 9);
+
+    for (int i = 0; i < 9; i++)
+    {
+      assertEquals(testList[i], filledArray.get(i), 0.001d);
+    }
+
+    filledArray.clear();
+    scanner1.close();
+  
+    Scanner scanner2 = new Scanner(
+            "1.346 5.554 35.345 5.64 1.4");
+    filledArray.add(0.2603);
+    filledArray.add(0.00387);
+    filledArray.add(0d);
+    filledArray.add(0.00355);
+    filledArray.add(0.2466);
+  
+    testList = HMMFile.parseDoubles(scanner2, 5);
+
+    for (int i = 0; i < 5; i++)
+    {
+      assertEquals(testList[i], filledArray.get(i), 0.001d);
+    }
+  }
+  
+  @Test(groups = "Functional")
+  public void testParseModel() throws IOException
+  {
+    FileReader fr = new FileReader(
+            new File("test/jalview/io/test_MADE1_hmm.txt"));
+    BufferedReader br = new BufferedReader(fr);
+    HiddenMarkovModel testHMM = new HiddenMarkovModel();
+    String line = null;
+    do
+    {
+      line = br.readLine(); // skip header lines up to HMM plus one
+    } while (!line.startsWith("HMM "));
+    br.readLine();
+
+    made1.parseModel(br);
+    testHMM = made1.getHMM();
+
+    br.close();
+    fr.close();
+  
+    assertEquals(testHMM.getMatchEmissionProbability(1, 'C'), 0.09267,
+            0.001d);
+    assertEquals(testHMM.getMatchEmissionProbability(25, 'G'), 0.07327,
+            0.001d);
+    assertEquals(testHMM.getMatchEmissionProbability(1092, 'C'), 0.04184,
+            0.001d);
+    assertEquals(testHMM.getMatchEmissionProbability(1107, 'G'), 0.07,
+            0.001d);
+  
+    assertEquals(testHMM.getInsertEmissionProbability(0, 'G'), 0.25,
+            0.001d);
+    assertEquals(testHMM.getInsertEmissionProbability(247, 'T'), 0.2776,
+            0.001d);
+    assertEquals(testHMM.getInsertEmissionProbability(1096, 'T'), 0.25,
+            0.001d);
+    assertEquals(testHMM.getInsertEmissionProbability(1111, 'T'), 0.25,
+            0.001d);
+
+    assertEquals(testHMM.getStateTransitionProbability(1, 0), 0.9634,
+            0.001d);
+    assertEquals(testHMM.getStateTransitionProbability(5, 1), 0.0203,
+            0.001d);
+    assertEquals(testHMM.getStateTransitionProbability(14, 3), 0.2515,
+            0.001d);
+    assertEquals(testHMM.getStateTransitionProbability(65, 4), 0.78808,
+            0.001d);
+    assertEquals(testHMM.getStateTransitionProbability(1080, 2), 0.01845,
+            0.001d);
+    assertEquals(testHMM.getStateTransitionProbability(1111, 6),
+            Double.NEGATIVE_INFINITY);
+  }
+  
+  @Test(groups = "Functional")
+  public void testParseAnnotations()
+  {
+    HMMFile testFile = new HMMFile();
+    HiddenMarkovModel hmm = new HiddenMarkovModel();
+    PA.setValue(testFile, "hmm", hmm);
+
+    List<HMMNode> nodes = new ArrayList<>();
+    nodes.add(new HMMNode()); // BEGIN node
+  
+    hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
+    hmm.setProperty(HMMFile.MAP, "yes");
+    hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
+    hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "yes");
+    hmm.setProperty(HMMFile.MASKED_VALUE, "yes");
+    Scanner scanner = new Scanner("1345 t t t t");
+    HMMNode node = new HMMNode();
+    nodes.add(node);
+    testFile.parseAnnotations(scanner, node);
+  
+    hmm.setProperty(HMMFile.CONSENSUS_RESIDUE, "yes");
+    hmm.setProperty(HMMFile.MAP, "no");
+    hmm.setProperty(HMMFile.REFERENCE_ANNOTATION, "yes");
+    hmm.setProperty(HMMFile.CONSENSUS_STRUCTURE, "no");
+    hmm.setProperty(HMMFile.MASKED_VALUE, "no");
+    Scanner scanner2 = new Scanner("- y x - -");
+    node = new HMMNode();
+    nodes.add(node);
+    testFile.parseAnnotations(scanner2, node);
+
+    hmm.setNodes(nodes);
+
+    assertEquals(hmm.getNodeMapPosition(1), 1345);
+    assertEquals(hmm.getConsensusResidue(1), 't');
+    assertEquals(hmm.getReferenceAnnotation(1), 't');
+    assertEquals(hmm.getMaskedValue(1), 't');
+    assertEquals(hmm.getConsensusStructure(1), 't');
+  
+    scanner.close();
+  }
+  
+  /**
+   * tests to see if file produced by the output matches the file from the input
+   * 
+   * @throws IOException
+   */
+  @Test(groups = "Functional")
+  public void testPrint_roundTrip() throws IOException
+  {
+    String output = pKinase.print();
+    HMMFile pKinaseClone = new HMMFile(
+            new FileParse(output, DataSourceType.PASTE));
+    HiddenMarkovModel pKinaseHMM = pKinase.getHMM();
+    HiddenMarkovModel pKinaseCloneHMM = pKinaseClone.getHMM();
+  
+    checkModelsMatch(pKinaseHMM, pKinaseCloneHMM);
+  }
+
+  /**
+   * A helper method to check two HMM models have the same values
+   * 
+   * @param model1
+   * @param model2
+   */
+  protected void checkModelsMatch(HiddenMarkovModel model1,
+          HiddenMarkovModel model2)
+  {
+    assertEquals(model1.getLength(), model2.getLength());
+
+    for (int i = 0; i < model1.getLength(); i++)
+    {
+      String msg = "For Node" + i;
+      assertEquals(model1.getNode(i).getMatchEmissions(),
+              model2.getNode(i).getMatchEmissions(), msg);
+      assertEquals(model1.getNode(i).getInsertEmissions(),
+              model2.getNode(i).getInsertEmissions(), msg);
+      assertEquals(model1.getNode(i).getStateTransitions(),
+              model2.getNode(i).getStateTransitions(), msg);
+  
+      if (i > 0)
+      {
+        assertEquals(model1.getNodeMapPosition(i),
+                model2.getNodeMapPosition(i), msg);
+        assertEquals(model1.getReferenceAnnotation(i),
+                model2.getReferenceAnnotation(i), msg);
+        assertEquals(model1.getConsensusResidue(i),
+                model2.getConsensusResidue(i), msg);
+      }
+    }
+  }
+  
+  @Test(groups = "Functional")
+  public void testAppendProperties() throws FileNotFoundException
+  {
+    StringBuilder sb = new StringBuilder();
+    fn3.appendProperties(sb);
+
+    Scanner testScanner = new Scanner(sb.toString());
+  
+    String[] expected = new String[] { "HMMER3/f [3.1b1 | May 2013]",
+        "NAME  fn3", "ACC   PF00041.13",
+        "DESC  Fibronectin type III domain", "LENG  86", "ALPH  amino",
+        "RF    no", "MM    no", "CONS  yes", "CS    yes", "MAP   yes",
+        "DATE  Fri Jun 20 08:22:31 2014", "NSEQ  106", "EFFN  11.415833",
+        "CKSUM 3564431818", "GA    8.00 7.20", "TC    8.00 7.20",
+        "NC    7.90 7.90", "STATS LOCAL MSV       -9.4043  0.71847",
+        "STATS LOCAL VITERBI   -9.7737  0.71847",
+        "STATS LOCAL FORWARD   -3.8341  0.71847" };
+  
+    for (String value : expected)
+    {
+      assertEquals(testScanner.nextLine(), value);
+    }
+  
+    testScanner.close();
+  }
+  
+  @Test(groups = "Functional")
+  public void testAppendModelAsString() throws FileNotFoundException
+  {
+    StringBuilder sb = new StringBuilder();
+    fn3.appendModelAsString(sb);
+    String string = sb.toString();
+
+    assertEquals(findValue(2, 2, 2, string), "4.42225");
+    assertEquals(findValue(12, 14, 1, string), "2.79307");
+    assertEquals(findValue(6, 24, 3, string), "0.48576");
+    assertEquals(findValue(19, 33, 2, string), "4.58477");
+    assertEquals(findValue(20, 64, 2, string), "3.61505");
+    assertEquals(findValue(3, 72, 3, string), "6.81068");
+    assertEquals(findValue(10, 80, 2, string), "2.69355");
+    assertEquals(findValue(16, 65, 1, string), "2.81003");
+    assertEquals(findValue(14, 3, 1, string), "2.69012");
+    assertEquals(findValue(11, 32, 1, string), "4.34805");
+  }
+  
+  /**
+   * A helper method to find a token in the model string
+   * 
+   * @param symbolIndex
+   *          index of symbol being searched. First symbol has index 1.
+   * @param nodeIndex
+   *          index of node being searched. Begin node has index 0. First node
+   *          has index 1.
+   * @param line
+   *          index of line being searched in node. First line has index 1.
+   * @param model
+   *          string model being searched
+   * @return value at specified position
+   */
+  private String findValue(int symbolIndex, int nodeIndex, int line,
+          String model)
+  {
+    String value = "";
+    Scanner scanner = new Scanner(model);
+    scanner.nextLine();
+    scanner.nextLine();
+  
+    for (int lineIndex = 0; lineIndex < line - 1; lineIndex++)
+    {
+      scanner.nextLine();
+    }
+    for (int node = 0; node < nodeIndex; node++)
+    {
+      scanner.nextLine();
+      scanner.nextLine();
+      scanner.nextLine();
+    }
+  
+    for (int symbol = 0; symbol < symbolIndex; symbol++)
+    {
+      value = scanner.next();
+      if ("COMPO".equals(value))
+      {
+        scanner.next();
+      }
+      else if (value.length() < 7)
+      {
+        scanner.next();
+      }
+    }
+    scanner.close();
+    return value;
+  }
+}
+