--- /dev/null
+10 705
+Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTA
+Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTT
+Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTC
+Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTT
+Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTT
+Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTA
+Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTT
+Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTA
+Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTC
+Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTA
+
+CTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTAC
+CTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATAT
+GTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTAC
+ATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTAT
+CTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTAT
+ATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTAT
+ACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTAT
+CTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTAC
+CTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTAC
+CTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTAC
+
+ATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAA
+ATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAA
+CTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAA
+GCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAA
+GTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAA
+ATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAA
+ATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAA
+ATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAA
+ATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAA
+ATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAG
+
+GTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCT
+ATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCC
+GTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCC
+ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCC
+ATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCA
+GTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCT
+GTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCC
+GTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCA
+GTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCA
+ATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCC
+
+TTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATA
+CTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATA
+CTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATC
+CTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATT
+CTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATG
+CTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATA
+CTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATA
+TTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATA
+TTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATA
+CTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATC
+
+GGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCC
+GGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCC
+GGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCC
+GGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCC
+GGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCC
+GGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCA
+GGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCC
+GGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCA
+GGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCC
+GGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCT
+
+TACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAAT
+TATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCAC
+TACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCAT
+TACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAAT
+TACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCAC
+TATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAAC
+TACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAAT
+TATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAAT
+TATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAAC
+TATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAAT
+
+CGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTA
+CGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTA
+CGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTC
+CGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTG
+CGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTA
+CGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTC
+CGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTG
+CGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTC
+CGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTC
+CGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTC
+
+CACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAAC
+CATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAAT
+CACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAAT
+CACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAAC
+CACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAAC
+CACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAAT
+CACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAAC
+CACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAAC
+CACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAAC
+CACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCAT
+
+CAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGC
+CAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGT
+CAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGC
+CAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGT
+CAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGT
+CAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGT
+CAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGC
+CAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGT
+CAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGC
+CAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGC
+
+GGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAA
+GGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAA
+GGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAA
+GGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAA
+GGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAA
+GGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAA
+GGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAA
+GGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAG
+GGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAA
+GGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAA
+
+AAATGATCTGCGTCAATATTA---------------------TAA
+AACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA
+GCCTGATCCTCACTA------------------CTGTCATCTTAA
+ATA---------------------GGGCCCGTATTTACCCTATAG
+AACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA
+AACTGATCTGCTTCAATAATT---------------------TAA
+AACTGATCAGCTTCTATAATT---------------------TAA
+AAATGATCTACCTCAATGCTT---------------------TAA
+AAATGATCTGTATCAATACTA---------------------TAA
+AACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA
--- /dev/null
+10 705
+Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA
+Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA
+Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA
+Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG
+Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA
+Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA
+Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA
+Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA
+Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA
+Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA
/*
* Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
* Copyright (C) 2014 The Jalview Authors
- *
+ *
* This file is part of Jalview.
- *
+ *
* Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
+ * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see <http://www.gnu.org/licenses/>.
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
+import jalview.api.AlignViewportI;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentAnnotation;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentView;
+import jalview.util.MessageManager;
+
import java.io.File;
import java.io.InputStream;
import java.util.List;
-import jalview.api.AlignViewportI;
-import jalview.datamodel.*;
-import jalview.util.MessageManager;
-
/**
* A low level class for alignment and feature IO with alignment formatting
* methods used by both applet and application for generating flat alignment
* files. It also holds the lists of magic format names that the applet and
* application will allow the user to read or write files with.
- *
+ *
* @author $author$
* @version $Revision$
*/
* List of valid format strings used in the isValidFormat method
*/
public static final String[] READABLE_FORMATS = new String[]
- { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH",
- "PDB", "JnetFile", "RNAML" }; // , "SimpleBLAST" };
+ { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH",
+ "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC }; // , "SimpleBLAST" };
/**
* List of valid format strings for use by callers of the formatSequences
* method
*/
public static final String[] WRITEABLE_FORMATS = new String[]
- { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", "STH" };
+ { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA",
+ "STH", PhylipFile.FILE_DESC };
/**
* List of extensions corresponding to file format types in WRITABLE_FNAMES
* that are writable by the application.
*/
public static final String[] WRITABLE_EXTENSIONS = new String[]
- { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
- "jvp", "sto,stk", "jar" };
+ { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
+ "jvp", "sto,stk", "jar", PhylipFile.FILE_EXT };
/**
* List of writable formats by the application. Order must correspond with the
* WRITABLE_EXTENSIONS list of formats.
*/
public static final String[] WRITABLE_FNAMES = new String[]
- { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview",
- "STH", "Jalview" };
+ { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview",
+ "STH", "Jalview", PhylipFile.FILE_DESC };
/**
* List of readable format file extensions by application in order
* corresponding to READABLE_FNAMES
*/
public static final String[] READABLE_EXTENSIONS = new String[]
- { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
- "jar,jvp", "sto,stk", "xml,rnaml" }; // ".blast"
+ { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa",
+ "jar,jvp", "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT }; // ".blast"
/**
* List of readable formats by application in order corresponding to
* READABLE_EXTENSIONS
*/
public static final String[] READABLE_FNAMES = new String[]
- { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview",
- "Stockholm", "RNAML" };// ,
+ { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview",
+ "Stockholm", "RNAML", PhylipFile.FILE_DESC };// ,
// "SimpleBLAST"
// };
+ prettyPrint(READABLE_FORMATS);
/**
- *
+ *
* @param els
* @return grammatically correct(ish) list consisting of els elements.
*/
/**
* check that this format is valid for reading
- *
+ *
* @param format
* a format string to be compared with READABLE_FORMATS
* @return true if format is readable
/**
* validate format is valid for IO
- *
+ *
* @param format
* a format string to be compared with either READABLE_FORMATS or
* WRITEABLE_FORMATS
boolean valid = false;
String[] format_list = (forwriting) ? WRITEABLE_FORMATS
: READABLE_FORMATS;
- for (int i = 0; i < format_list.length; i++)
+ for (String element : format_list)
{
- if (format_list[i].equalsIgnoreCase(format))
+ if (element.equalsIgnoreCase(format))
{
return true;
}
/**
* Constructs the correct filetype parser for a characterised datasource
- *
+ *
* @param inFile
* data/data location
* @param type
* type of datasource
* @param format
* File format of data provided by datasource
- *
+ *
* @return DOCUMENT ME!
*/
public Alignment readFile(String inFile, String type, String format)
{
afile = new SimpleBlastFile(inFile, type);
}
+ else if (format.equals(PhylipFile.FILE_DESC))
+ {
+ afile = new PhylipFile(inFile, type);
+ }
else if (format.equals("RNAML"))
{
afile = new RnamlFile(inFile, type);
/**
* Constructs the correct filetype parser for an already open datasource
- *
+ *
* @param source
* an existing datasource
* @param format
* File format of data that will be provided by datasource
- *
+ *
* @return DOCUMENT ME!
*/
public AlignmentI readFromFile(FileParse source, String format)
{
afile = new SimpleBlastFile(source);
}
-
+ else if (format.equals(PhylipFile.FILE_DESC))
+ {
+ afile = new PhylipFile(source);
+ }
Alignment al = new Alignment(afile.getSeqsAsArray());
afile.addAnnotations(al);
/**
- * create an alignment flatfile from a Jalview alignment view
+ * create an alignment flatfile from a Jalview alignment view
* @param format
* @param jvsuffix
* @param av
aselview.addAnnotation(aa);
}
}
-
+
return formatSequences(format, aselview, jvsuffix);
}
-
+
/**
* Construct an output class for an alignment in a particular filetype TODO:
* allow caller to detect errors and warnings encountered when generating
* output
- *
+ *
* @param format
* string name of alignment format
* @param alignment
* @param jvsuffix
* passed to AlnFile class controls whether /START-END is added to
* sequence names
- *
+ *
* @return alignment flat file contents
*/
public String formatSequences(String format, AlignmentI alignment,
{
afile = new AMSAFile(alignment);
}
+ else if (format.equalsIgnoreCase(PhylipFile.FILE_DESC))
+ {
+ afile = new PhylipFile();
+ }
else if (format.equalsIgnoreCase("RNAML"))
{
afile = new RnamlFile();
} catch (Exception e)
{
System.err
- .println("Couln't format the alignment for output as a FASTA file.");
+ .println("Couln't format the alignment for output as a FASTA file.");
e.printStackTrace(System.err);
}
}
}
System.out.println("Read took " + (t1 / 1000.0) + " seconds.");
System.out
- .println("Difference between free memory now and before is "
- + (memf / (1024.0 * 1024.0) * 1.0) + " MB");
+ .println("Difference between free memory now and before is "
+ + (memf / (1024.0 * 1024.0) * 1.0) + " MB");
} catch (Exception e)
{
System.err.println("Exception when dealing with " + i
/**
* try to discover how to access the given file as a valid datasource that
* will be identified as the given type.
- *
+ *
* @param file
* @param format
* @return protocol that yields the data parsable as the given type
} catch (Exception ex)
{
System.err
- .println("Exception checking resources: " + file + " " + ex);
+ .println("Exception checking resources: " + file + " " + ex);
}
if (file.indexOf("://") > -1)
if (debug)
{
System.out
- .println("File deemed not accessible via " + protocol);
+ .println("File deemed not accessible via " + protocol);
}
fp.close();
return null;
public void setNewlineString(String nl)
{
- newline = nl;
+ newline = nl;
}
public String getNewlineString()
/*
* Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
* Copyright (C) 2014 The Jalview Authors
- *
+ *
* This file is part of Jalview.
- *
+ *
* Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
+ * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see <http://www.gnu.org/licenses/>.
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.io;
-import java.io.*;
+import java.io.IOException;
/**
* DOCUMENT ME!
- *
+ *
* @author $author$
* @version $Revision$
*/
{
/**
* Identify a datasource's file content.
- *
+ *
* @note Do not use this method for stream sources - create a FileParse object
* instead.
- *
+ *
* @param file
* DOCUMENT ME!
* @param protocol
emessage = e.getMessage();
}
if (parser != null)
+ {
return parser.errormessage;
+ }
return emessage;
}
/**
* Identify contents of source, closing it or resetting source to start
* afterwards.
- *
+ *
* @param source
* @param closeSource
* @return filetype string
else
{
reply = "FASTA"; // possibly a bad choice - may be recognised as
- // PIR
+ // PIR
}
// otherwise can still possibly be a PIR file
}
reply = "PDB";
break;
}
+ else if (data.matches("\\s*\\d+\\s+\\d+\\s*"))
+ {
+ reply = PhylipFile.FILE_DESC;
+ break;
+ }
+
/*
* // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else
* if (!lineswereskipped && data.indexOf("BLAST")<4) { reply =
* "SimpleBLAST"; break;
- *
+ *
* } // end comments for Jalview 2.4.1
*/
else if (!lineswereskipped && data.charAt(0) != '*'
&& data.charAt(0) != ' '
&& data.indexOf(":") < data.indexOf(",")) // &&
- // data.indexOf(",")<data.indexOf(",",
- // data.indexOf(",")))
+ // data.indexOf(",")<data.indexOf(",",
+ // data.indexOf(",")))
{
// file looks like a concise JNet file
reply = "JnetFile";
if (length == 0)
{
System.err
- .println("File Identification failed! - Empty file was read.");
+ .println("File Identification failed! - Empty file was read.");
return "EMPTY DATA FILE";
}
return reply;
/*
* Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2)
* Copyright (C) 2014 The Jalview Authors
- *
+ *
* This file is part of Jalview.
- *
+ *
* Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
+ * modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see <http://www.gnu.org/licenses/>.
* The Jalview Authors are detailed in the 'AUTHORS' file.
import jalview.util.MessageManager;
-import java.io.*;
-import java.util.*;
-
-import java.awt.*;
-import java.awt.event.*;
-import javax.swing.*;
+import java.awt.Component;
+import java.awt.Dimension;
+import java.awt.EventQueue;
+import java.awt.HeadlessException;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.io.File;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+import javax.swing.DefaultListCellRenderer;
+import javax.swing.JFileChooser;
+import javax.swing.JList;
+import javax.swing.JOptionPane;
+import javax.swing.JPanel;
+import javax.swing.JScrollPane;
/**
* Enhanced file chooser dialog box.
- *
+ *
* NOTE: bug on Windows systems when filechooser opened on directory to view
* files with colons in title.
- *
+ *
* @author AMW
- *
+ *
*/
public class JalviewFileChooser extends JFileChooser
{
setAccessory(new RecentlyOpened());
}
+ @Override
public void setFileFilter(javax.swing.filechooser.FileFilter filter)
{
super.setFileFilter(filter);
EventQueue.invokeLater(new Thread()
{
+ @Override
public void run()
{
String currentName = ui.getFileName();
{
format = "PFAM";
}
+ else if (format.toUpperCase().startsWith(PhylipFile.FILE_DESC))
+ {
+ format = PhylipFile.FILE_DESC;
+ }
return format;
}
+ @Override
public int showSaveDialog(Component parent) throws HeadlessException
{
this.setAccessory(null);
list.addMouseListener(new MouseAdapter()
{
+ @Override
public void mousePressed(MouseEvent evt)
{
recentListSelectionChanged(list.getSelectedValue());
javax.swing.SwingUtilities.invokeLater(new Runnable()
{
+ @Override
public void run()
{
scroller.getHorizontalScrollBar().setValue(
--- /dev/null
+/**
+ *
+ */
+package jalview.io;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+
+/**
+ * <p>
+ * Parser and exporter for PHYLIP file format, as defined <a
+ * href="http://evolution.genetics.washington.edu/phylip/doc/main.html">in the
+ * documentation</a>. The parser imports PHYLIP files in both sequential and
+ * interleaved format, and (currently) exports in interleaved format (using 60
+ * characters per matrix for the sequence).
+ * <p>
+ *
+ * <p>
+ * The following assumptions have been made for input
+ * <ul>
+ * <li>Sequences are expressed as letters, not real numbers with decimal points
+ * separated by blanks (which is a valid option according to the specification)</li>
+ * </ul>
+ *
+ * The following assumptions have been made for output
+ * <ul>
+ * <li>Interleaved format is used, with each matrix consisting of 60 characters;
+ * </li>
+ * <li>a blank line is added between each matrix;</li>
+ * <li>no spacing is added between the sequence characters.</li>
+ * </ul>
+ *
+ *
+ * </p>
+ *
+ * @author David Corsar
+ *
+ *
+ */
+public class PhylipFile extends AlignFile
+{
+
+ // Define file extension and description to save repeating it elsewhere
+ public static final String FILE_EXT = "phy";
+
+ public static final String FILE_DESC = "PHYLIP";
+
+ /**
+ *
+ * @see {@link AlignFile#AlignFile()}
+ */
+ public PhylipFile()
+ {
+ super();
+ }
+
+ /**
+ *
+ * @param source
+ * @throws IOException
+ */
+ public PhylipFile(FileParse source) throws IOException
+ {
+ super(source);
+ }
+
+ /**
+ * @param inFile
+ * @param type
+ * @throws IOException
+ * @see {@link AlignFile#AlignFile(FileParse)}
+ */
+ public PhylipFile(String inFile, String type) throws IOException
+ {
+ super(inFile, type);
+ }
+
+ /**
+ * Parses the input source
+ *
+ * @see {@link AlignFile#parse()}
+ */
+ @Override
+ public void parse() throws IOException
+ {
+ try
+ {
+ // First line should contain number of species and number of
+ // characters, separated by blanks
+ String line = nextLine();
+ String[] lineElements = line.trim().split("\\s+");
+ if (lineElements.length < 2)
+ {
+ throw new IOException(
+ "First line must contain the number of specifies and number of characters");
+ }
+
+ int numberSpecies = Integer.parseInt(lineElements[0]), numberCharacters = Integer
+ .parseInt(lineElements[1]);
+
+ if (numberSpecies <= 0)
+ {
+ // there are no sequences in this file so exit a nothing to
+ // parse
+ return;
+ }
+
+ SequenceI[] sequenceElements = new Sequence[numberSpecies];
+ StringBuffer[] sequences = new StringBuffer[numberSpecies];
+
+ // if file is in sequential format there is only one data matrix,
+ // else there are multiple
+
+ // read the first data matrix
+ for (int i = 0; i < numberSpecies; i++)
+ {
+ line = nextLine();
+ // lines start with the name - a maximum of 10 characters
+ // if less, then padded out or terminated with a tab
+ String potentialName = line.substring(0, 10);
+ int tabIndex = potentialName.indexOf('\t');
+ if (tabIndex == -1)
+ {
+ sequenceElements[i] = parseId(validateName(potentialName));
+ sequences[i] = new StringBuffer(
+ removeWhitespace(line.substring(10)));
+ }
+ else
+ {
+ sequenceElements[i] = parseId(validateName(potentialName
+ .substring(0, tabIndex)));
+ sequences[i] = new StringBuffer(
+ removeWhitespace(line.substring(tabIndex)));
+ }
+ }
+
+ // determine if interleaved
+ if ((sequences[0]).length() != numberCharacters)
+ {
+ // interleaved file, so have to read the remainder
+ int i = 0;
+ for (line = nextLine(); line != null; line = nextLine())
+ {
+ // ignore blank lines, as defined by the specification
+ if (line.length() > 0)
+ {
+ sequences[i++].append(removeWhitespace(line));
+ }
+ // reached end of matrix, so get ready for the next one
+ if (i == sequences.length)
+ {
+ i = 0;
+ }
+ }
+ }
+
+ // file parsed completely, now store sequences
+ for (int i = 0; i < numberSpecies; i++)
+ {
+ // first check sequence is the expected length
+ if (sequences[i].length() != numberCharacters)
+ {
+ throw new IOException(sequenceElements[i].getName()
+ + " sequence is incorrect length - should be "
+ + numberCharacters + " but is " + sequences[i].length());
+ }
+ sequenceElements[i].setSequence(sequences[i].toString());
+ seqs.add(sequenceElements[i]);
+ }
+
+ // create an alignment based on the sequences
+ Alignment a = new Alignment(sequenceElements);
+ // add annotations - although comments say addAnnotations
+ // is used by AppletFormatAdapter, it doesn't say other
+ // classes should/can not use it
+ addAnnotations(a);
+
+ } catch (IOException e)
+ {
+ System.err.println("Exception parsing PHYLIP file " + e);
+ e.printStackTrace(System.err);
+ throw e;
+ }
+
+ }
+
+ /**
+ * Removes any whitespace from txt, used to strip and spaces added to
+ * sequences to improve human readability
+ *
+ * @param txt
+ * @return
+ */
+ private String removeWhitespace(String txt)
+ {
+ return txt.replaceAll("\\s*", "");
+ }
+
+ /**
+ * According to the specification, the name cannot have parentheses, square
+ * brackets, colon, semicolon, comma
+ *
+ * @param name
+ * @return
+ * @throws IOException
+ */
+ private String validateName(String name) throws IOException
+ {
+ char[] invalidCharacters = new char[]
+ { '(', ')', '[', ']', ':', ';', ',' };
+ for (char c : invalidCharacters)
+ {
+ if (name.indexOf(c) > -1)
+ {
+ throw new IOException("Species name contains illegal character "
+ + c);
+ }
+ }
+ return name;
+ }
+
+ /**
+ * <p>
+ * Prints the seqs in interleaved format, with each matrix consisting of 60
+ * characters; a blank line is added between each matrix; no spacing is added
+ * between the sequence characters.
+ * </p>
+ *
+ *
+ * @see {@link AlignFile#print()}
+ */
+ @Override
+ public String print()
+ {
+
+ StringBuffer sb = new StringBuffer(Integer.toString(seqs.size()));
+ sb.append(" ");
+ // if there are no sequences, then define the number of characters as 0
+ sb.append(
+ (seqs.size() > 0) ? Integer
+ .toString(seqs.get(0).getSequence().length) : "0")
+ .append(newline);
+
+ // Due to how IO is handled, there doesn't appear to be a way to store
+ // if the original file was sequential or interleaved; if there is, then
+ // use that to set the value of the following variable
+ boolean sequential = false;
+
+ // maximum number of columns for each row of interleaved format
+ int numInterleavedColumns = 60;
+
+ int sequenceLength = 0;
+ for (SequenceI s : seqs)
+ {
+
+ // ensure name is only 10 characters
+ String name = s.getName();
+ if (name.length() > 10)
+ {
+ name = name.substring(0, 10);
+ }
+ else
+ {
+ // add padding 10 characters
+ name = String.format("%1$-" + 10 + "s", s.getName());
+ }
+ sb.append(name);
+
+ // sequential has the entire sequence following the name
+ if (sequential)
+ {
+ sb.append(s.getSequence());
+ }
+ else
+ {
+ // Jalview ensures all sequences are of same length so no need
+ // to keep track of min/max length
+ sequenceLength = s.getSequence().length;
+ // interleaved breaks the sequence into chunks for
+ // interleavedColumns characters
+ sb.append(s.getSequence(0,
+ Math.min(numInterleavedColumns, sequenceLength)));
+ }
+ sb.append(newline);
+ }
+
+ // add the remaining matrixes if interleaved and there is something to
+ // add
+ if (!sequential && sequenceLength > numInterleavedColumns)
+ {
+ // determine number of remaining matrixes
+ int numMatrics = sequenceLength / numInterleavedColumns;
+ if ((sequenceLength % numInterleavedColumns) > 0)
+ {
+ numMatrics++;
+ }
+
+ // start i = 1 as first matrix has already been printed
+ for (int i = 1; i < numMatrics; i++)
+ {
+ // add blank line to separate this matrix from previous
+ sb.append(newline);
+ int start = i * numInterleavedColumns;
+ for (SequenceI s : seqs)
+ {
+ sb.append(
+ s.getSequence(start, Math.min(start
+ + numInterleavedColumns, sequenceLength)))
+ .append(newline);
+ }
+ }
+
+ }
+
+ return sb.toString();
+ }
+}
\ No newline at end of file
--- /dev/null
+package jalview.io;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.junit.Test;
+
+/**
+ * Test file for {@link PhylipFile}.
+ *
+ * Tests use example data obtained from <a
+ * href="http://www.molecularevolution.org/resources/fileformats"
+ * >molecularrevolution.org<a>.
+ *
+ * @author David Corsar
+ *
+ */
+public class PhylipFileTests
+{
+
+ // interleaved file from
+ // http://www.molecularevolution.org/molevolfiles/fileformats/dna.phy.dat
+ // sequential file is the interleave file converted into sequential format
+
+ static String sequentialFile = "examples/dna_sequential.phy",
+ interleavedFile = "examples/dna_interleaved.phy";
+
+ /**
+ * Creates a name:sequence map for the data in the above files
+ *
+ * @return
+ */
+ private static Map<String, String> getTestData()
+ {
+ Map<String, String> data = new HashMap<String, String>();
+ data.put(
+ "Cow",
+ "ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA");
+ data.put(
+ "Carp",
+ "ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA");
+ data.put(
+ "Chicken",
+ "ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA");
+ data.put(
+ "Human",
+ "ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG");
+ data.put(
+ "Loach",
+ "ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA");
+ data.put(
+ "Mouse",
+ "ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA");
+ data.put(
+ "Rat",
+ "ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA");
+ data.put(
+ "Seal",
+ "ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA");
+ data.put(
+ "Whale",
+ "ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA");
+ data.put(
+ "Frog",
+ "ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA");
+ return data;
+ }
+
+ /**
+ * Tests sequential format file is read correctly by comparing read sequence
+ * with that in the test data.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSequentialDataExtraction() throws Exception
+ {
+ testDataExtraction(sequentialFile);
+ }
+
+ /**
+ * Tests interleaved format file is read correctly by comparing read sequence
+ * with that in the test data.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testInterleavedDataExtraction() throws Exception
+ {
+ testDataExtraction(interleavedFile);
+ }
+
+ /**
+ * Tests a PHYLIP file is read correctly by comparing read sequence with that
+ * in the test data.
+ *
+ * @throws Exception
+ */
+ private void testDataExtraction(String file) throws IOException
+ {
+ AppletFormatAdapter rf = new AppletFormatAdapter();
+ Alignment al = rf.readFile(file, AppletFormatAdapter.FILE,
+ PhylipFile.FILE_DESC);
+ assertNotNull("Couldn't read supplied alignment data.", al);
+
+ Map<String, String> data = PhylipFileTests.getTestData();
+ for (SequenceI s : al.getSequencesArray())
+ {
+ assertTrue(s.getName() + " sequence did not match test data.", data
+ .get(s.getName()).equals(s.getSequenceAsString()));
+ }
+ }
+
+ /**
+ * Tests sequential format file reading and writing without data lose using
+ * similar approach to {@link StockholmFileTest}
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testSequentialIO() throws Exception
+ {
+ testIO(sequentialFile);
+ }
+
+ /**
+ * Tests interleaved format file reading and writing without data lose using
+ * similar approach to {@link StockholmFileTest}
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testInterleavedIO() throws Exception
+ {
+ testIO(interleavedFile);
+ }
+
+ /**
+ * Uses {@link StockholmFileTest} to test read/write/read
+ *
+ * @param file
+ * @throws IOException
+ */
+ public void testIO(String file) throws IOException
+ {
+ AppletFormatAdapter rf = new AppletFormatAdapter();
+ Alignment al = rf.readFile(file, AppletFormatAdapter.FILE,
+ PhylipFile.FILE_DESC);
+ assertNotNull("Couldn't read supplied alignment data.", al);
+
+ String outputfile = rf.formatSequences(PhylipFile.FILE_DESC, al, true);
+
+ Alignment al_input = new AppletFormatAdapter().readFile(outputfile,
+ AppletFormatAdapter.PASTE, PhylipFile.FILE_DESC);
+ assertNotNull("Couldn't parse reimported alignment data.", al_input);
+
+ StockholmFileTest.testAlignmentEquivalence(al, al_input);
+
+ }
+}
\ No newline at end of file