From: Jim Procter Date: Mon, 6 Oct 2014 16:29:00 +0000 (+0100) Subject: Merge branch 'Release_2_8_2_Branch' into Release_2_8_2_Branch_i18n X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d518f19c8d411d365085962d4ec8dadc78e773f9;hp=14dc1751bb77650f5621d1a77953e94806d0c9d7;p=jalview.git Merge branch 'Release_2_8_2_Branch' into Release_2_8_2_Branch_i18n --- diff --git a/examples/dna_interleaved.phy b/examples/dna_interleaved.phy new file mode 100644 index 0000000..745f399 --- /dev/null +++ b/examples/dna_interleaved.phy @@ -0,0 +1,132 @@ +10 705 +Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTA +Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTT +Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTC +Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTT +Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTT +Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTA +Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTT +Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTA +Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTC +Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTA + +CTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTAC +CTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATAT +GTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTAC +ATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTAT +CTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTAT +ATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTAT +ACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTAT +CTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTAC +CTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTAC +CTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTAC + +ATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAA +ATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAA +CTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAA +GCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAA +GTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAA +ATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAA +ATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAA +ATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAA +ATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAA +ATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAG + +GTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCT +ATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCC +GTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCC +ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCC +ATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCA +GTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCT +GTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCC +GTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCA +GTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCA +ATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCC + +TTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATA +CTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATA +CTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATC +CTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATT +CTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATG +CTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATA +CTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATA +TTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATA +TTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATA +CTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATC + +GGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCC +GGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCC +GGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCC +GGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCC +GGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCC +GGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCA +GGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCC +GGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCA +GGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCC +GGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCT + +TACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAAT +TATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCAC +TACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCAT +TACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAAT +TACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCAC +TATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAAC +TACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAAT +TATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAAT +TATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAAC +TATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAAT + +CGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTA +CGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTA +CGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTC +CGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTG +CGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTA +CGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTC +CGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTG +CGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTC +CGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTC +CGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTC + +CACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAAC +CATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAAT +CACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAAT +CACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAAC +CACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAAC +CACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAAT +CACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAAC +CACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAAC +CACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAAC +CACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCAT + +CAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGC +CAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGT +CAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGC +CAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGT +CAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGT +CAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGT +CAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGC +CAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGT +CAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGC +CAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGC + +GGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAA +GGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAA +GGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAA +GGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAA +GGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAA +GGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAA +GGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAA +GGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAG +GGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAA +GGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAA + +AAATGATCTGCGTCAATATTA---------------------TAA +AACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA +GCCTGATCCTCACTA------------------CTGTCATCTTAA +ATA---------------------GGGCCCGTATTTACCCTATAG +AACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA +AACTGATCTGCTTCAATAATT---------------------TAA +AACTGATCAGCTTCTATAATT---------------------TAA +AAATGATCTACCTCAATGCTT---------------------TAA +AAATGATCTGTATCAATACTA---------------------TAA +AACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA diff --git a/examples/dna_sequential.phy b/examples/dna_sequential.phy new file mode 100644 index 0000000..99dd34c --- /dev/null +++ b/examples/dna_sequential.phy @@ -0,0 +1,11 @@ +10 705 +Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA +Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA +Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA +Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG +Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA +Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA +Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA +Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA +Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA +Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java index ce15f0e..ed49d5e 100755 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -1,39 +1,42 @@ /* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) * Copyright (C) 2014 The Jalview Authors - * + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License + * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; +import jalview.api.AlignViewportI; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentView; +import jalview.util.MessageManager; + import java.io.File; import java.io.InputStream; import java.util.List; -import jalview.api.AlignViewportI; -import jalview.datamodel.*; -import jalview.util.MessageManager; - /** * A low level class for alignment and feature IO with alignment formatting * methods used by both applet and application for generating flat alignment * files. It also holds the lists of magic format names that the applet and * application will allow the user to read or write files with. - * + * * @author $author$ * @version $Revision$ */ @@ -43,47 +46,48 @@ public class AppletFormatAdapter * List of valid format strings used in the isValidFormat method */ public static final String[] READABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", - "PDB", "JnetFile", "RNAML" }; // , "SimpleBLAST" }; + { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", + "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC }; // , "SimpleBLAST" }; /** * List of valid format strings for use by callers of the formatSequences * method */ public static final String[] WRITEABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", "STH" }; + { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", + "STH", PhylipFile.FILE_DESC }; /** * List of extensions corresponding to file format types in WRITABLE_FNAMES * that are writable by the application. */ public static final String[] WRITABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", - "jvp", "sto,stk", "jar" }; + { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "jvp", "sto,stk", "jar", PhylipFile.FILE_EXT }; /** * List of writable formats by the application. Order must correspond with the * WRITABLE_EXTENSIONS list of formats. */ public static final String[] WRITABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "STH", "Jalview" }; + { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", + "STH", "Jalview", PhylipFile.FILE_DESC }; /** * List of readable format file extensions by application in order * corresponding to READABLE_FNAMES */ public static final String[] READABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", - "jar,jvp", "sto,stk", "xml,rnaml" }; // ".blast" + { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "jar,jvp", "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT }; // ".blast" /** * List of readable formats by application in order corresponding to * READABLE_EXTENSIONS */ public static final String[] READABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "Stockholm", "RNAML" };// , + { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", + "Stockholm", "RNAML", PhylipFile.FILE_DESC };// , // "SimpleBLAST" // }; @@ -95,7 +99,7 @@ public class AppletFormatAdapter + prettyPrint(READABLE_FORMATS); /** - * + * * @param els * @return grammatically correct(ish) list consisting of els elements. */ @@ -140,7 +144,7 @@ public class AppletFormatAdapter /** * check that this format is valid for reading - * + * * @param format * a format string to be compared with READABLE_FORMATS * @return true if format is readable @@ -152,7 +156,7 @@ public class AppletFormatAdapter /** * validate format is valid for IO - * + * * @param format * a format string to be compared with either READABLE_FORMATS or * WRITEABLE_FORMATS @@ -166,9 +170,9 @@ public class AppletFormatAdapter boolean valid = false; String[] format_list = (forwriting) ? WRITEABLE_FORMATS : READABLE_FORMATS; - for (int i = 0; i < format_list.length; i++) + for (String element : format_list) { - if (format_list[i].equalsIgnoreCase(format)) + if (element.equalsIgnoreCase(format)) { return true; } @@ -179,14 +183,14 @@ public class AppletFormatAdapter /** * Constructs the correct filetype parser for a characterised datasource - * + * * @param inFile * data/data location * @param type * type of datasource * @param format * File format of data provided by datasource - * + * * @return DOCUMENT ME! */ public Alignment readFile(String inFile, String type, String format) @@ -244,6 +248,10 @@ public class AppletFormatAdapter { afile = new SimpleBlastFile(inFile, type); } + else if (format.equals(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(inFile, type); + } else if (format.equals("RNAML")) { afile = new RnamlFile(inFile, type); @@ -295,12 +303,12 @@ public class AppletFormatAdapter /** * Constructs the correct filetype parser for an already open datasource - * + * * @param source * an existing datasource * @param format * File format of data that will be provided by datasource - * + * * @return DOCUMENT ME! */ public AlignmentI readFromFile(FileParse source, String format) @@ -363,7 +371,10 @@ public class AppletFormatAdapter { afile = new SimpleBlastFile(source); } - + else if (format.equals(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(source); + } Alignment al = new Alignment(afile.getSeqsAsArray()); afile.addAnnotations(al); @@ -410,7 +421,7 @@ public class AppletFormatAdapter /** - * create an alignment flatfile from a Jalview alignment view + * create an alignment flatfile from a Jalview alignment view * @param format * @param jvsuffix * @param av @@ -433,15 +444,15 @@ public class AppletFormatAdapter aselview.addAnnotation(aa); } } - + return formatSequences(format, aselview, jvsuffix); } - + /** * Construct an output class for an alignment in a particular filetype TODO: * allow caller to detect errors and warnings encountered when generating * output - * + * * @param format * string name of alignment format * @param alignment @@ -449,7 +460,7 @@ public class AppletFormatAdapter * @param jvsuffix * passed to AlnFile class controls whether /START-END is added to * sequence names - * + * * @return alignment flat file contents */ public String formatSequences(String format, AlignmentI alignment, @@ -495,6 +506,10 @@ public class AppletFormatAdapter { afile = new AMSAFile(alignment); } + else if (format.equalsIgnoreCase(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(); + } else if (format.equalsIgnoreCase("RNAML")) { afile = new RnamlFile(); @@ -570,7 +585,7 @@ public class AppletFormatAdapter } catch (Exception e) { System.err - .println("Couln't format the alignment for output as a FASTA file."); + .println("Couln't format the alignment for output as a FASTA file."); e.printStackTrace(System.err); } } @@ -580,8 +595,8 @@ public class AppletFormatAdapter } System.out.println("Read took " + (t1 / 1000.0) + " seconds."); System.out - .println("Difference between free memory now and before is " - + (memf / (1024.0 * 1024.0) * 1.0) + " MB"); + .println("Difference between free memory now and before is " + + (memf / (1024.0 * 1024.0) * 1.0) + " MB"); } catch (Exception e) { System.err.println("Exception when dealing with " + i @@ -600,7 +615,7 @@ public class AppletFormatAdapter /** * try to discover how to access the given file as a valid datasource that * will be identified as the given type. - * + * * @param file * @param format * @return protocol that yields the data parsable as the given type @@ -647,7 +662,7 @@ public class AppletFormatAdapter } catch (Exception ex) { System.err - .println("Exception checking resources: " + file + " " + ex); + .println("Exception checking resources: " + file + " " + ex); } if (file.indexOf("://") > -1) @@ -749,7 +764,7 @@ public class AppletFormatAdapter if (debug) { System.out - .println("File deemed not accessible via " + protocol); + .println("File deemed not accessible via " + protocol); } fp.close(); return null; diff --git a/src/jalview/io/FileParse.java b/src/jalview/io/FileParse.java index 4b8caec..8e9a49c 100755 --- a/src/jalview/io/FileParse.java +++ b/src/jalview/io/FileParse.java @@ -52,7 +52,7 @@ public class FileParse public void setNewlineString(String nl) { - newline = nl; + newline = nl; } public String getNewlineString() diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 08d4dca..9c7478b 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,30 +1,30 @@ /* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) * Copyright (C) 2014 The Jalview Authors - * + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License + * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; +import java.io.IOException; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ @@ -32,10 +32,10 @@ public class IdentifyFile { /** * Identify a datasource's file content. - * + * * @note Do not use this method for stream sources - create a FileParse object * instead. - * + * * @param file * DOCUMENT ME! * @param protocol @@ -60,7 +60,9 @@ public class IdentifyFile emessage = e.getMessage(); } if (parser != null) + { return parser.errormessage; + } return emessage; } @@ -73,7 +75,7 @@ public class IdentifyFile /** * Identify contents of source, closing it or resetting source to start * afterwards. - * + * * @param source * @param closeSource * @return filetype string @@ -207,7 +209,7 @@ public class IdentifyFile else { reply = "FASTA"; // possibly a bad choice - may be recognised as - // PIR + // PIR } // otherwise can still possibly be a PIR file } @@ -267,18 +269,24 @@ public class IdentifyFile reply = "PDB"; break; } + else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) + { + reply = PhylipFile.FILE_DESC; + break; + } + /* * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = * "SimpleBLAST"; break; - * + * * } // end comments for Jalview 2.4.1 */ else if (!lineswereskipped && data.charAt(0) != '*' && data.charAt(0) != ' ' && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(","). * The Jalview Authors are detailed in the 'AUTHORS' file. @@ -23,21 +23,31 @@ package jalview.io; import jalview.util.MessageManager; -import java.io.*; -import java.util.*; - -import java.awt.*; -import java.awt.event.*; -import javax.swing.*; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.EventQueue; +import java.awt.HeadlessException; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.io.File; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.DefaultListCellRenderer; +import javax.swing.JFileChooser; +import javax.swing.JList; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; /** * Enhanced file chooser dialog box. - * + * * NOTE: bug on Windows systems when filechooser opened on directory to view * files with colons in title. - * + * * @author AMW - * + * */ public class JalviewFileChooser extends JFileChooser { @@ -104,6 +114,7 @@ public class JalviewFileChooser extends JFileChooser setAccessory(new RecentlyOpened()); } + @Override public void setFileFilter(javax.swing.filechooser.FileFilter filter) { super.setFileFilter(filter); @@ -122,6 +133,7 @@ public class JalviewFileChooser extends JFileChooser EventQueue.invokeLater(new Thread() { + @Override public void run() { String currentName = ui.getFileName(); @@ -176,10 +188,15 @@ public class JalviewFileChooser extends JFileChooser { format = "PFAM"; } + else if (format.toUpperCase().startsWith(PhylipFile.FILE_DESC)) + { + format = PhylipFile.FILE_DESC; + } return format; } + @Override public int showSaveDialog(Component parent) throws HeadlessException { this.setAccessory(null); @@ -265,6 +282,7 @@ public class JalviewFileChooser extends JFileChooser list.addMouseListener(new MouseAdapter() { + @Override public void mousePressed(MouseEvent evt) { recentListSelectionChanged(list.getSelectedValue()); @@ -279,6 +297,7 @@ public class JalviewFileChooser extends JFileChooser javax.swing.SwingUtilities.invokeLater(new Runnable() { + @Override public void run() { scroller.getHorizontalScrollBar().setValue( diff --git a/src/jalview/io/PhylipFile.java b/src/jalview/io/PhylipFile.java new file mode 100644 index 0000000..ce65eea --- /dev/null +++ b/src/jalview/io/PhylipFile.java @@ -0,0 +1,320 @@ +/** + * + */ +package jalview.io; + +import jalview.datamodel.Alignment; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.io.IOException; + +/** + *

+ * Parser and exporter for PHYLIP file format, as defined in the + * documentation. The parser imports PHYLIP files in both sequential and + * interleaved format, and (currently) exports in interleaved format (using 60 + * characters per matrix for the sequence). + *

+ * + *

+ * The following assumptions have been made for input + *

    + *
  • Sequences are expressed as letters, not real numbers with decimal points + * separated by blanks (which is a valid option according to the specification)
  • + *
+ * + * The following assumptions have been made for output + *
    + *
  • Interleaved format is used, with each matrix consisting of 60 characters; + *
  • + *
  • a blank line is added between each matrix;
  • + *
  • no spacing is added between the sequence characters.
  • + *
+ * + * + *

+ * + * @author David Corsar + * + * + */ +public class PhylipFile extends AlignFile +{ + + // Define file extension and description to save repeating it elsewhere + public static final String FILE_EXT = "phy"; + + public static final String FILE_DESC = "PHYLIP"; + + /** + * + * @see {@link AlignFile#AlignFile()} + */ + public PhylipFile() + { + super(); + } + + /** + * + * @param source + * @throws IOException + */ + public PhylipFile(FileParse source) throws IOException + { + super(source); + } + + /** + * @param inFile + * @param type + * @throws IOException + * @see {@link AlignFile#AlignFile(FileParse)} + */ + public PhylipFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + /** + * Parses the input source + * + * @see {@link AlignFile#parse()} + */ + @Override + public void parse() throws IOException + { + try + { + // First line should contain number of species and number of + // characters, separated by blanks + String line = nextLine(); + String[] lineElements = line.trim().split("\\s+"); + if (lineElements.length < 2) + { + throw new IOException( + "First line must contain the number of specifies and number of characters"); + } + + int numberSpecies = Integer.parseInt(lineElements[0]), numberCharacters = Integer + .parseInt(lineElements[1]); + + if (numberSpecies <= 0) + { + // there are no sequences in this file so exit a nothing to + // parse + return; + } + + SequenceI[] sequenceElements = new Sequence[numberSpecies]; + StringBuffer[] sequences = new StringBuffer[numberSpecies]; + + // if file is in sequential format there is only one data matrix, + // else there are multiple + + // read the first data matrix + for (int i = 0; i < numberSpecies; i++) + { + line = nextLine(); + // lines start with the name - a maximum of 10 characters + // if less, then padded out or terminated with a tab + String potentialName = line.substring(0, 10); + int tabIndex = potentialName.indexOf('\t'); + if (tabIndex == -1) + { + sequenceElements[i] = parseId(validateName(potentialName)); + sequences[i] = new StringBuffer( + removeWhitespace(line.substring(10))); + } + else + { + sequenceElements[i] = parseId(validateName(potentialName + .substring(0, tabIndex))); + sequences[i] = new StringBuffer( + removeWhitespace(line.substring(tabIndex))); + } + } + + // determine if interleaved + if ((sequences[0]).length() != numberCharacters) + { + // interleaved file, so have to read the remainder + int i = 0; + for (line = nextLine(); line != null; line = nextLine()) + { + // ignore blank lines, as defined by the specification + if (line.length() > 0) + { + sequences[i++].append(removeWhitespace(line)); + } + // reached end of matrix, so get ready for the next one + if (i == sequences.length) + { + i = 0; + } + } + } + + // file parsed completely, now store sequences + for (int i = 0; i < numberSpecies; i++) + { + // first check sequence is the expected length + if (sequences[i].length() != numberCharacters) + { + throw new IOException(sequenceElements[i].getName() + + " sequence is incorrect length - should be " + + numberCharacters + " but is " + sequences[i].length()); + } + sequenceElements[i].setSequence(sequences[i].toString()); + seqs.add(sequenceElements[i]); + } + + // create an alignment based on the sequences + Alignment a = new Alignment(sequenceElements); + // add annotations - although comments say addAnnotations + // is used by AppletFormatAdapter, it doesn't say other + // classes should/can not use it + addAnnotations(a); + + } catch (IOException e) + { + System.err.println("Exception parsing PHYLIP file " + e); + e.printStackTrace(System.err); + throw e; + } + + } + + /** + * Removes any whitespace from txt, used to strip and spaces added to + * sequences to improve human readability + * + * @param txt + * @return + */ + private String removeWhitespace(String txt) + { + return txt.replaceAll("\\s*", ""); + } + + /** + * According to the specification, the name cannot have parentheses, square + * brackets, colon, semicolon, comma + * + * @param name + * @return + * @throws IOException + */ + private String validateName(String name) throws IOException + { + char[] invalidCharacters = new char[] + { '(', ')', '[', ']', ':', ';', ',' }; + for (char c : invalidCharacters) + { + if (name.indexOf(c) > -1) + { + throw new IOException("Species name contains illegal character " + + c); + } + } + return name; + } + + /** + *

+ * Prints the seqs in interleaved format, with each matrix consisting of 60 + * characters; a blank line is added between each matrix; no spacing is added + * between the sequence characters. + *

+ * + * + * @see {@link AlignFile#print()} + */ + @Override + public String print() + { + + StringBuffer sb = new StringBuffer(Integer.toString(seqs.size())); + sb.append(" "); + // if there are no sequences, then define the number of characters as 0 + sb.append( + (seqs.size() > 0) ? Integer + .toString(seqs.get(0).getSequence().length) : "0") + .append(newline); + + // Due to how IO is handled, there doesn't appear to be a way to store + // if the original file was sequential or interleaved; if there is, then + // use that to set the value of the following variable + boolean sequential = false; + + // maximum number of columns for each row of interleaved format + int numInterleavedColumns = 60; + + int sequenceLength = 0; + for (SequenceI s : seqs) + { + + // ensure name is only 10 characters + String name = s.getName(); + if (name.length() > 10) + { + name = name.substring(0, 10); + } + else + { + // add padding 10 characters + name = String.format("%1$-" + 10 + "s", s.getName()); + } + sb.append(name); + + // sequential has the entire sequence following the name + if (sequential) + { + sb.append(s.getSequence()); + } + else + { + // Jalview ensures all sequences are of same length so no need + // to keep track of min/max length + sequenceLength = s.getSequence().length; + // interleaved breaks the sequence into chunks for + // interleavedColumns characters + sb.append(s.getSequence(0, + Math.min(numInterleavedColumns, sequenceLength))); + } + sb.append(newline); + } + + // add the remaining matrixes if interleaved and there is something to + // add + if (!sequential && sequenceLength > numInterleavedColumns) + { + // determine number of remaining matrixes + int numMatrics = sequenceLength / numInterleavedColumns; + if ((sequenceLength % numInterleavedColumns) > 0) + { + numMatrics++; + } + + // start i = 1 as first matrix has already been printed + for (int i = 1; i < numMatrics; i++) + { + // add blank line to separate this matrix from previous + sb.append(newline); + int start = i * numInterleavedColumns; + for (SequenceI s : seqs) + { + sb.append( + s.getSequence(start, Math.min(start + + numInterleavedColumns, sequenceLength))) + .append(newline); + } + } + + } + + return sb.toString(); + } +} \ No newline at end of file diff --git a/test/jalview/io/PhylipFileTests.java b/test/jalview/io/PhylipFileTests.java new file mode 100644 index 0000000..70e3f2c --- /dev/null +++ b/test/jalview/io/PhylipFileTests.java @@ -0,0 +1,166 @@ +package jalview.io; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import jalview.datamodel.Alignment; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.junit.Test; + +/** + * Test file for {@link PhylipFile}. + * + * Tests use example data obtained from molecularrevolution.org. + * + * @author David Corsar + * + */ +public class PhylipFileTests +{ + + // interleaved file from + // http://www.molecularevolution.org/molevolfiles/fileformats/dna.phy.dat + // sequential file is the interleave file converted into sequential format + + static String sequentialFile = "examples/dna_sequential.phy", + interleavedFile = "examples/dna_interleaved.phy"; + + /** + * Creates a name:sequence map for the data in the above files + * + * @return + */ + private static Map getTestData() + { + Map data = new HashMap(); + data.put( + "Cow", + "ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA"); + data.put( + "Carp", + "ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA"); + data.put( + "Chicken", + "ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA"); + data.put( + "Human", + "ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG"); + data.put( + "Loach", + "ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA"); + data.put( + "Mouse", + "ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA"); + data.put( + "Rat", + "ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA"); + data.put( + "Seal", + "ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA"); + data.put( + "Whale", + "ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA"); + data.put( + "Frog", + "ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA"); + return data; + } + + /** + * Tests sequential format file is read correctly by comparing read sequence + * with that in the test data. + * + * @throws Exception + */ + @Test + public void testSequentialDataExtraction() throws Exception + { + testDataExtraction(sequentialFile); + } + + /** + * Tests interleaved format file is read correctly by comparing read sequence + * with that in the test data. + * + * @throws Exception + */ + @Test + public void testInterleavedDataExtraction() throws Exception + { + testDataExtraction(interleavedFile); + } + + /** + * Tests a PHYLIP file is read correctly by comparing read sequence with that + * in the test data. + * + * @throws Exception + */ + private void testDataExtraction(String file) throws IOException + { + AppletFormatAdapter rf = new AppletFormatAdapter(); + Alignment al = rf.readFile(file, AppletFormatAdapter.FILE, + PhylipFile.FILE_DESC); + assertNotNull("Couldn't read supplied alignment data.", al); + + Map data = PhylipFileTests.getTestData(); + for (SequenceI s : al.getSequencesArray()) + { + assertTrue(s.getName() + " sequence did not match test data.", data + .get(s.getName()).equals(s.getSequenceAsString())); + } + } + + /** + * Tests sequential format file reading and writing without data lose using + * similar approach to {@link StockholmFileTest} + * + * @throws Exception + */ + @Test + public void testSequentialIO() throws Exception + { + testIO(sequentialFile); + } + + /** + * Tests interleaved format file reading and writing without data lose using + * similar approach to {@link StockholmFileTest} + * + * @throws Exception + */ + @Test + public void testInterleavedIO() throws Exception + { + testIO(interleavedFile); + } + + /** + * Uses {@link StockholmFileTest} to test read/write/read + * + * @param file + * @throws IOException + */ + public void testIO(String file) throws IOException + { + AppletFormatAdapter rf = new AppletFormatAdapter(); + Alignment al = rf.readFile(file, AppletFormatAdapter.FILE, + PhylipFile.FILE_DESC); + assertNotNull("Couldn't read supplied alignment data.", al); + + String outputfile = rf.formatSequences(PhylipFile.FILE_DESC, al, true); + + Alignment al_input = new AppletFormatAdapter().readFile(outputfile, + AppletFormatAdapter.PASTE, PhylipFile.FILE_DESC); + assertNotNull("Couldn't parse reimported alignment data.", al_input); + + StockholmFileTest.testAlignmentEquivalence(al, al_input); + + } +} \ No newline at end of file