From 77abb3fac2965a8966410cd77cd749c7c1dc6453 Mon Sep 17 00:00:00 2001 From: Charles Ofoegbu Date: Fri, 3 Oct 2014 16:33:03 +0100 Subject: [PATCH] Integration of David Corsars Phylip File support --- examples/dna_interleaved.phy | 132 +++++++++++++ examples/dna_sequential.phy | 11 ++ nbbuild.xml | 100 ---------- nbproject/genfiles.properties | 25 --- nbproject/project.properties | 145 -------------- nbproject/project.xml | 31 --- src/jalview/io/AppletFormatAdapter.java | 103 +++++----- src/jalview/io/FileParse.java | 2 +- src/jalview/io/IdentifyFile.java | 44 +++-- src/jalview/io/JalviewFileChooser.java | 53 +++-- src/jalview/io/PhylipFile.java | 320 +++++++++++++++++++++++++++++++ test/jalview/io/PhylipFileTests.java | 166 ++++++++++++++++ 12 files changed, 751 insertions(+), 381 deletions(-) create mode 100644 examples/dna_interleaved.phy create mode 100644 examples/dna_sequential.phy delete mode 100644 nbbuild.xml delete mode 100644 nbproject/genfiles.properties delete mode 100644 nbproject/project.properties delete mode 100644 nbproject/project.xml create mode 100644 src/jalview/io/PhylipFile.java create mode 100644 test/jalview/io/PhylipFileTests.java diff --git a/examples/dna_interleaved.phy b/examples/dna_interleaved.phy new file mode 100644 index 0000000..745f399 --- /dev/null +++ b/examples/dna_interleaved.phy @@ -0,0 +1,132 @@ +10 705 +Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTA +Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTT +Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTC +Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTT +Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTT +Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTA +Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTT +Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTA +Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTC +Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTA + +CTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTAC +CTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATAT +GTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTAC +ATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTAT +CTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTAT +ATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTAT +ACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTAT +CTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTAC +CTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTAC +CTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTAC + +ATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAA +ATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAA +CTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAA +GCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAA +GTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAA +ATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAA +ATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAA +ATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAA +ATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAA +ATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAG + +GTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCT +ATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCC +GTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCC +ATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCC +ATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCA +GTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCT +GTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCC +GTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCA +GTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCA +ATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCC + +TTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATA +CTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATA +CTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATC +CTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATT +CTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATG +CTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATA +CTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATA +TTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATA +TTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATA +CTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATC + +GGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCC +GGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCC +GGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCC +GGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCC +GGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCC +GGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCA +GGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCC +GGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCA +GGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCC +GGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCT + +TACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAAT +TATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCAC +TACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCAT +TACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAAT +TACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCAC +TATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAAC +TACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAAT +TATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAAT +TATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAAC +TATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAAT + +CGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTA +CGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTA +CGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTC +CGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTG +CGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTA +CGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTC +CGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTG +CGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTC +CGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTC +CGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTC + +CACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAAC +CATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAAT +CACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAAT +CACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAAC +CACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAAC +CACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAAT +CACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAAC +CACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAAC +CACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAAC +CACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCAT + +CAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGC +CAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGT +CAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGC +CAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGT +CAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGT +CAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGT +CAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGC +CAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGT +CAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGC +CAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGC + +GGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAA +GGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAA +GGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAA +GGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAA +GGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAA +GGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAA +GGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAA +GGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAG +GGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAA +GGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAA + +AAATGATCTGCGTCAATATTA---------------------TAA +AACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA +GCCTGATCCTCACTA------------------CTGTCATCTTAA +ATA---------------------GGGCCCGTATTTACCCTATAG +AACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA +AACTGATCTGCTTCAATAATT---------------------TAA +AACTGATCAGCTTCTATAATT---------------------TAA +AAATGATCTACCTCAATGCTT---------------------TAA +AAATGATCTGTATCAATACTA---------------------TAA +AACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA diff --git a/examples/dna_sequential.phy b/examples/dna_sequential.phy new file mode 100644 index 0000000..99dd34c --- /dev/null +++ b/examples/dna_sequential.phy @@ -0,0 +1,11 @@ +10 705 +Cow ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA +Carp ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA +Chicken ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA +Human ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG +Loach ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA +Mouse ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA +Rat ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA +Seal ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA +Whale ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA +Frog ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA diff --git a/nbbuild.xml b/nbbuild.xml deleted file mode 100644 index 1fbf3c9..0000000 --- a/nbbuild.xml +++ /dev/null @@ -1,100 +0,0 @@ - - - - - - - - - - - - Builds, tests, and runs the project jalview. - - - - - - - - - - diff --git a/nbproject/genfiles.properties b/nbproject/genfiles.properties deleted file mode 100644 index 158e222..0000000 --- a/nbproject/genfiles.properties +++ /dev/null @@ -1,25 +0,0 @@ -############################################################################### -# Jalview - A Sequence Alignment Editor and Viewer (Version 2.8) -# Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle -# -# This file is part of Jalview. -# -# Jalview is free software: you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -# -# Jalview is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with Jalview. If not, see . -############################################################################### -nbbuild.xml.data.CRC32=f5d3c6c2 -nbbuild.xml.script.CRC32=307ee084 -nbbuild.xml.stylesheet.CRC32=28e38971@1.43.1.45 -# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. -# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. -nbproject/build-impl.xml.data.CRC32=f5d3c6c2 -nbproject/build-impl.xml.script.CRC32=7a2a3bf9 -nbproject/build-impl.xml.stylesheet.CRC32=0c01fd8e@1.43.1.45 diff --git a/nbproject/project.properties b/nbproject/project.properties deleted file mode 100644 index b505d62..0000000 --- a/nbproject/project.properties +++ /dev/null @@ -1,145 +0,0 @@ -############################################################################### -# Jalview - A Sequence Alignment Editor and Viewer (Version 2.8) -# Copyright (C) 2012 J Procter, AM Waterhouse, LM Lui, J Engelhardt, G Barton, M Clamp, S Searle -# -# This file is part of Jalview. -# -# Jalview is free software: you can redistribute it and/or -# modify it under the terms of the GNU General Public License -# as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. -# -# Jalview is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty -# of MERCHANTABILITY or FITNESS FOR A PARTICULAR -# PURPOSE. See the GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along with Jalview. If not, see . -############################################################################### -annotation.processing.enabled=true -annotation.processing.enabled.in.editor=false -annotation.processing.run.all.processors=true -annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output -application.title=jalview -application.vendor=Barton Group -build.classes.dir=${build.dir}/classes -build.classes.excludes=**/*.java,**/*.form -# This directory is removed when the project is cleaned: -build.dir=build -build.generated.dir=${build.dir}/generated -build.generated.sources.dir=${build.dir}/generated-sources -# Only compile against the classpath explicitly listed here: -build.sysclasspath=ignore -build.test.classes.dir=${build.dir}/test/classes -build.test.results.dir=${build.dir}/test/results -buildfile=nbbuild.xml -# Uncomment to specify the preferred debugger connection transport: -#debug.transport=dt_socket -debug.classpath=\ - ${run.classpath} -debug.test.classpath=\ - ${run.test.classpath} -# This directory is removed when the project is cleaned: -dist.dir=dist -dist.jar=${dist.dir}/jalview.jar -dist.javadoc.dir=${dist.dir}/javadoc -endorsed.classpath= -excludes= -file.reference.activation.jar=lib/activation.jar -file.reference.apache-mime4j-0.6.jar=lib/apache-mime4j-0.6.jar -file.reference.axis.jar=lib/axis.jar -file.reference.castor-1.1-cycle-xml.jar=lib/castor-1.1-cycle-xml.jar -file.reference.commons-codec-1.3.jar=lib/commons-codec-1.3.jar -file.reference.commons-discovery.jar=lib/commons-discovery.jar -file.reference.commons-logging-1.1.1.jar=lib/commons-logging-1.1.1.jar -file.reference.commons-logging.jar=lib/commons-logging.jar -file.reference.httpclient-4.0.3.jar=lib/httpclient-4.0.3.jar -file.reference.httpcore-4.0.1.jar=lib/httpcore-4.0.1.jar -file.reference.httpmime-4.0.3.jar=lib/httpmime-4.0.3.jar -file.reference.jalview-src=src -file.reference.jaxrpc.jar=lib/jaxrpc.jar -file.reference.JGoogleAnalytics_0.3.jar=lib/JGoogleAnalytics_0.3.jar -file.reference.jhall.jar=lib/jhall.jar -file.reference.Jmol-12.2.4.jar=lib/Jmol-12.2.4.jar -file.reference.JmolApplet-12.2.4.jar=appletlib/JmolApplet-12.2.4.jar -file.reference.log4j-1.2.8.jar=lib/log4j-1.2.8.jar -file.reference.mail.jar=lib/mail.jar -file.reference.min-jaba-client.jar=lib/min-jaba-client-2.0.jar -file.reference.regex.jar=lib/regex.jar -file.reference.saaj.jar=lib/saaj.jar -file.reference.vamsas-client.jar=lib/vamsas-client.jar -file.reference.wsdl4j.jar=lib/wsdl4j.jar -file.reference.xercesImpl.jar=lib/xercesImpl.jar -file.reference.xml-apis.jar=lib/xml-apis.jar -file.reference.miglayout-4.0-swing.jar=lib/miglayout-4.0-swing.jar -file.reference.varna-3.9-dev.jar=lib/VARNAv3.9.jar -includes=** -jar.compress=false -javac.classpath=\ - ${libs.plugin.jar.classpath}:\ - ${file.reference.activation.jar}:\ - ${file.reference.apache-mime4j-0.6.jar}:\ - ${file.reference.axis.jar}:\ - ${file.reference.castor-1.1-cycle-xml.jar}:\ - ${file.reference.commons-codec-1.3.jar}:\ - ${file.reference.commons-discovery.jar}:\ - ${file.reference.commons-logging-1.1.1.jar}:\ - ${file.reference.commons-logging.jar}:\ - ${file.reference.regex.jar}:\ - ${file.reference.saaj.jar}:\ - ${file.reference.httpclient-4.0.3.jar}:\ - ${file.reference.httpcore-4.0.1.jar}:\ - ${file.reference.httpmime-4.0.3.jar}:\ - ${file.reference.jaxrpc.jar}:\ - ${file.reference.JGoogleAnalytics_0.3.jar}:\ - ${file.reference.jhall.jar}:\ - ${file.reference.Jmol-12.2.4.jar}:\ - ${file.reference.miglayout-4.0-swing.jar}:\ - ${file.reference.log4j-1.2.8.jar}:\ - ${file.reference.mail.jar}:\ - ${file.reference.min-jaba-client.jar}:\ - ${file.reference.vamsas-client.jar}:\ - ${file.reference.xml-apis.jar}:\ - ${file.reference.xercesImpl.jar}:\ - ${file.reference.wsdl4j.jar}:\ - ${file.reference.JmolApplet-12.2.4.jar} \ - ${file.reference.varna-3.9-dev.jar} -# Space-separated list of extra javac options -javac.compilerargs= -javac.deprecation=false -javac.processorpath=\ - ${javac.classpath} -javac.source=1.6 -javac.target=1.6 -javac.test.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} -javac.test.processorpath=\ - ${javac.test.classpath} -javadoc.additionalparam= -javadoc.author=false -javadoc.encoding=${source.encoding} -javadoc.noindex=false -javadoc.nonavbar=false -javadoc.notree=false -javadoc.private=false -javadoc.splitindex=true -javadoc.use=true -javadoc.version=false -javadoc.windowtitle= -main.class=jalview.bin.Jalview -manifest.file=manifest.mf -meta.inf.dir=${src.dir}/META-INF -mkdist.disabled=false -platform.active=default_platform -run.classpath=\ - ${javac.classpath}:\ - ${build.classes.dir} -# Space-separated list of JVM arguments used when running the project -# (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value -# or test-sys-prop.name=value to set system properties for unit tests): -run.jvmargs= -run.test.classpath=\ - ${javac.test.classpath}:\ - ${build.test.classes.dir} -source.encoding=UTF-8 -src.dir=${file.reference.jalview-src} diff --git a/nbproject/project.xml b/nbproject/project.xml deleted file mode 100644 index 6c49988..0000000 --- a/nbproject/project.xml +++ /dev/null @@ -1,31 +0,0 @@ - - - - org.netbeans.modules.java.j2seproject - - - jalview - - - - - - - diff --git a/src/jalview/io/AppletFormatAdapter.java b/src/jalview/io/AppletFormatAdapter.java index 3f9a425..d43e7c1 100755 --- a/src/jalview/io/AppletFormatAdapter.java +++ b/src/jalview/io/AppletFormatAdapter.java @@ -1,38 +1,41 @@ /* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) * Copyright (C) 2014 The Jalview Authors - * + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License + * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; +import jalview.api.AlignViewportI; +import jalview.datamodel.Alignment; +import jalview.datamodel.AlignmentAnnotation; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentView; + import java.io.File; import java.io.InputStream; import java.util.List; -import jalview.api.AlignViewportI; -import jalview.datamodel.*; - /** * A low level class for alignment and feature IO with alignment formatting * methods used by both applet and application for generating flat alignment * files. It also holds the lists of magic format names that the applet and * application will allow the user to read or write files with. - * + * * @author $author$ * @version $Revision$ */ @@ -42,47 +45,48 @@ public class AppletFormatAdapter * List of valid format strings used in the isValidFormat method */ public static final String[] READABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", - "PDB", "JnetFile", "RNAML" }; // , "SimpleBLAST" }; + { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "STH", + "PDB", "JnetFile", "RNAML", PhylipFile.FILE_DESC }; // , "SimpleBLAST" }; /** * List of valid format strings for use by callers of the formatSequences * method */ public static final String[] WRITEABLE_FORMATS = new String[] - { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", "STH" }; + { "BLC", "CLUSTAL", "FASTA", "MSF", "PileUp", "PIR", "PFAM", "AMSA", + "STH", PhylipFile.FILE_DESC }; /** * List of extensions corresponding to file format types in WRITABLE_FNAMES * that are writable by the application. */ public static final String[] WRITABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", - "jvp", "sto,stk", "jar" }; + { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "jvp", "sto,stk", "jar", PhylipFile.FILE_EXT }; /** * List of writable formats by the application. Order must correspond with the * WRITABLE_EXTENSIONS list of formats. */ public static final String[] WRITABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "STH", "Jalview" }; + { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", + "STH", "Jalview", PhylipFile.FILE_DESC }; /** * List of readable format file extensions by application in order * corresponding to READABLE_FNAMES */ public static final String[] READABLE_EXTENSIONS = new String[] - { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", - "jar,jvp", "sto,stk", "xml,rnaml" }; // ".blast" + { "fa, fasta, mfa, fastq", "aln", "pfam", "msf", "pir", "blc", "amsa", + "jar,jvp", "sto,stk", "xml,rnaml", PhylipFile.FILE_EXT }; // ".blast" /** * List of readable formats by application in order corresponding to * READABLE_EXTENSIONS */ public static final String[] READABLE_FNAMES = new String[] - { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", - "Stockholm", "RNAML" };// , + { "Fasta", "Clustal", "PFAM", "MSF", "PIR", "BLC", "AMSA", "Jalview", + "Stockholm", "RNAML", PhylipFile.FILE_DESC };// , // "SimpleBLAST" // }; @@ -94,7 +98,7 @@ public class AppletFormatAdapter + prettyPrint(READABLE_FORMATS); /** - * + * * @param els * @return grammatically correct(ish) list consisting of els elements. */ @@ -139,7 +143,7 @@ public class AppletFormatAdapter /** * check that this format is valid for reading - * + * * @param format * a format string to be compared with READABLE_FORMATS * @return true if format is readable @@ -151,7 +155,7 @@ public class AppletFormatAdapter /** * validate format is valid for IO - * + * * @param format * a format string to be compared with either READABLE_FORMATS or * WRITEABLE_FORMATS @@ -165,9 +169,9 @@ public class AppletFormatAdapter boolean valid = false; String[] format_list = (forwriting) ? WRITEABLE_FORMATS : READABLE_FORMATS; - for (int i = 0; i < format_list.length; i++) + for (String element : format_list) { - if (format_list[i].equalsIgnoreCase(format)) + if (element.equalsIgnoreCase(format)) { return true; } @@ -178,14 +182,14 @@ public class AppletFormatAdapter /** * Constructs the correct filetype parser for a characterised datasource - * + * * @param inFile * data/data location * @param type * type of datasource * @param format * File format of data provided by datasource - * + * * @return DOCUMENT ME! */ public Alignment readFile(String inFile, String type, String format) @@ -243,6 +247,10 @@ public class AppletFormatAdapter { afile = new SimpleBlastFile(inFile, type); } + else if (format.equals(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(inFile, type); + } else if (format.equals("RNAML")) { afile = new RnamlFile(inFile, type); @@ -294,12 +302,12 @@ public class AppletFormatAdapter /** * Constructs the correct filetype parser for an already open datasource - * + * * @param source * an existing datasource * @param format * File format of data that will be provided by datasource - * + * * @return DOCUMENT ME! */ public AlignmentI readFromFile(FileParse source, String format) @@ -362,7 +370,10 @@ public class AppletFormatAdapter { afile = new SimpleBlastFile(source); } - + else if (format.equals(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(source); + } Alignment al = new Alignment(afile.getSeqsAsArray()); afile.addAnnotations(al); @@ -409,7 +420,7 @@ public class AppletFormatAdapter /** - * create an alignment flatfile from a Jalview alignment view + * create an alignment flatfile from a Jalview alignment view * @param format * @param jvsuffix * @param av @@ -432,15 +443,15 @@ public class AppletFormatAdapter aselview.addAnnotation(aa); } } - + return formatSequences(format, aselview, jvsuffix); } - + /** * Construct an output class for an alignment in a particular filetype TODO: * allow caller to detect errors and warnings encountered when generating * output - * + * * @param format * string name of alignment format * @param alignment @@ -448,7 +459,7 @@ public class AppletFormatAdapter * @param jvsuffix * passed to AlnFile class controls whether /START-END is added to * sequence names - * + * * @return alignment flat file contents */ public String formatSequences(String format, AlignmentI alignment, @@ -494,6 +505,10 @@ public class AppletFormatAdapter { afile = new AMSAFile(alignment); } + else if (format.equalsIgnoreCase(PhylipFile.FILE_DESC)) + { + afile = new PhylipFile(); + } else if (format.equalsIgnoreCase("RNAML")) { afile = new RnamlFile(); @@ -570,7 +585,7 @@ public class AppletFormatAdapter } catch (Exception e) { System.err - .println("Couln't format the alignment for output as a FASTA file."); + .println("Couln't format the alignment for output as a FASTA file."); e.printStackTrace(System.err); } } @@ -580,8 +595,8 @@ public class AppletFormatAdapter } System.out.println("Read took " + (t1 / 1000.0) + " seconds."); System.out - .println("Difference between free memory now and before is " - + (memf / (1024.0 * 1024.0) * 1.0) + " MB"); + .println("Difference between free memory now and before is " + + (memf / (1024.0 * 1024.0) * 1.0) + " MB"); } catch (Exception e) { System.err.println("Exception when dealing with " + i @@ -600,7 +615,7 @@ public class AppletFormatAdapter /** * try to discover how to access the given file as a valid datasource that * will be identified as the given type. - * + * * @param file * @param format * @return protocol that yields the data parsable as the given type @@ -647,7 +662,7 @@ public class AppletFormatAdapter } catch (Exception ex) { System.err - .println("Exception checking resources: " + file + " " + ex); + .println("Exception checking resources: " + file + " " + ex); } if (file.indexOf("://") > -1) @@ -749,7 +764,7 @@ public class AppletFormatAdapter if (debug) { System.out - .println("File deemed not accessible via " + protocol); + .println("File deemed not accessible via " + protocol); } fp.close(); return null; diff --git a/src/jalview/io/FileParse.java b/src/jalview/io/FileParse.java index d20b468..e8e9f2e 100755 --- a/src/jalview/io/FileParse.java +++ b/src/jalview/io/FileParse.java @@ -50,7 +50,7 @@ public class FileParse public void setNewlineString(String nl) { - newline = nl; + newline = nl; } public String getNewlineString() diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 08d4dca..9c7478b 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -1,30 +1,30 @@ /* * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) * Copyright (C) 2014 The Jalview Authors - * + * * This file is part of Jalview. - * + * * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License + * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.io; -import java.io.*; +import java.io.IOException; /** * DOCUMENT ME! - * + * * @author $author$ * @version $Revision$ */ @@ -32,10 +32,10 @@ public class IdentifyFile { /** * Identify a datasource's file content. - * + * * @note Do not use this method for stream sources - create a FileParse object * instead. - * + * * @param file * DOCUMENT ME! * @param protocol @@ -60,7 +60,9 @@ public class IdentifyFile emessage = e.getMessage(); } if (parser != null) + { return parser.errormessage; + } return emessage; } @@ -73,7 +75,7 @@ public class IdentifyFile /** * Identify contents of source, closing it or resetting source to start * afterwards. - * + * * @param source * @param closeSource * @return filetype string @@ -207,7 +209,7 @@ public class IdentifyFile else { reply = "FASTA"; // possibly a bad choice - may be recognised as - // PIR + // PIR } // otherwise can still possibly be a PIR file } @@ -267,18 +269,24 @@ public class IdentifyFile reply = "PDB"; break; } + else if (data.matches("\\s*\\d+\\s+\\d+\\s*")) + { + reply = PhylipFile.FILE_DESC; + break; + } + /* * // TODO comment out SimpleBLAST identification for Jalview 2.4.1 else * if (!lineswereskipped && data.indexOf("BLAST")<4) { reply = * "SimpleBLAST"; break; - * + * * } // end comments for Jalview 2.4.1 */ else if (!lineswereskipped && data.charAt(0) != '*' && data.charAt(0) != ' ' && data.indexOf(":") < data.indexOf(",")) // && - // data.indexOf(","). * The Jalview Authors are detailed in the 'AUTHORS' file. @@ -23,21 +23,31 @@ package jalview.io; import jalview.util.MessageManager; -import java.io.*; -import java.util.*; - -import java.awt.*; -import java.awt.event.*; -import javax.swing.*; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.EventQueue; +import java.awt.HeadlessException; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.io.File; +import java.util.StringTokenizer; +import java.util.Vector; + +import javax.swing.DefaultListCellRenderer; +import javax.swing.JFileChooser; +import javax.swing.JList; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JScrollPane; /** * Enhanced file chooser dialog box. - * + * * NOTE: bug on Windows systems when filechooser opened on directory to view * files with colons in title. - * + * * @author AMW - * + * */ public class JalviewFileChooser extends JFileChooser { @@ -104,6 +114,7 @@ public class JalviewFileChooser extends JFileChooser setAccessory(new RecentlyOpened()); } + @Override public void setFileFilter(javax.swing.filechooser.FileFilter filter) { super.setFileFilter(filter); @@ -122,6 +133,7 @@ public class JalviewFileChooser extends JFileChooser EventQueue.invokeLater(new Thread() { + @Override public void run() { String currentName = ui.getFileName(); @@ -176,10 +188,15 @@ public class JalviewFileChooser extends JFileChooser { format = "PFAM"; } + else if (format.toUpperCase().startsWith(PhylipFile.FILE_DESC)) + { + format = PhylipFile.FILE_DESC; + } return format; } + @Override public int showSaveDialog(Component parent) throws HeadlessException { this.setAccessory(null); @@ -265,6 +282,7 @@ public class JalviewFileChooser extends JFileChooser list.addMouseListener(new MouseAdapter() { + @Override public void mousePressed(MouseEvent evt) { recentListSelectionChanged(list.getSelectedValue()); @@ -279,6 +297,7 @@ public class JalviewFileChooser extends JFileChooser javax.swing.SwingUtilities.invokeLater(new Runnable() { + @Override public void run() { scroller.getHorizontalScrollBar().setValue( diff --git a/src/jalview/io/PhylipFile.java b/src/jalview/io/PhylipFile.java new file mode 100644 index 0000000..ce65eea --- /dev/null +++ b/src/jalview/io/PhylipFile.java @@ -0,0 +1,320 @@ +/** + * + */ +package jalview.io; + +import jalview.datamodel.Alignment; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.io.IOException; + +/** + *

+ * Parser and exporter for PHYLIP file format, as defined in the + * documentation. The parser imports PHYLIP files in both sequential and + * interleaved format, and (currently) exports in interleaved format (using 60 + * characters per matrix for the sequence). + *

+ * + *

+ * The following assumptions have been made for input + *

    + *
  • Sequences are expressed as letters, not real numbers with decimal points + * separated by blanks (which is a valid option according to the specification)
  • + *
+ * + * The following assumptions have been made for output + *
    + *
  • Interleaved format is used, with each matrix consisting of 60 characters; + *
  • + *
  • a blank line is added between each matrix;
  • + *
  • no spacing is added between the sequence characters.
  • + *
+ * + * + *

+ * + * @author David Corsar + * + * + */ +public class PhylipFile extends AlignFile +{ + + // Define file extension and description to save repeating it elsewhere + public static final String FILE_EXT = "phy"; + + public static final String FILE_DESC = "PHYLIP"; + + /** + * + * @see {@link AlignFile#AlignFile()} + */ + public PhylipFile() + { + super(); + } + + /** + * + * @param source + * @throws IOException + */ + public PhylipFile(FileParse source) throws IOException + { + super(source); + } + + /** + * @param inFile + * @param type + * @throws IOException + * @see {@link AlignFile#AlignFile(FileParse)} + */ + public PhylipFile(String inFile, String type) throws IOException + { + super(inFile, type); + } + + /** + * Parses the input source + * + * @see {@link AlignFile#parse()} + */ + @Override + public void parse() throws IOException + { + try + { + // First line should contain number of species and number of + // characters, separated by blanks + String line = nextLine(); + String[] lineElements = line.trim().split("\\s+"); + if (lineElements.length < 2) + { + throw new IOException( + "First line must contain the number of specifies and number of characters"); + } + + int numberSpecies = Integer.parseInt(lineElements[0]), numberCharacters = Integer + .parseInt(lineElements[1]); + + if (numberSpecies <= 0) + { + // there are no sequences in this file so exit a nothing to + // parse + return; + } + + SequenceI[] sequenceElements = new Sequence[numberSpecies]; + StringBuffer[] sequences = new StringBuffer[numberSpecies]; + + // if file is in sequential format there is only one data matrix, + // else there are multiple + + // read the first data matrix + for (int i = 0; i < numberSpecies; i++) + { + line = nextLine(); + // lines start with the name - a maximum of 10 characters + // if less, then padded out or terminated with a tab + String potentialName = line.substring(0, 10); + int tabIndex = potentialName.indexOf('\t'); + if (tabIndex == -1) + { + sequenceElements[i] = parseId(validateName(potentialName)); + sequences[i] = new StringBuffer( + removeWhitespace(line.substring(10))); + } + else + { + sequenceElements[i] = parseId(validateName(potentialName + .substring(0, tabIndex))); + sequences[i] = new StringBuffer( + removeWhitespace(line.substring(tabIndex))); + } + } + + // determine if interleaved + if ((sequences[0]).length() != numberCharacters) + { + // interleaved file, so have to read the remainder + int i = 0; + for (line = nextLine(); line != null; line = nextLine()) + { + // ignore blank lines, as defined by the specification + if (line.length() > 0) + { + sequences[i++].append(removeWhitespace(line)); + } + // reached end of matrix, so get ready for the next one + if (i == sequences.length) + { + i = 0; + } + } + } + + // file parsed completely, now store sequences + for (int i = 0; i < numberSpecies; i++) + { + // first check sequence is the expected length + if (sequences[i].length() != numberCharacters) + { + throw new IOException(sequenceElements[i].getName() + + " sequence is incorrect length - should be " + + numberCharacters + " but is " + sequences[i].length()); + } + sequenceElements[i].setSequence(sequences[i].toString()); + seqs.add(sequenceElements[i]); + } + + // create an alignment based on the sequences + Alignment a = new Alignment(sequenceElements); + // add annotations - although comments say addAnnotations + // is used by AppletFormatAdapter, it doesn't say other + // classes should/can not use it + addAnnotations(a); + + } catch (IOException e) + { + System.err.println("Exception parsing PHYLIP file " + e); + e.printStackTrace(System.err); + throw e; + } + + } + + /** + * Removes any whitespace from txt, used to strip and spaces added to + * sequences to improve human readability + * + * @param txt + * @return + */ + private String removeWhitespace(String txt) + { + return txt.replaceAll("\\s*", ""); + } + + /** + * According to the specification, the name cannot have parentheses, square + * brackets, colon, semicolon, comma + * + * @param name + * @return + * @throws IOException + */ + private String validateName(String name) throws IOException + { + char[] invalidCharacters = new char[] + { '(', ')', '[', ']', ':', ';', ',' }; + for (char c : invalidCharacters) + { + if (name.indexOf(c) > -1) + { + throw new IOException("Species name contains illegal character " + + c); + } + } + return name; + } + + /** + *

+ * Prints the seqs in interleaved format, with each matrix consisting of 60 + * characters; a blank line is added between each matrix; no spacing is added + * between the sequence characters. + *

+ * + * + * @see {@link AlignFile#print()} + */ + @Override + public String print() + { + + StringBuffer sb = new StringBuffer(Integer.toString(seqs.size())); + sb.append(" "); + // if there are no sequences, then define the number of characters as 0 + sb.append( + (seqs.size() > 0) ? Integer + .toString(seqs.get(0).getSequence().length) : "0") + .append(newline); + + // Due to how IO is handled, there doesn't appear to be a way to store + // if the original file was sequential or interleaved; if there is, then + // use that to set the value of the following variable + boolean sequential = false; + + // maximum number of columns for each row of interleaved format + int numInterleavedColumns = 60; + + int sequenceLength = 0; + for (SequenceI s : seqs) + { + + // ensure name is only 10 characters + String name = s.getName(); + if (name.length() > 10) + { + name = name.substring(0, 10); + } + else + { + // add padding 10 characters + name = String.format("%1$-" + 10 + "s", s.getName()); + } + sb.append(name); + + // sequential has the entire sequence following the name + if (sequential) + { + sb.append(s.getSequence()); + } + else + { + // Jalview ensures all sequences are of same length so no need + // to keep track of min/max length + sequenceLength = s.getSequence().length; + // interleaved breaks the sequence into chunks for + // interleavedColumns characters + sb.append(s.getSequence(0, + Math.min(numInterleavedColumns, sequenceLength))); + } + sb.append(newline); + } + + // add the remaining matrixes if interleaved and there is something to + // add + if (!sequential && sequenceLength > numInterleavedColumns) + { + // determine number of remaining matrixes + int numMatrics = sequenceLength / numInterleavedColumns; + if ((sequenceLength % numInterleavedColumns) > 0) + { + numMatrics++; + } + + // start i = 1 as first matrix has already been printed + for (int i = 1; i < numMatrics; i++) + { + // add blank line to separate this matrix from previous + sb.append(newline); + int start = i * numInterleavedColumns; + for (SequenceI s : seqs) + { + sb.append( + s.getSequence(start, Math.min(start + + numInterleavedColumns, sequenceLength))) + .append(newline); + } + } + + } + + return sb.toString(); + } +} \ No newline at end of file diff --git a/test/jalview/io/PhylipFileTests.java b/test/jalview/io/PhylipFileTests.java new file mode 100644 index 0000000..70e3f2c --- /dev/null +++ b/test/jalview/io/PhylipFileTests.java @@ -0,0 +1,166 @@ +package jalview.io; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import jalview.datamodel.Alignment; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.junit.Test; + +/** + * Test file for {@link PhylipFile}. + * + * Tests use example data obtained from molecularrevolution.org. + * + * @author David Corsar + * + */ +public class PhylipFileTests +{ + + // interleaved file from + // http://www.molecularevolution.org/molevolfiles/fileformats/dna.phy.dat + // sequential file is the interleave file converted into sequential format + + static String sequentialFile = "examples/dna_sequential.phy", + interleavedFile = "examples/dna_interleaved.phy"; + + /** + * Creates a name:sequence map for the data in the above files + * + * @return + */ + private static Map getTestData() + { + Map data = new HashMap(); + data.put( + "Cow", + "ATGGCATATCCCATACAACTAGGATTCCAAGATGCAACATCACCAATCATAGAAGAACTACTTCACTTTCATGACCACACGCTAATAATTGTCTTCTTAATTAGCTCATTAGTACTTTACATTATTTCACTAATACTAACGACAAAGCTGACCCATACAAGCACGATAGATGCACAAGAAGTAGAGACAATCTGAACCATTCTGCCCGCCATCATCTTAATTCTAATTGCTCTTCCTTCTTTACGAATTCTATACATAATAGATGAAATCAATAACCCATCTCTTACAGTAAAAACCATAGGACATCAGTGATACTGAAGCTATGAGTATACAGATTATGAGGACTTAAGCTTCGACTCCTACATAATTCCAACATCAGAATTAAAGCCAGGGGAGCTACGACTATTAGAAGTCGATAATCGAGTTGTACTACCAATAGAAATAACAATCCGAATGTTAGTCTCCTCTGAAGACGTATTACACTCATGAGCTGTGCCCTCTCTAGGACTAAAAACAGACGCAATCCCAGGCCGTCTAAACCAAACAACCCTTATATCGTCCCGTCCAGGCTTATATTACGGTCAATGCTCAGAAATTTGCGGGTCAAACCACAGTTTCATACCCATTGTCCTTGAGTTAGTCCCACTAAAGTACTTTGAAAAATGATCTGCGTCAATATTA---------------------TAA"); + data.put( + "Carp", + "ATGGCACACCCAACGCAACTAGGTTTCAAGGACGCGGCCATACCCGTTATAGAGGAACTTCTTCACTTCCACGACCACGCATTAATAATTGTGCTCCTAATTAGCACTTTAGTTTTATATATTATTACTGCAATGGTATCAACTAAACTTACTAATAAATATATTCTAGACTCCCAAGAAATCGAAATCGTATGAACCATTCTACCAGCCGTCATTTTAGTACTAATCGCCCTGCCCTCCCTACGCATCCTGTACCTTATAGACGAAATTAACGACCCTCACCTGACAATTAAAGCAATAGGACACCAATGATACTGAAGTTACGAGTATACAGACTATGAAAATCTAGGATTCGACTCCTATATAGTACCAACCCAAGACCTTGCCCCCGGACAATTCCGACTTCTGGAAACAGACCACCGAATAGTTGTTCCAATAGAATCCCCAGTCCGTGTCCTAGTATCTGCTGAAGACGTGCTACATTCTTGAGCTGTTCCATCCCTTGGCGTAAAAATGGACGCAGTCCCAGGACGACTAAATCAAGCCGCCTTTATTGCCTCACGCCCAGGGGTCTTTTACGGACAATGCTCTGAAATTTGTGGAGCTAATCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCTCTCGAACACTTCGAAAACTGATCCTCATTAATACTAGAAGACGCCTCGCTAGGAAGCTAA"); + data.put( + "Chicken", + "ATGGCCAACCACTCCCAACTAGGCTTTCAAGACGCCTCATCCCCCATCATAGAAGAGCTCGTTGAATTCCACGACCACGCCCTGATAGTCGCACTAGCAATTTGCAGCTTAGTACTCTACCTTCTAACTCTTATACTTATAGAAAAACTATCA---TCAAACACCGTAGATGCCCAAGAAGTTGAACTAATCTGAACCATCCTACCCGCTATTGTCCTAGTCCTGCTTGCCCTCCCCTCCCTCCAAATCCTCTACATAATAGACGAAATCGACGAACCTGATCTCACCCTAAAAGCCATCGGACACCAATGATACTGAACCTATGAATACACAGACTTCAAGGACCTCTCATTTGACTCCTACATAACCCCAACAACAGACCTCCCCCTAGGCCACTTCCGCCTACTAGAAGTCGACCATCGCATTGTAATCCCCATAGAATCCCCCATTCGAGTAATCATCACCGCTGATGACGTCCTCCACTCATGAGCCGTACCCGCCCTCGGGGTAAAAACAGACGCAATCCCTGGACGACTAAATCAAACCTCCTTCATCACCACTCGACCAGGAGTGTTTTACGGACAATGCTCAGAAATCTGCGGAGCTAACCACAGCTACATACCCATTGTAGTAGAGTCTACCCCCCTAAAACACTTTGAAGCCTGATCCTCACTA------------------CTGTCATCTTAA"); + data.put( + "Human", + "ATGGCACATGCAGCGCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCACCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTCCTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACATCTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCATCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGACGAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTACTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACATACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTTGACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTACATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAACAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGACCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAGTTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATA---------------------GGGCCCGTATTTACCCTATAG"); + data.put( + "Loach", + "ATGGCACATCCCACACAATTAGGATTCCAAGACGCGGCCTCACCCGTAATAGAAGAACTTCTTCACTTCCATGACCATGCCCTAATAATTGTATTTTTGATTAGCGCCCTAGTACTTTATGTTATTATTACAACCGTCTCAACAAAACTCACTAACATATATATTTTGGACTCACAAGAAATTGAAATCGTATGAACTGTGCTCCCTGCCCTAATCCTCATTTTAATCGCCCTCCCCTCACTACGAATTCTATATCTTATAGACGAGATTAATGACCCCCACCTAACAATTAAGGCCATGGGGCACCAATGATACTGAAGCTACGAGTATACTGATTATGAAAACTTAAGTTTTGACTCCTACATAATCCCCACCCAGGACCTAACCCCTGGACAATTCCGGCTACTAGAGACAGACCACCGAATGGTTGTTCCCATAGAATCCCCTATTCGCATTCTTGTTTCCGCCGAAGATGTACTACACTCCTGGGCCCTTCCAGCCATGGGGGTAAAGATAGACGCGGTCCCAGGACGCCTTAACCAAACCGCCTTTATTGCCTCCCGCCCCGGGGTATTCTATGGGCAATGCTCAGAAATCTGTGGAGCAAACCACAGCTTTATACCCATCGTAGTAGAAGCGGTCCCACTATCTCACTTCGAAAACTGGTCCACCCTTATACTAAAAGACGCCTCACTAGGAAGCTAA"); + data.put( + "Mouse", + "ATGGCCTACCCATTCCAACTTGGTCTACAAGACGCCACATCCCCTATTATAGAAGAGCTAATAAATTTCCATGATCACACACTAATAATTGTTTTCCTAATTAGCTCCTTAGTCCTCTATATCATCTCGCTAATATTAACAACAAAACTAACACATACAAGCACAATAGATGCACAAGAAGTTGAAACCATTTGAACTATTCTACCAGCTGTAATCCTTATCATAATTGCTCTCCCCTCTCTACGCATTCTATATATAATAGACGAAATCAACAACCCCGTATTAACCGTTAAAACCATAGGGCACCAATGATACTGAAGCTACGAATATACTGACTATGAAGACCTATGCTTTGATTCATATATAATCCCAACAAACGACCTAAAACCTGGTGAACTACGACTGCTAGAAGTTGATAACCGAGTCGTTCTGCCAATAGAACTTCCAATCCGTATATTAATTTCATCTGAAGACGTCCTCCACTCATGAGCAGTCCCCTCCCTAGGACTTAAAACTGATGCCATCCCAGGCCGACTAAATCAAGCAACAGTAACATCAAACCGACCAGGGTTATTCTATGGCCAATGCTCTGAAATTTGTGGATCTAACCATAGCTTTATGCCCATTGTCCTAGAAATGGTTCCACTAAAATATTTCGAAAACTGATCTGCTTCAATAATT---------------------TAA"); + data.put( + "Rat", + "ATGGCTTACCCATTTCAACTTGGCTTACAAGACGCTACATCACCTATCATAGAAGAACTTACAAACTTTCATGACCACACCCTAATAATTGTATTCCTCATCAGCTCCCTAGTACTTTATATTATTTCACTAATACTAACAACAAAACTAACACACACAAGCACAATAGACGCCCAAGAAGTAGAAACAATTTGAACAATTCTCCCAGCTGTCATTCTTATTCTAATTGCCCTTCCCTCCCTACGAATTCTATACATAATAGACGAGATTAATAACCCAGTTCTAACAGTAAAAACTATAGGACACCAATGATACTGAAGCTATGAATATACTGACTATGAAGACCTATGCTTTGACTCCTACATAATCCCAACCAATGACCTAAAACCAGGTGAACTTCGTCTATTAGAAGTTGATAATCGGGTAGTCTTACCAATAGAACTTCCAATTCGTATACTAATCTCATCCGAAGACGTCCTGCACTCATGAGCCATCCCTTCACTAGGGTTAAAAACCGACGCAATCCCCGGCCGCCTAAACCAAGCTACAGTCACATCAAACCGACCAGGTCTATTCTATGGCCAATGCTCTGAAATTTGCGGCTCAAATCACAGCTTCATACCCATTGTACTAGAAATAGTGCCTCTAAAATATTTCGAAAACTGATCAGCTTCTATAATT---------------------TAA"); + data.put( + "Seal", + "ATGGCATACCCCCTACAAATAGGCCTACAAGATGCAACCTCTCCCATTATAGAGGAGTTACTACACTTCCATGACCACACATTAATAATTGTGTTCCTAATTAGCTCATTAGTACTCTACATTATCTCACTTATACTAACCACGAAACTCACCCACACAAGTACAATAGACGCACAAGAAGTGGAAACGGTGTGAACGATCCTACCCGCTATCATTTTAATTCTCATTGCCCTACCATCATTACGAATCCTCTACATAATGGACGAGATCAATAACCCTTCCTTGACCGTAAAAACTATAGGACATCAGTGATACTGAAGCTATGAGTACACAGACTACGAAGACCTGAACTTTGACTCATATATGATCCCCACACAAGAACTAAAGCCCGGAGAACTACGACTGCTAGAAGTAGACAATCGAGTAGTCCTCCCAATAGAAATAACAATCCGCATACTAATCTCATCAGAAGATGTACTCCACTCATGAGCCGTACCGTCCCTAGGACTAAAAACTGATGCTATCCCAGGACGACTAAACCAAACAACCCTAATAACCATACGACCAGGACTGTACTACGGTCAATGCTCAGAAATCTGTGGTTCAAACCACAGCTTCATACCTATTGTCCTCGAATTGGTCCCACTATCCCACTTCGAGAAATGATCTACCTCAATGCTT---------------------TAA"); + data.put( + "Whale", + "ATGGCATATCCATTCCAACTAGGTTTCCAAGATGCAGCATCACCCATCATAGAAGAGCTCCTACACTTTCACGATCATACACTAATAATCGTTTTTCTAATTAGCTCTTTAGTTCTCTACATTATTACCCTAATGCTTACAACCAAATTAACACATACTAGTACAATAGACGCCCAAGAAGTAGAAACTGTCTGAACTATCCTCCCAGCCATTATCTTAATTTTAATTGCCTTGCCTTCATTACGGATCCTTTACATAATAGACGAAGTCAATAACCCCTCCCTCACTGTAAAAACAATAGGTCACCAATGATATTGAAGCTATGAGTATACCGACTACGAAGACCTAAGCTTCGACTCCTATATAATCCCAACATCAGACCTAAAGCCAGGAGAACTACGATTATTAGAAGTAGATAACCGAGTTGTCTTACCTATAGAAATAACAATCCGAATATTAGTCTCATCAGAAGACGTACTCCACTCATGGGCCGTACCCTCCTTGGGCCTAAAAACAGATGCAATCCCAGGACGCCTAAACCAAACAACCTTAATATCAACACGACCAGGCCTATTTTATGGACAATGCTCAGAGATCTGCGGCTCAAACCACAGTTTCATACCAATTGTCCTAGAACTAGTACCCCTAGAAGTCTTTGAAAAATGATCTGTATCAATACTA---------------------TAA"); + data.put( + "Frog", + "ATGGCACACCCATCACAATTAGGTTTTCAAGACGCAGCCTCTCCAATTATAGAAGAATTACTTCACTTCCACGACCATACCCTCATAGCCGTTTTTCTTATTAGTACGCTAGTTCTTTACATTATTACTATTATAATAACTACTAAACTAACTAATACAAACCTAATGGACGCACAAGAGATCGAAATAGTGTGAACTATTATACCAGCTATTAGCCTCATCATAATTGCCCTTCCATCCCTTCGTATCCTATATTTAATAGATGAAGTTAATGATCCACACTTAACAATTAAAGCAATCGGCCACCAATGATACTGAAGCTACGAATATACTAACTATGAGGATCTCTCATTTGACTCTTATATAATTCCAACTAATGACCTTACCCCTGGACAATTCCGGCTGCTAGAAGTTGATAATCGAATAGTAGTCCCAATAGAATCTCCAACCCGACTTTTAGTTACAGCCGAAGACGTCCTCCACTCGTGAGCTGTACCCTCCTTGGGTGTCAAAACAGATGCAATCCCAGGACGACTTCATCAAACATCATTTATTGCTACTCGTCCGGGAGTATTTTACGGACAATGTTCAGAAATTTGCGGAGCAAACCACAGCTTTATACCAATTGTAGTTGAAGCAGTACCGCTAACCGACTTTGAAAACTGATCTTCATCAATACTA---GAAGCATCACTA------AGA"); + return data; + } + + /** + * Tests sequential format file is read correctly by comparing read sequence + * with that in the test data. + * + * @throws Exception + */ + @Test + public void testSequentialDataExtraction() throws Exception + { + testDataExtraction(sequentialFile); + } + + /** + * Tests interleaved format file is read correctly by comparing read sequence + * with that in the test data. + * + * @throws Exception + */ + @Test + public void testInterleavedDataExtraction() throws Exception + { + testDataExtraction(interleavedFile); + } + + /** + * Tests a PHYLIP file is read correctly by comparing read sequence with that + * in the test data. + * + * @throws Exception + */ + private void testDataExtraction(String file) throws IOException + { + AppletFormatAdapter rf = new AppletFormatAdapter(); + Alignment al = rf.readFile(file, AppletFormatAdapter.FILE, + PhylipFile.FILE_DESC); + assertNotNull("Couldn't read supplied alignment data.", al); + + Map data = PhylipFileTests.getTestData(); + for (SequenceI s : al.getSequencesArray()) + { + assertTrue(s.getName() + " sequence did not match test data.", data + .get(s.getName()).equals(s.getSequenceAsString())); + } + } + + /** + * Tests sequential format file reading and writing without data lose using + * similar approach to {@link StockholmFileTest} + * + * @throws Exception + */ + @Test + public void testSequentialIO() throws Exception + { + testIO(sequentialFile); + } + + /** + * Tests interleaved format file reading and writing without data lose using + * similar approach to {@link StockholmFileTest} + * + * @throws Exception + */ + @Test + public void testInterleavedIO() throws Exception + { + testIO(interleavedFile); + } + + /** + * Uses {@link StockholmFileTest} to test read/write/read + * + * @param file + * @throws IOException + */ + public void testIO(String file) throws IOException + { + AppletFormatAdapter rf = new AppletFormatAdapter(); + Alignment al = rf.readFile(file, AppletFormatAdapter.FILE, + PhylipFile.FILE_DESC); + assertNotNull("Couldn't read supplied alignment data.", al); + + String outputfile = rf.formatSequences(PhylipFile.FILE_DESC, al, true); + + Alignment al_input = new AppletFormatAdapter().readFile(outputfile, + AppletFormatAdapter.PASTE, PhylipFile.FILE_DESC); + assertNotNull("Couldn't parse reimported alignment data.", al_input); + + StockholmFileTest.testAlignmentEquivalence(al, al_input); + + } +} \ No newline at end of file -- 1.7.10.2