From: Jim Procter Date: Fri, 5 Jun 2015 15:28:56 +0000 (+0100) Subject: Merge branch 'develop' into features/JAL-653_gffalignments X-Git-Tag: Release_2_10_0~296^2~157 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=09d81d7aa46a618d636a8ca3fda75d4252650564;hp=43b415c68805f3dec44a6ddc666d52220483c120;p=jalview.git Merge branch 'develop' into features/JAL-653_gffalignments --- diff --git a/examples/testdata/exonerateoutput.fullgff b/examples/testdata/exonerateoutput.fullgff new file mode 100644 index 0000000..9ab732e --- /dev/null +++ b/examples/testdata/exonerateoutput.fullgff @@ -0,0 +1,261 @@ +Command line: [exonerate --model protein2genome Input_Sequences63/dcsA.fas NewSequencedGenome/A_Ellipt_clc_pe_contigs.fa --bestn 1 --showtargetgff] +Hostname: [ningal.cluster.lifesci.dundee.ac.uk] + +C4 Alignment: +------------ + Query: DDB_G0269124 + Target: contig_1146 [revcomp] + Model: protein2genome:local + Raw score: 3652 + Query range: 142 -> 1059 + Target range: 11269 -> 8533 + + 143 : SerProSerSerGluTyrGlyThrThrSerGlyGlyGlnArgPheAspThrLeuValAsp : 162 + ||||||!:!! !||| !.! ! ! !!.! !! !!.!|||!!:||||||:!!||| + SerProAsnMetGluLeuAlaArgAspLeuAlaGlnProHisPheGluThrLeuIleAsp + 11269 : TCGCCCAACATGGAGCTGGCGCGCGACCTCGCCCAGCCGCACTTTGAGACGCTGATCGAC : 11212 + + 163 : ProAspIleSerLeuAlaGluMetGluGluLysMetArgGlnHisLysValTyrGlnGlu : 182 + ||||||!!:!!!! !!.!|||!!:||||||||||||||||||||||||!.!:!!! ||| + ProAspMetThrProGlyGluIleGluGluLysMetArgGlnHisLysAlaHisLeuGlu + 11211 : CCCGACATGACGCCCGGCGAGATCGAGGAGAAGATGCGCCAGCACAAGGCGCACCTCGAG : 11152 + + 183 : GlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnGlnLysGlnLysAspLysGluLeuSer : 202 + ..!|||:!!.....!..!:!!! |||:!!|||..!:!! !! ! ! + ------------MetGlnLysSerSerSerGluLeuLysLysLysSerGlnMetGlnLeu + 11151 : ------------ATGCAAAAGTCCTCGTCGGAACTCAAGAAGAAGTCCCAAATGCAACTC : 11104 + + 203 : SerGlnLysLysLysProSerSerMetGlnLeuSerLysLysLysHisValAlaLysGlu : 222 + !.!||| ! :!!:!! !..!..!:!: !!.!||| ::: :!! !:!!!!: + LysGlnAspGlnGlnLysGlnGlnValValAlaLysLysProArgSerIleLeuGlnAsp + 11103 : AAGCAGGATCAGCAGAAACAACAAGTCGTCGCAAAGAAGCCCCGTTCGATCCTCCAGGAC : 11044 + + 223 : AspSerGluThrLeuGluThrIleIleGlyGluGluLysLysGluValValPheGluVal : 242 + |||! !||||||! !||||||:!!.!!!.!||||||:::||||||||||||||||||||| + AspMetGluThrSerGluThrLeuPheAlaGluGluArgLysGluValValPheGluVal + 11043 : GACATGGAGACGTCGGAGACCCTTTTCGCCGAGGAACGCAAGGAGGTCGTCTTTGAGGTG : 10984 + + 243 : LysProTyrPheSerHisAlaIleLeuGlnAlaThrMetAlaValPheLeuIleTrpAsn : 262 + :::|||||||||||||||:!!||||||||||||||||||||||||||||||||||||||| + ArgProTyrPheSerHisSerIleLeuGlnAlaThrMetAlaValPheLeuIleTrpAsn + 10983 : CGTCCCTACTTCTCGCACTCTATCCTCCAGGCGACGATGGCCGTCTTCCTCATCTGGAAC : 10924 + + 263 : IlePheTyrPheAlaTyrArgAlaGlyTrpThrMetAsnArgThrAspTyrIle<->Thr : 281 + ||||||||||||||||||||| !|||||||||||||||! ! !:!! !:!! ..! + IlePheTyrPheAlaTyrArgMetGlyTrpThrMetAsnThrGlnAsnGlyValTyrVal + 10923 : ATCTTTTACTTTGCCTACCGTATGGGCTGGACCATGAACACCCAGAACGGCGTCTACGTG : 10864 + + 282 : PheSerTyrSerIleLeuPheIleIleValGluPheIleSerPheLeuGlySerAlaLeu : 301 + .!!!!!||||||:!:||||||:!!|||||||||||||||||||||||||||||||||||| + LeuCysTyrSerValLeuPheLeuIleValGluPheIleSerPheLeuGlySerAlaLeu + 10863 : CTCTGCTACTCGGTGCTCTTCCTCATCGTCGAGTTCATCTCTTTCCTCGGCTCCGCGCTC : 10804 + + 302 : HisLeuAsnAsnPheThrAsnProCysThrPheValLeuValValThrLeuGluGlnIle : 321 + |||||||||||||||||||||||||||||||||:!!|||||||||||||||||||||||| + HisLeuAsnAsnPheThrAsnProCysThrPheIleLeuValValThrLeuGluGlnIle + 10803 : CATCTCAACAACTTTACCAATCCGTGCACCTTTATCCTGGTGGTCACGCTGGAGCAGATC : 10744 + + 322 : LeuAlaLysArgArgLysLysHisProThrValMetMetTyrValCysThrTyrLysGlu : 341 + ||||||:::||||||||| !||||||||||||||||||:!!||||||||||||||| + LeuAlaArgArgArgLysProPheProThrValMetMetTyrIleCysThrTyrLysGlu + 10743 : CTCGCGCGCCGTCGCAAGCCCTTCCCCACCGTCATGATGTACATCTGTACCTACAAGGAG : 10684 + + 342 : ProProSerIleValSerArgThrPheArgThrAlaIleSerMetAspTyrProSerGlu : 361 + |||||||||||||||||||||||||||||||||||||||:!!||||||||||||:!!||| + ProProSerIleValSerArgThrPheArgThrAlaIleAlaMetAspTyrProAlaGlu + 10683 : CCGCCCTCGATCGTCTCGCGCACGTTCCGCACCGCCATCGCCATGGACTACCCCGCCGAG : 10624 + + 362 : AsnLeuTrpIleGlyLeuLeuAspAspSerValAsnTyrArgGluSerArgGlyTrpAla : 381 + ||||||||||||||||||||||||||||||:!!|||!:!||||||||||||||||||:!! + AsnLeuTrpIleGlyLeuLeuAspAspSerIleAsnPheArgGluSerArgGlyTrpSer + 10623 : AACCTCTGGATCGGCCTGCTCGACGACTCGATCAACTTCCGCGAGTCGCGCGGCTGGTCG : 10564 + + 382 : HisLeuGlnSerValGluLysAsnPheLeuTyrValLeuLeuGlnLysAlaValTyrSer : 401 + ||||||||||||||||||||||||||||||!:! !|||||||||::::!!||||||:!! + HisLeuGlnSerValGluLysAsnPheLeuPheGlnLeuLeuGlnArgSerValTyrAla + 10563 : CACCTCCAATCGGTCGAGAAGAACTTCCTCTTCCAGCTGCTCCAGCGCTCCGTGTACGCC : 10504 + + 402 : ValHisAsnIleArgProProValThrSerGlnHisGluAspProHisGlyIleLeuAsn : 421 + |||||||||||| !|||||||||.!!..!||| !|||||||||:!!|||||||||..! + ValHisAsnIleAlaProProValAlaGlnGlnAlaGluAspProTyrGlyIleLeuGly + 10503 : GTGCACAACATCGCGCCGCCCGTCGCGCAGCAGGCCGAGGACCCGTACGGCATCCTCGGC : 10444 + + 422 : GluThrSerSerLysIleGluSerSerThrLysGluValIleGluAlaGluValGlnTrp : 441 + |||||||||..!:::||||||!.!!!!||||||||||||:!!|||||||||||||||||| + GluThrSerGluArgIleGluLysThrThrLysGluValValGluAlaGluValGlnTrp + 10443 : GAGACGTCCGAGCGCATCGAAAAGACCACGAAAGAGGTCGTCGAGGCCGAGGTGCAGTGG : 10384 + + 442 : PheIleGluTyrPheLeuLeuAsnSerTrpPheGlyValGlyGlnGluIleProArgAsp : 461 + ||||||||||||||||||||||||||||||||||||:!!! !!::||| ! !! !!!: + PheIleGluTyrPheLeuLeuAsnSerTrpPheGlyIleAspArgGluProGluIleGlu + 10383 : TTCATCGAGTACTTCCTCCTGAACAGCTGGTTCGGCATCGACCGCGAGCCCGAGATCGAG : 10324 + + 462 : AlaAspAspAlaGluArgAlaLeuIleAlaLysLeuArgAspAspAsnPheSerProTyr : 481 + !!..!|||||||||||| !!!!|||:!!! !||||||!!:|||||||||||| !!||| + ProSerAspAlaGluArgAsnPheIleSerMetLeuArgGluAspAsnPheSerAlaTyr + 10323 : CCCTCCGACGCCGAACGCAACTTTATCTCGATGCTGCGCGAGGACAACTTCTCGGCGTAC : 10264 + + 482 : ArgThrPheThrLysSerGluSerGluLysIleSerAsnPheThrIleAspSerLeuGln : 501 + ||||||.!!||| ! ..!||| !!||| |||! !!..|||:!!! !|||:!!|||||| + ArgThrIleThrAspGlnGluArgGluLeuIleTyrThrPheSerSerAspAlaLeuGln + 10263 : CGCACCATCACCGACCAGGAGCGCGAGCTCATCTACACGTTCTCGAGCGACGCGCTCCAG : 10204 + + 502 : SerLeuTrpHisGlySerAlaPhePheArgProLeuIleArgSerIleLeuLeuLysLys : 521 + |||:!!|||||||||||| !!.!.!:!|||||||||:!:|||!:! !|||!!!:!!::: + SerIleTrpHisGlySerProMetTyrArgProLeuValArgAsnAlaLeuPheGlnArg + 10203 : TCGATCTGGCACGGCTCGCCCATGTACCGCCCGCTGGTGCGCAACGCCCTGTTCCAGCGC : 10144 + + 522 : AspTyrValArgAsnPheValSerGluLeuAsnAsnGlnHisArgLeuArgPheLeuAsn : 541 + !||||||!:!:!!|||:!!:!!|||! !||| ..!||||||||||||||||||||| + ArgTyrValLysAspPheIleAlaGluHisAsnAlaSerHisArgLeuArgPheLeuAsn + 10143 : CGCTACGTCAAGGACTTTATCGCCGAGCACAACGCGTCGCACCGTCTGCGCTTCCTCAAC : 10084 + + 542 : ThrGluAlaLeuAlaMetAlaGlnTyrGlnValLeuMetMetGlyArgGlnGluLeuPro : 561 + ..!!!:|||:!! !||||||||||||:!!|||! !||||||||||||||||||:!!||| + ValAspAlaIleAsnMetAlaGlnTyrLysValHisMetMetGlyArgGlnGluValPro + 10083 : GTCGACGCGATCAACATGGCGCAGTACAAGGTGCACATGATGGGCCGCCAGGAGGTGCCC : 10024 + + 562 : TrpAspGluIleSerSerGlyAsnValArgIleAspPheAspThrCysAspGlyProIle : 581 + !::|||!!::!:|||:!!|||||||||||||||||||||||| !! !||| !!:!! + PheAspAspValSerAlaGlyAsnValArgIleAspPheAspPro---ThrGlySerVal + 10023 : TTCGACGACGTGTCCGCGGGCAACGTGCGCATCGACTTTGACCCG---ACCGGCTCGGTC : 9967 + + 582 : ValSerProLysCysThrTyrLeuArgArgArgLysProProIleProHisAsnLysAla : 601 + |||!!!|||:::|||||||||||||||||||||||||||||||||||||||||||||||| + ValThrProArgCysThrTyrLeuArgArgArgLysProProIleProHisAsnLysAla + 9966 : GTCACGCCGCGCTGCACCTACCTGCGCCGCCGCAAGCCGCCCATCCCGCACAACAAGGCC : 9907 + + 602 : GlyAsnIleAsnAsnAlaLeuPheAsnGluSerThrLysAlaAspTyrGluPheLeuGly : 621 + |||||||||||||||!.!|||||||||||||||! ! ! |||||||||||||||:!!||| + GlyAsnIleAsnAsnGlyLeuPheAsnGluSerIleHisAlaAspTyrGluPheMetGly + 9906 : GGCAACATCAACAACGGCCTCTTCAACGAGTCGATCCACGCCGACTACGAGTTCATGGGC : 9847 + + 622 : LeuLeuAspAlaAspGlnGlnProHisProAspPheLeuLysArgValLeuProTyrPhe : 641 + ||||||||||||||||||||||||||||||||||||||||||||||||:!!||||||||| + LeuLeuAspAlaAspGlnGlnProHisProAspPheLeuLysArgValMetProTyrPhe + 9846 : CTGCTCGATGCCGACCAGCAGCCGCACCCCGACTTCCTCAAGCGCGTCATGCCCTACTTC : 9787 + + 642 : TyrSerAspGluGlyGlnAspLeuAlaPheValGlnThrProGlnPhePheSerAsnIle : 661 + !:!||||||!!:|||!!.!!::!!|||||||||||||||||||||||||||||||||||| + PheSerAspAspGlyHisGluValAlaPheValGlnThrProGlnPhePheSerAsnIle + 9786 : TTCAGCGACGACGGCCACGAGGTCGCCTTTGTCCAGACGCCGCAGTTCTTCTCCAACATC : 9727 + + 662 : TyrProValAspAspProLeuGlyHisArgAsnMetGluPheTyrGlyProValMetGlu : 681 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + TyrProValAspAspProLeuGlyHisArgAsnMetGluPheTyrGlyProValMetGlu + 9726 : TACCCCGTCGACGACCCGCTCGGCCACAGAAACATGGAGTTCTACGGTCCCGTAATGGAG : 9667 + + 682 : GlyArgSerAlaAsnAsnAlaCysProPheValGlyThrAsnAlaIlePheArgArgGln : 701 + |||||||||.!!|||..!|||||||||||||||||||||||||||||||||||||||:!! + GlyArgSerThrAsnGlyAlaCysProPheValGlyThrAsnAlaIlePheArgArgLys + 9666 : GGTCGCTCCACCAACGGCGCCTGCCCCTTCGTCGGAACCAACGCCATCTTCCGTCGCAAG : 9607 + + 702 : ProLeuTyrAspIleGlyGlyIleMetTyrAsnSerValThrGluAspMetTyrThrGly : 721 + |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| + ProLeuTyrAspIleGlyGlyIleMetTyrAsnSerValThrGluAspMetTyrThrGly + 9606 : CCCCTCTACGACATTGGCGGCATCATGTACAACTCTGTCACTGAGGATATGTACACGGGA : 9547 + + 722 : MetLysLeuGlnValSerGlyTyrLysSerTrpTyrHisAsnGluValLeuValValGly : 741 + |||||||||||||||||||||!:!|||||||||||||||||||||||||||||||||||| + MetLysLeuGlnValSerGlyPheLysSerTrpTyrHisAsnGluValLeuValValGly + 9546 : ATGAAGCTCCAGGTCTCGGGATTCAAGTCGTGGTACCACAACGAGGTGCTCGTCGTCGGT : 9487 + + 742 : ThrAlaProValAspLeuLysGluThrLeuGluGlnArgLysArgTrpAlaGlnGlyAla : 761 + |||||||||||||||:!!|||||||||||||||||||||||||||||||||||||||||| + ThrAlaProValAspIleLysGluThrLeuGluGlnArgLysArgTrpAlaGlnGlyAla + 9486 : ACCGCGCCCGTCGATATCAAGGAAACGCTCGAGCAGAGAAAGCGTTGGGCGCAGGGCGCC : 9427 + + 762 : ValGluIlePheSerLeuThrProTrpGlyTyrIleArgGlyLysLeuGlyTrpArgLys : 781 + ||||||||||||||||||||||||||||||||||||||| !|||||||||||||||||| + ValGluIlePheSerLeuThrProTrpGlyTyrIleArgLysLysLeuGlyTrpArgLys + 9426 : GTCGAAATCTTCTCGCTCACGCCGTGGGGCTACATCCGCAAGAAGCTCGGCTGGAGAAAG : 9367 + + 782 : MetLeuTyrAsnLeuAspSerCysIleTyrProPheLeuSerProThrAlaPhePheTyr : 801 + |||||||||||||||||||||||||||||||||||||||||||||||||||.!!|||||| + MetLeuTyrAsnLeuAspSerCysIleTyrProPheLeuSerProThrAlaIlePheTyr + 9366 : ATGCTCTACAACCTCGACTCGTGCATCTACCCGTTCCTCTCGCCGACTGCCATCTTCTAC : 9307 + + 802 : GlyAlaSerProLeuIleMetSerIleTrpThrValProIleValValLysAspProIle : 821 + ||| !:!!||||||||||||!!!:!:|||||||||||||||||||||! :!!|||||| + GlyLeuAlaProLeuIleMetCysLeuTrpThrValProIleValValThrAsnProIle + 9306 : GGTCTGGCGCCGCTGATCATGTGTCTGTGGACCGTGCCCATCGTCGTCACCAACCCCATC : 9247 + + 822 : IlePheIleLeuValGlyMetIleProValMetValLeuProArgValIleGlnTyrMet : 841 + |||||||||||||||||||||||||||||||||:!!||||||||||||!!:||||||||| + IlePheIleLeuValGlyMetIleProValMetIleLeuProArgValMetGlnTyrMet + 9246 : ATCTTCATCCTCGTCGGTATGATCCCCGTCATGATCCTGCCGCGTGTCATGCAGTACATG : 9187 + + 842 : IleLeuArgAlaLysArgProTyrGluAlaGlyLysSerGlyProSerLeuTrpValGlu : 861 + ||||||||||||! !||||||!:!|||||||||||||||||||||||||||||||||||| + IleLeuArgAlaThrArgProPheGluAlaGlyLysSerGlyProSerLeuTrpValGlu + 9186 : ATCCTCCGCGCCACGCGTCCCTTCGAGGCCGGAAAGTCCGGCCCCTCGCTCTGGGTCGAA : 9127 + + 862 : AlaThrAspLeuTrpArgAlaGluGlnThrPhePheGlyPheAlaGlyThrTyrIleSer : 881 + ||||||||||||||||||||||||||||||||||||!.!||||||||||||||||||||| + AlaThrAspLeuTrpArgAlaGluGlnThrPhePheAlaPheAlaGlyThrTyrIleSer + 9126 : GCCACCGATCTCTGGCGTGCCGAACAGACCTTCTTTGCGTTCGCCGGAACCTACATCTCT : 9067 + + 882 : SerTrpArgGluGlySerAlaSerIleValLysLeuLeuLysAlaArgLysIleSerArg : 901 + :!!|||!:!! ||||||||||||:!!|||:::|||:!!||||||||||||||||||||| + AlaTrpLysAlaGlySerAlaSerValValArgLeuIleLysAlaArgLysIleSerArg + 9066 : GCGTGGAAGGCCGGCTCCGCGTCGGTCGTCCGTCTCATCAAGGCGCGCAAGATCTCGCGT : 9007 + + 902 : HisLysLeuAlaMetTrpAsnTrpLysArgAspPheValLysLysProValValCysGlu : 921 + ||||||||||||||||||||||||||||||!!:|||!.!||||||||||||:!! !||| + HisLysLeuAlaMetTrpAsnTrpLysArgGluPheAlaLysLysProValIleValGlu + 9006 : CACAAACTCGCCATGTGGAACTGGAAGCGTGAGTTTGCCAAGAAGCCCGTCATCGTCGAG : 8947 + + 922 : ValPheArgGlnThrLysLeuValAsnGluAsnAspAsnAlaGlnGluSerSerGlyLys : 941 + !!:!||||||:!!|||||||||:!!.!. !!!: .!!:!!||| !!.!||| ! + ArgTyrArgGlnSerLysLeuValHisHisAlaGlu---ThrGluGluHisLysGlyPro + 8946 : CGCTACCGCCAGTCGAAGCTGGTGCACCACGCCGAG---ACCGAGGAGCACAAGGGCCCG : 8890 + + 942 : HisLysAlaGluGlnSerPheArgThrSerAsnLysGluSerAspThrIleLysAsnSer : 961 + !.!|||||||||||||||||||||:!!||||||||||||||||||||||||||||||||| + ArgLysAlaGluGlnSerPheArgSerSerAsnLysGluSerAspThrIleLysAsnSer + 8889 : CGCAAGGCCGAGCAGTCGTTCCGTTCCTCCAACAAGGAGTCCGACACCATCAAGAACTCG : 8830 + + 962 : ArgLeuPheLeuProAsnIleIleLeuPheValValAsnIleLeuAlaMetMetSerAla : 981 + ||||||||| !||||||:!!|||:!!|||! !!.!||||||||||||!!::!! !.!! + ArgLeuPheAlaProAsnLeuIleMetPheGlyAlaAsnIleLeuAlaIleLeuLeuThr + 8829 : CGTCTCTTTGCGCCGAATCTCATCATGTTTGGCGCCAACATCCTCGCCATCCTGCTGACC : 8770 + + 982 : ValLeuArgPheAsnCysPheGlnAsnAspMetTrpLeuLeuValValValAlaGlyPhe : 1001 + :!!||| !||||||||||||! |||||||||||||||:!!:!!||||||||||||||| + LeuLeuSerPheAsnCysPheLeuAsnAspMetTrpLeuMetIleValValAlaGlyPhe + 8769 : CTGCTCTCGTTCAACTGCTTCCTCAACGACATGTGGCTGATGATTGTCGTCGCCGGTTTC : 8710 + + 1002 : SerPheSerThrLeuTrpHisLeuTrpSerPheIleProMetAlaLeuArgGlnSerGlu : 1021 + :!!|||||||||! !||||||||||||||||||||||||||||||||||||||||||||| + AlaPheSerThrCysTrpHisLeuTrpSerPheIleProMetAlaLeuArgGlnSerGlu + 8709 : GCCTTCTCCACGTGCTGGCATCTCTGGTCGTTCATCCCTATGGCCCTCAGACAGTCCGAG : 8650 + + 1022 : LysGlnTrpProTyrAlaSerSerTyrHisAlaHisAsnIleValLeuPheLeuValLeu : 1041 + ||||||||||||||||||||||||||||||||||||||||||:!!:!!||||||:!!||| + LysGlnTrpProTyrAlaSerSerTyrHisAlaHisAsnIleLeuIlePheLeuIleLeu + 8649 : AAGCAGTGGCCCTACGCCTCCTCGTACCACGCGCACAACATTCTCATCTTTCTCATTCTC : 8590 + + 1042 : GlyPheLeuValLeuLeuPheValAspValLysValCysIleProArgValGly : 1059 + |||||||||||||||||||||..! ! ||| ||||||||||||||||||||| + GlyPheLeuValLeuLeuPheThrLysValAlaValCysIleProArgValGly + 8589 : GGTTTCCTGGTGCTCCTGTTCACCAAGGTCGCTGTCTGTATTCCTCGTGTCGGA : 8534 + +vulgar: DDB_G0269124 142 1059 . contig_1146 11269 8533 - 3652 M 40 120 G 4 0 M 94 282 G 0 3 M 296 888 G 1 0 M 356 1068 G 1 0 M 125 375 +# --- START OF GFF DUMP --- +# +# +##gff-version 2 +##source-version exonerate:protein2genome:local 2.2.0 +##date 2015-01-16 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +contig_1146 exonerate:protein2genome:local gene 8534 11269 3652 - . gene_id 0 ; sequence DDB_G0269124 ; gene_orientation . +contig_1146 exonerate:protein2genome:local cds 8534 11269 . - . +contig_1146 exonerate:protein2genome:local exon 8534 11269 . - . insertions 3 ; deletions 6 +contig_1146 exonerate:protein2genome:local similarity 8534 11269 3652 - . alignment_id 0 ; Query DDB_G0269124 ; Align 11270 143 120 ; Align 11150 187 282 ; Align 10865 281 888 ; Align 9977 578 1068 ; Align 8909 935 375 +# --- END OF GFF DUMP --- +# +-- completed exonerate analysis diff --git a/examples/testdata/exonerateoutput.gff b/examples/testdata/exonerateoutput.gff new file mode 100644 index 0000000..3ea68dc --- /dev/null +++ b/examples/testdata/exonerateoutput.gff @@ -0,0 +1,13 @@ +##gff-version 2 +##source-version exonerate:protein2genome:local 2.2.0 +##date 2015-01-16 +##type DNA +# +# +# seqname source feature start end score strand frame attributes +# +contig_1146 exonerate:protein2genome:local gene 8534 11269 3652 - . gene_id 0 ; sequence DDB_G0269124 ; gene_orientation . +contig_1146 exonerate:protein2genome:local cds 8534 11269 . - . +contig_1146 exonerate:protein2genome:local exon 8534 11269 . - . insertions 3 ; deletions 6 +contig_1146 exonerate:protein2genome:local similarity 8534 11269 3652 - . alignment_id 0 ; Query DDB_G0269124 ; Align 11270 143 120 ; Align 11150 187 282 ; Align 10865 281 888 ; Align 9977 578 1068 ; Align 8909 935 375 +# --- END OF GFF DUMP --- diff --git a/examples/testdata/exonerateseqs.fa b/examples/testdata/exonerateseqs.fa new file mode 100644 index 0000000..e759925 --- /dev/null +++ b/examples/testdata/exonerateseqs.fa @@ -0,0 +1,183 @@ +>DDB_G0280897 +MTDKINNLINQWLKWDKNEITRKEIEQLKENNNEKELLVRLEERIQFGTAGLRGAMRAGF +SCMNDLTVTQASQGLCEYVIETIEQSKSKGIVIGYDGRHNSYIFAKITAATFKSKGFKVY +LFSHIVPTPYVSFAVPNLKAAIGVMITASHNPKNDNGYKVYWETGCQINTPHDKGISKKI +DENLEPWSNVDATSDIKYGNGDDGESMIDPLSVITELYNKNIKEYSVGSKIELANEPIVY +TAMHGVGGVYAKKAFETFQLKPFIPVAQQIEPDAEFPTVTYPNPEEGKGALKLSIETAEA +NNSRLILANDPDADRLAVAEKLADGSWKVFNGNEIGVLLADWAWTNRSTLTKGGSTLENN +KYFMINTAVSSAMLKTMSEKEGFIHQECLTGFKWIGNAAYNAINNNDGTTFLFGYEEAIG +FQYGDVSFDKDGVRAAAIFAEFALSLYKKGSSVQDHLESMYKRYGYHISKNRYFFCYEPS +KMVSIFNKIRNDGKYLTKLGDDDDEQFTITRIRDLTTGYDNGYPDCKARLPVSSSTQMIT +FYFKNGGIATLRGSGTEPKLKYYVEMIGEVKSNVESTLTKAVELVINQLLKPIENQLEPP +KDD +>PPL_06716 +MSNIKELAESWLKWDKNAETRKEIQSLLESDNQSELKSRLEQRIAFGTAGLRGPMKAGFS +CMNDLTVIQASQGLCIYVEQTLSNSKNSGIVVGYDGRHHSKEFARLTAATFASRGFKVYL +FSKIVPTPYVVILYLISNYMDCYVHQAFAVPELKASVGVMITASHNPKDDNGYKVYWDNG +CQINTPHDIRIAMQIDLNLEPWNIDVNELLNGSLVSDPLDTITKSYFGKIAKYSVKNEVK +LATSEKIVYTAMHGVGGEYAKMAFETFGLPAFIPVDQQIQPDPEFPTVAFPNPEEGKGAL +KLSIETAERNNSRLILANDPDADRLAVAERQPDGQWKVFNGNEIGVLFADWAWQNARRAD +STTPAERFCMINTAVSSSMLKTMANKDGYRHEECLTGFKWVGNKARELMDKGYNFLFAYE +EAIGFMYGDVSLDKDGVRCAPIFAELALTCYQAGKSCQDHLEELYKRYGYHISKNRYFFC +YDPKKMVAIFDKIRNYGQFPTNCGDFYITRVRDLTVGYDSGYPDHKARLPVSSSTQMITF +YFENGGIATLRGSGTEPKLKYYVEMIGSDRQLVESTLSQLVEQVINQFLRPVENELTPPK +DD +>DFA_03821 +MTDINQLAQNWLKWDRNPKTHKEIEQLVEAKDENELRARLENRIAFGTAGIVSTTIVQSH +MNIGPMKAGFANMNDLTVIQASQGLSIYVQETISQAQSKGVVVGYDGRYNSEVFAKLTAA +TFASKGFKVYLFSKIVPTPFVAFAVPELGASVGVMVTASHNPKDDNGYKVYWDNGCQINT +PHDKGIAKQIDLNLEPWTINIDKLLSSELVNDPLETISNAYFSKIYSYSVKNRSTPLELA +NEKVVYTAMHGVGGDYVKKAFETFKLPPYVEVAQQIKPDPAFPTVAFPNPEEGKGALKLS +IETAESVNSRLILANDPDADRLAVAEKLKDGSWKVFNGNEIGILLADWAWTNAKINHPDV +PAEKFFMINTAVSSAMLKTMAKKEGYICEETLTGFKWVGNKAKEMIDQGYKFLFAYEEAI +GFMYGDVSLDKDGVRCAPIFAEYALNLYANGSSCQDHLDHLMQRYGYHISKNRYFFCYEP +SKMVRIFNDIRKSNNGQFPDKCGPYEIIRIRDLTVDYDTAYPDNKARLPVSTSTQMITFY +FKNGAIATLRGSGTEPKLKYYVEMIGDNKQEVESTLQQVVQQVIDNFLQPVVNQLTPPKD +D +>DLA_10096 +MDIYTLANKWLEWDKNEKNRKEIQHFVDEKNEQELRERLENRIQFGTAGLRGPMKAGFAN +MNDLTVIQASQGLALYVKETIDSALTKGVVVGYDGRHNSQTFARLTAATFLSKGFKVYLF +SKLVPTPFVAFAVPELGASCGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGISKLIDE +NLVPWTMNLDDLNKSDLVSDPLERVSKSYFTKISKYSVVKSGATIKQEKVVYTPMHGVGG +DYAAEAFKVFDLHPFIPVELQIKPDAEFPTVAFPNPEEGKGALKLAIETAESNQSRLILA +NDPDADRLAVAEKQSSDGSWKVFNGNEIGVLFADWAWRKERALFSEGYNCKPSEYTMIST +AVSSAMLSTMAKKEGFQHEEVLTGFKWVGNAAKQAMDRGQKFLFAYEEAIGFMYGDVSLD +KDGVRGASIFAELAFDLYQQGSSCQEHLESLYKKYGYHISNNRYFFCYDPKKMVRIFNEI +RGNNREYVKELGEFKVERIRDLTTGYDTAFPPEFKAQLPTSSSTQMITFYFTNGSIATLR +GSGTEPKLKYYVESIGSDKLQVQQTLTKLVSLVIEKLLRPKENELTPPKESVGSERLLAL +LSEVMSTSMKIQVKYNESITEYNIIKGVKLLTQIDVLCQIFKVDANPDRFVLNYRESNLI +LSEDNLSKLFSNEISSCSSQSQNGSNGELSSLYSSFGENSSNNNNNSTLKFELILAPIYQ +VDSVLEHLNNSNLIKKRII +>DPU1265769 +MSMIRSISGVRGVIGQSWTPTLVSNHIIGFTQLLESEKYYNQKQKKIVVGRDSRVSGPWI +EMIVNGSLISMGYQVIHIDIAATPTVQYMVEKTKSSGGIVITSSHNPVEWNGLKFVGPDG +LFIAPVECEVLFSLADNPSSFKFPNYDKLGSVVCNTTANKEHIEAIFKLPFISVDKIKEK +KFKVCLDSVNGAGGPIMSYLLTELGCEVIGINLEPTGLFAHTPEPVPANLGQLCELVKTH +KADFGIAVDPDVDRCVFIDDKGVPLGEEYTLAMAVELLLGDCGRRGNVCKNLSSSRAIDD +ICKKYDSQVICAPVGEIQVAKKMQQVNAVIGGEGNGGVMLPDIHIGRDAPVAATLALQLL +ANRNAASISEFKRTTLPTYEIVKLKAGIEGLDPDAILAEYTKQYENKEGVVINQEDGLKI +DSADWWVHLRKSNTEHIIRVISEAKNTKEATDIATKFINEIESKRK +>440792448 +MASRVSGRMRKISDETQQMVNAWLSVDWDPESREHVKGLVAAGKEEELVAHLGRRISFGT +AGLRGKMKWGFAFMNAVTVTQASQGLCAYLRTVHPCLTDLRERGVIVGHDGRYNSRMFAR +LTAAVFLSRKIKVHLFRDDVPTPLVAFGVRHLKCAAGVMVTASHNPKEDNGYKVYWANSA +QITAPHDAQIARAIEANFSIWDRMPDDKAIDEHPLCLDPTTDVCAAYLAAARHWSFRTPQ +QNAAAQLRVVYTAMHGVGGQSVERIFDAFGLPPVIAVREQHDPDPDFTTVEFPNPEEANG +CSLRLAMSTADREGAPLILANDPDADRLAVAERQRDSGEWRILDGNEIALLLADWLWRNY +TERHPEVDRAKIVMLNSTVSSKALAAMAAKEGFHYRETLTGFKWLGNLADELVRAGYTFL +FAYEVEIGFMIGDMSLDTDGVRAAPVFVEMANHLYERGLTLSDHLDNLYHKYGYYKMAVG +YYFCHDPRLMDQIFNEIRNDGLYISTCGDHKVQYVRDLTTGFDNSQPHNRAVLPVSSAAH +MITFTFENECVATFRGSGTEPKLKYYIEVANASNEQLATDLLDSMKQEIIDRFLQPSQNG +LRPPAAAEDAHNSPHNSGNSPEQMAPARIARDVIHKEIQALQNLEATLGRDFEKVVEIIE +SRGSGRVIFTGVGKSGIIAQKISASFSSLGISSFFVHATEAAHGDLGVITAEDVIIAISN +SGNTPELIFIIPSLRVLAGKIIGITSNKDSLLARYSDASIITGKIMEADQHKIAPTASTI +VCLAIGDALAVTLSARMKFTLPEFGLRHPGGVLGEKVLGKVFQEFAMKGQGRFLRFWKRM +TNEERDKLRRDFERIDLAELSRIYLQCRSKAEKGAIDPHSLEPLPSHTWVKLHESDPAAV +AAWRDAGLRALREGKIGVVLMAGGQATRLGMTMPKGFLDLNLPSHKSLYQLHAEKLLRLQ +DEVRQTFGGGGGDEEVQQQQQQIQIPFYVMTSPEALQQTHQFFIKHQFFGLCPKQVFFFK +QRSLPCVAPSGEIIMDTKCSVVFSPDGHGGLFVALKDAKAYEDMKRRGVEYVFAFGVDNP +LCEVADPAYMGYCIQRNVKMGYKVVDRRDPQETAGVVCVRDGVINCVEYSELPESVAELR +DEQSGELVYNAANMLNLFFTLRFMRKIADNPSLMEYHLAKKRIPFVNDNGVRTEPLVPNG +WKFEKYLVDCTPYANNSVAVMFVKREEEFAPIKNGWNSEVDSPRSARRLLAAHYRRRIER +AGGKLAADDPDKMVEVSPLVTDRKLAQLLQDKHLVTGPAVLQ +>ENY64621.1 +MALNNYIKKTEMDYLYEQAALWLKWDKTPETRKEIEDLVASKNEEELKKRFCKRIEFGTA +GLRGKMCAGFNCMNNLIVQQASQGLALAVEELVQNAHEKGVVIGYDGRYHSKEFAAITAK +VFISKGFKTYLFSTLCPTPWTAFAVGYLKTACGVMVTASHNPKADNGYKVYWENGCQIIE +PIDANIASKIHSNLEPWDLSNVDISKVIDPLADVSAEYYKQMMLTIPHFECPEQPKVKYV +YTAMHGVGSKYVQDAFKTAKLPQPILVPLQNEPDPEFPTVPFPNPEEGKGALKCSIEVAE +ANGATVIIANDPDADRLSVAVKSGNGWRQFTGNEMANLIADWTYNKYIVSGDKTPAFMVR +STVSSSFISKMGEVEGFDTYETLTGFKWIGNKAKEIVDTQHKKLLMAYEEAIGFVIGNMS +YDKDGVRAAVCFAAMALEYAEQGFNLEDRLNMLYEKYGYFASNNKYYFCYDPKLMEKIFN +KMRNNGQYYWKFGKYAVKSIRDLTVGIDTAQPDKKPLLPVSASTQMITYTFENGCKATLR +GSGTEPKLKYYIELPGKKGVKAEDVIAELMDLSHELLQASLEPEKNGLIPPKAE +>Ppo014092.000 +MSISPSVQELVGKWLQWDKNPQNIKEIKDLVAANNEAELKNRLATRIAFGTAGLRGPMRA +GFSCMNDLTVIQASQGLCKYLQQMVSDIKTRGIVVGYDGRHHSKEFAEWTAATFLSQGIT +VYLFTRLVPTPFVSYATPLLRCAAGIMITASHNPKDDNGYKVYWDNGCQINVPHDKGISD +CIEQNLTPWDINKAELLKSELVKDPTETVASAYLKEIKAKCCFHHDENSQKIPVTYTAMH +GVGSEWVARAFEVFGLAPYVPVAPQISADPEFPTVAFPNPEEGKGALKLSMEAADKAGST +LILATDPDADRLAVAEKLPSGSWKIFTGNEIGALLAYWAWLKYKERNPKVDPSKCVVINS +TVSSKLLKALADKEGLKYDETLTGFKWIGGQAAIRIKEGYTFIFGFEEAIGFLFGDVNLD +KDGVRAAAVFAEMNIQLHKQGITVVQQLEKIYKLYGYFITRNRYFFCYDPAKMERIFNAI +RNYNNSGTYPTSCGPFKIKNTRDLTTGYDDSQTDKKAILPVSKSTQMITFFFENGGVVTL +RGSGTEPKLKYYTELSGSDPEKVKSTLDEMVQAIIDTCLKPVENQLQPPSDE +>ADB0001102_3 +MSTTTSINKLAQDWLKWDKNPKTRAEIQELVEQNDVKELTARLENRIAFGTAGLRGPMKA +GFSCMNDLTVIQASQGLCLYVIDTIPNAIKSGVVIGYDGRYNSKEFAKYTAATFLSKGYK +VYLFSKVVPTPYVAFAVTDLKASIGVMITASHNPKDDNGYKVYWENGCQINTPHDKGIAK +LIDLNLEPWEINVDQLLSGPLVEDPLDRIVSSYNTKIAQYSVASHVKFANEKIIYTAMHG +VGGEYTKMAFEAFKLPPFIPVAQQYQPDPAFPTVTFPNPEEGKGALKLSIETAEANGSRL +ILANDPDADRLAVAERLKDGTWKVFNGNEIGVLLADWAWQNARRSHPDTPAEKFFMINTA +VSSAMLKTMAKKDGYRCEETLTGFKWVGNRAREVMDAEGLHFLFAYEEAIGFLYGDVSLD +KDGVRCAAIFAELALSYYANGSSCEDHLESLYKRYGYHISRNRYFFCYEPPKMVAIFNKI +RNNRNFPTKCGRFEIERVRDLTIDYDDGFPDKKARLPVSTSTQMITFYFKNGAIATLRGS +GTEPKLKYYVEMIGQDKAHVQQELAELVQCIINEFLRPVENELTPPKDD +>Carpum +MTQSTCITSMVINNYLSIYIFIYTINDYLKRSLFVLCLVAKMSHHKVAITHPISSYNSII +NELAQNWLRWDKNKETRKEIEQLVEQKNEKELYDCLAKRIAFGTADNEIMMLLTHTLHTG +LRGQMKAGFSNMNDLTVIQASQGLCKYVKETIPEAQKKGVVVGYDCRHHSETFARLTAAT +FASQGFTVYLYSKMVPTPFVAFGVTDLKACVGVMVTASHNPKEDNGYKVYWENGCQINSP +HDKGISQQIELNLEPWTIDVNSLLEKVDDPLERVTKSYMDQISKYSVRGSVDMATENVVY +TAMHGVGGVFVKDAFAAFGLAPYIPVPAQVGPDAEFPTVTLPNPEEGKGALKLSIETAEA +NNSRLIVANDPDADRLAAAEKLKDGSWKVFNGNEIGVLFADWAWQNARRQHGGDSINPSE +YFMVTTAVSSSMLRTMATKEGYGYDETLTGFKWVGNKARDLIDQGKKFLFAYEESIGYMY +GEVSLDKDGVRGAAVFTEMALSCYARGTSCQEHLESLYVKYGYHLSKNRYYFCYDPSKMV +SIFNRIRNNGEFPKTCGPFEITRIRDLTVDYDNGYEDKKARLPVSSSTQMITFYFKNGAI +ATLRGSGTEPKLKYYVEMIGDDKEQVKATLDQVHDQVIQQFLRPTENQLSPPSDE +>Cephalum +MTTDIYQIAQNWLRWDRNPKTHKEISQLVQDKNESELKARLESRIAFGTAGLRGPMKAGF +SCMNDLTVIQASQGLCMYVKQTLAPDAERKGIVVGYDGRYNSEVFAKLTAATFVSQGFKV +HLFSRLVPTPFVAFAVPFLKACVGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGIAKQ +IELNLEPWNVFYKEYFDRIERYTVRHNKQMAREKIVYSAMHGVGGEYTKRAFEVFALDPF +IAVKEQFHPDPAFPTVTFPNPEEGKGALKLSIETAEANNNWAWKNGKPYYEKGLGSFPND +QYFMINTAVSSAMLKTMAMKEGFTYEEVLTGFKWVGNAAQNLIEKGKHLLFAYEEAIGFM +YGDVSLDKDGVRCAPIFAELAQHLYSKGSSCQDHLEELYKRYGYHISKNRYFFCYDPLKM +EKIFNRIRNGGQYPTKCGDFEITRIRDLTTGYDTGYPPENKAQLPTSTSTQMITFYFKNG +GIATLRGSGTEPKLKYYVEMIGDDKENVELILQSMVDQVINQFLRPIENELIPPKD +>Violaceum +MVINPFYPYYLYFCYSPGISYQGVKINKTKLEQSTLTTINQWLNGNYDEQTKKNIQNLLD +QESYTELTDAFYRNLEFGTGGLRGIMGAGSNRINKYTIGTATQGLSNYLLKKYPGEKIKV +AIAHDSRNNSDQFAKITADVFSANGIYVYFFKELRPTPELSFAIRELGCRSGVMLTASHN +PKEYNGYKAYGADGGQFTAPDDRLVMDEVAKITSIDEVKFTRIDANIELIGEEIDQLYLD +KITALSVSPEAISRQKDLKIVYSPIHGTGITLVPKALAQFGFDNVTIVEEQSKPDGNFPT +VVYPNPEEKEAMTLALKKAQEIDADLVLATDPDADRVGIAVKNNNNEWILLNGNQTGSLL +VHYVLTAWEEKGKIDGNQYIVKTVVTSNLIEAIAKAKKVDCYNTLTGFKWIGQLITSLQG +KKTFVVGGEESYGYSVGELVRDKDAVISCAFIAEMTAYYKDKGSSLYNALIDMYVTHGLY +KEELVSLTKKGKTGAEEIKAMMEKFRNNPPASLGGSKVSTLKDYELGTETDLNTGKISKL +SLPKSDVLQFVTEDGSIVSARPSGTEPKIKFYCSVNATLSQASEFDKTDEKLGLKINALM +EDLQK +>Deminut +MTDIYQIAQNWLKWDRNPKTHKEISTLVEKKDEAELRARLETRIAFGTAGLRGPMKAGFS +CMNDLTVIQASQGLSLYVKKTLAGSESKGAVVGYDGRYNSEVFAKLTAATFASQGFKVYL +FSKVVPTPYVAFAVPELGASVGVMVTASHNPKDDNGYKVYWDNGCQINTPHDKHISELIE +SNLEPWNVCIYITLQINIDKLLSGVIDPLQVVTSSYMSKIEKYSVKHLPQPLKLATEQKI +VYTAMHGVGAEYAKLAFEAFSLPPFIPVTQQVTPDPAFPTVAFPNPEEGKGALKLAIETA +EANKSRIILANDPDADRLAVAEKQPEYVFLFYLISNNGTWKVFNGNEIGILFADWAWQNC +RRVYPDVPADQFFMINTAVSSAMLKSMAKKDGYIHEETLTGFKWVGNKARELLDQNKRFL +FAYEEAIGFMYGDVSLDKDGVRCAAIFAELALYQYANGSSCQRHLDSLYERYGYHISKNR +YFFCYEPPKMVAIFNAIRNNKNYPTKCGEFEIERIRDLTDDYDNGYPDNKARLPISKSTQ +MITFFFKNGAIATLRGSGTEPKLKYYVEMIGDNKSEVEAILAKVVTAVIDNFLRPVENQL +TPPKDD +>Ellipt +MADLDKLVEDWMRWDKNTKTRDEVQKMVAQGDKKALAAALQNRIAFGTAGLRGPMKAGFA +NMNDLTVIQASQGLCIYVSATIADAAKKGVVVGYDGRHNSLQFARLTAATFRSKGFKVYL +FSTVVPTPYVAFSVPELGACVGVMVTASHNPKDDNGYKIDVEKLLKEDGVEDPLEKITAS +YMSKVADYSIKSHPATKDIVMSDDKIVYTAMHGVGGEYTRRSFKAFSLPEFIPVVQQFHP +DPEFPTVTFPNPEEGKGALKLAIETAEKNNSRLILANDPDADRLAVAERQPDGTWKVFNG +NEIGVLFADWAWKNARARDPTTPASEFFMVNTAVSSAMLKTMAKTEGYTYEETLTGFKWV +GNKAKEAIDKGGRFLFAYEEAIGFMYGDVSLDKDGVRTAPIFAQMALSLYAKGLSCVDHL +EQLMKTYGYHISRNRYFFCYEPPKMVAIFDKIRNNGNFPKHCGPFEIVRVRDLTVDYDDA +YEDKKARLPVSTSTQMITFYFKNGAIATLRGSGTEPKLKYYVEMIGDKSAKKEDVEKTLA +EVVKQVIDNFLRPVENELTPPKDD +>Lepto +MASSERLQQLIQDWLKWDKNPTTLSEIQELVKKNDEKELRARLENRIAFGTAGMFLLGPM +KAGFSCMNDLTVIQASQGLCIYVSDTIPNALNSGVVVGYDGRYNSKEFAKYTAATFLSKG +YKVYLFSKVVPTPYVAFAVTELKAAIGVMITASHNPKDDNGYKVYWDNGCQINTPHDKGI +AKQIQLNLEPWNVCAFFLDINANELLSGSSVVDPLDTIVNSYNSKITSYSVGNSGVKLAN +EKIVYTAMHGVGGEYTKLAFEAFKLPPFVPVPQQYTPDPAFPTVAFPNPEEGKGALKLSI +ETAEANGSRLILANDPDADRLAVAERNTNGTWKVFNGNEIGVLLADWAWQNARRAHPDTP +ANRYFMINTAVSSAMLKTMAKHEGYRCDETLTGFKWVGNQARKVIDEEKLNFLFAYEEAI +GFMYGDVSLDKDGVRCAPIFAEMALSYYAQGHSCEDHLETLYKRYGYHISRNRYFFCYEP +PKMVAIFDRIRNGRNFPTKCGRFEIERVRDLTVDYDDAYPDKKARLPVSTSTQMITFWFK +NGGIATLRGSGTEPKLKYYVEMIGQDKQVVEKELAELVDAVIQQFLRPVENELTPPKDD diff --git a/src/jalview/analysis/SequenceIdMatcher.java b/src/jalview/analysis/SequenceIdMatcher.java index e6a4853..5b812dd 100755 --- a/src/jalview/analysis/SequenceIdMatcher.java +++ b/src/jalview/analysis/SequenceIdMatcher.java @@ -24,7 +24,9 @@ import jalview.datamodel.DBRefEntry; import jalview.datamodel.SequenceI; import java.util.ArrayList; -import java.util.Hashtable; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; import java.util.Vector; /** @@ -35,17 +37,27 @@ import java.util.Vector; */ public class SequenceIdMatcher { - private Hashtable names; + private HashMap names; - public SequenceIdMatcher(SequenceI[] seqs) + public SequenceIdMatcher(List seqs) { - names = new Hashtable(); - for (int i = 0; i < seqs.length; i++) + names = new HashMap(); + addAll(seqs); + } + + /** + * add more sequences to this matcher - also used by the constructor + * + * @param seqs + */ + public void addAll(List seqs) + { + for (SequenceI seq : seqs) { // TODO: deal with ID collisions - SequenceI should be appended to list // associated with this key. - names.put(new SeqIdName(seqs[i].getDisplayId(true)), seqs[i]); - SequenceI dbseq = seqs[i]; + names.put(new SeqIdName(seq.getDisplayId(true)), seq); + SequenceI dbseq = seq; while (dbseq.getDatasetSequence()!=null) { dbseq = dbseq.getDatasetSequence(); @@ -58,9 +70,9 @@ public class SequenceIdMatcher for (int r = 0; r < dbr.length; r++) { sid = new SeqIdName(dbr[r].getAccessionId()); - if (!names.contains(sid)) + if (!names.containsKey(sid)) { - names.put(sid, seqs[i]); + names.put(sid, seq); } } } @@ -68,19 +80,30 @@ public class SequenceIdMatcher } /** + * convenience method to make a matcher from concrete array + * + * @param sequences + */ + public SequenceIdMatcher(SequenceI[] sequences) + { + this(Arrays.asList(sequences)); + } + + /** * returns the closest SequenceI in matches to SeqIdName and returns all the * matches to the names hash. * * @param candName * SeqIdName * @param matches - * Vector of SequenceI objects + * List of SequenceI objects * @return SequenceI closest SequenceI to SeqIdName */ - private SequenceI pickbestMatch(SeqIdName candName, Vector matches) + private SequenceI pickbestMatch(SeqIdName candName, + List matches) { - SequenceI[] st = pickbestMatches(candName, matches); - return st == null || st.length == 0 ? null : st[0]; + List st = pickbestMatches(candName, matches); + return st == null || st.size() == 0 ? null : st.get(0); } /** @@ -94,16 +117,15 @@ public class SequenceIdMatcher * @return Object[] { SequenceI closest SequenceI to SeqIdName, SequenceI[] * ties } */ - private SequenceI[] pickbestMatches(SeqIdName candName, Vector matches) + private List pickbestMatches(SeqIdName candName, + List matches) { - ArrayList best = new ArrayList(); - SequenceI match = null; + ArrayList best = new ArrayList(); if (candName == null || matches == null || matches.size() == 0) { return null; } - match = (SequenceI) matches.elementAt(0); - matches.removeElementAt(0); + SequenceI match = matches.remove(0); best.add(match); names.put(new SeqIdName(match.getName()), match); int matchlen = match.getName().length(); @@ -111,8 +133,7 @@ public class SequenceIdMatcher while (matches.size() > 0) { // look through for a better one. - SequenceI cand = (SequenceI) matches.elementAt(0); - matches.remove(0); + SequenceI cand = matches.remove(0); names.put(new SeqIdName(cand.getName()), cand); int q, w, candlen = cand.getName().length(); // keep the one with an id 'closer' to the given seqnam string @@ -136,7 +157,7 @@ public class SequenceIdMatcher return null; } ; - return (SequenceI[]) best.toArray(new SequenceI[0]); + return best; } /** @@ -163,12 +184,18 @@ public class SequenceIdMatcher * * @param seqnam * string to query Matcher with. + * @return a new array or (possibly) null */ public SequenceI[] findAllIdMatches(String seqnam) { SeqIdName nam = new SeqIdName(seqnam); - return findAllIdMatches(nam); + List m = findAllIdMatches(nam); + if (m!=null) + { + return m.toArray(new SequenceI[m.size()]); + } + return null; } /** @@ -233,15 +260,15 @@ public class SequenceIdMatcher * SeqIdName * @return SequenceI[] */ - private SequenceI[] findAllIdMatches( + private List findAllIdMatches( jalview.analysis.SequenceIdMatcher.SeqIdName nam) { - Vector matches = new Vector(); + ArrayList matches = new ArrayList(); while (names.containsKey(nam)) { - matches.addElement(names.remove(nam)); + matches.add(names.remove(nam)); } - SequenceI[] r = pickbestMatches(nam, matches); + List r = pickbestMatches(nam, matches); return r; } diff --git a/src/jalview/api/AlignViewControllerGuiI.java b/src/jalview/api/AlignViewControllerGuiI.java index f768451..4896b35 100644 --- a/src/jalview/api/AlignViewControllerGuiI.java +++ b/src/jalview/api/AlignViewControllerGuiI.java @@ -47,4 +47,19 @@ public interface AlignViewControllerGuiI void setMenusForViewport(); void changeColour(ColourSchemeI cs); + + /** + * trigger an update of the UI in response to a model data change, and if + * necessary enable the display of sequence feature annotation on the view. + * + * @param enableIfNecessary + */ + void refreshFeatureUI(boolean enableIfNecessary); + + /** + * get the Feature Settings control panel for the alignment view if one exists + * + * @return + */ + FeatureSettingsControllerI getFeatureSettingsUI(); } diff --git a/src/jalview/api/AlignViewControllerI.java b/src/jalview/api/AlignViewControllerI.java index 235a656..fef9f14 100644 --- a/src/jalview/api/AlignViewControllerI.java +++ b/src/jalview/api/AlignViewControllerI.java @@ -76,4 +76,17 @@ public interface AlignViewControllerI */ void sortAlignmentByFeatureDensity(String[] typ); + /** + * add a features file of some kind to the current view + * + * @param file + * @param protocol + * @param relaxedIdMatching + * if true, try harder to match up IDs with local sequence data + * @return true if parsing resulted in something being imported to the view or + * dataset + */ + public boolean parseFeaturesFile(String file, String protocol, + boolean relaxedIdMatching); + } diff --git a/src/jalview/api/FeatureSettingsControllerI.java b/src/jalview/api/FeatureSettingsControllerI.java index c718e36..2bcd667 100644 --- a/src/jalview/api/FeatureSettingsControllerI.java +++ b/src/jalview/api/FeatureSettingsControllerI.java @@ -2,5 +2,7 @@ package jalview.api; public interface FeatureSettingsControllerI { + + void discoverAllFeatureData(); } diff --git a/src/jalview/appletgui/AlignFrame.java b/src/jalview/appletgui/AlignFrame.java index 478aa09..77747d1 100644 --- a/src/jalview/appletgui/AlignFrame.java +++ b/src/jalview/appletgui/AlignFrame.java @@ -26,6 +26,7 @@ import jalview.api.AlignViewControllerGuiI; import jalview.api.AlignViewControllerI; import jalview.api.AlignViewportI; import jalview.api.FeatureRenderer; +import jalview.api.FeatureSettingsControllerI; import jalview.api.SequenceStructureBinding; import jalview.bin.JalviewLite; import jalview.commands.CommandI; @@ -4178,7 +4179,7 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, { this.splitFrame = sf; } - + // may not need this @Override public void setShowSeqFeatures(boolean b) { @@ -4193,4 +4194,20 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, // setMenusFromViewport(viewport); } + @Override + public void refreshFeatureUI(boolean enableIfNecessary) + { + if (enableIfNecessary) + { + sequenceFeatures.setState(true); + alignPanel.av.setShowSequenceFeatures(true); + } + } + + @Override + public FeatureSettingsControllerI getFeatureSettingsUI() + { + return alignPanel.av.featureSettings; + } + } diff --git a/src/jalview/appletgui/FeatureSettings.java b/src/jalview/appletgui/FeatureSettings.java index 8ce3e62..2b79256 100755 --- a/src/jalview/appletgui/FeatureSettings.java +++ b/src/jalview/appletgui/FeatureSettings.java @@ -20,21 +20,51 @@ */ package jalview.appletgui; -import java.util.*; -import java.util.List; -import java.awt.*; -import java.awt.event.*; - -import jalview.analysis.AlignmentSorter; -import jalview.commands.OrderCommand; -import jalview.datamodel.*; +import jalview.api.FeatureSettingsControllerI; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceFeature; import jalview.schemes.AnnotationColourGradient; import jalview.schemes.GraduatedColor; import jalview.util.MessageManager; +import java.awt.BorderLayout; +import java.awt.Button; +import java.awt.Checkbox; +import java.awt.Color; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.Font; +import java.awt.FontMetrics; +import java.awt.Frame; +import java.awt.Graphics; +import java.awt.GridLayout; +import java.awt.Image; +import java.awt.Label; +import java.awt.MenuItem; +import java.awt.Panel; +import java.awt.PopupMenu; +import java.awt.ScrollPane; +import java.awt.Scrollbar; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.AdjustmentEvent; +import java.awt.event.AdjustmentListener; +import java.awt.event.InputEvent; +import java.awt.event.ItemEvent; +import java.awt.event.ItemListener; +import java.awt.event.MouseEvent; +import java.awt.event.MouseListener; +import java.awt.event.MouseMotionListener; +import java.awt.event.WindowAdapter; +import java.awt.event.WindowEvent; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.List; +import java.util.Vector; + public class FeatureSettings extends Panel implements ItemListener, MouseListener, MouseMotionListener, ActionListener, - AdjustmentListener + AdjustmentListener, FeatureSettingsControllerI { FeatureRenderer fr; @@ -84,7 +114,7 @@ public class FeatureSettings extends Panel implements ItemListener, fr.findAllFeatures(true); // was default - now true to make all visible } - setTableData(); + discoverAllFeatureData(); this.setLayout(new BorderLayout()); scrollPane = new ScrollPane(); @@ -249,7 +279,8 @@ public class FeatureSettings extends Panel implements ItemListener, men.show(this.featurePanel, x, y); } - public void setTableData() + @Override + public void discoverAllFeatureData() { if (fr.getAllFeatureColours()!=null && fr.getAllFeatureColours().size()>0) { @@ -276,7 +307,7 @@ public class FeatureSettings extends Panel implements ItemListener, } // TODO: JAL-964 - smoothly incorporate new group entries if panel already // displayed and new groups present - for (String group:(List)fr.getFeatureGroups()) + for (String group:fr.getFeatureGroups()) { boolean vis = fr.checkGroupVisibility(group, false); Checkbox check = new MyCheckbox(group, vis, @@ -648,7 +679,7 @@ public class FeatureSettings extends Panel implements ItemListener, public void adjustmentValueChanged(AdjustmentEvent evt) { - fr.setTransparency((float) (100 - transparency.getValue()) / 100f); + fr.setTransparency((100 - transparency.getValue()) / 100f); ap.seqPanel.seqCanvas.repaint(); } diff --git a/src/jalview/controller/AlignViewController.java b/src/jalview/controller/AlignViewController.java index 972b6ab..3be32bb 100644 --- a/src/jalview/controller/AlignViewController.java +++ b/src/jalview/controller/AlignViewController.java @@ -33,6 +33,7 @@ import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; +import jalview.io.FeaturesFile; import jalview.util.MessageManager; import java.awt.Color; @@ -379,4 +380,38 @@ public class AlignViewController implements AlignViewControllerI { sortBy(typ, "Sort by Feature Score", AlignmentSorter.FEATURE_SCORE); } + + @Override + public boolean parseFeaturesFile(String file, String protocol, + boolean relaxedIdMatching) + { + boolean featuresFile = false; + try + { + featuresFile = new FeaturesFile(file, protocol).parse(viewport + .getAlignment().getDataset(), alignPanel.getFeatureRenderer() + .getFeatureColours(), false, relaxedIdMatching); + } catch (Exception ex) + { + ex.printStackTrace(); + } + + if (featuresFile) + { + avcg.refreshFeatureUI(true); + if (alignPanel.getFeatureRenderer() != null) + { + // update the min/max ranges where necessary + alignPanel.getFeatureRenderer().findAllFeatures(true); + } + if (avcg.getFeatureSettingsUI() != null) + { + avcg.getFeatureSettingsUI().discoverAllFeatureData(); + } + alignPanel.paintAlignment(true); + } + + return featuresFile; + + } } diff --git a/src/jalview/controller/FeatureSettingsController.java b/src/jalview/controller/FeatureSettingsController.java index ebf4958..7dd1399 100644 --- a/src/jalview/controller/FeatureSettingsController.java +++ b/src/jalview/controller/FeatureSettingsController.java @@ -3,7 +3,8 @@ package jalview.controller; import jalview.api.FeatureRenderer; import jalview.api.FeatureSettingsModelI; -public class FeatureSettingsController implements jalview.api.FeatureSettingsControllerI +public class FeatureSettingsController // implements + // jalview.api.FeatureSettingsControllerI { FeatureSettingsControllerGuiI settingUI; FeatureRenderer fr; diff --git a/src/jalview/datamodel/ASequence.java b/src/jalview/datamodel/ASequence.java new file mode 100755 index 0000000..238fd38 --- /dev/null +++ b/src/jalview/datamodel/ASequence.java @@ -0,0 +1,16 @@ +/** + * + */ +package jalview.datamodel; + +/** + * Metadata for a sequence that may or may not be physically present in Jalview + * at the moment + * + * @author jprocter + * + */ +public class ASequence implements ASequenceI +{ + +} diff --git a/src/jalview/datamodel/ASequenceI.java b/src/jalview/datamodel/ASequenceI.java new file mode 100755 index 0000000..481b1e3 --- /dev/null +++ b/src/jalview/datamodel/ASequenceI.java @@ -0,0 +1,12 @@ +package jalview.datamodel; + +/** + * interfaces to access the basic metadata for a concrete or virtual sequence + * + * @author jprocter + * + */ +public interface ASequenceI +{ + +} diff --git a/src/jalview/datamodel/AlignedCodon.java b/src/jalview/datamodel/AlignedCodon.java index 0daa3fb..12eb470 100644 --- a/src/jalview/datamodel/AlignedCodon.java +++ b/src/jalview/datamodel/AlignedCodon.java @@ -7,6 +7,8 @@ package jalview.datamodel; * * Example: in "G-AT-C-GA" the aligned codons are (0, 2, 3) and (5, 7, 8). * + * JBPComment: Is this useful anywhere other than jalview.analysis.Dna ? + * * @author gmcarstairs * */ diff --git a/src/jalview/datamodel/AlignedCodonFrame.java b/src/jalview/datamodel/AlignedCodonFrame.java index d0b2731..eb977bc 100644 --- a/src/jalview/datamodel/AlignedCodonFrame.java +++ b/src/jalview/datamodel/AlignedCodonFrame.java @@ -33,12 +33,12 @@ import jalview.util.MappingUtils; public class AlignedCodonFrame { - /* + /** * tied array of na Sequence objects. */ private SequenceI[] dnaSeqs = null; - /* + /** * tied array of Mappings to protein sequence Objects and SequenceI[] * aaSeqs=null; MapLists where each maps from the corresponding dnaSeqs * element to corresponding aaSeqs element diff --git a/src/jalview/datamodel/Sequence.java b/src/jalview/datamodel/Sequence.java index cab1ac7..257c894 100755 --- a/src/jalview/datamodel/Sequence.java +++ b/src/jalview/datamodel/Sequence.java @@ -37,7 +37,7 @@ import jalview.util.StringUtils; * @author $author$ * @version $Revision$ */ -public class Sequence implements SequenceI +public class Sequence extends ASequence implements SequenceI { SequenceI datasetSequence; diff --git a/src/jalview/datamodel/SequenceDummy.java b/src/jalview/datamodel/SequenceDummy.java new file mode 100644 index 0000000..80b0072 --- /dev/null +++ b/src/jalview/datamodel/SequenceDummy.java @@ -0,0 +1,9 @@ +package jalview.datamodel; + +public class SequenceDummy extends Sequence implements SequenceI +{ + public SequenceDummy(String sequenceId) + { + super(sequenceId, "THISAPLACEHOLDER"); + } +} diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index 28ab82c..1b6498f 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -302,4 +302,23 @@ public class SequenceFeature return begin; } + public int getStrand() + { + String str; + if (otherDetails == null + || (str = otherDetails.get("STRAND").toString()) == null) + { + return 0; + } + if (str.equals("-")) + { + return -1; + } + if (str.equals("+")) + { + return 1; + } + return 0; + } + } diff --git a/src/jalview/datamodel/SequenceI.java b/src/jalview/datamodel/SequenceI.java index 38ae372..f69c8b6 100755 --- a/src/jalview/datamodel/SequenceI.java +++ b/src/jalview/datamodel/SequenceI.java @@ -26,12 +26,13 @@ import java.util.Vector; import fr.orsay.lri.varna.models.rna.RNA; /** - * DOCUMENT ME! + * Methods for manipulating a sequence, its metadata and related annotation in + * an alignment or dataset. * * @author $author$ * @version $Revision$ */ -public interface SequenceI +public interface SequenceI extends ASequenceI { /** * Set the display name for the sequence diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index 553ddd2..90b9f6a 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -32,7 +32,6 @@ import jalview.api.AlignViewControllerGuiI; import jalview.api.AlignViewControllerI; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; -import jalview.api.SplitContainerI; import jalview.api.ViewStyleI; import jalview.api.analysis.ScoreModelI; import jalview.bin.Cache; @@ -60,7 +59,6 @@ import jalview.gui.ViewSelectionMenu.ViewSetProvider; import jalview.io.AlignmentProperties; import jalview.io.AnnotationFile; import jalview.io.BioJsHTMLOutput; -import jalview.io.FeaturesFile; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import jalview.io.HtmlSvgOutput; @@ -3067,6 +3065,12 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, public FeatureSettings featureSettings; @Override + public FeatureSettingsControllerI getFeatureSettingsUI() + { + return featureSettings; + } + + @Override public void featureSettings_actionPerformed(ActionEvent e) { if (featureSettings != null) @@ -4916,41 +4920,29 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * contents or path to retrieve file * @param type * access mode of file (see jalview.io.AlignFile) - * @return true if features file was parsed corectly. + * @return true if features file was parsed correctly. */ public boolean parseFeaturesFile(String file, String type) { - boolean featuresFile = false; - try - { - featuresFile = new FeaturesFile(file, type).parse(viewport - .getAlignment().getDataset(), alignPanel.getSeqPanel().seqCanvas - .getFeatureRenderer().getFeatureColours(), false, - jalview.bin.Cache.getDefault("RELAXEDSEQIDMATCHING", false)); - } catch (Exception ex) - { - ex.printStackTrace(); - } + return avc.parseFeaturesFile(file, type, + jalview.bin.Cache.getDefault("RELAXEDSEQIDMATCHING", false)); + + } - if (featuresFile) + @Override + public void refreshFeatureUI(boolean enableIfNecessary) + { + // note - currently this is only still here rather than in the controller + // because of the featureSettings hard reference that is yet to be + // abstracted + if (enableIfNecessary) { viewport.setShowSequenceFeatures(true); showSeqFeatures.setSelected(true); - if (alignPanel.getSeqPanel().seqCanvas.fr != null) - { - // update the min/max ranges where necessary - alignPanel.getSeqPanel().seqCanvas.fr.findAllFeatures(true); - } - if (featureSettings != null) - { - featureSettings.setTableData(); - } - alignPanel.paintAlignment(true); } - return featuresFile; - } + } @Override public void dragEnter(DropTargetDragEvent evt) { diff --git a/src/jalview/gui/FeatureSettings.java b/src/jalview/gui/FeatureSettings.java index 918fc02..940a216 100644 --- a/src/jalview/gui/FeatureSettings.java +++ b/src/jalview/gui/FeatureSettings.java @@ -20,6 +20,7 @@ */ package jalview.gui; +import jalview.api.FeatureSettingsControllerI; import jalview.bin.Cache; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -88,7 +89,8 @@ import javax.swing.table.AbstractTableModel; import javax.swing.table.TableCellEditor; import javax.swing.table.TableCellRenderer; -public class FeatureSettings extends JPanel +public class FeatureSettings extends JPanel implements + FeatureSettingsControllerI { DasSourceBrowser dassourceBrowser; @@ -227,7 +229,7 @@ public class FeatureSettings extends JPanel fr.findAllFeatures(true); // display everything! } - setTableData(); + discoverAllFeatureData(); final PropertyChangeListener change; final FeatureSettings fs = this; fr.addPropertyChangeListener(change = new PropertyChangeListener() @@ -421,7 +423,8 @@ public class FeatureSettings extends JPanel */ Hashtable typeWidth = null; - synchronized public void setTableData() + @Override + synchronized public void discoverAllFeatureData() { Vector allFeatures = new Vector(); Vector allGroups = new Vector(); diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 260b32d..88a8611 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -20,13 +20,28 @@ */ package jalview.io; -import java.io.*; -import java.util.*; - import jalview.analysis.SequenceIdMatcher; -import jalview.datamodel.*; -import jalview.schemes.*; +import jalview.datamodel.AlignedCodonFrame; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SequenceDummy; +import jalview.datamodel.SequenceFeature; +import jalview.datamodel.SequenceI; +import jalview.schemes.AnnotationColourGradient; +import jalview.schemes.GraduatedColor; +import jalview.schemes.UserColourScheme; import jalview.util.Format; +import jalview.util.MapList; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Hashtable; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.StringTokenizer; +import java.util.Vector; /** * Parse and create Jalview Features files Detects GFF format features files and @@ -154,6 +169,10 @@ public class FeaturesFile extends AlignFile try { SequenceI seq = null; + /** + * keep track of any sequences we try to create from the data if it is a GFF3 file + */ + ArrayList newseqs = new ArrayList(); String type, desc, token = null; int index, start, end; @@ -427,7 +446,7 @@ public class FeaturesFile extends AlignFile // Still possible this is an old Jalview file, // which does not have type colours at the beginning seqId = token = st.nextToken(); - seq = findName(align, seqId, relaxedIdmatching); + seq = findName(align, seqId, relaxedIdmatching, newseqs); if (seq != null) { desc = st.nextToken(); @@ -496,9 +515,11 @@ public class FeaturesFile extends AlignFile if (st.hasMoreTokens()) { StringBuffer attributes = new StringBuffer(); + boolean sep = false; while (st.hasMoreTokens()) { - attributes.append("\t" + st.nextElement()); + attributes.append((sep ? "\t" : "") + st.nextElement()); + sep = true; } // TODO validate and split GFF2 attributes field ? parse out // ([A-Za-z][A-Za-z0-9_]*) ; and add as @@ -506,10 +527,15 @@ public class FeaturesFile extends AlignFile sf.setValue("ATTRIBUTES", attributes.toString()); } - seq.addSequenceFeature(sf); - while ((seq = align.findName(seq, seqId, true)) != null) + if (processOrAddSeqFeature(align, newseqs, seq, sf, GFFFile, + relaxedIdmatching)) { - seq.addSequenceFeature(new SequenceFeature(sf)); + // check whether we should add the sequence feature to any other + // sequences in the alignment with the same or similar + while ((seq = align.findName(seq, seqId, true)) != null) + { + seq.addSequenceFeature(new SequenceFeature(sf)); + } } break; } @@ -536,7 +562,7 @@ public class FeaturesFile extends AlignFile if (!token.equals("ID_NOT_SPECIFIED")) { - seq = findName(align, seqId = token, relaxedIdmatching); + seq = findName(align, seqId = token, relaxedIdmatching, null); st.nextToken(); } else @@ -609,6 +635,9 @@ public class FeaturesFile extends AlignFile resetMatcher(); } catch (Exception ex) { + // should report somewhere useful for UI if necessary + warningMessage = ((warningMessage == null) ? "" : warningMessage) + + "Parsing error at\n" + line; System.out.println("Error parsing feature file: " + ex + "\n" + line); ex.printStackTrace(System.err); resetMatcher(); @@ -618,6 +647,280 @@ public class FeaturesFile extends AlignFile return true; } + + /** + * take a sequence feature and examine its attributes to decide how it should + * be added to a sequence + * + * @param seq + * - the destination sequence constructed or discovered in the + * current context + * @param sf + * - the base feature with ATTRIBUTES property containing any + * additional attributes + * @param gFFFile + * - true if we are processing a GFF annotation file + * @return true if sf was actually added to the sequence, false if it was + * processed in another way + */ + public boolean processOrAddSeqFeature(AlignmentI align, List newseqs, SequenceI seq, SequenceFeature sf, + boolean gFFFile, boolean relaxedIdMatching) + { + String attr = (String) sf.getValue("ATTRIBUTES"); + boolean add = true; + if (gFFFile && attr != null) + { + int nattr=8; + + for (String attset : attr.split("\t")) + { + if (attset==null || attset.trim().length()==0) + { + continue; + } + nattr++; + Map> set = new HashMap>(); + // normally, only expect one column - 9 - in this field + // the attributes (Gff3) or groups (gff2) field + for (String pair : attset.trim().split(";")) + { + pair = pair.trim(); + if (pair.length() == 0) + { + continue; + } + + // expect either space seperated (gff2) or '=' separated (gff3) + // key/value pairs here + + int eqpos = pair.indexOf('='),sppos = pair.indexOf(' '); + String key = null, value = null; + + if (sppos > -1 && (eqpos == -1 || sppos < eqpos)) + { + key = pair.substring(0, sppos); + value = pair.substring(sppos + 1); + } else { + if (eqpos > -1 && (sppos == -1 || eqpos < sppos)) + { + key = pair.substring(0, eqpos); + value = pair.substring(eqpos + 1); + } else + { + key = pair; + } + } + if (key != null) + { + List vals = set.get(key); + if (vals == null) + { + vals = new ArrayList(); + set.put(key, vals); + } + if (value != null) + { + vals.add(value.trim()); + } + } + } + try + { + add &= processGffKey(set, nattr, seq, sf, align, newseqs, + relaxedIdMatching); // process decides if + // feature is actually + // added + } catch (InvalidGFF3FieldException ivfe) + { + System.err.println(ivfe); + } + } + } + if (add) + { + seq.addSequenceFeature(sf); + } + return add; + } + + public class InvalidGFF3FieldException extends Exception + { + String field, value; + + public InvalidGFF3FieldException(String field, + Map> set, String message) + { + super(message + " (Field was " + field + " and value was " + + set.get(field).toString()); + this.field = field; + this.value = set.get(field).toString(); + } + + } + + /** + * take a set of keys for a feature and interpret them + * + * @param set + * @param nattr + * @param seq + * @param sf + * @return + */ + public boolean processGffKey(Map> set, int nattr, + SequenceI seq, SequenceFeature sf, AlignmentI align, + List newseqs, boolean relaxedIdMatching) + throws InvalidGFF3FieldException + { + String attr; + // decide how to interpret according to type + if (sf.getType().equals("similarity")) + { + int strand = sf.getStrand(); + // exonerate cdna/protein map + // look for fields + List querySeq = findNames(align, newseqs, + relaxedIdMatching, set.get(attr="Query")); + if (querySeq==null || querySeq.size()!=1) + { + throw new InvalidGFF3FieldException( attr, set, + "Expecting exactly one sequence in Query field (got " + + set.get(attr) + ")"); + } + if (set.containsKey(attr="Align")) + { + // process the align maps and create cdna/protein maps + // ideally, the query sequences are in the alignment, but maybe not... + + AlignedCodonFrame alco = new AlignedCodonFrame(); + MapList codonmapping = constructCodonMappingFromAlign(set, attr, + strand); + + // add codon mapping, and hope! + alco.addMap(seq, querySeq.get(0), codonmapping); + align.addCodonFrame(alco); + // everything that's needed to be done is done + // no features to create here ! + return false; + } + + } + return true; + } + + private MapList constructCodonMappingFromAlign( + Map> set, + String attr, int strand) throws InvalidGFF3FieldException + { + if (strand == 0) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid strand for a codon mapping (cannot be 0)"); + } + List fromrange = new ArrayList(), torange = new ArrayList(); + int lastppos = 0, lastpframe = 0; + for (String range : set.get(attr)) + { + List ints = new ArrayList(); + StringTokenizer st = new StringTokenizer(range, " "); + while (st.hasMoreTokens()) + { + String num = st.nextToken(); + try + { + ints.add(new Integer(num)); + } catch (NumberFormatException nfe) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid number in field " + num); + } + } + // Align positionInRef positionInQuery LengthInRef + // contig_1146 exonerate:protein2genome:local similarity 8534 11269 + // 3652 - . alignment_id 0 ; + // Query DDB_G0269124 + // Align 11270 143 120 + // corresponds to : 120 bases align at pos 143 in protein to 11270 on + // dna in strand direction + // Align 11150 187 282 + // corresponds to : 282 bases align at pos 187 in protein to 11150 on + // dna in strand direction + // + // Align 10865 281 888 + // Align 9977 578 1068 + // Align 8909 935 375 + // + if (ints.size() != 3) + { + throw new InvalidGFF3FieldException(attr, set, + "Invalid number of fields for this attribute (" + + ints.size() + ")"); + } + fromrange.add(new Integer(ints.get(0).intValue())); + fromrange.add(new Integer(ints.get(0).intValue() + strand + * ints.get(2).intValue())); + // how are intron/exon boundaries that do not align in codons + // represented + if (ints.get(1).equals(lastppos) && lastpframe > 0) + { + // extend existing to map + lastppos += ints.get(2) / 3; + lastpframe = ints.get(2) % 3; + torange.set(torange.size() - 1, new Integer(lastppos)); + } + else + { + // new to map range + torange.add(ints.get(1)); + lastppos = ints.get(1) + ints.get(2) / 3; + lastpframe = ints.get(2) % 3; + torange.add(new Integer(lastppos)); + } + } + // from and to ranges must end up being a series of start/end intervals + if (fromrange.size() % 2 == 1) + { + throw new InvalidGFF3FieldException(attr, set, + "Couldn't parse the DNA alignment range correctly"); + } + if (torange.size() % 2 == 1) + { + throw new InvalidGFF3FieldException(attr, set, + "Couldn't parse the protein alignment range correctly"); + } + // finally, build the map + int[] frommap = new int[fromrange.size()], tomap = new int[torange + .size()]; + int p = 0; + for (Integer ip : fromrange) + { + frommap[p++] = ip.intValue(); + } + p = 0; + for (Integer ip : torange) + { + tomap[p++] = ip.intValue(); + } + + return new MapList(frommap, tomap, 3, 1); + } + + private List findNames(AlignmentI align, + List newseqs, boolean relaxedIdMatching, + List list) + { + List found = new ArrayList(); + for (String seqId : list) + { + SequenceI seq = findName(align, seqId, relaxedIdMatching, newseqs); + if (seq != null) + { + found.add(seq); + } + } + return found; + } + private AlignmentI lastmatchedAl = null; private SequenceIdMatcher matcher = null; @@ -632,7 +935,7 @@ public class FeaturesFile extends AlignFile } private SequenceI findName(AlignmentI align, String seqId, - boolean relaxedIdMatching) + boolean relaxedIdMatching, List newseqs) { SequenceI match = null; if (relaxedIdMatching) @@ -641,16 +944,29 @@ public class FeaturesFile extends AlignFile { matcher = new SequenceIdMatcher( (lastmatchedAl = align).getSequencesArray()); + if (newseqs != null) + { + matcher.addAll(newseqs); + } } match = matcher.findIdMatch(seqId); } else { match = align.findName(seqId, true); + + } + if (match==null && newseqs!=null) + { + match = new SequenceDummy(seqId); + if (relaxedIdMatching) + { + matcher.addAll(Arrays.asList(new SequenceI[] + { match })); + } } return match; } - public void parseDescriptionHTML(SequenceFeature sf, boolean removeHTML) { if (sf.getDescription() == null) diff --git a/src/jalview/io/FileLoader.java b/src/jalview/io/FileLoader.java index 6329f58..aa76da5 100755 --- a/src/jalview/io/FileLoader.java +++ b/src/jalview/io/FileLoader.java @@ -384,7 +384,9 @@ public class FileLoader implements Runnable final String errorMessage = "Couldn't load file " + title + "\n" + error; - if (raiseGUI) + // TODO: refactor FileLoader to be independent of Desktop / Applet GUI + // bits ? + if (raiseGUI && Desktop.desktop != null) { javax.swing.SwingUtilities.invokeLater(new Runnable() { diff --git a/src/jalview/viewmodel/seqfeatures/FeatureSettingsModel.java b/src/jalview/viewmodel/seqfeatures/FeatureSettingsModel.java index 57d57da..15d1da7 100644 --- a/src/jalview/viewmodel/seqfeatures/FeatureSettingsModel.java +++ b/src/jalview/viewmodel/seqfeatures/FeatureSettingsModel.java @@ -2,7 +2,7 @@ package jalview.viewmodel.seqfeatures; import jalview.api.FeatureSettingsModelI; -public class FeatureSettingsModel implements FeatureSettingsModelI +public abstract class FeatureSettingsModel implements FeatureSettingsModelI { } diff --git a/src/jalview/ws/DasSequenceFeatureFetcher.java b/src/jalview/ws/DasSequenceFeatureFetcher.java index e22aa12..3db663d 100644 --- a/src/jalview/ws/DasSequenceFeatureFetcher.java +++ b/src/jalview/ws/DasSequenceFeatureFetcher.java @@ -587,7 +587,7 @@ public class DasSequenceFeatureFetcher if (af != null && af.featureSettings != null) { - af.featureSettings.setTableData(); + af.featureSettings.discoverAllFeatureData(); } if (getFeatSettings() != null) diff --git a/test/jalview/io/Gff3tests.java b/test/jalview/io/Gff3tests.java new file mode 100644 index 0000000..cbb7f24 --- /dev/null +++ b/test/jalview/io/Gff3tests.java @@ -0,0 +1,48 @@ +package jalview.io; + +import jalview.gui.AlignFrame; + +import org.junit.Assert; +import org.junit.Test; + +public class Gff3tests +{ + + private static String exonerateSeqs = "examples/testdata/exonerateseqs.fa", + exonerateOutput = "examples/testdata/exonerateoutput.gff"; + + @Test + public void testExonerateImport() + { + // exonerate does not tag sequences after features, so we have a more + // conventional annotation import test here + + FileLoader loader = new FileLoader(false); + + AlignFrame af = loader.LoadFileWaitTillLoaded(exonerateSeqs, + FormatAdapter.FILE); + + Assert.assertEquals("Unexpected number of DNA protein associations", 0, + af.getViewport().getAlignment().getCodonFrames().size()); + + af.loadJalviewDataFile(exonerateOutput, FormatAdapter.FILE, null, null); + + Assert.assertNotEquals("Expected at least one DNA protein association", + 0, af.getViewport().getAlignment().getDataset() + .getCodonFrames().size()); + + + } + // @Test + // public final void testPrintGFFFormatSequenceIArrayMapOfStringObject() + // { + // fail("Not yet implemented"); + // } + // + // @Test + // public final void testAlignFileBooleanStringString() + // { + // fail("Not yet implemented"); + // } + +}