From f417c6f24712def4fab766afa021ef56fcd13f4b Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Thu, 8 Oct 2015 15:47:39 +0100 Subject: [PATCH] JAL-1684 updated fragment alignment for example and added aligned protein and cDNA --- examples/estrogenReceptorCdna_aln.fa | 320 ++++++++++++++++++++++++++++++ examples/estrogenReceptorCdna_frag.fa | 36 +++- examples/estrogenReceptorProtein_aln.fa | 112 +++++++++++ examples/estrogenReceptorProtein_frag.fa | 24 ++- 4 files changed, 476 insertions(+), 16 deletions(-) create mode 100644 examples/estrogenReceptorCdna_aln.fa create mode 100644 examples/estrogenReceptorProtein_aln.fa diff --git a/examples/estrogenReceptorCdna_aln.fa b/examples/estrogenReceptorCdna_aln.fa new file mode 100644 index 0000000..2f121db --- /dev/null +++ b/examples/estrogenReceptorCdna_aln.fa @@ -0,0 +1,320 @@ +>EMBLCDS|ADZ17331/1-1593 Homo sapiens (human) estrogen nuclear receptor beta variant a +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------atggatataaaaaactcaccatctagccttaattctccttcctcctacaactgcagtcaatccatc +------------------ttacccctggagcacggctccatatacataccttcctcctatgtagacagccac +catgaatatccagccatgacattctatagccctgctgtgatgaattacagcattcccagcaatgtcactaac +ttggaaggtgggcctggtcggcagacc---------------------acaagccca--------------- +---------------aatgtgttgtggccaacacctgggcacctttctcctttagtggtccatcgccagtta +tcacatctgtat------------------------------------------------------------ +------------------------------------------------------------------------ +gcggaacctcaaaagagtccctggtgtgaagcaagatcgctagaacacaccttacctgtaaacagagagaca +ctgaaa------------------------------aggaaggttagtgggaac------------------ +---------------------------cgttgcgccagccctgttactggtcca------------------ +------------------------------------------------------------------------ +---------------------------------------------------------ggttcaaagagggat +gctcacttc--------------------------------------------------------------- +------------------------------------tgcgctgtctgcagcgattacgcatcgggatatcac +tatggagtctggtcgtgtgaaggatgtaaggccttttttaaaagaagcattcaaggacataatgattatatt +tgtccagctacaaatcagtgtacaatcgataaaaaccggcgcaagagctgccaggcctgccgacttcggaag +tgttacgaagtgggaatggtgaagtgtggctcccggagagagagatgt---gggtaccgccttgtgcggaga +cagagaagtgccgacgagcagctgcactgtgccggcaaggccaagagaagtggcggccac------------ +---------------------------gcgccccgagtgcgggagctgctgctggacgccctgagccccgag +cagctagtgctcaccctcctggaggct---gagccgccccatgtgctgatcagc------------------ +------------------------------------------------------------------cgc--- +cccagtgcgcccttcaccgaggcctccatgatgatgtccctgaccaagttggccgacaaggagttggtacac +atgatcagctgggccaagaagattcccggctttgtggagctcagcctgttcgaccaagtgcggctcttggag +agctgttggatggaggtgttaatgatggggctgatgtggcgctcaattgac------caccccggcaagctc +atctttgctccagatcttgttctggacagggatgaggggaaatgcgtagaaggaattctggaaatctttgac +atgctcctggcaact---------acttcaaggtttcgagagttaaaactccaacacaaagaatatctctgt +gtcaaggccatgatcctgctcaattccagtatgtaccctctggtcacagcgacc---caggatgctgacagc +agccggaagctggctcacttgctgaacgccgtgaccgatgctttggtttgggtgattgccaagagcggcatc +tcctcccagcagcaatccatgcgcctggctaacctcctgatgctcctgtcccacgtcaggcatgcgagtaac +aagggcatggaacatctgctcaac------atgaagtgcaaaaatgtg---------gtcccagtgtatgac +ctgctgctggagatgctgaatgcccacgtgcttcgcgggtgcaagtcctccatcacggggtccgagtgcagc +ccggcagaggacagtaaaagcaaagagggctcccagaacccacagtctcagtga +>EMBLCDS|AAK93056/1-1455 Drosophila melanogaster (fruit fly) GH28308p +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +---------------------------------atgtccgacggcgtcagcatcttgcacatcaaacaggag +gtggacactccatcggcgtcctgctttagtcccagctccaagtcaacggccacgcagagtggcacaaacggc +ctgaaa------------------------------------------tcctcgccc--------------- +---------------tcggtttcgccggaaaggcagctctgcagctcgacgacctctctatcctgcgatttg +cacaatgtatcc------------------------------------------------------------ +---------------------------------------------------------ttaagcaatgatggc +gatagtctgaaaggaagtggtacaagtggcggcaatggcggaggaggaggtggtggtacgagtggtggaaat +gcgacc------------------------------------------------------------------ +------------------------------aatgcgagtgccggagctggatcg------------------ +------------------------------------------------------------------------ +---------------------------------------------------------ggatccgtcagggac +gagctccgccgattg--------------------------------------------------------- +------------------------------------tgtttggtttgtggcgatgtggccagtggattccac +tatggtgtggcgagttgtgaggcttgcaaagcgttctttaaacgcaccatccaaggcaacatcgagtacacg +tgtccggcgaacaacgagtgtgagattaacaagcggagacgcaaggcctgccaagcgtgtcgcttccagaaa +tgtctactaatgggcatgctcaaggagggtgtgcgcttggatcgagttcgtggaggacggcagaagtaccga +aggaatcctgtatcaaactcttaccagactatgcagctgctataccaatccaacaccacctcgctg------ +---------------------------tgcgatgtcaagatactggaggtgctcaattcatatgagccggat +gccttgagcgtccaaacg------------ccgccgccgcaagtccacacgactagcataactaatgatgag +gcctcatcctcctcgggcagcataaaactggagtccagcgttgttacgcccaatgggacttgcattttccaa +aacaacaacaacaatgatcccaatgagatactaagcgtccttagtgatatttacgacaaggaattggtcagc +gtcattggctgggccaagcagatacctggctttatagatctgccacttaacgaccagatgaagcttctccag +gtgtcgtgggcagagatcctgacgctccagctgaccttccggtccctaccg------ttcaatggcaagtta +tgcttcgccacggatgtctggatggatgaacatttggccaaggagtgc---ggttacacggagttctactac +cactgcgtccagatc---------gcacagcgcatggaaagaatatcgccacgaagggaggagtactacttg +ctaaaggcgctcctgctggccaactgcgacattctgctg------------------gatgatcagagttcc +ctgcgcgcatttcgtgatacgattcttaattctctaaacgatgtggtctacttgctgcgtcattcgtcggcc +gtgtcgcatcagcaa---------------caattgctgcttttgctgccttcgctgcggcaggcggatgat +atcctgcgaagattttggcgtgga------attgcacgcgatgaagtc---------attaccatgaagaaa +ctgttcctcgagatgctcgag--------------------------------------------------- +---------------------------------------ccgctggccaggtga +>EMBLCDS|BAA89539/1-2133 Anguilla japonica (Japanese eel) progesterone receptor +------------------------------------------------------------------------ +------------------------------------------------------------atggacaacaat +caccaagacaagatggaaagtctatacacgccagccagagcatcaccaactcctgatgcagaatcgattaaa +agagccaggaatttgattaaaacatactcggagtcttttgggagttatgtggaggagatagttcgagacgac +tcgaataacatacaatctttgagcagcgtccctctcttgatgcgtaattttggaaacatggacaccctaacc +tgcgcacctggctcgggtagtgacagtgagatttggaaagactttgttgttcccgggaactctgtggtaagc +aaagacacctgtggtcatgttgaaatatccactaaagccgaaaatttgtcttgggctgccgcgcccttaagt +agagaagaaaccctcgcgaaaggaactgttacggtcccagcgactgtgcctaaagaaagttttaccgcaaca +---------------tcaaacacttcttcagccagtggaatctctattaaagatgaacaacaatctttgctc +aaaatggaaccacagtcttctgacttttgtccttatacagcaaatataccgaaattgaatccatcttatctg +accaatactgcgagtacgaaacaacttggatatggc------------gaacagccg--------------- +---------------gacacttcagcgcactcctctccacctgctcagaagattgttttagatactgctcga +tactcggccgat------------------------------------------------------------ +------------------------------------------ttatgttcggataaccctttaccgcaagcg +acaaatatcaaaacagatccttgtagtagtttctcttctttcgttggagaagggatccttactagggcatct +atgggctactcacagcaagcgattcagacattgccggtgcacaagagtgaaccg------------------ +---------------------------ttcaggttgtctgcttcgagcgcgcccgcggattctccgttttgg +tgccaatcc--------------------------------------------------------------- +------------------------------------------------------acgggtccttctgaggat +catcatctgcagattgactatctatctcccgctggactccacagcacatgcaaatacagttccacgaacgcg +tacagctcctatttaggtgtgctgccccagagggtgtgcgtcatctgtggggatgaagcatcaggctgtcac +tatggtgtcctcacctgtggcagctgtaaggtgttctttaagagggcagtggaaggccatcataactacttg +tgtgctggacggaatgactgcatcgtggacaagatccgtaggaaaaactgtcctgcttgtcgcctcagaaag +tgctaccaggcgggaatgatactgggaggtcggaagctgaagaagttg---ggggctctgaaggcagcaggg +ctgacccaggccctggtggcccactcactgactcctcggaggctctctggtgacagccaggccctg------ +---------------------------atgcccctgggctgccttccaggggtccgggagctgcacctttcc +ccacagatcatcagcgtgctggagagcattgagcctgaggtggtgtactctggt------------------ +------------------------------------------------------------------tatgac +aactcccagcctgacatgcccaatatgctgctcaacagcctcaaccgcttatgtgagaggcagctgctgagg +attgtcaagtggtccaagtctttaccaggttttcgcagtttacacatcaatgaccaaatggctctgatccag +tactcctggatgagcttaatggtattttctttgggttggcggtctttccaaaatgtcaccagtgattacctg +tactttgcacctgacctcattctcaacgaagagtatatgaggaggtct------------ccaatatttgac +ctgtgcatggccatgcagttcatccctcaagagtttgccaatctccaggtgaccaaggaggagtttctgtgc +atgaaggtcttgctgttgctcaac------acagtgcctctg---------------gaggggttgaagagc +cagccacagtttgatgagatgaggcagaattacatccatgaactcaccaaggccattcacctgcgagagaat +ggtgtggtcgcctgctcccagcgtttctaccacctgaccaagctgatggaccacatgcatgacattgtgaag +aagctccacctgtactgcctgagcactttcattcaggcagatgccatgcgg------gtagagttccccgag +atgatgtcagaagtcatcgcctcccagctg------------------------------------------ +------------cctcgggtgctcgctggaatggtgaaaccccttctttttcacaccaaatga +>EMBLCDS|AHW56473/1-1590 Homo sapiens (human) partial estrogen receptor 2 isoform A +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------atggatataaaaaactcaccatctagccttaattctccttcctcctacaactgcagtcaatccatc +------------------ttacccctggagcacggctccatatacataccttcctcctatgtagacagccac +catgaatatccagccatgacattctatagccctgctgtgatgaattacagcattcccagcaatgtcactaac +ttggaaggtgggcctggtcggcagacc---------------------acaagccca--------------- +---------------aatgtgttgtggccaacacctgggcacctttctcctttagtggtccatcgccagtta +tcacatctgtat------------------------------------------------------------ +------------------------------------------------------------------------ +gcggaacctcaaaagagtccctggtgtgaagcaagatcgctagaacacaccttacctgtaaacagagagaca +ctgaaa------------------------------aggaaggttagtgggaac------------------ +---------------------------cgttgcgccagccctgttactggtcca------------------ +------------------------------------------------------------------------ +---------------------------------------------------------ggttcaaagagggat +gctcacttc--------------------------------------------------------------- +------------------------------------tgcgctgtctgcagcgattacgcatcgggatatcac +tatggagtctggtcgtgtgaaggatgtaaggccttttttaaaagaagcattcaaggacataatgattatatt +tgtccagctacaaatcagtgtacaatcgataaaaaccggcgcaagagctgccaggcctgccgacttcggaag +tgttacgaagtgggaatggtgaagtgtggctcccggagagagagatgt---gggtaccgccttgtgcggaga +cagagaagtgccgacgagcagctgcactgtgccggcaaggccaagagaagtggcggccac------------ +---------------------------gcgccccgagtgcgggagctgctgctggacgccctgagccccgag +cagctagtgctcaccctcctggaggct---gagccgccccatgtgctgatcagc------------------ +------------------------------------------------------------------cgc--- +cccagtgcgcccttcaccgaggcctccatgatgatgtccctgaccaagttggccgacaaggagttggtacac +atgatcagctgggccaagaagattcccggctttgtggagctcagcctgttcgaccaagtgcggctcttggag +agctgttggatggaggtgttaatgatggggctgatgtggcgctcaattgac------caccccggcaagctc +atctttgctccagatcttgttctggacagggatgaggggaaatgcgtagaaggaattctggaaatctttgac +atgctcctggcaact---------acttcaaggtttcgagagttaaaactccaacacaaagaatatctctgt +gtcaaggccatgatcctgctcaattccagtatgtaccctctggtcacagcgacc---caggatgctgacagc +agccggaagctggctcacttgctgaacgccgtgaccgatgctttggtttgggtgattgccaagagcggcatc +tcctcccagcagcaatccatgcgcctggctaacctcctgatgctcctgtcccacgtcaggcatgcgagtaac +aagggcatggaacatctgctcaac------atgaagtgcaaaaatgtg---------gtcccagtgtatgac +ctgctgctggagatgctgaatgcccacgtgcttcgcgggtgcaagtcctccatcacggggtccgagtgcagc +ccggcagaggacagtaaaagcaaagagggctcccagaacccacagtctcag +>EMBLCDS|BAA75464/1-2547 Anguilla japonica (Japanese eel) androgen receptor alpha +atggagattccagttgggttaggaggagtttcagatgccacaaacgccgtttttcgcggaccttaccaaaac +gttttccacagccttcaagtggcatttcagagtcacggtgccgtctccaggagcttagattttccaaataca +aagtacggttttttacaaaacagacatttctgtgaaatgcgtcaggagaacaagcagccgccatgcaaagga +ctcggcctattttacgggaaccatcgtaattcggacactgggacaaacgaagacgacatcgcttgtttttcc +agacagtccgacgctgaagccagacctggtattttttctgaaagctcattggatactggagacgagattact +tgcaaactccagtcagacaaccaaggggtaagagcgagcggtcctctcctaccgggctctagcggctgcaat +tcgggacaaaagtcctcccttgcttgtacgtcccaacaaagggagacaacatctcaaagtgacacctgcgca +ggagagagctgctcggaacatcaagcaactaccatttcggaaactgcgcgcgaattgtgcaacgccgtttcc +gtgtcgctgggcttgaatttagatcttaatgatatgaatgacctaagttcaaaccaaatatcgtctaccgaa +agtgacacaagtcaagccatctacttatttgaatcttcacctgggtatactggggtcggactgaacgccttg +gtaagagactgtaaatgtcagagtgcacgcgaagggacatcgacacaacagtacgaccgcggggcaatgttt +aagataaaccgtgtaaatgacttgccgcttcagccagcacccccgcgacacaccagcattagcgatgctaaa +tgggacatggaagcaggtttgtgtgcgcagatggagcacaaagactctgaaaagtgcgcgaatatggatggt +gcacactccacttctgtcttctcccagttcgaccaactgttgccagtaaacgcgtcgcactacagtcagaac +gtttcggtcagagtggaaccacaaagtgatttctctccgattttgtacaaatcacctggtattcagaaaaat +gccgaaaagtacaatgtccaatatgatgccacaattaaatcagaagatgggaaaacgacatctgaacgggaa +tggggttttcagtacaggtacaatgaaagctgcagcacaccgtcagcacctcctagacattgtgcacatcag +aacagggccggaccgtacaaccagttcttttttaatccatttgaatatgcgaaaagaggtgttgtctcaagg +gaaggatattctctcgaacatgggttcccaaacaatctcgctcggacaccctactctggttccttgaaaaac +gaactaggagatcgtctgagtgggccataccctgacgtcagttacaggtacgagggcgagcgggagaacgtg +ttccccgtggagttcttctttccgccgcagaggacctgcctgatctgcggggacgaggcctcgggctgtcac +tatggagccctcacctgcggcagctgcaaggtgttcttcaagagggccgcggaagggaaacagaagtacctg +tgcgccagcatcaatgattgcaccattgataaacttcgaaggaagaactgcccctcttgccgtctcaaaagg +tgctttgctgccggaatgaccctt---------------------------ggagcgcggaagctgaagaag +atcgggcaaatgagggcccccgaggatggccaggggcagggcccggcggaagcggagctgagcgtc------ +---------------------------tcccccaagtacgacctg------------ggcttccacacccag +tccatgttcctcaacatcctggaggccatcgagccggaggtggtgaacgccggg------------------ +------------------------------------------------------------------cacgac +tatggccagccggactctgcggccagcctgctgaccagcctcaacgagctcggagaacggcaactcgtcaag +gtcgtcaagtgggccaagggcatgccaggttttcggagtctgtacgtggatgaccagatgacagtcatccag +cactcctggatggcagtgatggtgttcgctctgggctggaggtcatttaagaatgtgaagtccaggatgctt +tactttgctcctgaccttgttttcaacgagcaccgaatgcaggtgtcc------------accatgtatgaa +cactgcatccggatgaagaacttctcccaggagtttgctatgctgcaggtctcccaggaagagttcctgtgc +atgaaagctctgcttctcttcagc------accatccccgtt---------------gaagggctgaagggg +cagaatttctttgacgagctgcggaggagctacattaacgagctggaccggctggttagcttcaggagcaag +tccagc------tgttccgagaggttccagcagctcacccgcctcctggactccctccaacctgttctgaag +aagctccaccagtttacgttcgaccttttcgtccagtcccagaacctctccaaccaagtttgctttcccgag +atgatctcagagatcatatccgtgcacgtg------------------------------------------ +------------ccaaagattctcgctggcacggtgaagccaatcctcttccacaagtag +>EMBLCDS|AAA17402/1-1032 Serinus canaria (common canary) partial androgen receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------gaagcctccgggtgccac +tacggagccctgacgtgtgggagctgcaaagtgttcttcaaacgggcagctgaaggtaaacagaagtacctc +tgtgccagccgcaacgactgcaccatcgacaagttccggcggaaaaactgcccctcctgccgcctgcgcaag +tgctacgaggccgggatgacgctt---------------------------ggagcccgcaagctgaagaaa +ctgggtaacctgaaggcacaggacgacatagagggagccagctcgtccagcccaacggaggagcaa------ +---------------------------gctcccaagctggtgatgacacgcattgatggctacgagtgccag +cccatcttcctcaacgtcctggaggccatcgagcctggggtggtgtgtgctggc------------------ +------------------------------------------------------------------catgac +aacagccagcctgactccttctccaacctgctgaccagcctgaatgagcttggggagaggcagctggtctac +gtggtcaaatgggcaaaggctctgccaggatttcgcaacctgcatgtggatgaccagatgtcaataatccag +tactcttggatgggcctgatggtgtttgctatggggtggagatccttcaccaacgtcaattccaggatgctt +tactttgctccagacctggtcttcaatgagtaccgcatgcacaaatcc------------aggatgtacagc +cagtgcatcaggatgcggcacctctcccaggaattcgggtggcttcagatcacaccccaggggttcctctgt +atgaaggctctcctcttcttcagt------attattccagtg---------------gatggcctgaagaac +cagaagctcttcgatgagctccgcatgaattacatcaaggaacttgaccgtatcattgcctgcaagaggaag +aaccccacctcatgctcccggaggttttaccagctcaccaaggtcctggactccgtgactccgattgccaag +gacctgcatcagtttacatttgatctcctaatcaaggcacacatggtgagc------gtggactacccagaa +atgatggctgagatcatctctgtgcaggtt------------------------------------------ +------------cccaagatcctgtct +>EMBLCDS|AAL37553/1-1455 Drosophila melanogaster (fruit fly) estrogen-related receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +---------------------------------atgtccgacggcgtcagcatcttgcacatcaaacaggag +gtggacactccatcggcgtcctgctttagtcccagctccaagtcaacggccacgcagagtggcacaaacggc +ctgaaa------------------------------------------tcctcgccc--------------- +---------------tcggtttcgccggaaaggcagctctgcagctcgacgacctctctatcctgcgatttg +cacaatgtatcc------------------------------------------------------------ +---------------------------------------------------------ttaagcaatgatggc +gatagtctgaaaggaagtggtacaagtggcggcaatggcggaggaggaggtggtggtacgagtggtggaaat +gcgacc------------------------------------------------------------------ +------------------------------aatgcgagtgccggagctggatcg------------------ +------------------------------------------------------------------------ +---------------------------------------------------------ggatccgtcagggac +gagctccgccgattg--------------------------------------------------------- +------------------------------------tgtttggtttgtggcgatgtggccagtggattccac +tatggtgtggcgagttgtgaggcttgcaaagcgttctttaaacgcaccatccaaggcaacatcgagtacacg +tgtccggcgaacaacgagtgtgagattaacaagcggagacgcaaggcctgccaagcgtgtcgcttccagaaa +tgtctactaatgggcatgctcaaggagggtgtgcgcttggatcgagttcgtggaggacggcagaagtaccga +aggaatcctgtatcaaactcttaccagactatgcagctgctataccaatccaacaccacctcgctg------ +---------------------------tgcgatgtcaagatactggaggtgctcaattcatatgagccggat +gccttgagcgtccaaacg------------ccgccgccgcaagtccacacgactagcataactaatgatgag +gcctcatcctcctcgggcagcataaaactggagtccagcgttgttacgcccaatgggacttgcattttccaa +aacaacaacaacaatgatcccaatgagatactaagcgtccttagtgatatttacgacaaggaattggtcagc +gtcattggctgggccaagcagatacctggctttatagatctgccacttaacgaccagatgaagcttctccag +gtgtcgtgggcagagatcctgacgctccagctgaccttccggtccctaccg------ttcaatggcaagtta +tgcttcgccacggatgtctggatggatgaacatttggccaaggagtgc---ggttacacggagttctactac +cactgcgtccagatc---------gcacagcgcatggaaagaatatcgccacgaagggaggagtactacttg +ctaaaggcgctcctgctggccaactgcgacattctgctg------------------gatgatcagagttcc +ctgcgcgcatttcgtgatacgattcttaattctctaaacgatgtggtctacttgctgcgtcattcgtcggcc +gtgtcgcatcagcaa---------------caattgctgcttttgctgccttcgctgcggcaggcggatgat +atcctgcgaagattttggcgtgga------attgcacgcgatgaagtc---------attaccatgaagaaa +ctgttcctcgagatgctcgag--------------------------------------------------- +---------------------------------------ccgctggccaggtga +>EMBLCDS|AAK20929/1-1662 Petromyzon marinus (sea lamprey) partial estrogen receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +---------------------------------------------------------gcacgaggcttcagc +gaggcacatggctacgagtactccggggcctcgctctaccagccactgcctccctcgtgcacagagttctca +---------------------------------attggagctcatcaacaacaacaacatcagcaccagcat +caccagcaccagcatcagcagcaccaccaccagcagcagcagcagcagccacagccgcagcagaatggagtt +ttgggcgaggggcagagttcccatctctcttatcttccgccctcgaccgagctgccc--------------- +---------------cagtacgtgccctccagccccagcgcgccctacagcatggagctcggggcagggcgt +cctcacggctacgac--------------------------------------------------------- +------------------------------------------ccagggccacagagcctctacaggggcggt +gtggagagcagcgcccccccgtacagcgagcagcagcaggtggtgggcggcggcggagccatgtcggccatg +ggcttg---------------------------------------acagagcca------------------ +---------------------------cgccacgtcagctccggatcgctaccc------------------ +------------------------------------------------------------------------ +---------------------------------------------------------agcagcacgaggccc +gagcgcagcacccagttc------------------------------------------------------ +------------------------------------tgtgccgtgtgcagcgactatgcctcggggtaccac +tacggcgtgtggtcctgcgagggctgcaaagccttcttcaagcgcagcacgcaaggccacaatgactacatg +tgcccggccaccaaccagtgcaccatcgacaggaaccgtcgcaagagctgtcaggcttgccgcctgcgtaag +tgctacgaagtgggcatggtcaaa---ggcgttcgcaaggaccgcaaa---ggctttcgaggggtcaagcac +aaacgtaagcgccccatcccccaaaagaatgggggagaaggaggtgccggcggcggccaagacgtgagcgag +acgaggcctcagggtgagaggccctccgggccgagggaccgggagagcgccgtcagctcactcgaggctgac +caggtgatctcggctcttctggaggct---gagccacccaccgtactgtcctcg------------------ +------------------------------------------------------------------tatgac +cccgacaagcctgtgacggaggcctcgctcatggctgctctcaccagcctggctgaccgagagctcgtgcac +atgatcacctgggctaagaagattccaggattcacggccatcgggctgagtgaccaggtgcagctgctggag +tgctgctggctggagatcctaatcgtggggctcatctggaggtctatcgat------cgccctggtcagctc +cactttgctccaaacctcatcctaggaagggaggacgcgcgcaatgtggagggcatgctggacatgttcgac +atgctgctcgtcacc---------gtgagtcgcttccgtgagctgcatctccgccgggaggaatacgtctgc +ctcaaggccatgatcctcctcaactcgggggtgtttttctgcctctccaattccgccggggagcagacgaat +gtgcagctcatccagcagatcctcgagaaggtgatggacgccctgggcagcaccatcggccacattgaggcg +tccccgccgcaacactcgcgtcgcctctcccagctgctcctgctgctttcacagatccggcacattagcaac +aagggcatcgagcatctcaacagc------atgaagcgtaagaatgtg---------atcccgctatacgac +ctgctccttgagctgctggacgctcacagcctgcag---------------------aatactggcttacgg +acgtcgcccccaccgcaggatttcagggcaaccctcgtgccg diff --git a/examples/estrogenReceptorCdna_frag.fa b/examples/estrogenReceptorCdna_frag.fa index da5f5c8..c44766d 100644 --- a/examples/estrogenReceptorCdna_frag.fa +++ b/examples/estrogenReceptorCdna_frag.fa @@ -1,12 +1,32 @@ ->EMBLCDS|BAA89539/1111-1224 Anguilla japonica (Japanese eel) progesterone receptor +>EMBLCDS|ADZ17331/532-675 Homo sapiens (human) estrogen nuclear receptor beta variant a +caaggacataatgattatatttgtccagctacaaatcagtgtacaatcgataaaaaccggcgcaagagctgc +caggcctgccgacttcggaagtgttacgaagtgggaatggtgaagtgtggctcccggagagagagatgt--- +ggg +>EMBLCDS|AAK93056/430-576 Drosophila melanogaster (fruit fly) GH28308p +caaggcaacatcgagtacacgtgtccggcgaacaacgagtgtgagattaacaagcggagacgcaaggcctgc +caagcgtgtcgcttccagaaatgtctactaatgggcatgctcaaggagggtgtgcgcttggatcgagttcgt +gga +>EMBLCDS|BAA89539/1111-1254 Anguilla japonica (Japanese eel) progesterone receptor gaaggccatcataactacttgtgtgctggacggaatgactgcatcgtggacaagatccgtaggaaaaactgt -cctgcttgtcgcctcagaaagtgctaccaggcgggaatgata--- ->EMBLCDS|BAA75464/1564-1677 Anguilla japonica (Japanese eel) androgen receptor alpha +cctgcttgtcgcctcagaaagtgctaccaggcgggaatgatactgggaggtcggaagctgaagaagttg--- +ggg +>EMBLCDS|AHW56473/532-675 Homo sapiens (human) partial estrogen receptor 2 isoform A +caaggacataatgattatatttgtccagctacaaatcagtgtacaatcgataaaaaccggcgcaagagctgc +caggcctgccgacttcggaagtgttacgaagtgggaatggtgaagtgtggctcccggagagagagatgt--- +ggg +>EMBLCDS|BAA75464/1564-1683 Anguilla japonica (Japanese eel) androgen receptor alpha gaagggaaacagaagtacctgtgcgccagcatcaatgattgcaccattgataaacttcgaaggaagaactgc -ccctcttgccgtctcaaaaggtgctttgctgccggaatgacc--- ->EMBLCDS|AAA17402/70-183 Serinus canaria (common canary) partial androgen receptor +ccctcttgccgtctcaaaaggtgctttgctgccggaatgaccctt--------------------------- +gga +>EMBLCDS|AAA17402/70-189 Serinus canaria (common canary) partial androgen receptor gaaggtaaacagaagtacctctgtgccagccgcaacgactgcaccatcgacaagttccggcggaaaaactgc -ccctcctgccgcctgcgcaagtgctacgaggccgggatgacg--- ->EMBLCDS|AAK20929/592-708 Petromyzon marinus (sea lamprey) partial estrogen receptor +ccctcctgccgcctgcgcaagtgctacgaggccgggatgacgctt--------------------------- +gga +>EMBLCDS|AAL37553/430-576 Drosophila melanogaster (fruit fly) estrogen-related receptor +caaggcaacatcgagtacacgtgtccggcgaacaacgagtgtgagattaacaagcggagacgcaaggcctgc +caagcgtgtcgcttccagaaatgtctactaatgggcatgctcaaggagggtgtgcgcttggatcgagttcgt +gga +>EMBLCDS|AAK20929/592-732 Petromyzon marinus (sea lamprey) partial estrogen receptor caaggccacaatgactacatgtgcccggccaccaaccagtgcaccatcgacaggaaccgtcgcaagagctgt -caggcttgccgcctgcgtaagtgctacgaagtgggcatggtcaaa +caggcttgccgcctgcgtaagtgctacgaagtgggcatggtcaaa---ggcgttcgcaaggaccgcaaa--- +ggc diff --git a/examples/estrogenReceptorProtein_aln.fa b/examples/estrogenReceptorProtein_aln.fa new file mode 100644 index 0000000..353c68c --- /dev/null +++ b/examples/estrogenReceptorProtein_aln.fa @@ -0,0 +1,112 @@ +>UNIPROT|Q7LCB3/1-530 estrogen nuclear receptor beta variant a +------------------------------------------------------------------------ +------------------------------------------------------------------------ +--------------------------MDIKNSPSSLNSPSSYNCSQSI------LPLEHGSIYIPSSYVDSH +HEYPAMTFYSPAVMNYSIPSNVTNLEGGPGRQT-------TSP----------NVLWPTPGHLSPLVVHRQL +SHLY--------------------------------------------AEPQKSPWCEARSLEHTLPVNRET +LK----------RKVSGN---------------RCASPVTGP------------------------------ +-------------------GSKRDAHF---------------------------------CAVCSDYASGYH +YGVWSCEGCKAFFKRSIQGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERC-GYRLVRR +QRSADEQLHCAGKAKRSGGH-------------APRVRELLLDALSPEQLVLTLLEA-EPPHVLIS------ +----------------------R-PSAPFTEASMMMSLTKLADKELVHMISWAKKIPGFVELSLFDQVRLLE +SCWMEVLMMGLMWRSID--HPGKLIFAPDLVLDRDEGKCVEGILEIFDMLLAT---TSRFRELKLQHKEYLC +VKAMILLNSSMYPLVTAT-QDADSSRKLAHLLNAVTDALVWVIAKSGISSQQQSMRLANLLMLLSHVRHASN +KGMEHLLN--MKCKNV---VPVYDLLLEMLNAHVLRGCKSSITGSECSPAEDSKSKEGSQNPQSQ--- +>UNIPROT|Q9VSE9/1-484 GH28308p +------------------------------------------------------------------------ +------------------------------------------------------------------------ +-----------------------------------------------------------MSDGVSILHIKQE +VDTPSASCFSPSSKSTATQSGTNGLK--------------SSP----------SVSPERQLCSSTTSLSCDL +HNVS---------------------------------------LSNDGDSLKGSGTSGGNGGGGGGGTSGGN +AT--------------------------------NASAGAGS------------------------------ +-------------------GSVRDELRRL-------------------------------CLVCGDVASGFH +YGVASCEACKAFFKRTIQGNIEYTCPANNECEINKRRRKACQACRFQKCLLMGMLKEGVRLDRVRGGRQKYR +RNPVSNSYQTMQLLYQSNTTSL-----------CDVKILEVLNSYEPDALSVQT----PPPQVHTTSITNDE +ASSSSGSIKLESSVVTPNGTCIFQNNNNNDPNEILSVLSDIYDKELVSVIGWAKQIPGFIDLPLNDQMKLLQ +VSWAEILTLQLTFRSLP--FNGKLCFATDVWMDEHLAKEC-GYTEFYYHCVQI---AQRMERISPRREEYYL +LKALLLANCDILL------DDQSSLRAFRDTILNSLNDVVYLLRHSSAVSHQQ-----QLLLLLPSLRQADD +ILRRFWRG--IARDEV---ITMKKLFLEMLE------------------------------PLAR--- +>UNIPROT|Q9IBD5/1-710 progesterone receptor +--------------------------------------------MDNNHQDKMESLYTPARASPTPDAESIK +RARNLIKTYSESFGSYVEEIVRDDSNNIQSLSSVPLLMRNFGNMDTLTCAPGSGSDSEIWKDFVVPGNSVVS +KDTCGHVEISTKAENLSWAAAPLSREETLAKGTVTVPATVPKESFTAT-----SNTSSASGISIKDEQQSLL +KMEPQSSDFCPYTANIPKLNPSYLTNTASTKQLGYG----EQP----------DTSAHSSPPAQKIVLDTAR +YSAD----------------------------------LCSDNPLPQATNIKTDPCSSFSSFVGEGILTRAS +MGYSQQAIQTLPVHKSEP---------------FRLSASSAPADSPFWCQS--------------------- +------------------TGPSEDHHLQIDYLSPAGLHSTCKYSSTNAYSSYLGVLPQRVCVICGDEASGCH +YGVLTCGSCKVFFKRAVEGHHNYLCAGRNDCIVDKIRRKNCPACRLRKCYQAGMILGGRKLKKL-GALKAAG +LTQALVAHSLTPRRLSGDSQAL-----------MPLGCLPGVRELHLSPQIISVLESIEPEVVYSG------ +----------------------YDNSQPDMPNMLLNSLNRLCERQLLRIVKWSKSLPGFRSLHINDQMALIQ +YSWMSLMVFSLGWRSFQNVTSDYLYFAPDLILNEEYMRRS----PIFDLCMAMQFIPQEFANLQVTKEEFLC +MKVLLLLN--TVPL-----EGLKSQPQFDEMRQNYIHELTKAIHLRENGVVACSQRFYHLTKLMDHMHDIVK +KLHLYCLSTFIQADAMR--VEFPEMMSEVIASQL------------------PRVLAGMVKPLLFHTK +>UNIPROT|Q7LCB3/1-530 estrogen receptor 2 isoform A +------------------------------------------------------------------------ +------------------------------------------------------------------------ +--------------------------MDIKNSPSSLNSPSSYNCSQSI------LPLEHGSIYIPSSYVDSH +HEYPAMTFYSPAVMNYSIPSNVTNLEGGPGRQT-------TSP----------NVLWPTPGHLSPLVVHRQL +SHLY--------------------------------------------AEPQKSPWCEARSLEHTLPVNRET +LK----------RKVSGN---------------RCASPVTGP------------------------------ +-------------------GSKRDAHF---------------------------------CAVCSDYASGYH +YGVWSCEGCKAFFKRSIQGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERC-GYRLVRR +QRSADEQLHCAGKAKRSGGH-------------APRVRELLLDALSPEQLVLTLLEA-EPPHVLIS------ +----------------------R-PSAPFTEASMMMSLTKLADKELVHMISWAKKIPGFVELSLFDQVRLLE +SCWMEVLMMGLMWRSID--HPGKLIFAPDLVLDRDEGKCVEGILEIFDMLLAT---TSRFRELKLQHKEYLC +VKAMILLNSSMYPLVTAT-QDADSSRKLAHLLNAVTDALVWVIAKSGISSQQQSMRLANLLMLLSHVRHASN +KGMEHLLN--MKCKNV---VPVYDLLLEMLNAHVLRGCKSSITGSECSPAEDSKSKEGSQNPQSQ--- +>UNIPROT|Q9YGV9/1-848 androgen receptor alpha +MEIPVGLGGVSDATNAVFRGPYQNVFHSLQVAFQSHGAVSRSLDFPNTKYGFLQNRHFCEMRQENKQPPCKG +LGLFYGNHRNSDTGTNEDDIACFSRQSDAEARPGIFSESSLDTGDEITCKLQSDNQGVRASGPLLPGSSGCN +SGQKSSLACTSQQRETTSQSDTCAGESCSEHQATTISETARELCNAVSVSLGLNLDLNDMNDLSSNQISSTE +SDTSQAIYLFESSPGYTGVGLNALVRDCKCQSAREGTSTQQYDRGAMFKINRVNDLPLQPAPPRHTSISDAK +WDMEAGLCAQMEHKDSEKCANMDGAHSTSVFSQFDQLLPVNASHYSQNVSVRVEPQSDFSPILYKSPGIQKN +AEKYNVQYDATIKSEDGKTTSEREWGFQYRYNESCSTPSAPPRHCAHQNRAGPYNQFFFNPFEYAKRGVVSR +EGYSLEHGFPNNLARTPYSGSLKNELGDRLSGPYPDVSYRYEGERENVFPVEFFFPPQRTCLICGDEASGCH +YGALTCGSCKVFFKRAAEGKQKYLCASINDCTIDKLRRKNCPSCRLKRCFAAGMTL---------GARKLKK +IGQMRAPEDGQGQGPAEAELSV-----------SPKYDL----GFHTQSMFLNILEAIEPEVVNAG------ +----------------------HDYGQPDSAASLLTSLNELGERQLVKVVKWAKGMPGFRSLYVDDQMTVIQ +HSWMAVMVFALGWRSFKNVKSRMLYFAPDLVFNEHRMQVS----TMYEHCIRMKNFSQEFAMLQVSQEEFLC +MKALLLFS--TIPV-----EGLKGQNFFDELRRSYINELDRLVSFRSKSS--CSERFQQLTRLLDSLQPVLK +KLHQFTFDLFVQSQNLSNQVCFPEMISEIISVHV------------------PKILAGTVKPILFHK- +>UNIPROT|Q91445/1-344 androgen receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------------ +------------------------------------------------------------------EASGCH +YGALTCGSCKVFFKRAAEGKQKYLCASRNDCTIDKFRRKNCPSCRLRKCYEAGMTL---------GARKLKK +LGNLKAQDDIEGASSSSPTEEQ-----------APKLVMTRIDGYECQPIFLNVLEAIEPGVVCAG------ +----------------------HDNSQPDSFSNLLTSLNELGERQLVYVVKWAKALPGFRNLHVDDQMSIIQ +YSWMGLMVFAMGWRSFTNVNSRMLYFAPDLVFNEYRMHKS----RMYSQCIRMRHLSQEFGWLQITPQGFLC +MKALLFFS--IIPV-----DGLKNQKLFDELRMNYIKELDRIIACKRKNPTSCSRRFYQLTKVLDSVTPIAK +DLHQFTFDLLIKAHMVS--VDYPEMMAEIISVQV------------------PKILS----------- +>UNIPROT|Q9VSE9/1-484 estrogen-related receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +-----------------------------------------------------------MSDGVSILHIKQE +VDTPSASCFSPSSKSTATQSGTNGLK--------------SSP----------SVSPERQLCSSTTSLSCDL +HNVS---------------------------------------LSNDGDSLKGSGTSGGNGGGGGGGTSGGN +AT--------------------------------NASAGAGS------------------------------ +-------------------GSVRDELRRL-------------------------------CLVCGDVASGFH +YGVASCEACKAFFKRTIQGNIEYTCPANNECEINKRRRKACQACRFQKCLLMGMLKEGVRLDRVRGGRQKYR +RNPVSNSYQTMQLLYQSNTTSL-----------CDVKILEVLNSYEPDALSVQT----PPPQVHTTSITNDE +ASSSSGSIKLESSVVTPNGTCIFQNNNNNDPNEILSVLSDIYDKELVSVIGWAKQIPGFIDLPLNDQMKLLQ +VSWAEILTLQLTFRSLP--FNGKLCFATDVWMDEHLAKEC-GYTEFYYHCVQI---AQRMERISPRREEYYL +LKALLLANCDILL------DDQSSLRAFRDTILNSLNDVVYLLRHSSAVSHQQ-----QLLLLLPSLRQADD +ILRRFWRG--IARDEV---ITMKKLFLEMLE------------------------------PLAR--- +>UNIPROT|Q90ZM8/1-554 estrogen receptor +------------------------------------------------------------------------ +------------------------------------------------------------------------ +-------------------ARGFSEAHGYEYSGASLYQPLPPSCTEFS-----------IGAHQQQQHQHQH +HQHQHQQHHHQQQQQQPQPQQNGVLGEGQSSHLSYLPPSTELP----------QYVPSSPSAPYSMELGAGR +PHGYD---------------------------------PGPQSLYRGGVESSAPPYSEQQQVVGGGGAMSAM +GL-------------TEP---------------RHVSSGSLP------------------------------ +-------------------SSTRPERSTQF------------------------------CAVCSDYASGYH +YGVWSCEGCKAFFKRSTQGHNDYMCPATNQCTIDRNRRKSCQACRLRKCYEVGMVK-GVRKDRK-GFRGVKH +KRKRPIPQKNGGEGGAGGGQDVSETRPQGERPSGPRDRESAVSSLEADQVISALLEA-EPPTVLSS------ +----------------------YDPDKPVTEASLMAALTSLADRELVHMITWAKKIPGFTAIGLSDQVQLLE +CCWLEILIVGLIWRSID--RPGQLHFAPNLILGREDARNVEGMLDMFDMLLVT---VSRFRELHLRREEYVC +LKAMILLNSGVFFCLSNSAGEQTNVQLIQQILEKVMDALGSTIGHIEASPPQHSRRLSQLLLLLSQIRHISN +KGIEHLNS--MKRKNV---IPLYDLLLELLDAHSLQ-------NTGLRTSPPPQDFRATLVP------ diff --git a/examples/estrogenReceptorProtein_frag.fa b/examples/estrogenReceptorProtein_frag.fa index c9e31aa..efd89bf 100644 --- a/examples/estrogenReceptorProtein_frag.fa +++ b/examples/estrogenReceptorProtein_frag.fa @@ -1,8 +1,16 @@ ->UNIPROT|Q9IBD5/371-408 -EGHHNYLCAGRNDCIVDKIRRKNCPACRLRKCYQAGMI- ->UNIPROT|Q9YGV9/522-559 -EGKQKYLCASINDCTIDKLRRKNCPSCRLKRCFAAGMT- ->UNIPROT|Q90ZM7/97-134 -EGQHNYLCAGRNDCIIDKIRRKNCPACRLRKCIQAGMT- ->UNIPROT|Q90ZM8/198-236 -QGHNDYMCPATNQCTIDRNRRKSCQACRLRKCYEVGMVK +>UNIPROT|Q7LCB3/178-225 estrogen nuclear receptor beta variant a +QGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERC-G +>UNIPROT|Q9VSE9/144-192 GH28308p +QGNIEYTCPANNECEINKRRRKACQACRFQKCLLMGMLKEGVRLDRVRG +>UNIPROT|Q9IBD5/371-418 progesterone receptor +EGHHNYLCAGRNDCIVDKIRRKNCPACRLRKCYQAGMILGGRKLKKL-G +>UNIPROT|Q7LCB3/178-225 estrogen receptor 2 isoform A +QGHNDYICPATNQCTIDKNRRKSCQACRLRKCYEVGMVKCGSRRERC-G +>UNIPROT|Q9YGV9/522-561 androgen receptor alpha +EGKQKYLCASINDCTIDKLRRKNCPSCRLKRCFAAGMTL---------G +>UNIPROT|Q91445/24-63 androgen receptor +EGKQKYLCASRNDCTIDKFRRKNCPSCRLRKCYEAGMTL---------G +>UNIPROT|Q9VSE9/144-192 estrogen-related receptor +QGNIEYTCPANNECEINKRRRKACQACRFQKCLLMGMLKEGVRLDRVRG +>UNIPROT|Q90ZM8/198-244 estrogen receptor +QGHNDYMCPATNQCTIDRNRRKSCQACRLRKCYEVGMVK-GVRKDRK-G -- 1.7.10.2