From: Jim Procter Date: Thu, 4 May 2017 13:09:44 +0000 (+0100) Subject: Merge branch 'features/JAL-2393customMatrices' into develop X-Git-Tag: Release_2_10_2~3^2~105^2~2 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=e67e5f3a5b922e8a7729a0e9e9b174f46b11456c;hp=8356850ec2f6043a65d3d892f9ebd405f23893e2;p=jalview.git Merge branch 'features/JAL-2393customMatrices' into develop --- diff --git a/resources/lang/Messages.properties b/resources/lang/Messages.properties index 922f482..e63752d 100644 --- a/resources/lang/Messages.properties +++ b/resources/lang/Messages.properties @@ -80,7 +80,8 @@ action.scale_left = Scale Left action.scale_right = Scale Right action.by_tree_order = By Tree Order action.sort = Sort -action.calculate_tree = Calculate Tree +action.calculate_tree = Calculate Tree... +action.calculate_tree_pca = Calculate Tree or PCA... action.help = Help action.by_annotation = By Annotation... action.invert_sequence_selection = Invert Sequence Selection @@ -170,6 +171,7 @@ label.redo_command = Redo {0} label.principal_component_analysis = Principal Component Analysis label.average_distance_identity = Average Distance Using % Identity label.neighbour_joining_identity = Neighbour Joining Using % Identity +label.choose_calculation = Choose Calculation label.treecalc_title = {0} Using {1} label.tree_calc_av = Average Distance label.tree_calc_nj = Neighbour Joining @@ -177,6 +179,8 @@ label.select_score_model = Select score model label.score_model_pid = % Identity label.score_model_blosum62 = BLOSUM62 label.score_model_pam250 = PAM 250 +label.score_model_smithwatermanscore = Score between two sequences aligned with Smith-Waterman with default Peptide/Nucleotide matrix +label.score_model_sequencefeaturesimilarity = Distance measure of average number of features not shared at sequence positions label.score_model_conservation = Physicochemical property conservation label.score_model_enhconservation = Physicochemical property conservation label.status_bar = Status bar @@ -333,6 +337,7 @@ label.colour_residues_above_occurrence = Colour residues above % occurrence label.set_this_label_text = set this label text label.sequences_from = Sequences from {0} label.successfully_loaded_file = Successfully loaded file {0} +label.successfully_loaded_matrix = Successfully loaded score matrix {0} label.successfully_saved_to_file_in_format = Successfully saved to file: {0} in {1} format. label.copied_sequences_to_clipboard = Copied {0} sequences to clipboard. label.check_file_matches_sequence_ids_alignment = Check that the file matches sequence IDs in the alignment. @@ -379,11 +384,10 @@ label.invalid_selection = Invalid Selection label.principal_component_analysis_must_take_least_four_input_sequences = Principal component analysis must take\nat least 4 input sequences. label.sequence_selection_insufficient = Sequence selection insufficient label.you_need_more_two_sequences_selected_build_tree = You need to have more than two sequences selected to build a tree! +label.you_need_more_than_n_sequences = You need to have more than {0} sequences label.not_enough_sequences = Not enough sequences label.selected_region_to_tree_may_only_contain_residues_or_gaps = The selected region to create a tree may\nonly contain residues or gaps.\nTry using the Pad function in the edit menu,\nor one of the multiple sequence alignment web services. label.sequences_selection_not_aligned = Sequences in selection are not aligned -label.sequences_must_be_aligned_before_creating_tree = The sequences must be aligned before creating a tree.\nTry using the Pad function in the edit menu,\n or one of the multiple sequence alignment web services. -label.sequences_not_aligned = Sequences not aligned label.problem_reading_tree_file = Problem reading tree file label.possible_problem_with_tree_file = Possible problem with tree file label.select_at_least_three_bases_in_at_least_one_sequence_to_cDNA_translation = Please select at least three bases in at least one sequence in order to perform a cDNA translation. @@ -712,7 +716,6 @@ label.set_as_default = Set as Default label.show_labels = Show labels action.background_colour = Background Colour... label.associate_nodes_with = Associate Nodes With -label.jalview_pca_calculation = Jalview PCA Calculation label.link_name = Link Name label.pdb_file = PDB file label.colour_with_jmol = Colour with Jmol @@ -855,7 +858,6 @@ label.couldnt_save_project = Couldn't save project label.error_whilst_saving_current_state_to = Error whilst saving current state to {0} label.error_whilst_loading_project_from = Error whilst loading project from {0} label.couldnt_load_project = Couldn't load project -label.pca_sequences_not_aligned = The sequences must be aligned before calculating PCA.\nTry using the Pad function in the edit menu,\nor one of the multiple sequence alignment web services. label.invalid_name_preset_exists = Invalid name - preset already exists. label.invalid_name = Invalid name label.set_proxy_settings = Please set up your proxy settings in the 'Connections' tab of the Preferences window @@ -899,6 +901,7 @@ label.choose_filename_for_param_file = Choose a filename for this parameter file label.save_as_html = Save as HTML label.recently_opened = Recently Opened label.blasting_for_unidentified_sequence_jobs_running = BLASTing for unidentified sequences - {0} jobs running. +label.tree = Tree label.tree_from = Tree from {0} label.webservice_job_title = {0} using {1} label.select_visible_region_of = selected {0} region of {1} diff --git a/resources/lang/Messages_es.properties b/resources/lang/Messages_es.properties index e6e1872..6ddbb44 100644 --- a/resources/lang/Messages_es.properties +++ b/resources/lang/Messages_es.properties @@ -78,7 +78,8 @@ action.scale_left = Escala izquierda action.scale_right = Escala derecha action.by_tree_order = Por orden del árbol action.sort = Ordenar -action.calculate_tree = Calcular árbol +action.calculate_tree = Calcular árbol... +action.calculate_tree_pca = Calcular árbol o ACP... action.help = Ayuda action.by_annotation = Por anotación... action.invert_sequence_selection = Invertir selección de secuencias @@ -167,6 +168,7 @@ label.redo_command = Rehacer {0} label.principal_component_analysis = Análisis del Componente Principal label.average_distance_identity = Distancia Media Usando % de Identidad label.neighbour_joining_identity = Unir vecinos utilizando % de Identidad +label.choose_calculation = Elegir el cálculo label.treecalc_title = {0} utilizando {1} label.tree_calc_av = Distancia media label.tree_calc_nj = Unir vecinos @@ -174,6 +176,8 @@ label.select_score_model = Selecciones modelo de puntuaci label.score_model_pid = % Identidad label.score_model_blosum62 = BLOSUM62 label.score_model_pam250 = PAM 250 +label.score_model_smithwatermanscore = Puntuación entre secuencias alineadas por Smith-Waterman con matriz por defecto proteica / nucleotídica +label.score_model_sequencefeaturesimilarity = Medida de distancia por cuenta promedia de características no compartidas at sequence positions label.score_model_conservation = Conservación de las propiedades físico-químicas label.score_model_enhconservation = Conservación de las propiedades físico-químicas label.status_bar = Barra de estado @@ -302,6 +306,7 @@ label.colour_residues_above_occurrence = Residuos de color por encima del % de a label.set_this_label_text = fijar como etiqueta label.sequences_from = Secuencias de {0} label.successfully_loaded_file = Fichero cargado exitosamente {0} +label.successfully_loaded_matrix = Matriz cargada exitosamente {0} label.successfully_saved_to_file_in_format = Guardado exitosamente en el fichero: {0} en formato {1}. label.copied_sequences_to_clipboard = Copiadas {0} secuencias en el portapapeles. label.check_file_matches_sequence_ids_alignment = Comprobar que el fichero coincide con el ID de la secuencia en el alineamiento. @@ -350,8 +355,6 @@ label.you_need_more_two_sequences_selected_build_tree = necesitas seleccionar m label.not_enough_sequences = No suficientes secuencias label.selected_region_to_tree_may_only_contain_residues_or_gaps = La regi\u00F3n seleccionada para construir un \u00E1rbol puede\ncontener s\u00F3lo residuos o espacios.\nPrueba usando la funci\u00F3n Pad en el men\u00FA de edici\u00F3n,\n o uno de los m\u00FAltiples servicios web de alineamiento de secuencias. label.sequences_selection_not_aligned = Las secuencias seleccionadas no están alineadas -label.sequences_must_be_aligned_before_creating_tree = Las secuencias deben estar alineadas antes de crear el \u00E1rbol.\nPrueba usando la funci\u00F3n Pad en el men\u00FA de editar,\n o uno de los m\u00FAltiples servicios web de alineamiento de secuencias. -label.sequences_not_aligned = Secuencias no alineadas label.problem_reading_tree_file = Problema al leer el fichero del árbol label.possible_problem_with_tree_file = Posible problema con el fichero del árbol label.select_at_least_three_bases_in_at_least_one_sequence_to_cDNA_translation = Por favor seleccionar al menos tres bases de al menos una secuencia para poder realizar la traducción de cDNA. @@ -414,7 +417,7 @@ label.colour_by_annotation = Color por anotaci label.selection_output_command = Seleccionar salida - {0} label.annotation_for_displayid =

Anotación para {0}

label.pdb_sequence_mapping = PDB - Mapeado de secuencia -label.pca_details = detalles de la PCA +label.pca_details = detalles de la ACP label.redundancy_threshold_selection = Selección del umbral de redundancia label.user_defined_colours = Colores definidos del usuario label.jalviewLite_release = JalviewLite - versión {0} @@ -435,7 +438,7 @@ label.label = Etiqueta label.no_features_added_to_this_alignment = No hay funciones asociadas a este alineamiento!! label.features_can_be_added_from_searches_1 = (Las funciones pueden ser añadidas de búsquedas o label.features_can_be_added_from_searches_2 = de ficheros de funciones Jalview / GFF) -label.calculating_pca= Calculando PCA +label.calculating_pca= Calculando ACP label.jalview_cannot_open_file = Jalview no puede abrir el fichero label.jalview_applet = Aplicación Jalview label.loading_data = Cargando datos @@ -657,11 +660,9 @@ label.add_local_source = A label.set_as_default = Establecer por defecto label.show_labels = Mostrar etiquetas label.associate_nodes_with = Asociar nodos con -label.jalview_pca_calculation = Cálculo del PCA por Jalview label.link_name = Nombre del enalce label.pdb_file = Fichero PDB label.colour_with_jmol = Colorear con Jmol -label.align_structures = Alinear estructuras label.jmol = Jmol label.sort_alignment_by_tree = Ordenar alineamiento por árbol label.mark_unlinked_leaves = Marcar las hojas como no enlazadas @@ -781,7 +782,6 @@ label.couldnt_save_project = No es posible guardar el proyecto label.error_whilst_saving_current_state_to = Error mientras se guardaba el estado a {0} label.error_whilst_loading_project_from = Error cargando el proyecto desde {0} label.couldnt_load_project = No es posible cargar el proyecto -label.pca_sequences_not_aligned = Las secuencias deben estar alineadas antes de calcular el PCA.\nPruebe a utilizar la funci\u00F3n de rellenar huecos en el men\u00FA Editar,\no cualquiera de los servicios web de alineamiento m\u00FAltiple. label.invalid_name_preset_exists = Nombre no válido - esta preconfiguración ya existe. label.invalid_name = Nombre no válido label.set_proxy_settings = Por favor, configure su proxy en la pestaña 'Conexiones' de la ventana de Preferencia @@ -825,6 +825,7 @@ label.choose_filename_for_param_file = Escoja un nombre de fichero para este fic label.save_as_html = Guardar como HTML label.recently_opened = Abiertos recientemente label.blasting_for_unidentified_sequence_jobs_running = Ejecutando BLAST de las secuencias no indentificadas - {0} trabajos en marcha. +label.tree = Árbol label.tree_from = Árbol de {0} label.webservice_job_title = {0} usando {1} label.select_visible_region_of = seleccionada {0} región de {1} @@ -942,8 +943,8 @@ label.submission_params = Env label.empty_alignment_job = Trabajo de alineamiento vacío label.add_new_sbrs_service = Añadir un nuevo SBRS label.edit_sbrs_entry = Editar entrada SBRS -label.pca_recalculating = Recalculando PCA -label.pca_calculating = Calculando PCA +label.pca_recalculating = Recalculando ACP +label.pca_calculating = Calculando ACP label.select_foreground_colour = Escoger color del primer plano label.select_colour_for_text = Seleccione el color del texto label.adjunst_foreground_text_colour_threshold = Ajustar el umbral del color del texto en primer plano @@ -1230,7 +1231,7 @@ label.select_all=Seleccionar Todos label.alpha_helix=Hélice Alfa label.chimera_help=Ayuda para Chimera label.find_tip=Buscar alineamiento, selección o IDs de secuencia para una subsecuencia (sin huecos) -label.structure_viewer=Visualizador de estructura for defecto +label.structure_viewer=Visualizador de estructura por defecto label.embbed_biojson=Incrustar BioJSON al exportar HTML label.transparency_tip=Ajustar la transparencia a "ver a través" los colores de las características. label.choose_annotations=Escoja anotaciones diff --git a/resources/scoreModel/blosum62.scm b/resources/scoreModel/blosum62.scm new file mode 100644 index 0000000..b0e927d --- /dev/null +++ b/resources/scoreModel/blosum62.scm @@ -0,0 +1,34 @@ +ScoreMatrix BLOSUM62 +# +# The BLOSUM62 substitution matrix, as at https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt +# The first line declares a ScoreMatrix with the name BLOSUM62 (shown in menus) +# +# Scores are not symbol case sensitive, unless column(s) are provided for lower case characters +# The 'guide symbol' at the start of each row of score values is optional +# Values may be integer or floating point, delimited by tab, space, comma or combinations +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4 +R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4 +N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4 +D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4 +C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4 +Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4 +E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 +G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4 +H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4 +I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4 +L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4 +K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4 +M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4 +F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4 +P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4 +S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4 +T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4 +W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4 +Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4 +V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4 +B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4 +Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4 +X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4 +* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1 diff --git a/resources/scoreModel/blosum80.scm b/resources/scoreModel/blosum80.scm new file mode 100644 index 0000000..8153d3b --- /dev/null +++ b/resources/scoreModel/blosum80.scm @@ -0,0 +1,32 @@ +# +# Source: http://www.genome.jp/dbget-bin/www_bget?aaindex:HENS920103 +# +H HENS920103 +D BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992) +R PMID:1438297 +A Henikoff, S. and Henikoff, J.G. +T Amino acid substitution matrices from protein blocks +J Proc. Natl. Acad. Sci. USA 89, 10915-10919 (1992) +* matrix in 1/3 Bit Units +M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV + 7. + -3. 9. + -3. -1. 9. + -3. -3. 2. 10. + -1. -6. -5. -7. 13. + -2. 1. 0. -1. -5. 9. + -2. -1. -1. 2. -7. 3. 8. + 0. -4. -1. -3. -6. -4. -4. 9. + -3. 0. 1. -2. -7. 1. 0. -4. 12. + -3. -5. -6. -7. -2. -5. -6. -7. -6. 7. + -3. -4. -6. -7. -3. -4. -6. -7. -5. 2. 6. + -1. 3. 0. -2. -6. 2. 1. -3. -1. -5. -4. 8. + -2. -3. -4. -6. -3. -1. -4. -5. -4. 2. 3. -3. 9. + -4. -5. -6. -6. -4. -5. -6. -6. -2. -1. 0. -5. 0. 10. + -1. -3. -4. -3. -6. -3. -2. -5. -4. -5. -5. -2. -4. -6. 12. + 2. -2. 1. -1. -2. -1. -1. -1. -2. -4. -4. -1. -3. -4. -2. 7. + 0. -2. 0. -2. -2. -1. -2. -3. -3. -2. -3. -1. -1. -4. -3. 2. 8. + -5. -5. -7. -8. -5. -4. -6. -6. -4. -5. -4. -6. -3. 0. -7. -6. -5. 16. + -4. -4. -4. -6. -5. -3. -5. -6. 3. -3. -2. -4. -3. 4. -6. -3. -3. 3. 11. + -1. -4. -5. -6. -2. -4. -4. -6. -5. 4. 1. -4. 1. -2. -4. -3. 0. -5. -3. 7. +// diff --git a/resources/scoreModel/dna.scm b/resources/scoreModel/dna.scm new file mode 100644 index 0000000..0d7cbc1 --- /dev/null +++ b/resources/scoreModel/dna.scm @@ -0,0 +1,27 @@ +ScoreMatrix DNA +# +# A DNA substitution matrix. +# This is an ad-hoc matrix which, in addition to penalising mutations between the common +# nucleotides (ACGT), includes T/U equivalence in order to allow both DNA and/or RNA. +# In addition, it encodes weak equivalence between R and Y with AG and CTU, respectively, +# and N is allowed to match any other base weakly. +# This matrix also includes I (Inosine) and X (Xanthine), but encodes them to weakly match +# any of (ACGTU), and unfavourably match each other. +# +# The first line declares a ScoreMatrix with the name DNA (shown in menus) +# Scores are not case sensitive, unless column(s) are provided for lower case characters +# +# Values may be integer or floating point, delimited by tab, space, comma or combinations +# + A C G T U I X R Y N - +A 10 -8 -8 -8 -8 1 1 1 -8 1 1 +C -8 10 -8 -8 -8 1 1 -8 1 1 1 +G -8 -8 10 -8 -8 1 1 1 -8 1 1 +T -8 -8 -8 10 10 1 1 -8 1 1 1 +U -8 -8 -8 10 10 1 1 -8 1 1 1 +I 1 1 1 1 1 10 0 0 0 1 1 +X 1 1 1 1 1 0 10 0 0 1 1 +R 1 -8 1 -8 -8 0 0 10 -8 1 1 +Y -8 1 -8 1 1 0 0 -8 10 1 1 +N 1 1 1 1 1 1 1 1 1 10 1 +- 1 1 1 1 1 1 1 1 1 1 1 diff --git a/resources/scoreModel/pam250.scm b/resources/scoreModel/pam250.scm new file mode 100644 index 0000000..898c723 --- /dev/null +++ b/resources/scoreModel/pam250.scm @@ -0,0 +1,32 @@ +ScoreMatrix PAM250 +# +# The PAM250 substitution matrix +# The first line declares a ScoreMatrix with the name PAM250 (shown in menus) +# Scores are not case sensitive, unless column(s) are provided for lower case characters +# Values may be integer or floating point, delimited by tab, space, comma or combinations +# + A R N D C Q E G H I L K M F P S T W Y V B Z X * +A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8 +R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8 +N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8 +D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8 +C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8 +Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8 +E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8 +G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8 +H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8 +I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8 +L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8 +K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8 +M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8 +F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8 +P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8 +S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8 +T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8 +W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8 +Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8 +V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8 +B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8 +Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8 +X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8 +* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1 diff --git a/src/MCview/AppletPDBCanvas.java b/src/MCview/AppletPDBCanvas.java index 3ae0650..c454203 100644 --- a/src/MCview/AppletPDBCanvas.java +++ b/src/MCview/AppletPDBCanvas.java @@ -178,7 +178,7 @@ public class AppletPDBCanvas extends Panel implements MouseListener, colourBySequence(); - int max = -10; + float max = -10; int maxchain = -1; int pdbstart = 0; int pdbend = 0; diff --git a/src/MCview/PDBCanvas.java b/src/MCview/PDBCanvas.java index 08bca8c..ff1211a 100644 --- a/src/MCview/PDBCanvas.java +++ b/src/MCview/PDBCanvas.java @@ -177,7 +177,7 @@ public class PDBCanvas extends JPanel implements MouseListener, colourBySequence(); - int max = -10; + float max = -10; int maxchain = -1; int pdbstart = 0; int pdbend = 0; diff --git a/src/jalview/analysis/AlignSeq.java b/src/jalview/analysis/AlignSeq.java index 86bf721..07f43da 100755 --- a/src/jalview/analysis/AlignSeq.java +++ b/src/jalview/analysis/AlignSeq.java @@ -20,13 +20,15 @@ */ package jalview.analysis; +import jalview.analysis.scoremodels.PIDModel; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.Mapping; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import jalview.schemes.ResidueProperties; -import jalview.schemes.ScoreMatrix; import jalview.util.Comparison; import jalview.util.Format; import jalview.util.MapList; @@ -53,17 +55,11 @@ public class AlignSeq private static final String NEWLINE = System.lineSeparator(); - static String[] dna = { "A", "C", "G", "T", "-" }; + float[][] score; - // "C", "T", "A", "G", "-"}; - static String[] pep = { "A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", - "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", "B", "Z", "X", "-" }; + float[][] E; - int[][] score; - - int[][] E; - - int[][] F; + float[][] F; int[][] traceback; @@ -106,7 +102,7 @@ public class AlignSeq int count; /** DOCUMENT ME!! */ - public int maxscore; + public float maxscore; float pid; @@ -116,31 +112,24 @@ public class AlignSeq int gapExtend = 20; - int[][] lookup = ResidueProperties.getBLOSUM62(); - - String[] intToStr = pep; - - int defInt = 23; - StringBuffer output = new StringBuffer(); - String type; + String type; // AlignSeq.PEP or AlignSeq.DNA + + private ScoreMatrix scoreMatrix; - private int[] charToInt; + private static final int GAP_INDEX = -1; /** * Creates a new AlignSeq object. * - * @param s1 - * DOCUMENT ME! - * @param s2 - * DOCUMENT ME! - * @param type - * DOCUMENT ME! + * @param s1 first sequence for alignment + * @param s2 second sequence for alignment + * @param type molecule type, either AlignSeq.PEP or AlignSeq.DNA */ public AlignSeq(SequenceI s1, SequenceI s2, String type) { - SeqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(), + seqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(), type); } @@ -157,7 +146,7 @@ public class AlignSeq public AlignSeq(SequenceI s1, String string1, SequenceI s2, String string2, String type) { - SeqInit(s1, string1.toUpperCase(), s2, string2.toUpperCase(), type); + seqInit(s1, string1.toUpperCase(), s2, string2.toUpperCase(), type); } /** @@ -165,7 +154,7 @@ public class AlignSeq * * @return DOCUMENT ME! */ - public int getMaxScore() + public float getMaxScore() { return maxscore; } @@ -261,26 +250,6 @@ public class AlignSeq } /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceI getS1() - { - return s1; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceI getS2() - { - return s2; - } - - /** * * @return aligned instance of Seq 1 */ @@ -322,36 +291,13 @@ public class AlignSeq * @param type * DNA or PEPTIDE */ - public void SeqInit(SequenceI s1, String string1, SequenceI s2, + public void seqInit(SequenceI s1, String string1, SequenceI s2, String string2, String type) { this.s1 = s1; this.s2 = s2; setDefaultParams(type); - SeqInit(string1, string2); - } - - /** - * Construct score matrix for sequences with custom substitution matrix - * - * @param s1 - * - sequence 1 - * @param string1 - * - string to use for s1 - * @param s2 - * - sequence 2 - * @param string2 - * - string to use for s2 - * @param scoreMatrix - * - substitution matrix to use for alignment - */ - public void SeqInit(SequenceI s1, String string1, SequenceI s2, - String string2, ScoreMatrix scoreMatrix) - { - this.s1 = s1; - this.s2 = s2; - setType(scoreMatrix.isDNA() ? AlignSeq.DNA : AlignSeq.PEP); - lookup = scoreMatrix.getMatrix(); + seqInit(string1, string2); } /** @@ -361,7 +307,7 @@ public class AlignSeq * @param string1 * @param string2 */ - private void SeqInit(String string1, String string2) + private void seqInit(String string1, String string2) { s1str = extractGaps(jalview.util.Comparison.GapChars, string1); s2str = extractGaps(jalview.util.Comparison.GapChars, string2); @@ -374,84 +320,31 @@ public class AlignSeq return; } - // System.out.println("lookuip " + rt.freeMemory() + " "+ rt.totalMemory()); - seq1 = new int[s1str.length()]; - - // System.out.println("seq1 " + rt.freeMemory() +" " + rt.totalMemory()); - seq2 = new int[s2str.length()]; - - // System.out.println("seq2 " + rt.freeMemory() + " " + rt.totalMemory()); - score = new int[s1str.length()][s2str.length()]; + score = new float[s1str.length()][s2str.length()]; - // System.out.println("score " + rt.freeMemory() + " " + rt.totalMemory()); - E = new int[s1str.length()][s2str.length()]; + E = new float[s1str.length()][s2str.length()]; - // System.out.println("E " + rt.freeMemory() + " " + rt.totalMemory()); - F = new int[s1str.length()][s2str.length()]; + F = new float[s1str.length()][s2str.length()]; traceback = new int[s1str.length()][s2str.length()]; - // System.out.println("F " + rt.freeMemory() + " " + rt.totalMemory()); - seq1 = stringToInt(s1str, type); - - // System.out.println("seq1 " + rt.freeMemory() + " " + rt.totalMemory()); - seq2 = stringToInt(s2str, type); - - // System.out.println("Seq2 " + rt.freeMemory() + " " + rt.totalMemory()); - // long tstart = System.currentTimeMillis(); - // calcScoreMatrix(); - // long tend = System.currentTimeMillis(); - // System.out.println("Time take to calculate score matrix = " + - // (tend-tstart) + " ms"); - // printScoreMatrix(score); - // System.out.println(); - // printScoreMatrix(traceback); - // System.out.println(); - // printScoreMatrix(E); - // System.out.println(); - // /printScoreMatrix(F); - // System.out.println(); - // tstart = System.currentTimeMillis(); - // traceAlignment(); - // tend = System.currentTimeMillis(); - // System.out.println("Time take to traceback alignment = " + (tend-tstart) - // + " ms"); - } - - private void setDefaultParams(String type) - { - setType(type); + seq1 = indexEncode(s1str); - if (type.equals(AlignSeq.PEP)) - { - lookup = ResidueProperties.getDefaultPeptideMatrix(); - } - else if (type.equals(AlignSeq.DNA)) - { - lookup = ResidueProperties.getDefaultDnaMatrix(); - } + seq2 = indexEncode(s2str); } - private void setType(String type2) + private void setDefaultParams(String moleculeType) { - this.type = type2; - if (type.equals(AlignSeq.PEP)) - { - intToStr = pep; - charToInt = ResidueProperties.aaIndex; - defInt = ResidueProperties.maxProteinIndex; - } - else if (type.equals(AlignSeq.DNA)) - { - intToStr = dna; - charToInt = ResidueProperties.nucleotideIndex; - defInt = ResidueProperties.maxNucleotideIndex; - } - else + if (!PEP.equals(moleculeType) && !DNA.equals(moleculeType)) { output.append("Wrong type = dna or pep only"); throw new Error(MessageManager.formatMessage( - "error.unknown_type_dna_or_pep", new String[] { type2 })); + "error.unknown_type_dna_or_pep", + new String[] { moleculeType })); } + + type = moleculeType; + scoreMatrix = ScoreModels.getInstance().getDefaultModel( + PEP.equals(type)); } /** @@ -460,7 +353,7 @@ public class AlignSeq public void traceAlignment() { // Find the maximum score along the rhs or bottom row - int max = -9999; + float max = -Float.MAX_VALUE; for (int i = 0; i < seq1.length; i++) { @@ -494,21 +387,17 @@ public class AlignSeq aseq1 = new int[seq1.length + seq2.length]; aseq2 = new int[seq1.length + seq2.length]; + StringBuilder sb1 = new StringBuilder(aseq1.length); + StringBuilder sb2 = new StringBuilder(aseq2.length); + count = (seq1.length + seq2.length) - 1; - while ((i > 0) && (j > 0)) + while (i > 0 && j > 0) { - if ((aseq1[count] != defInt) && (i >= 0)) - { - aseq1[count] = seq1[i]; - astr1 = s1str.charAt(i) + astr1; - } - - if ((aseq2[count] != defInt) && (j > 0)) - { - aseq2[count] = seq2[j]; - astr2 = s2str.charAt(j) + astr2; - } + aseq1[count] = seq1[i]; + sb1.append(s1str.charAt(i)); + aseq2[count] = seq2[j]; + sb2.append(s2str.charAt(j)); trace = findTrace(i, j); @@ -520,14 +409,14 @@ public class AlignSeq else if (trace == 1) { j--; - aseq1[count] = defInt; - astr1 = "-" + astr1.substring(1); + aseq1[count] = GAP_INDEX; + sb1.replace(sb1.length() - 1, sb1.length(), "-"); } else if (trace == -1) { i--; - aseq2[count] = defInt; - astr2 = "-" + astr2.substring(1); + aseq2[count] = GAP_INDEX; + sb2.replace(sb2.length() - 1, sb2.length(), "-"); } count--; @@ -536,17 +425,24 @@ public class AlignSeq seq1start = i + 1; seq2start = j + 1; - if (aseq1[count] != defInt) + if (aseq1[count] != GAP_INDEX) { aseq1[count] = seq1[i]; - astr1 = s1str.charAt(i) + astr1; + sb1.append(s1str.charAt(i)); } - if (aseq2[count] != defInt) + if (aseq2[count] != GAP_INDEX) { aseq2[count] = seq2[j]; - astr2 = s2str.charAt(j) + astr2; + sb2.append(s2str.charAt(j)); } + + /* + * we built the character strings backwards, so now + * reverse them to convert to sequence strings + */ + astr1 = sb1.reverse().toString(); + astr2 = sb2.reverse().toString(); } /** @@ -599,6 +495,8 @@ public class AlignSeq .append(String.valueOf(s2str.length())).append(")") .append(NEWLINE).append(NEWLINE); + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); + for (int j = 0; j < nochunks; j++) { // Print the first aligned sequence @@ -615,25 +513,27 @@ public class AlignSeq output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { if ((i + (j * len)) < astr1.length()) { - boolean sameChar = Comparison.isSameResidue( - astr1.charAt(i + (j * len)), astr2.charAt(i + (j * len)), - false); - if (sameChar - && !jalview.util.Comparison.isGap(astr1.charAt(i - + (j * len)))) + char c1 = astr1.charAt(i + (j * len)); + char c2 = astr2.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) { pid++; output.append("|"); } else if (type.equals("pep")) { - if (ResidueProperties.getPAM250(astr1.charAt(i + (j * len)), - astr2.charAt(i + (j * len))) > 0) + if (pam250.getPairwiseScore(c1, c2) > 0) { output.append("."); } @@ -678,46 +578,6 @@ public class AlignSeq /** * DOCUMENT ME! * - * @param mat - * DOCUMENT ME! - */ - public void printScoreMatrix(int[][] mat) - { - int n = seq1.length; - int m = seq2.length; - - for (int i = 0; i < n; i++) - { - // Print the top sequence - if (i == 0) - { - Format.print(System.out, "%8s", s2str.substring(0, 1)); - - for (int jj = 1; jj < m; jj++) - { - Format.print(System.out, "%5s", s2str.substring(jj, jj + 1)); - } - - System.out.println(); - } - - for (int j = 0; j < m; j++) - { - if (j == 0) - { - Format.print(System.out, "%3s", s1str.substring(i, i + 1)); - } - - Format.print(System.out, "%3d ", mat[i][j] / 10); - } - - System.out.println(); - } - } - - /** - * DOCUMENT ME! - * * @param i * DOCUMENT ME! * @param j @@ -728,7 +588,10 @@ public class AlignSeq public int findTrace(int i, int j) { int t = 0; - int max = score[i - 1][j - 1] + (lookup[seq1[i]][seq2[j]] * 10); + // float pairwiseScore = lookup[seq1[i]][seq2[j]]; + float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), + s2str.charAt(j)); + float max = score[i - 1][j - 1] + (pairwiseScore * 10); if (F[i][j] > max) { @@ -772,7 +635,8 @@ public class AlignSeq int m = seq2.length; // top left hand element - score[0][0] = lookup[seq1[0]][seq2[0]] * 10; + score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0), + s2str.charAt(0)) * 10; E[0][0] = -gapExtend; F[0][0] = 0; @@ -783,7 +647,9 @@ public class AlignSeq E[0][j] = max(score[0][j - 1] - gapOpen, E[0][j - 1] - gapExtend); F[0][j] = -gapExtend; - score[0][j] = max(lookup[seq1[0]][seq2[j]] * 10, -gapOpen, -gapExtend); + float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0), + s2str.charAt(j)); + score[0][j] = max(pairwiseScore * 10, -gapOpen, -gapExtend); traceback[0][j] = 1; } @@ -794,7 +660,9 @@ public class AlignSeq E[i][0] = -gapOpen; F[i][0] = max(score[i - 1][0] - gapOpen, F[i - 1][0] - gapExtend); - score[i][0] = max(lookup[seq1[i]][seq2[0]] * 10, E[i][0], F[i][0]); + float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), + s2str.charAt(0)); + score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]); traceback[i][0] = -1; } @@ -806,8 +674,10 @@ public class AlignSeq E[i][j] = max(score[i][j - 1] - gapOpen, E[i][j - 1] - gapExtend); F[i][j] = max(score[i - 1][j] - gapOpen, F[i - 1][j] - gapExtend); + float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i), + s2str.charAt(j)); score[i][j] = max(score[i - 1][j - 1] - + (lookup[seq1[i]][seq2[j]] * 10), E[i][j], F[i][j]); + + (pairwiseScore * 10), E[i][j], F[i][j]); traceback[i][j] = findTrace(i, j); } } @@ -843,27 +713,27 @@ public class AlignSeq /** * DOCUMENT ME! * - * @param i1 + * @param f1 * DOCUMENT ME! - * @param i2 + * @param f2 * DOCUMENT ME! - * @param i3 + * @param f3 * DOCUMENT ME! * * @return DOCUMENT ME! */ - public int max(int i1, int i2, int i3) + private static float max(float f1, float f2, float f3) { - int max = i1; + float max = f1; - if (i2 > i1) + if (f2 > f1) { - max = i2; + max = f2; } - if (i3 > max) + if (f3 > max) { - max = i3; + max = f3; } return max; @@ -872,65 +742,44 @@ public class AlignSeq /** * DOCUMENT ME! * - * @param i1 + * @param f1 * DOCUMENT ME! - * @param i2 + * @param f2 * DOCUMENT ME! * * @return DOCUMENT ME! */ - public int max(int i1, int i2) + private static float max(float f1, float f2) { - int max = i1; + float max = f1; - if (i2 > i1) + if (f2 > f1) { - max = i2; + max = f2; } return max; } /** - * DOCUMENT ME! + * Converts the character string to an array of integers which are the + * corresponding indices to the characters in the score matrix * * @param s - * DOCUMENT ME! - * @param type - * DOCUMENT ME! * - * @return DOCUMENT ME! + * @return */ - public int[] stringToInt(String s, String type) + int[] indexEncode(String s) { - int[] seq1 = new int[s.length()]; + int[] encoded = new int[s.length()]; for (int i = 0; i < s.length(); i++) { - // String ss = s.substring(i, i + 1).toUpperCase(); char c = s.charAt(i); - if ('a' <= c && c <= 'z') - { - // TO UPPERCASE !!! - c -= ('a' - 'A'); - } - - try - { - seq1[i] = charToInt[c]; // set accordingly from setType - if (seq1[i] < 0 || seq1[i] > defInt) // set from setType: 23 for - // peptides, or 4 for NA. - { - seq1[i] = defInt; - } - - } catch (Exception e) - { - seq1[i] = defInt; - } + encoded[i] = scoreMatrix.getMatrixIndex(c); } - return seq1; + return encoded; } /** @@ -950,7 +799,7 @@ public class AlignSeq public static void displayMatrix(Graphics g, int[][] mat, int n, int m, int psize) { - // TODO method dosen't seem to be referenced anywhere delete?? + // TODO method doesn't seem to be referenced anywhere delete?? int max = -1000; int min = 1000; @@ -1113,7 +962,7 @@ public class AlignSeq { SequenceI bestm = null; AlignSeq bestaseq = null; - int bestscore = 0; + float bestscore = 0; for (SequenceI msq : al.getSequences()) { AlignSeq aseq = doGlobalNWAlignment(msq, sq, dnaOrProtein); @@ -1124,8 +973,8 @@ public class AlignSeq bestm = msq; } } - System.out.println("Best Score for " + (matches.size() + 1) + " :" - + bestscore); + // System.out.println("Best Score for " + (matches.size() + 1) + " :" + // + bestscore); matches.add(bestm); aligns.add(bestaseq); al.deleteSequence(bestm); @@ -1214,6 +1063,8 @@ public class AlignSeq // long start = System.currentTimeMillis(); + SimilarityParams pidParams = new SimilarityParams(true, true, true, + true); float pid; String seqi, seqj; for (int i = 0; i < height; i++) @@ -1254,7 +1105,7 @@ public class AlignSeq seqj = ug; } } - pid = Comparison.PID(seqi, seqj); + pid = (float) PIDModel.computePID(seqi, seqj, pidParams); // use real sequence length rather than string length if (lngth[j] < lngth[i]) diff --git a/src/jalview/analysis/AlignmentSorter.java b/src/jalview/analysis/AlignmentSorter.java index 59cdccf..6c46a3e 100755 --- a/src/jalview/analysis/AlignmentSorter.java +++ b/src/jalview/analysis/AlignmentSorter.java @@ -20,6 +20,8 @@ */ package jalview.analysis; +import jalview.analysis.scoremodels.PIDModel; +import jalview.analysis.scoremodels.SimilarityParams; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentOrder; @@ -27,7 +29,6 @@ import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; -import jalview.util.Comparison; import jalview.util.MessageManager; import jalview.util.QuickSort; @@ -66,7 +67,7 @@ public class AlignmentSorter static boolean sortOrderAscending = true; - static NJTree lastTree = null; + static TreeModel lastTree = null; static boolean sortTreeAscending = true; @@ -87,46 +88,29 @@ public class AlignmentSorter private static boolean sortLengthAscending; /** - * Sort by Percentage Identity w.r.t. s + * Sorts sequences in the alignment by Percentage Identity with the given + * reference sequence, sorting the highest identity to the top * * @param align * AlignmentI * @param s * SequenceI - * @param tosort - * sequences from align that are to be sorted. - */ - public static void sortByPID(AlignmentI align, SequenceI s, - SequenceI[] tosort) - { - sortByPID(align, s, tosort, 0, -1); - } - - /** - * Sort by Percentage Identity w.r.t. s - * - * @param align - * AlignmentI - * @param s - * SequenceI - * @param tosort - * sequences from align that are to be sorted. - * @param start - * start column (0 for beginning * @param end */ - public static void sortByPID(AlignmentI align, SequenceI s, - SequenceI[] tosort, int start, int end) + public static void sortByPID(AlignmentI align, SequenceI s) { int nSeq = align.getHeight(); float[] scores = new float[nSeq]; SequenceI[] seqs = new SequenceI[nSeq]; + String refSeq = s.getSequenceAsString(); + SimilarityParams pidParams = new SimilarityParams(true, true, true, + true); for (int i = 0; i < nSeq; i++) { - scores[i] = Comparison.PID(align.getSequenceAt(i) - .getSequenceAsString(), s.getSequenceAsString()); + scores[i] = (float) PIDModel.computePID(align.getSequenceAt(i) + .getSequenceAsString(), refSeq, pidParams); seqs[i] = align.getSequenceAt(i); } @@ -447,7 +431,7 @@ public class AlignmentSorter * @return DOCUMENT ME! */ private static List getOrderByTree(AlignmentI align, - NJTree tree) + TreeModel tree) { int nSeq = align.getHeight(); @@ -487,7 +471,7 @@ public class AlignmentSorter * @param tree * tree which has */ - public static void sortByTree(AlignmentI align, NJTree tree) + public static void sortByTree(AlignmentI align, TreeModel tree) { List tmp = getOrderByTree(align, tree); diff --git a/src/jalview/analysis/AverageDistanceTree.java b/src/jalview/analysis/AverageDistanceTree.java new file mode 100644 index 0000000..907109e --- /dev/null +++ b/src/jalview/analysis/AverageDistanceTree.java @@ -0,0 +1,121 @@ +package jalview.analysis; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.SequenceNode; +import jalview.viewmodel.AlignmentViewport; + +/** + * This class implements distance calculations used in constructing a Average + * Distance tree (also known as UPGMA) + */ +public class AverageDistanceTree extends TreeBuilder +{ + /** + * Constructor + * + * @param av + * @param sm + * @param scoreParameters + */ + public AverageDistanceTree(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) + { + super(av, sm, scoreParameters); + } + + /** + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. An average of the distances from + * cluster(i) and cluster(j) is calculated, weighted by the sizes of each + * cluster. + * + * @param i + * @param j + */ + @Override + protected void findClusterDistance(int i, int j) + { + int noi = clusters.elementAt(i).cardinality(); + int noj = clusters.elementAt(j).cardinality(); + + // New distances from cluster i to others + double[] newdist = new double[noseqs]; + + for (int l = 0; l < noseqs; l++) + { + if ((l != i) && (l != j)) + { + newdist[l] = ((distances.getValue(i, l) * noi) + (distances + .getValue(j, l) * noj)) / (noi + noj); + } + else + { + newdist[l] = 0; + } + } + + for (int ii = 0; ii < noseqs; ii++) + { + distances.setValue(i, ii, newdist[ii]); + distances.setValue(ii, i, newdist[ii]); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected double findMinDistance() + { + double min = Double.MAX_VALUE; + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + if (!done.get(i) && !done.get(j)) + { + if (distances.getValue(i, j) < min) + { + mini = i; + minj = j; + + min = distances.getValue(i, j); + } + } + } + } + return min; + } + + /** + * {@inheritDoc} + */ + @Override + protected void findNewDistances(SequenceNode nodei, SequenceNode nodej, + double dist) + { + double ih = 0; + double jh = 0; + + SequenceNode sni = nodei; + SequenceNode snj = nodej; + + while (sni != null) + { + ih = ih + sni.dist; + sni = (SequenceNode) sni.left(); + } + + while (snj != null) + { + jh = jh + snj.dist; + snj = (SequenceNode) snj.left(); + } + + nodei.dist = ((dist / 2) - ih); + nodej.dist = ((dist / 2) - jh); + } + +} diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index 565924b..2b5a8f6 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -20,6 +20,8 @@ */ package jalview.analysis; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; import jalview.datamodel.ResidueCount; @@ -50,14 +52,19 @@ public class Conservation private static final int TOUPPERCASE = 'a' - 'A'; + private static final int GAP_INDEX = -1; + SequenceI[] sequences; int start; int end; - Vector seqNums; // vector of int vectors where first is sequence - // checksum + /* + * a list whose i'th element is an array whose first entry is the checksum + * of the i'th sequence, followed by residues encoded to score matrix index + */ + Vector seqNums; int maxLength = 0; // used by quality calcs @@ -70,17 +77,17 @@ public class Conservation */ Map[] total; - boolean canonicaliseAa = true; // if true then conservation calculation will - - // map all symbols to canonical aa numbering - // rather than consider conservation of that - // symbol + /* + * if true then conservation calculation will map all symbols to canonical aa + * numbering rather than consider conservation of that symbol + */ + boolean canonicaliseAa = true; - /** Stores calculated quality values */ private Vector quality; - /** Stores maximum and minimum values of quality values */ - private double[] qualityRange = new double[2]; + private double qualityMinimum; + + private double qualityMaximum; private Sequence consSequence; @@ -91,8 +98,16 @@ public class Conservation private String name = ""; + /* + * an array, for each column, of counts of symbols (by score matrix index) + */ private int[][] cons2; + /* + * gap counts for each column + */ + private int[] cons2GapCounts; + private String[] consSymbs; /** @@ -162,27 +177,29 @@ public class Conservation } /** - * Translate sequence i into a numerical representation and store it in the - * i'th position of the seqNums array. + * Translate sequence i into score matrix indices and store it in the i'th + * position of the seqNums array. * * @param i + * @param sm */ - private void calcSeqNum(int i) + private void calcSeqNum(int i, ScoreMatrix sm) { - String sq = null; // for dumb jbuilder not-inited exception warning - int[] sqnum = null; - int sSize = sequences.length; if ((i > -1) && (i < sSize)) { - sq = sequences[i].getSequenceAsString(); + String sq = sequences[i].getSequenceAsString(); if (seqNums.size() <= i) { seqNums.addElement(new int[sq.length() + 1]); } + /* + * the first entry in the array is the sequence's hashcode, + * following entries are matrix indices of sequence characters + */ if (sq.hashCode() != seqNums.elementAt(i)[0]) { int j; @@ -195,14 +212,26 @@ public class Conservation maxLength = len; } - sqnum = new int[len + 1]; // better to always make a new array - + int[] sqnum = new int[len + 1]; // better to always make a new array - // sequence can change its length sqnum[0] = sq.hashCode(); for (j = 1; j <= len; j++) { - sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq - .charAt(j - 1)]; + // sqnum[j] = ResidueProperties.aaIndex[sq.charAt(j - 1)]; + char residue = sq.charAt(j - 1); + if (Comparison.isGap(residue)) + { + sqnum[j] = GAP_INDEX; + } + else + { + sqnum[j] = sm.getMatrixIndex(residue); + if (sqnum[j] == -1) + { + sqnum[j] = GAP_INDEX; + } + } } seqNums.setElementAt(sqnum, i); @@ -527,137 +556,133 @@ public class Conservation // From Alignment.java in jalview118 public void findQuality() { - findQuality(0, maxLength - 1); + findQuality(0, maxLength - 1, ScoreModels.getInstance().getBlosum62()); } /** * DOCUMENT ME! + * + * @param sm */ - private void percentIdentity2() + private void percentIdentity(ScoreMatrix sm) { seqNums = new Vector(); - // calcSeqNum(s); int i = 0, iSize = sequences.length; // Do we need to calculate this again? for (i = 0; i < iSize; i++) { - calcSeqNum(i); + calcSeqNum(i, sm); } if ((cons2 == null) || seqNumsChanged) { + // FIXME remove magic number 24 without changing calc + // sm.getSize() returns 25 so doesn't quite do it... cons2 = new int[maxLength][24]; + cons2GapCounts = new int[maxLength]; - // Initialize the array - for (int j = 0; j < 24; j++) - { - for (i = 0; i < maxLength; i++) - { - cons2[i][j] = 0; - } - } - - int[] sqnum; int j = 0; while (j < sequences.length) { - sqnum = seqNums.elementAt(j); + int[] sqnum = seqNums.elementAt(j); for (i = 1; i < sqnum.length; i++) { - cons2[i - 1][sqnum[i]]++; + int index = sqnum[i]; + if (index == GAP_INDEX) + { + cons2GapCounts[i - 1]++; + } + else + { + cons2[i - 1][index]++; + } } + // TODO should this start from sqnum.length? for (i = sqnum.length - 1; i < maxLength; i++) { - cons2[i][23]++; // gap count + cons2GapCounts[i]++; } - j++; } - - // unnecessary ? - - /* - * for (int i=start; i <= end; i++) { int max = -1000; int maxi = -1; int - * maxj = -1; - * - * for (int j=0;j<24;j++) { if (cons2[i][j] > max) { max = cons2[i][j]; - * maxi = i; maxj = j; } } } - */ } } /** - * Calculates the quality of the set of sequences + * Calculates the quality of the set of sequences over the given inclusive + * column range, using the specified substitution score matrix * - * @param startRes - * Start residue - * @param endRes - * End residue + * @param startCol + * @param endCol + * @param scoreMatrix */ - public void findQuality(int startRes, int endRes) + protected void findQuality(int startCol, int endCol, ScoreMatrix scoreMatrix) { quality = new Vector(); - double max = -10000; - int[][] BLOSUM62 = ResidueProperties.getBLOSUM62(); + double max = -Double.MAX_VALUE; + float[][] scores = scoreMatrix.getMatrix(); - // Loop over columns // JBPNote Profiling info - // long ts = System.currentTimeMillis(); - // long te = System.currentTimeMillis(); - percentIdentity2(); + percentIdentity(scoreMatrix); int size = seqNums.size(); int[] lengths = new int[size]; - double tot, bigtot, sr, tmp; - double[] x, xx; - int l, j, i, ii, i2, k, seqNum; - for (l = 0; l < size; l++) + for (int l = 0; l < size; l++) { lengths[l] = seqNums.elementAt(l).length - 1; } - for (j = startRes; j <= endRes; j++) + final int symbolCount = scoreMatrix.getSize(); + + for (int j = startCol; j <= endCol; j++) { - bigtot = 0; + double bigtot = 0; // First Xr = depends on column only - x = new double[24]; + double[] x = new double[symbolCount]; - for (ii = 0; ii < 24; ii++) + for (int ii = 0; ii < symbolCount; ii++) { x[ii] = 0; - for (i2 = 0; i2 < 24; i2++) + /* + * todo JAL-728 currently assuming last symbol in matrix is * for gap + * (which we ignore as counted separately); true for BLOSUM62 but may + * not be once alternative matrices are supported + */ + for (int i2 = 0; i2 < symbolCount - 1; i2++) { - x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + 4); + x[ii] += (((double) cons2[j][i2] * scores[ii][i2]) + 4D); } + x[ii] += 4D + cons2GapCounts[j] * scoreMatrix.getMinimumScore(); x[ii] /= size; } // Now calculate D for each position and sum - for (k = 0; k < size; k++) + for (int k = 0; k < size; k++) { - tot = 0; - xx = new double[24]; - seqNum = (j < lengths[k]) ? seqNums.elementAt(k)[j + 1] : 23; // Sequence, - // or gap - // at the - // end - - // This is a loop over r - for (i = 0; i < 23; i++) - { - sr = 0; + double tot = 0; + double[] xx = new double[symbolCount]; + // sequence character index, or implied gap if sequence too short + int seqNum = (j < lengths[k]) ? seqNums.elementAt(k)[j + 1] + : GAP_INDEX; - sr = (double) BLOSUM62[i][seqNum] + 4; + for (int i = 0; i < symbolCount - 1; i++) + { + double sr = 4D; + if (seqNum == GAP_INDEX) + { + sr += scoreMatrix.getMinimumScore(); + } + else + { + sr += scores[i][seqNum]; + } - // Calculate X with another loop over residues - // System.out.println("Xi " + i + " " + x[i] + " " + sr); xx[i] = x[i] - sr; tot += (xx[i] * xx[i]); @@ -666,24 +691,18 @@ public class Conservation bigtot += Math.sqrt(tot); } - // This is the quality for one column - if (max < bigtot) - { - max = bigtot; - } + max = Math.max(max, bigtot); - // bigtot = bigtot * (size-cons2[j][23])/size; quality.addElement(new Double(bigtot)); - - // Need to normalize by gaps } - double newmax = -10000; + double newmax = -Double.MAX_VALUE; - for (j = startRes; j <= endRes; j++) + for (int j = startCol; j <= endCol; j++) { - tmp = quality.elementAt(j).doubleValue(); - tmp = ((max - tmp) * (size - cons2[j][23])) / size; + double tmp = quality.elementAt(j).doubleValue(); + // tmp = ((max - tmp) * (size - cons2[j][23])) / size; + tmp = ((max - tmp) * (size - cons2GapCounts[j])) / size; // System.out.println(tmp+ " " + j); quality.setElementAt(new Double(tmp), j); @@ -694,9 +713,8 @@ public class Conservation } } - // System.out.println("Quality " + s); - qualityRange[0] = 0D; - qualityRange[1] = newmax; + qualityMinimum = 0D; + qualityMaximum = newmax; } /** @@ -746,14 +764,14 @@ public class Conservation if (quality2 != null) { - quality2.graphMax = (float) qualityRange[1]; + quality2.graphMax = (float) qualityMaximum; if (quality2.annotations != null && quality2.annotations.length < alWidth) { quality2.annotations = new Annotation[alWidth]; } - qmin = (float) qualityRange[0]; - qmax = (float) qualityRange[1]; + qmin = (float) qualityMinimum; + qmax = (float) qualityMaximum; } for (int i = istart; i < alWidth; i++) diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index e0e50fb..487e85e 100644 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -21,1312 +21,116 @@ package jalview.analysis; import jalview.api.analysis.ScoreModelI; -import jalview.datamodel.AlignmentView; -import jalview.datamodel.BinaryNode; -import jalview.datamodel.CigarArray; -import jalview.datamodel.NodeTransformI; -import jalview.datamodel.SeqCigar; -import jalview.datamodel.Sequence; -import jalview.datamodel.SequenceI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.SequenceNode; -import jalview.io.NewickFile; -import jalview.schemes.ResidueProperties; - -import java.util.Enumeration; -import java.util.List; -import java.util.Vector; +import jalview.viewmodel.AlignmentViewport; /** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ + * This class implements distance calculations used in constructing a Neighbour + * Joining tree */ -public class NJTree +public class NJTree extends TreeBuilder { - Vector cluster; - - SequenceI[] sequence; - - // SequenceData is a string representation of what the user - // sees. The display may contain hidden columns. - public AlignmentView seqData = null; - - int[] done; - - int noseqs; - - int noClus; - - float[][] distance; - - int mini; - - int minj; - - float ri; - - float rj; - - Vector groups = new Vector(); - - SequenceNode maxdist; - - SequenceNode top; - - float maxDistValue; - - float maxheight; - - int ycount; - - Vector node; - - String type; - - String pwtype; - - Object found = null; - - boolean hasDistances = true; // normal case for jalview trees - - boolean hasBootstrap = false; // normal case for jalview trees - - private boolean hasRootDistance = true; - - /** - * Create a new NJTree object with leaves associated with sequences in seqs, - * and original alignment data represented by Cigar strings. - * - * @param seqs - * SequenceI[] - * @param odata - * Cigar[] - * @param treefile - * NewickFile - */ - public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) - { - this(seqs, treefile); - if (odata != null) - { - seqData = odata; - } - /* - * sequenceString = new String[odata.length]; char gapChar = - * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i < - * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar); - * sequenceString[i] = oseq_aligned.getSequence(); } - */ - } - - /** - * Creates a new NJTree object from a tree from an external source - * - * @param seqs - * SequenceI which should be associated with leafs of treefile - * @param treefile - * A parsed tree - */ - public NJTree(SequenceI[] seqs, NewickFile treefile) - { - this.sequence = seqs; - top = treefile.getTree(); - - /** - * There is no dependent alignment to be recovered from an imported tree. - * - * if (sequenceString == null) { sequenceString = new String[seqs.length]; - * for (int i = 0; i < seqs.length; i++) { sequenceString[i] = - * seqs[i].getSequence(); } } - */ - - hasDistances = treefile.HasDistances(); - hasBootstrap = treefile.HasBootstrap(); - hasRootDistance = treefile.HasRootDistance(); - - maxheight = findHeight(top); - - SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); - - Vector leaves = findLeaves(top); - - int i = 0; - int namesleft = seqs.length; - - SequenceNode j; - SequenceI nam; - String realnam; - Vector one2many = new Vector(); - int countOne2Many = 0; - while (i < leaves.size()) - { - j = leaves.elementAt(i++); - realnam = j.getName(); - nam = null; - - if (namesleft > -1) - { - nam = algnIds.findIdMatch(realnam); - } - - if (nam != null) - { - j.setElement(nam); - if (one2many.contains(nam)) - { - countOne2Many++; - // if (jalview.bin.Cache.log.isDebugEnabled()) - // jalview.bin.Cache.log.debug("One 2 many relationship for - // "+nam.getName()); - } - else - { - one2many.addElement(nam); - namesleft--; - } - } - else - { - j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); - j.setPlaceholder(true); - } - } - // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { - // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment - // sequence ids (out of "+one2many.size()+" unique ids) linked to two or - // more leaves."); - // } - // one2many.clear(); - } - /** - * Creates a new NJTree object. + * Constructor given a viewport, tree type and score model * - * @param sequence - * DOCUMENT ME! - * @param type - * DOCUMENT ME! - * @param pwtype - * DOCUMENT ME! - * @param start - * DOCUMENT ME! - * @param end - * DOCUMENT ME! + * @param av + * the current alignment viewport + * @param sm + * a distance or similarity score model to use to compute the tree + * @param scoreParameters */ - public NJTree(SequenceI[] sequence, AlignmentView seqData, String type, - String pwtype, ScoreModelI sm, int start, int end) + public NJTree(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) { - this.sequence = sequence; - this.node = new Vector(); - this.type = type; - this.pwtype = pwtype; - if (seqData != null) - { - this.seqData = seqData; - } - else - { - SeqCigar[] seqs = new SeqCigar[sequence.length]; - for (int i = 0; i < sequence.length; i++) - { - seqs[i] = new SeqCigar(sequence[i], start, end); - } - CigarArray sdata = new CigarArray(seqs); - sdata.addOperation(CigarArray.M, end - start + 1); - this.seqData = new AlignmentView(sdata, start); - } - // System.err.println("Made seqData");// dbg - if (!(type.equals("NJ"))) - { - type = "AV"; - } - - if (sm == null && !(pwtype.equals("PID"))) - { - if (ResidueProperties.getScoreMatrix(pwtype) == null) - { - pwtype = "BLOSUM62"; - } - } - - int i = 0; - - done = new int[sequence.length]; - - while ((i < sequence.length) && (sequence[i] != null)) - { - done[i] = 0; - i++; - } - - noseqs = i++; - - distance = findDistances(sm); - // System.err.println("Made distances");// dbg - makeLeaves(); - // System.err.println("Made leaves");// dbg - - noClus = cluster.size(); - - cluster(); - // System.err.println("Made clusters");// dbg - + super(av, sm, scoreParameters); } /** - * Generate a string representation of the Tree - * - * @return Newick File with all tree data available + * {@inheritDoc} */ @Override - public String toString() - { - jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode()); - - return fout.print(isHasBootstrap(), isHasDistances(), - isHasRootDistance()); // output all data available for tree - } - - /** - * - * used when the alignment associated to a tree has changed. - * - * @param list - * Sequence set to be associated with tree nodes - */ - public void UpdatePlaceHolders(List list) + protected double findMinDistance() { - Vector leaves = findLeaves(top); + double min = Double.MAX_VALUE; - int sz = leaves.size(); - SequenceIdMatcher seqmatcher = null; - int i = 0; - - while (i < sz) + for (int i = 0; i < (noseqs - 1); i++) { - SequenceNode leaf = leaves.elementAt(i++); - - if (list.contains(leaf.element())) - { - leaf.setPlaceholder(false); - } - else + for (int j = i + 1; j < noseqs; j++) { - if (seqmatcher == null) + if (!done.get(i) && !done.get(j)) { - // Only create this the first time we need it - SequenceI[] seqs = new SequenceI[list.size()]; + double tmp = distances.getValue(i, j) + - (findr(i, j) + findr(j, i)); - for (int j = 0; j < seqs.length; j++) + if (tmp < min) { - seqs[j] = list.get(j); - } - - seqmatcher = new SequenceIdMatcher(seqs); - } - - SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); + mini = i; + minj = j; - if (nam != null) - { - if (!leaf.isPlaceholder()) - { - // remapping the node to a new sequenceI - should remove any refs to - // old one. - // TODO - make many sequenceI to one leaf mappings possible! - // (JBPNote) - } - leaf.setPlaceholder(false); - leaf.setElement(nam); - } - else - { - if (!leaf.isPlaceholder()) - { - // Construct a new placeholder sequence object for this leaf - leaf.setElement(new Sequence(leaf.getName(), - "THISISAPLACEHLDER")); + min = tmp; } - leaf.setPlaceholder(true); - - } - } - } - } - - /** - * rename any nodes according to their associated sequence. This will modify - * the tree's metadata! (ie the original NewickFile or newly generated - * BinaryTree's label data) - */ - public void renameAssociatedNodes() - { - applyToNodes(new NodeTransformI() - { - - @Override - public void transform(BinaryNode nd) - { - Object el = nd.element(); - if (el != null && el instanceof SequenceI) - { - nd.setName(((SequenceI) el).getName()); } } - }); - } - - /** - * DOCUMENT ME! - */ - public void cluster() - { - while (noClus > 2) - { - if (type.equals("NJ")) - { - findMinNJDistance(); - } - else - { - findMinDistance(); - } - - Cluster c = joinClusters(mini, minj); - - done[minj] = 1; - - cluster.setElementAt(null, minj); - cluster.setElementAt(c, mini); - - noClus--; - } - - boolean onefound = false; - - int one = -1; - int two = -1; - - for (int i = 0; i < noseqs; i++) - { - if (done[i] != 1) - { - if (onefound == false) - { - two = i; - onefound = true; - } - else - { - one = i; - } - } - } - - joinClusters(one, two); - top = (node.elementAt(one)); - - reCount(top); - findHeight(top); - findMaxDist(top); - } - - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Cluster joinClusters(int i, int j) - { - float dist = distance[i][j]; - - int noi = cluster.elementAt(i).value.length; - int noj = cluster.elementAt(j).value.length; - - int[] value = new int[noi + noj]; - - for (int ii = 0; ii < noi; ii++) - { - value[ii] = cluster.elementAt(i).value[ii]; - } - - for (int ii = noi; ii < (noi + noj); ii++) - { - value[ii] = cluster.elementAt(j).value[ii - noi]; - } - - Cluster c = new Cluster(value); - - ri = findr(i, j); - rj = findr(j, i); - - if (type.equals("NJ")) - { - findClusterNJDistance(i, j); - } - else - { - findClusterDistance(i, j); - } - - SequenceNode sn = new SequenceNode(); - - sn.setLeft((node.elementAt(i))); - sn.setRight((node.elementAt(j))); - - SequenceNode tmpi = (node.elementAt(i)); - SequenceNode tmpj = (node.elementAt(j)); - - if (type.equals("NJ")) - { - findNewNJDistances(tmpi, tmpj, dist); - } - else - { - findNewDistances(tmpi, tmpj, dist); - } - - tmpi.setParent(sn); - tmpj.setParent(sn); - - node.setElementAt(sn, i); - - return c; - } - - /** - * DOCUMENT ME! - * - * @param tmpi - * DOCUMENT ME! - * @param tmpj - * DOCUMENT ME! - * @param dist - * DOCUMENT ME! - */ - public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) - { - - tmpi.dist = ((dist + ri) - rj) / 2; - tmpj.dist = (dist - tmpi.dist); - - if (tmpi.dist < 0) - { - tmpi.dist = 0; - } - - if (tmpj.dist < 0) - { - tmpj.dist = 0; - } - } - - /** - * DOCUMENT ME! - * - * @param tmpi - * DOCUMENT ME! - * @param tmpj - * DOCUMENT ME! - * @param dist - * DOCUMENT ME! - */ - public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj, - float dist) - { - float ih = 0; - float jh = 0; - - SequenceNode sni = tmpi; - SequenceNode snj = tmpj; - - while (sni != null) - { - ih = ih + sni.dist; - sni = (SequenceNode) sni.left(); - } - - while (snj != null) - { - jh = jh + snj.dist; - snj = (SequenceNode) snj.left(); } - tmpi.dist = ((dist / 2) - ih); - tmpj.dist = ((dist / 2) - jh); + return min; } /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! + * {@inheritDoc} */ - public void findClusterDistance(int i, int j) + @Override + protected void findNewDistances(SequenceNode nodei, SequenceNode nodej, + double dist) { - int noi = cluster.elementAt(i).value.length; - int noj = cluster.elementAt(j).value.length; + nodei.dist = ((dist + ri) - rj) / 2; + nodej.dist = (dist - nodei.dist); - // New distances from cluster to others - float[] newdist = new float[noseqs]; - - for (int l = 0; l < noseqs; l++) + if (nodei.dist < 0) { - if ((l != i) && (l != j)) - { - newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj)) - / (noi + noj); - } - else - { - newdist[l] = 0; - } + nodei.dist = 0; } - for (int ii = 0; ii < noseqs; ii++) + if (nodej.dist < 0) { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; + nodej.dist = 0; } } /** - * DOCUMENT ME! + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. The new distance to cluster k is + * calculated as the average of the distances from i to k and from j to k, + * less half the distance from i to j. * * @param i - * DOCUMENT ME! * @param j - * DOCUMENT ME! */ - public void findClusterNJDistance(int i, int j) + @Override + protected + void findClusterDistance(int i, int j) { - - // New distances from cluster to others - float[] newdist = new float[noseqs]; - + // New distances from cluster i to others + double[] newdist = new double[noseqs]; + + double ijDistance = distances.getValue(i, j); for (int l = 0; l < noseqs; l++) { if ((l != i) && (l != j)) { - newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2; + newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - ijDistance) / 2; } else { newdist[l] = 0; } } - + for (int ii = 0; ii < noseqs; ii++) { - distance[i][ii] = newdist[ii]; - distance[ii][i] = newdist[ii]; - } - } - - /** - * DOCUMENT ME! - * - * @param i - * DOCUMENT ME! - * @param j - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findr(int i, int j) - { - float tmp = 1; - - for (int k = 0; k < noseqs; k++) - { - if ((k != i) && (k != j) && (done[k] != 1)) - { - tmp = tmp + distance[i][k]; - } - } - - if (noClus > 2) - { - tmp = tmp / (noClus - 2); + distances.setValue(i, ii, newdist[ii]); + distances.setValue(ii, i, newdist[ii]); } - - return tmp; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinNJDistance() - { - float min = 100000; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - float tmp = distance[i][j] - (findr(i, j) + findr(j, i)); - - if (tmp < min) - { - mini = i; - minj = j; - - min = tmp; - } - } - } - } - - return min; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findMinDistance() - { - float min = 100000; - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i + 1; j < noseqs; j++) - { - if ((done[i] != 1) && (done[j] != 1)) - { - if (distance[i][j] < min) - { - mini = i; - minj = j; - - min = distance[i][j]; - } - } - } - } - - return min; - } - - /** - * Calculate a distance matrix given the sequence input data and score model - * - * @return similarity matrix used to compute tree - */ - public float[][] findDistances(ScoreModelI _pwmatrix) - { - - float[][] dist = new float[noseqs][noseqs]; - if (_pwmatrix == null) - { - // Resolve substitution model - _pwmatrix = ResidueProperties.getScoreModel(pwtype); - if (_pwmatrix == null) - { - _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); - } - } - dist = _pwmatrix.findDistances(seqData); - return dist; - - } - - /** - * DOCUMENT ME! - */ - public void makeLeaves() - { - cluster = new Vector(); - - for (int i = 0; i < noseqs; i++) - { - SequenceNode sn = new SequenceNode(); - - sn.setElement(sequence[i]); - sn.setName(sequence[i].getName()); - node.addElement(sn); - - int[] value = new int[1]; - value[0] = i; - - Cluster c = new Cluster(value); - cluster.addElement(c); - } - } - - /** - * Search for leaf nodes below (or at) the given node - * - * @param nd - * root node to search from - * - * @return - */ - public Vector findLeaves(SequenceNode nd) - { - Vector leaves = new Vector(); - findLeaves(nd, leaves); - return leaves; - } - - /** - * Search for leaf nodes. - * - * @param nd - * root node to search from - * @param leaves - * Vector of leaves to add leaf node objects too. - * - * @return Vector of leaf nodes on binary tree - */ - Vector findLeaves(SequenceNode nd, - Vector leaves) - { - if (nd == null) - { - return leaves; - } - - if ((nd.left() == null) && (nd.right() == null)) // Interior node - // detection - { - leaves.addElement(nd); - - return leaves; - } - else - { - /* - * TODO: Identify internal nodes... if (node.isSequenceLabel()) { - * leaves.addElement(node); } - */ - findLeaves((SequenceNode) nd.left(), leaves); - findLeaves((SequenceNode) nd.right(), leaves); - } - - return leaves; - } - - /** - * Find the leaf node with a particular ycount - * - * @param nd - * initial point on tree to search from - * @param count - * value to search for - * - * @return null or the node with ycound=count - */ - public Object findLeaf(SequenceNode nd, int count) - { - found = _findLeaf(nd, count); - - return found; - } - - /* - * #see findLeaf(SequenceNode node, count) - */ - public Object _findLeaf(SequenceNode nd, int count) - { - if (nd == null) - { - return null; - } - - if (nd.ycount == count) - { - found = nd.element(); - - return found; - } - else - { - _findLeaf((SequenceNode) nd.left(), count); - _findLeaf((SequenceNode) nd.right(), count); - } - - return found; - } - - /** - * printNode is mainly for debugging purposes. - * - * @param nd - * SequenceNode - */ - public void printNode(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); - System.out.println("Dist " + nd.dist); - System.out.println("Boot " + nd.getBootstrap()); - } - else - { - System.out.println("Dist " + nd.dist); - printNode((SequenceNode) nd.left()); - printNode((SequenceNode) nd.right()); - } - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void findMaxDist(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - float dist = nd.dist; - - if (dist > maxDistValue) - { - maxdist = nd; - maxDistValue = dist; - } - } - else - { - findMaxDist((SequenceNode) nd.left()); - findMaxDist((SequenceNode) nd.right()); - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public Vector getGroups() - { - return groups; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float getMaxHeight() - { - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * @param threshold - * DOCUMENT ME! - */ - public void groupNodes(SequenceNode nd, float threshold) - { - if (nd == null) - { - return; - } - - if ((nd.height / maxheight) > threshold) - { - groups.addElement(nd); - } - else - { - groupNodes((SequenceNode) nd.left(), threshold); - groupNodes((SequenceNode) nd.right(), threshold); - } - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public float findHeight(SequenceNode nd) - { - if (nd == null) - { - return maxheight; - } - - if ((nd.left() == null) && (nd.right() == null)) - { - nd.height = ((SequenceNode) nd.parent()).height + nd.dist; - - if (nd.height > maxheight) - { - return nd.height; - } - else - { - return maxheight; - } - } - else - { - if (nd.parent() != null) - { - nd.height = ((SequenceNode) nd.parent()).height + nd.dist; - } - else - { - maxheight = 0; - nd.height = (float) 0.0; - } - - maxheight = findHeight((SequenceNode) (nd.left())); - maxheight = findHeight((SequenceNode) (nd.right())); - } - - return maxheight; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode reRoot() - { - if (maxdist != null) - { - ycount = 0; - - float tmpdist = maxdist.dist; - - // New top - SequenceNode sn = new SequenceNode(); - sn.setParent(null); - - // New right hand of top - SequenceNode snr = (SequenceNode) maxdist.parent(); - changeDirection(snr, maxdist); - System.out.println("Printing reversed tree"); - printN(snr); - snr.dist = tmpdist / 2; - maxdist.dist = tmpdist / 2; - - snr.setParent(sn); - maxdist.setParent(sn); - - sn.setRight(snr); - sn.setLeft(maxdist); - - top = sn; - - ycount = 0; - reCount(top); - findHeight(top); - } - - return top; - } - - /** - * - * @return true if original sequence data can be recovered - */ - public boolean hasOriginalSequenceData() - { - return seqData != null; - } - - /** - * Returns original alignment data used for calculation - or null where not - * available. - * - * @return null or cut'n'pasteable alignment - */ - public String printOriginalSequenceData(char gapChar) - { - if (seqData == null) - { - return null; - } - - StringBuffer sb = new StringBuffer(); - String[] seqdatas = seqData.getSequenceStrings(gapChar); - for (int i = 0; i < seqdatas.length; i++) - { - sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i] - .getName())); - sb.append(" " + seqdatas[i] + "\n"); - } - return sb.toString(); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void printN(SequenceNode nd) - { - if (nd == null) - { - return; - } - - if ((nd.left() != null) && (nd.right() != null)) - { - printN((SequenceNode) nd.left()); - printN((SequenceNode) nd.right()); - } - else - { - System.out.println(" name = " + ((SequenceI) nd.element()).getName()); - } - - System.out.println(" dist = " + nd.dist + " " + nd.count + " " - + nd.height); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void reCount(SequenceNode nd) - { - ycount = 0; - _lycount = 0; - // _lylimit = this.node.size(); - _reCount(nd); - } - - private long _lycount = 0, _lylimit = 0; - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void _reCount(SequenceNode nd) - { - // if (_lycount<_lylimit) - // { - // System.err.println("Warning: depth of _recount greater than number of nodes."); - // } - if (nd == null) - { - return; - } - _lycount++; - - if ((nd.left() != null) && (nd.right() != null)) - { - - _reCount((SequenceNode) nd.left()); - _reCount((SequenceNode) nd.right()); - - SequenceNode l = (SequenceNode) nd.left(); - SequenceNode r = (SequenceNode) nd.right(); - - nd.count = l.count + r.count; - nd.ycount = (l.ycount + r.ycount) / 2; - } - else - { - nd.count = 1; - nd.ycount = ycount++; - } - _lycount--; - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - */ - public void swapNodes(SequenceNode nd) - { - if (nd == null) - { - return; - } - - SequenceNode tmp = (SequenceNode) nd.left(); - - nd.setLeft(nd.right()); - nd.setRight(tmp); - } - - /** - * DOCUMENT ME! - * - * @param nd - * DOCUMENT ME! - * @param dir - * DOCUMENT ME! - */ - public void changeDirection(SequenceNode nd, SequenceNode dir) - { - if (nd == null) - { - return; - } - - if (nd.parent() != top) - { - changeDirection((SequenceNode) nd.parent(), nd); - - SequenceNode tmp = (SequenceNode) nd.parent(); - - if (dir == nd.left()) - { - nd.setParent(dir); - nd.setLeft(tmp); - } - else if (dir == nd.right()) - { - nd.setParent(dir); - nd.setRight(tmp); - } - } - else - { - if (dir == nd.left()) - { - nd.setParent(nd.left()); - - if (top.left() == nd) - { - nd.setRight(top.right()); - } - else - { - nd.setRight(top.left()); - } - } - else - { - nd.setParent(nd.right()); - - if (top.left() == nd) - { - nd.setLeft(top.right()); - } - else - { - nd.setLeft(top.left()); - } - } - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode getMaxDist() - { - return maxdist; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public SequenceNode getTopNode() - { - return top; - } - - /** - * - * @return true if tree has real distances - */ - public boolean isHasDistances() - { - return hasDistances; - } - - /** - * - * @return true if tree has real bootstrap values - */ - public boolean isHasBootstrap() - { - return hasBootstrap; - } - - public boolean isHasRootDistance() - { - return hasRootDistance; - } - - /** - * apply the given transform to all the nodes in the tree. - * - * @param nodeTransformI - */ - public void applyToNodes(NodeTransformI nodeTransformI) - { - for (Enumeration nodes = node.elements(); nodes - .hasMoreElements(); nodeTransformI.transform(nodes - .nextElement())) - { - ; - } - } -} - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -class Cluster -{ - int[] value; - - /** - * Creates a new Cluster object. - * - * @param value - * DOCUMENT ME! - */ - public Cluster(int[] value) - { - this.value = value; } } diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 9babaee..3ec7995 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,9 +20,10 @@ */ package jalview.analysis; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; import jalview.math.MatrixI; -import jalview.schemes.ResidueProperties; -import jalview.schemes.ScoreMatrix; import java.io.PrintStream; @@ -31,8 +32,6 @@ import java.io.PrintStream; */ public class PCA implements Runnable { - boolean jvCalcMode = true; - MatrixI symm; double[] eigenvalue; @@ -41,55 +40,19 @@ public class PCA implements Runnable StringBuilder details = new StringBuilder(1024); - private String[] seqs; - - private ScoreMatrix scoreMatrix; + final private AlignmentView seqs; - /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices - * - * @param s - * Set of amino acid sequences to perform PCA on - */ - public PCA(String[] s) - { - this(s, false); - } - - /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices - * - * @param s - * Set of sequences to perform PCA on - * @param nucleotides - * if true, uses standard DNA/RNA matrix for sequence similarity - * calculation. - */ - public PCA(String[] s, boolean nucleotides) - { - this(s, nucleotides, null); - } + private ScoreModelI scoreModel; + + private SimilarityParamsI similarityParams; - public PCA(String[] s, boolean nucleotides, String s_m) + public PCA(AlignmentView s, ScoreModelI sm, SimilarityParamsI options) { this.seqs = s; - - scoreMatrix = null; - String sm = s_m; - if (sm != null) - { - scoreMatrix = ResidueProperties.getScoreMatrix(sm); - } - if (scoreMatrix == null) - { - // either we were given a non-existent score matrix or a scoremodel that - // isn't based on a pairwise symbol score matrix - scoreMatrix = ResidueProperties - .getScoreMatrix(sm = (nucleotides ? "DNA" : "BLOSUM62")); - } - details.append("PCA calculation using " + sm + this.similarityParams = options; + this.scoreModel = sm; + + details.append("PCA calculation using " + sm.getName() + " sequence similarity matrix\n========\n\n"); } @@ -206,11 +169,7 @@ public class PCA implements Runnable // long now = System.currentTimeMillis(); try { - details.append("PCA Calculation Mode is " - + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") - + "\n"); - - eigenvector = scoreMatrix.computePairwiseScores(seqs); + eigenvector = scoreModel.findSimilarities(seqs, similarityParams); details.append(" --- OrigT * Orig ---- \n"); eigenvector.print(ps, "%8.2f"); @@ -252,11 +211,6 @@ public class PCA implements Runnable // + (System.currentTimeMillis() - now) + "ms")); } - public void setJvCalcMode(boolean calcMode) - { - this.jvCalcMode = calcMode; - } - /** * Answers the N dimensions of the NxN PCA matrix. This is the number of * sequences involved in the pairwise score calculation. @@ -266,6 +220,6 @@ public class PCA implements Runnable public int getHeight() { // TODO can any of seqs[] be null? - return seqs.length; + return seqs.getSequences().length; } } diff --git a/src/jalview/analysis/TreeBuilder.java b/src/jalview/analysis/TreeBuilder.java new file mode 100644 index 0000000..effef9a --- /dev/null +++ b/src/jalview/analysis/TreeBuilder.java @@ -0,0 +1,460 @@ +package jalview.analysis; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.CigarArray; +import jalview.datamodel.SeqCigar; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.math.MatrixI; +import jalview.viewmodel.AlignmentViewport; + +import java.util.BitSet; +import java.util.Vector; + +public abstract class TreeBuilder +{ + public static final String AVERAGE_DISTANCE = "AV"; + + public static final String NEIGHBOUR_JOINING = "NJ"; + + protected Vector clusters; + + protected SequenceI[] sequences; + + public AlignmentView seqData; + + protected BitSet done; + + protected int noseqs; + + int noClus; + + protected MatrixI distances; + + protected int mini; + + protected int minj; + + protected double ri; + + protected double rj; + + SequenceNode maxdist; + + SequenceNode top; + + double maxDistValue; + + double maxheight; + + int ycount; + + Vector node; + + private AlignmentView seqStrings; + + /** + * Constructor + * + * @param av + * @param sm + * @param scoreParameters + */ + public TreeBuilder(AlignmentViewport av, ScoreModelI sm, + SimilarityParamsI scoreParameters) + { + int start, end; + boolean selview = av.getSelectionGroup() != null + && av.getSelectionGroup().getSize() > 1; + seqStrings = av.getAlignmentView(selview); + if (!selview) + { + start = 0; + end = av.getAlignment().getWidth(); + this.sequences = av.getAlignment().getSequencesArray(); + } + else + { + start = av.getSelectionGroup().getStartRes(); + end = av.getSelectionGroup().getEndRes() + 1; + this.sequences = av.getSelectionGroup().getSequencesInOrder( + av.getAlignment()); + } + + init(seqStrings, start, end); + + computeTree(sm, scoreParameters); + } + + public SequenceI[] getSequences() + { + return sequences; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + double findHeight(SequenceNode nd) + { + if (nd == null) + { + return maxheight; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + + if (nd.height > maxheight) + { + return nd.height; + } + else + { + return maxheight; + } + } + else + { + if (nd.parent() != null) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + } + else + { + maxheight = 0; + nd.height = (float) 0.0; + } + + maxheight = findHeight((SequenceNode) (nd.left())); + maxheight = findHeight((SequenceNode) (nd.right())); + } + + return maxheight; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void reCount(SequenceNode nd) + { + ycount = 0; + // _lycount = 0; + // _lylimit = this.node.size(); + _reCount(nd); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void _reCount(SequenceNode nd) + { + // if (_lycount<_lylimit) + // { + // System.err.println("Warning: depth of _recount greater than number of nodes."); + // } + if (nd == null) + { + return; + } + // _lycount++; + + if ((nd.left() != null) && (nd.right() != null)) + { + + _reCount((SequenceNode) nd.left()); + _reCount((SequenceNode) nd.right()); + + SequenceNode l = (SequenceNode) nd.left(); + SequenceNode r = (SequenceNode) nd.right(); + + nd.count = l.count + r.count; + nd.ycount = (l.ycount + r.ycount) / 2; + } + else + { + nd.count = 1; + nd.ycount = ycount++; + } + // _lycount--; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode getTopNode() + { + return top; + } + + /** + * + * @return true if tree has real distances + */ + public boolean hasDistances() + { + return true; + } + + /** + * + * @return true if tree has real bootstrap values + */ + public boolean hasBootstrap() + { + return false; + } + + public boolean hasRootDistance() + { + return true; + } + + /** + * Form clusters by grouping sub-clusters, starting from one sequence per + * cluster, and finishing when only two clusters remain + */ + void cluster() + { + while (noClus > 2) + { + findMinDistance(); + + joinClusters(mini, minj); + + noClus--; + } + + int rightChild = done.nextClearBit(0); + int leftChild = done.nextClearBit(rightChild + 1); + + joinClusters(leftChild, rightChild); + top = (node.elementAt(leftChild)); + + reCount(top); + findHeight(top); + findMaxDist(top); + } + + /** + * Returns the minimum distance between two clusters, and also sets the + * indices of the clusters in fields mini and minj + * + * @return + */ + protected abstract double findMinDistance(); + + /** + * Calculates the tree using the given score model and parameters, and the + * configured tree type + *

+ * If the score model computes pairwise distance scores, then these are used + * directly to derive the tree + *

+ * If the score model computes similarity scores, then the range of the scores + * is reversed to give a distance measure, and this is used to derive the tree + * + * @param sm + * @param scoreOptions + */ + protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions) + { + distances = sm.findDistances(seqData, scoreOptions); + + makeLeaves(); + + noClus = clusters.size(); + + cluster(); + } + + /** + * Finds the node, at or below the given node, with the maximum distance, and + * saves the node and the distance value + * + * @param nd + */ + void findMaxDist(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + double dist = nd.dist; + + if (dist > maxDistValue) + { + maxdist = nd; + maxDistValue = dist; + } + } + else + { + findMaxDist((SequenceNode) nd.left()); + findMaxDist((SequenceNode) nd.right()); + } + } + + /** + * Calculates and returns r, whatever that is + * + * @param i + * @param j + * + * @return + */ + protected double findr(int i, int j) + { + double tmp = 1; + + for (int k = 0; k < noseqs; k++) + { + if ((k != i) && (k != j) && (!done.get(k))) + { + tmp = tmp + distances.getValue(i, k); + } + } + + if (noClus > 2) + { + tmp = tmp / (noClus - 2); + } + + return tmp; + } + + protected void init(AlignmentView seqView, int start, int end) + { + this.node = new Vector(); + if (seqView != null) + { + this.seqData = seqView; + } + else + { + SeqCigar[] seqs = new SeqCigar[sequences.length]; + for (int i = 0; i < sequences.length; i++) + { + seqs[i] = new SeqCigar(sequences[i], start, end); + } + CigarArray sdata = new CigarArray(seqs); + sdata.addOperation(CigarArray.M, end - start + 1); + this.seqData = new AlignmentView(sdata, start); + } + + /* + * count the non-null sequences + */ + noseqs = 0; + + done = new BitSet(); + + for (SequenceI seq : sequences) + { + if (seq != null) + { + noseqs++; + } + } + } + + /** + * Merges cluster(j) to cluster(i) and recalculates cluster and node distances + * + * @param i + * @param j + */ + void joinClusters(final int i, final int j) + { + double dist = distances.getValue(i, j); + + ri = findr(i, j); + rj = findr(j, i); + + findClusterDistance(i, j); + + SequenceNode sn = new SequenceNode(); + + sn.setLeft((node.elementAt(i))); + sn.setRight((node.elementAt(j))); + + SequenceNode tmpi = (node.elementAt(i)); + SequenceNode tmpj = (node.elementAt(j)); + + findNewDistances(tmpi, tmpj, dist); + + tmpi.setParent(sn); + tmpj.setParent(sn); + + node.setElementAt(sn, i); + + /* + * move the members of cluster(j) to cluster(i) + * and mark cluster j as out of the game + */ + clusters.get(i).or(clusters.get(j)); + clusters.get(j).clear(); + done.set(j); + } + + /* + * Computes and stores new distances for nodei and nodej, given the previous + * distance between them + */ + protected abstract void findNewDistances(SequenceNode nodei, + SequenceNode nodej, double previousDistance); + + /** + * Calculates and saves the distance between the combination of cluster(i) and + * cluster(j) and all other clusters. The form of the calculation depends on + * the tree clustering method being used. + * + * @param i + * @param j + */ + protected abstract void findClusterDistance(int i, int j); + + /** + * Start by making a cluster for each individual sequence + */ + void makeLeaves() + { + clusters = new Vector(); + + for (int i = 0; i < noseqs; i++) + { + SequenceNode sn = new SequenceNode(); + + sn.setElement(sequences[i]); + sn.setName(sequences[i].getName()); + node.addElement(sn); + BitSet bs = new BitSet(); + bs.set(i); + clusters.addElement(bs); + } + } + + public AlignmentView getOriginalData() + { + return seqStrings; + } + +} diff --git a/src/jalview/analysis/TreeModel.java b/src/jalview/analysis/TreeModel.java new file mode 100644 index 0000000..5a41802 --- /dev/null +++ b/src/jalview/analysis/TreeModel.java @@ -0,0 +1,673 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import jalview.datamodel.AlignmentView; +import jalview.datamodel.BinaryNode; +import jalview.datamodel.NodeTransformI; +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; +import jalview.datamodel.SequenceNode; +import jalview.io.NewickFile; + +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.Vector; + +/** + * A model of a tree, either computed by Jalview or loaded from a file or other + * resource or service + */ +public class TreeModel +{ + + SequenceI[] sequences; + + /* + * SequenceData is a string representation of what the user + * sees. The display may contain hidden columns. + */ + private AlignmentView seqData; + + int noseqs; + + SequenceNode top; + + double maxDistValue; + + double maxheight; + + int ycount; + + Vector node; + + boolean hasDistances = true; // normal case for jalview trees + + boolean hasBootstrap = false; // normal case for jalview trees + + private boolean hasRootDistance = true; + + /** + * Create a new TreeModel object with leaves associated with sequences in + * seqs, and (optionally) original alignment data represented by Cigar strings + * + * @param seqs + * SequenceI[] + * @param odata + * Cigar[] + * @param treefile + * NewickFile + */ + public TreeModel(SequenceI[] seqs, AlignmentView odata, NewickFile treefile) + { + this(seqs, treefile.getTree(), treefile.HasDistances(), treefile + .HasBootstrap(), treefile.HasRootDistance()); + seqData = odata; + + associateLeavesToSequences(seqs); + } + + /** + * Constructor given a calculated tree + * + * @param tree + */ + public TreeModel(TreeBuilder tree) + { + this(tree.getSequences(), tree.getTopNode(), tree.hasDistances(), tree + .hasBootstrap(), tree.hasRootDistance()); + seqData = tree.getOriginalData(); + } + + /** + * Constructor given sequences, root node and tree property flags + * + * @param seqs + * @param root + * @param hasDist + * @param hasBoot + * @param hasRootDist + */ + public TreeModel(SequenceI[] seqs, SequenceNode root, boolean hasDist, + boolean hasBoot, boolean hasRootDist) + { + this.sequences = seqs; + top = root; + + hasDistances = hasDist; + hasBootstrap = hasBoot; + hasRootDistance = hasRootDist; + + maxheight = findHeight(top); + } + + /** + * @param seqs + */ + public void associateLeavesToSequences(SequenceI[] seqs) + { + SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs); + + Vector leaves = findLeaves(top); + + int i = 0; + int namesleft = seqs.length; + + SequenceNode j; + SequenceI nam; + String realnam; + Vector one2many = new Vector(); + // int countOne2Many = 0; + while (i < leaves.size()) + { + j = leaves.elementAt(i++); + realnam = j.getName(); + nam = null; + + if (namesleft > -1) + { + nam = algnIds.findIdMatch(realnam); + } + + if (nam != null) + { + j.setElement(nam); + if (one2many.contains(nam)) + { + // countOne2Many++; + // if (jalview.bin.Cache.log.isDebugEnabled()) + // jalview.bin.Cache.log.debug("One 2 many relationship for + // "+nam.getName()); + } + else + { + one2many.addElement(nam); + namesleft--; + } + } + else + { + j.setElement(new Sequence(realnam, "THISISAPLACEHLDER")); + j.setPlaceholder(true); + } + } + // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) { + // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment + // sequence ids (out of "+one2many.size()+" unique ids) linked to two or + // more leaves."); + // } + // one2many.clear(); + } + + /** + * Generate a string representation of the Tree + * + * @return Newick File with all tree data available + */ + public String print() + { + NewickFile fout = new NewickFile(getTopNode()); + + return fout.print(hasBootstrap(), hasDistances(), + hasRootDistance()); // output all data available for tree + } + + /** + * + * used when the alignment associated to a tree has changed. + * + * @param list + * Sequence set to be associated with tree nodes + */ + public void updatePlaceHolders(List list) + { + Vector leaves = findLeaves(top); + + int sz = leaves.size(); + SequenceIdMatcher seqmatcher = null; + int i = 0; + + while (i < sz) + { + SequenceNode leaf = leaves.elementAt(i++); + + if (list.contains(leaf.element())) + { + leaf.setPlaceholder(false); + } + else + { + if (seqmatcher == null) + { + // Only create this the first time we need it + SequenceI[] seqs = new SequenceI[list.size()]; + + for (int j = 0; j < seqs.length; j++) + { + seqs[j] = list.get(j); + } + + seqmatcher = new SequenceIdMatcher(seqs); + } + + SequenceI nam = seqmatcher.findIdMatch(leaf.getName()); + + if (nam != null) + { + if (!leaf.isPlaceholder()) + { + // remapping the node to a new sequenceI - should remove any refs to + // old one. + // TODO - make many sequenceI to one leaf mappings possible! + // (JBPNote) + } + leaf.setPlaceholder(false); + leaf.setElement(nam); + } + else + { + if (!leaf.isPlaceholder()) + { + // Construct a new placeholder sequence object for this leaf + leaf.setElement(new Sequence(leaf.getName(), + "THISISAPLACEHLDER")); + } + leaf.setPlaceholder(true); + + } + } + } + } + + /** + * rename any nodes according to their associated sequence. This will modify + * the tree's metadata! (ie the original NewickFile or newly generated + * BinaryTree's label data) + */ + public void renameAssociatedNodes() + { + applyToNodes(new NodeTransformI() + { + + @Override + public void transform(BinaryNode nd) + { + Object el = nd.element(); + if (el != null && el instanceof SequenceI) + { + nd.setName(((SequenceI) el).getName()); + } + } + }); + } + + /** + * Search for leaf nodes below (or at) the given node + * + * @param nd + * root node to search from + * + * @return + */ + public Vector findLeaves(SequenceNode nd) + { + Vector leaves = new Vector(); + findLeaves(nd, leaves); + return leaves; + } + + /** + * Search for leaf nodes. + * + * @param nd + * root node to search from + * @param leaves + * Vector of leaves to add leaf node objects too. + * + * @return Vector of leaf nodes on binary tree + */ + Vector findLeaves(SequenceNode nd, + Vector leaves) + { + if (nd == null) + { + return leaves; + } + + if ((nd.left() == null) && (nd.right() == null)) // Interior node + // detection + { + leaves.addElement(nd); + + return leaves; + } + else + { + /* + * TODO: Identify internal nodes... if (node.isSequenceLabel()) { + * leaves.addElement(node); } + */ + findLeaves((SequenceNode) nd.left(), leaves); + findLeaves((SequenceNode) nd.right(), leaves); + } + + return leaves; + } + + /** + * printNode is mainly for debugging purposes. + * + * @param nd + * SequenceNode + */ + void printNode(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + System.out.println("Leaf = " + ((SequenceI) nd.element()).getName()); + System.out.println("Dist " + nd.dist); + System.out.println("Boot " + nd.getBootstrap()); + } + else + { + System.out.println("Dist " + nd.dist); + printNode((SequenceNode) nd.left()); + printNode((SequenceNode) nd.right()); + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double getMaxHeight() + { + return maxheight; + } + + /** + * Makes a list of groups, where each group is represented by a node whose + * height (distance from the root node), as a fraction of the height of the + * whole tree, is greater than the given threshold. This corresponds to + * selecting the nodes immediately to the right of a vertical line + * partitioning the tree (if the tree is drawn with root to the left). Each + * such node represents a group that contains all of the sequences linked to + * the child leaf nodes. + * + * @param threshold + * @see #getGroups() + */ + public List groupNodes(float threshold) + { + List groups = new ArrayList(); + _groupNodes(groups, getTopNode(), threshold); + return groups; + } + + protected void _groupNodes(List groups, SequenceNode nd, + float threshold) + { + if (nd == null) + { + return; + } + + if ((nd.height / maxheight) > threshold) + { + groups.add(nd); + } + else + { + _groupNodes(groups, (SequenceNode) nd.left(), threshold); + _groupNodes(groups, (SequenceNode) nd.right(), threshold); + } + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double findHeight(SequenceNode nd) + { + if (nd == null) + { + return maxheight; + } + + if ((nd.left() == null) && (nd.right() == null)) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + + if (nd.height > maxheight) + { + return nd.height; + } + else + { + return maxheight; + } + } + else + { + if (nd.parent() != null) + { + nd.height = ((SequenceNode) nd.parent()).height + nd.dist; + } + else + { + maxheight = 0; + nd.height = (float) 0.0; + } + + maxheight = findHeight((SequenceNode) (nd.left())); + maxheight = findHeight((SequenceNode) (nd.right())); + } + + return maxheight; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void printN(SequenceNode nd) + { + if (nd == null) + { + return; + } + + if ((nd.left() != null) && (nd.right() != null)) + { + printN((SequenceNode) nd.left()); + printN((SequenceNode) nd.right()); + } + else + { + System.out.println(" name = " + ((SequenceI) nd.element()).getName()); + } + + System.out.println(" dist = " + nd.dist + " " + nd.count + " " + + nd.height); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + public void reCount(SequenceNode nd) + { + ycount = 0; + // _lycount = 0; + // _lylimit = this.node.size(); + _reCount(nd); + } + + // private long _lycount = 0, _lylimit = 0; + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + void _reCount(SequenceNode nd) + { + // if (_lycount<_lylimit) + // { + // System.err.println("Warning: depth of _recount greater than number of nodes."); + // } + if (nd == null) + { + return; + } + // _lycount++; + + if ((nd.left() != null) && (nd.right() != null)) + { + + _reCount((SequenceNode) nd.left()); + _reCount((SequenceNode) nd.right()); + + SequenceNode l = (SequenceNode) nd.left(); + SequenceNode r = (SequenceNode) nd.right(); + + nd.count = l.count + r.count; + nd.ycount = (l.ycount + r.ycount) / 2; + } + else + { + nd.count = 1; + nd.ycount = ycount++; + } + // _lycount--; + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + */ + public void swapNodes(SequenceNode nd) + { + if (nd == null) + { + return; + } + + SequenceNode tmp = (SequenceNode) nd.left(); + + nd.setLeft(nd.right()); + nd.setRight(tmp); + } + + /** + * DOCUMENT ME! + * + * @param nd + * DOCUMENT ME! + * @param dir + * DOCUMENT ME! + */ + void changeDirection(SequenceNode nd, SequenceNode dir) + { + if (nd == null) + { + return; + } + + if (nd.parent() != top) + { + changeDirection((SequenceNode) nd.parent(), nd); + + SequenceNode tmp = (SequenceNode) nd.parent(); + + if (dir == nd.left()) + { + nd.setParent(dir); + nd.setLeft(tmp); + } + else if (dir == nd.right()) + { + nd.setParent(dir); + nd.setRight(tmp); + } + } + else + { + if (dir == nd.left()) + { + nd.setParent(nd.left()); + + if (top.left() == nd) + { + nd.setRight(top.right()); + } + else + { + nd.setRight(top.left()); + } + } + else + { + nd.setParent(nd.right()); + + if (top.left() == nd) + { + nd.setLeft(top.right()); + } + else + { + nd.setLeft(top.left()); + } + } + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public SequenceNode getTopNode() + { + return top; + } + + /** + * + * @return true if tree has real distances + */ + public boolean hasDistances() + { + return hasDistances; + } + + /** + * + * @return true if tree has real bootstrap values + */ + public boolean hasBootstrap() + { + return hasBootstrap; + } + + public boolean hasRootDistance() + { + return hasRootDistance; + } + + /** + * apply the given transform to all the nodes in the tree. + * + * @param nodeTransformI + */ + public void applyToNodes(NodeTransformI nodeTransformI) + { + for (Enumeration nodes = node.elements(); nodes + .hasMoreElements(); nodeTransformI.transform(nodes + .nextElement())) + { + ; + } + } + + public AlignmentView getOriginalData() + { + return seqData; + } +} diff --git a/src/jalview/analysis/scoremodels/DistanceScoreModel.java b/src/jalview/analysis/scoremodels/DistanceScoreModel.java new file mode 100644 index 0000000..0dd7617 --- /dev/null +++ b/src/jalview/analysis/scoremodels/DistanceScoreModel.java @@ -0,0 +1,40 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; +import jalview.math.MatrixI; + +public abstract class DistanceScoreModel implements ScoreModelI +{ + /** + * A similarity score is calculated by first computing a distance score, and + * then reversing the min-max range of the score values + */ + @Override + public MatrixI findSimilarities(AlignmentView seqData, + SimilarityParamsI options) + { + MatrixI distances = findDistances(seqData, options); + + MatrixI similarities = distanceToSimilarity(distances); + + return similarities; + } + + /** + * Converts distance scores to similarity scores, by reversing the range of + * score values so that max becomes min and vice versa. The input matrix is + * not modified. + * + * @param distances + */ + public static MatrixI distanceToSimilarity(MatrixI distances) + { + MatrixI similarities = distances.copy(); + + similarities.reverseRange(false); + + return similarities; + } +} diff --git a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java new file mode 100644 index 0000000..f88180a --- /dev/null +++ b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java @@ -0,0 +1,222 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis.scoremodels; + +import jalview.api.AlignmentViewPanel; +import jalview.api.FeatureRenderer; +import jalview.api.analysis.SimilarityParamsI; +import jalview.api.analysis.ViewBasedAnalysisI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.SeqCigar; +import jalview.datamodel.SequenceFeature; +import jalview.math.Matrix; +import jalview.math.MatrixI; +import jalview.util.SetUtils; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class FeatureDistanceModel extends DistanceScoreModel implements + ViewBasedAnalysisI +{ + private static final String NAME = "Sequence Feature Similarity"; + + private String description; + + FeatureRenderer fr; + + /** + * Constructor + */ + public FeatureDistanceModel() + { + } + + @Override + public boolean configureFromAlignmentView(AlignmentViewPanel view) + + { + fr = view.cloneFeatureRenderer(); + return true; + } + + /** + * Calculates a distance measure [i][j] between each pair of sequences as the + * average number of features they have but do not share. That is, find the + * features each sequence pair has at each column, ignore feature types they + * have in common, and count the rest. The totals are normalised by the number + * of columns processed. + *

+ * The parameters argument provides settings for treatment of gap-residue + * aligned positions, and whether the score is over the longer or shorter of + * each pair of sequences + * + * @param seqData + * @param params + */ + @Override + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI params) + { + SeqCigar[] seqs = seqData.getSequences(); + int noseqs = seqs.length; + int cpwidth = 0;// = seqData.getWidth(); + double[][] distances = new double[noseqs][noseqs]; + List dft = null; + if (fr != null) + { + dft = fr.getDisplayedFeatureTypes(); + } + if (dft == null || dft.isEmpty()) + { + return new Matrix(distances); + } + + // need to get real position for view position + int[] viscont = seqData.getVisibleContigs(); + + /* + * scan each column, compute and add to each distance[i, j] + * the number of feature types that seqi and seqj do not share + */ + for (int vc = 0; vc < viscont.length; vc += 2) + { + for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++) + { + cpwidth++; + + /* + * first record feature types in this column for each sequence + */ + Map> sfap = findFeatureTypesAtColumn( + seqs, cpos); + + /* + * count feature types on either i'th or j'th sequence but not both + * and add this 'distance' measure to the total for [i, j] for j > i + */ + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i + 1; j < noseqs; j++) + { + SeqCigar sc1 = seqs[i]; + SeqCigar sc2 = seqs[j]; + Set set1 = sfap.get(sc1); + Set set2 = sfap.get(sc2); + boolean gap1 = set1 == null; + boolean gap2 = set2 == null; + + /* + * gap-gap always scores zero + * residue-residue is always scored + * include gap-residue score if params say to do so + */ + if ((!gap1 && !gap2) || params.includeGaps()) + { + int seqDistance = SetUtils.countDisjunction(set1, set2); + distances[i][j] += seqDistance; + } + } + } + } + } + + /* + * normalise the distance scores (summed over columns) by the + * number of visible columns used in the calculation + * and fill in the bottom half of the matrix + */ + // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape + for (int i = 0; i < noseqs; i++) + { + for (int j = i + 1; j < noseqs; j++) + { + distances[i][j] /= cpwidth; + distances[j][i] = distances[i][j]; + } + } + return new Matrix(distances); + } + + /** + * Builds and returns a map containing a (possibly empty) list (one per + * SeqCigar) of visible feature types at the given column position. The map + * has no entry for sequences which are gapped at the column position. + * + * @param seqs + * @param columnPosition + * @return + */ + protected Map> findFeatureTypesAtColumn( + SeqCigar[] seqs, int columnPosition) + { + Map> sfap = new HashMap>(); + for (SeqCigar seq : seqs) + { + int spos = seq.findPosition(columnPosition); + if (spos != -1) + { + Set types = new HashSet(); + List sfs = fr.findFeaturesAtRes(seq.getRefSeq(), + spos); + for (SequenceFeature sf : sfs) + { + types.add(sf.getType()); + } + sfap.put(seq, types); + } + } + return sfap; + } + + @Override + public String getName() + { + return NAME; + } + + @Override + public String getDescription() + { + return description; + } + + @Override + public boolean isDNA() + { + return true; + } + + @Override + public boolean isProtein() + { + return true; + } + + @Override + public String toString() + { + return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column"; + } +} diff --git a/src/jalview/analysis/scoremodels/FeatureScoreModel.java b/src/jalview/analysis/scoremodels/FeatureScoreModel.java deleted file mode 100644 index 7c81912..0000000 --- a/src/jalview/analysis/scoremodels/FeatureScoreModel.java +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.analysis.scoremodels; - -import jalview.api.analysis.ScoreModelI; -import jalview.api.analysis.ViewBasedAnalysisI; -import jalview.datamodel.AlignmentView; -import jalview.datamodel.SeqCigar; -import jalview.datamodel.SequenceFeature; - -import java.util.ArrayList; -import java.util.Hashtable; -import java.util.List; - -public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI -{ - jalview.api.FeatureRenderer fr; - - @Override - public boolean configureFromAlignmentView( - jalview.api.AlignmentViewPanel view) - { - fr = view.cloneFeatureRenderer(); - return true; - } - - @Override - public float[][] findDistances(AlignmentView seqData) - { - int nofeats = 0; - List dft = fr.getDisplayedFeatureTypes(); - nofeats = dft.size(); - SeqCigar[] seqs = seqData.getSequences(); - int noseqs = seqs.length; - int cpwidth = 0;// = seqData.getWidth(); - float[][] distance = new float[noseqs][noseqs]; - if (nofeats == 0) - { - for (float[] d : distance) - { - for (int i = 0; i < d.length; d[i++] = 0f) - { - ; - } - } - return distance; - } - // need to get real position for view position - int[] viscont = seqData.getVisibleContigs(); - for (int vc = 0; vc < viscont.length; vc += 2) - { - - for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++) - { - cpwidth++; - // get visible features at cpos under view's display settings and - // compare them - List> sfap = new ArrayList>(); - for (int i = 0; i < noseqs; i++) - { - Hashtable types = new Hashtable(); - int spos = seqs[i].findPosition(cpos); - if (spos != -1) - { - List sfs = fr.findFeaturesAtRes( - seqs[i].getRefSeq(), spos); - for (SequenceFeature sf : sfs) - { - types.put(sf.getType(), sf); - } - } - sfap.add(types); - } - for (int i = 0; i < (noseqs - 1); i++) - { - if (cpos == 0) - { - distance[i][i] = 0f; - } - for (int j = i + 1; j < noseqs; j++) - { - int sfcommon = 0; - // compare the two lists of features... - Hashtable fi = sfap.get(i), fk, fj = sfap - .get(j); - if (fi.size() > fj.size()) - { - fk = fj; - } - else - { - fk = fi; - fi = fj; - } - for (String k : fi.keySet()) - { - SequenceFeature sfj = fk.get(k); - if (sfj != null) - { - sfcommon++; - } - } - distance[i][j] += (fi.size() + fk.size() - 2f * sfcommon); - distance[j][i] += distance[i][j]; - } - } - } - } - for (int i = 0; i < noseqs; i++) - { - for (int j = i + 1; j < noseqs; j++) - { - distance[i][j] /= cpwidth; - distance[j][i] = distance[i][j]; - } - } - return distance; - } - - @Override - public String getName() - { - return "Sequence Feature Similarity"; - } - - @Override - public boolean isDNA() - { - return true; - } - - @Override - public boolean isProtein() - { - return true; - } - - @Override - public String toString() - { - return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column"; - } -} diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java new file mode 100644 index 0000000..985918b --- /dev/null +++ b/src/jalview/analysis/scoremodels/PIDModel.java @@ -0,0 +1,234 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.PairwiseScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; +import jalview.math.Matrix; +import jalview.math.MatrixI; +import jalview.util.Comparison; + +/** + * A class to provide sequence pairwise similarity based on residue identity. + * Instances of this class are immutable and thread-safe. + */ +public class PIDModel extends SimilarityScoreModel implements + PairwiseScoreModelI +{ + private static final String NAME = "PID"; + + /** + * Constructor + */ + public PIDModel() + { + } + + @Override + public String getName() + { + return NAME; + } + + /** + * Answers null for description. If a display name is needed, use getName() or + * an internationalized string built from the name. + */ + @Override + public String getDescription() + { + return null; + } + + @Override + public boolean isDNA() + { + return true; + } + + @Override + public boolean isProtein() + { + return true; + } + + /** + * Answers 1 if c and d are the same residue (ignoring case), and not gap + * characters. Answers 0 for non-matching or gap characters. + */ + @Override + public float getPairwiseScore(char c, char d) + { + c = toUpper(c); + d = toUpper(d); + if (c == d && !Comparison.isGap(c)) + { + return 1f; + } + return 0f; + } + + /** + * @param c + */ + protected static char toUpper(char c) + { + if ('a' <= c && c <= 'z') + { + c += 'A' - 'a'; + } + return c; + } + + /** + * Computes similarity scores based on pairwise percentage identity of + * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA + * calculation, the percentage scores are rescaled to the width of the + * sequences (as if counts of identical residues). This method is thread-safe. + */ + @Override + public MatrixI findSimilarities(AlignmentView seqData, + SimilarityParamsI options) + { + String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH); + + MatrixI result = findSimilarities(seqs, options); + + result.multiply(seqData.getWidth() / 100d); + + return result; + } + + /** + * A distance score is computed in the usual way (by reversing the range of + * the similarity score results), and then rescaled to percentage values + * (reversing the rescaling to count values done in findSimilarities). This + * method is thread-safe. + */ + @Override + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI options) + { + MatrixI result = super.findDistances(seqData, options); + + if (seqData.getWidth() != 0) + { + result.multiply(100d / seqData.getWidth()); + } + + return result; + } + + /** + * Compute percentage identity scores, using the gap treatment and + * normalisation specified by the options parameter + * + * @param seqs + * @param options + * @return + */ + protected MatrixI findSimilarities(String[] seqs, + SimilarityParamsI options) + { + // TODO reuse code in ScoreMatrix instead somehow + double[][] values = new double[seqs.length][]; + for (int row = 0; row < seqs.length; row++) + { + values[row] = new double[seqs.length]; + for (int col = 0; col < seqs.length; col++) + { + double total = computePID(seqs[row], seqs[col], options); + values[row][col] = total; + } + } + return new Matrix(values); + } + + /** + * Computes a percentage identity for two sequences, using the algorithm + * choices specified by the options parameter + * + * @param seq1 + * @param seq2 + * @param options + * @return + */ + public static double computePID(String seq1, String seq2, + SimilarityParamsI options) + { + int len1 = seq1.length(); + int len2 = seq2.length(); + int width = Math.max(len1, len2); + int total = 0; + int divideBy = 0; + + for (int i = 0; i < width; i++) + { + if (i >= len1 || i >= len2) + { + /* + * off the end of one sequence; stop if we are only matching + * on the shorter sequence length, else treat as trailing gap + */ + if (options.denominateByShortestLength()) + { + break; + } + if (options.includeGaps()) + { + divideBy++; + } + if (options.matchGaps()) + { + total++; + } + continue; + } + char c1 = seq1.charAt(i); + char c2 = seq2.charAt(i); + boolean gap1 = Comparison.isGap(c1); + boolean gap2 = Comparison.isGap(c2); + + if (gap1 && gap2) + { + /* + * gap-gap: include if options say so, if so + * have to score as identity; else ignore + */ + if (options.includeGappedColumns()) + { + divideBy++; + total++; + } + continue; + } + + if (gap1 || gap2) + { + /* + * gap-residue: include if options say so, + * count as match if options say so + */ + if (options.includeGaps()) + { + divideBy++; + } + if (options.matchGaps()) + { + total++; + } + continue; + } + + /* + * remaining case is gap-residue + */ + if (toUpper(c1) == toUpper(c2)) + { + total++; + } + divideBy++; + } + + return divideBy == 0 ? 0D : 100D * total / divideBy; + } +} diff --git a/src/jalview/analysis/scoremodels/PIDScoreModel.java b/src/jalview/analysis/scoremodels/PIDScoreModel.java deleted file mode 100644 index 0f7a67a..0000000 --- a/src/jalview/analysis/scoremodels/PIDScoreModel.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.analysis.scoremodels; - -import jalview.api.analysis.ScoreModelI; -import jalview.datamodel.AlignmentView; -import jalview.util.Comparison; - -public class PIDScoreModel implements ScoreModelI -{ - - @Override - public float[][] findDistances(AlignmentView seqData) - { - String[] sequenceString = seqData - .getSequenceStrings(Comparison.GapChars.charAt(0)); - int noseqs = sequenceString.length; - float[][] distance = new float[noseqs][noseqs]; - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - if (j == i) - { - distance[i][i] = 0; - } - else - { - distance[i][j] = 100 - Comparison.PID(sequenceString[i], - sequenceString[j]); - - distance[j][i] = distance[i][j]; - } - } - } - return distance; - } - - @Override - public String getName() - { - return "PID"; - } - - @Override - public boolean isDNA() - { - return true; - } - - @Override - public boolean isProtein() - { - return true; - } - -} diff --git a/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java b/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java deleted file mode 100644 index 2ff2518..0000000 --- a/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.analysis.scoremodels; - -import jalview.api.analysis.ScoreModelI; -import jalview.datamodel.AlignmentView; -import jalview.util.Comparison; - -public abstract class PairwiseSeqScoreModel implements ScoreModelI -{ - abstract public int getPairwiseScore(char c, char d); - - public float[][] findDistances(AlignmentView seqData) - { - String[] sequenceString = seqData - .getSequenceStrings(Comparison.GapChars.charAt(0)); - int noseqs = sequenceString.length; - float[][] distance = new float[noseqs][noseqs]; - - int maxscore = 0; - int end = sequenceString[0].length(); - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - int score = 0; - - for (int k = 0; k < end; k++) - { - try - { - score += getPairwiseScore(sequenceString[i].charAt(k), - sequenceString[j].charAt(k)); - } catch (Exception ex) - { - System.err.println("err creating " + getName() + " tree"); - ex.printStackTrace(); - } - } - - distance[i][j] = (float) score; - - if (score > maxscore) - { - maxscore = score; - } - } - } - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = (float) maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - return distance; - } - - abstract public int[][] getMatrix(); -} diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java new file mode 100644 index 0000000..9bec6e4 --- /dev/null +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -0,0 +1,585 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.PairwiseScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; +import jalview.math.Matrix; +import jalview.math.MatrixI; +import jalview.util.Comparison; + +import java.util.Arrays; + +/** + * A class that models a substitution score matrix for any given alphabet of + * symbols. Instances of this class are immutable and thread-safe. + */ +public class ScoreMatrix extends SimilarityScoreModel implements + PairwiseScoreModelI +{ + private static final char GAP_CHARACTER = Comparison.GAP_DASH; + + /* + * an arbitrary score to assign for identity of an unknown symbol + * (this is the value on the diagonal in the * column of the NCBI matrix) + * (though a case could be made for using the minimum diagonal value) + */ + private static final int UNKNOWN_IDENTITY_SCORE = 1; + + /* + * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide) + * for pairwise scoring; 2.10.2 uses gap score (last column) in + * score matrix (JAL-2397) + * Set this flag to true (via Groovy) for 2.10.1 behaviour + */ + private static boolean scoreGapAsAny = false; + + public static final short UNMAPPED = (short) -1; + + private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore"; + + private static final int MAX_ASCII = 127; + + /* + * the name of the model as shown in menus + * each score model in use should have a unique name + */ + private String name; + + /* + * a description for the model as shown in tooltips + */ + private String description; + + /* + * the characters that the model provides scores for + */ + private char[] symbols; + + /* + * the score matrix; both dimensions must equal the number of symbols + * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j] + */ + private float[][] matrix; + + /* + * quick lookup to convert from an ascii character value to the index + * of the corresponding symbol in the score matrix + */ + private short[] symbolIndex; + + /* + * true for Protein Score matrix, false for dna score matrix + */ + private boolean peptide; + + private float minValue; + + private float maxValue; + + /** + * Constructor given a name, symbol alphabet, and matrix of scores for pairs + * of symbols. The matrix should be square and of the same size as the + * alphabet, for example 20x20 for a 20 symbol alphabet. + * + * @param theName + * Unique, human readable name for the matrix + * @param alphabet + * the symbols to which scores apply + * @param values + * Pairwise scores indexed according to the symbol alphabet + */ + public ScoreMatrix(String theName, char[] alphabet, float[][] values) + { + this(theName, null, alphabet, values); + } + + /** + * Constructor given a name, description, symbol alphabet, and matrix of + * scores for pairs of symbols. The matrix should be square and of the same + * size as the alphabet, for example 20x20 for a 20 symbol alphabet. + * + * @param theName + * Unique, human readable name for the matrix + * @param theDescription + * descriptive display name suitable for use in menus + * @param alphabet + * the symbols to which scores apply + * @param values + * Pairwise scores indexed according to the symbol alphabet + */ + public ScoreMatrix(String theName, String theDescription, + char[] alphabet, float[][] values) + { + if (alphabet.length != values.length) + { + throw new IllegalArgumentException( + "score matrix size must match alphabet size"); + } + for (float[] row : values) + { + if (row.length != alphabet.length) + { + throw new IllegalArgumentException( + "score matrix size must be square"); + } + } + + this.matrix = values; + this.name = theName; + this.description = theDescription; + this.symbols = alphabet; + + symbolIndex = buildSymbolIndex(alphabet); + + findMinMax(); + + /* + * crude heuristic for now... + */ + peptide = alphabet.length >= 20; + } + + /** + * Record the minimum and maximum score values + */ + protected void findMinMax() + { + float min = Float.MAX_VALUE; + float max = -Float.MAX_VALUE; + if (matrix != null) + { + for (float[] row : matrix) + { + if (row != null) + { + for (float f : row) + { + min = Math.min(min, f); + max = Math.max(max, f); + } + } + } + } + minValue = min; + maxValue = max; + } + + /** + * Returns an array A where A[i] is the position in the alphabet array of the + * character whose value is i. For example if the alphabet is { 'A', 'D', 'X' + * } then A['D'] = A[68] = 1. + *

+ * Unmapped characters (not in the alphabet) get an index of -1. + *

+ * Mappings are added automatically for lower case symbols (for non case + * sensitive scoring), unless they are explicitly present in the alphabet (are + * scored separately in the score matrix). + *

+ * the gap character (space, dash or dot) included in the alphabet (if any) is + * recorded in a field + * + * @param alphabet + * @return + */ + short[] buildSymbolIndex(char[] alphabet) + { + short[] index = new short[MAX_ASCII + 1]; + Arrays.fill(index, UNMAPPED); + short pos = 0; + for (char c : alphabet) + { + if (c <= MAX_ASCII) + { + index[c] = pos; + } + + /* + * also map lower-case character (unless separately mapped) + */ + if (c >= 'A' && c <= 'Z') + { + short lowerCase = (short) (c + ('a' - 'A')); + if (index[lowerCase] == UNMAPPED) + { + index[lowerCase] = pos; + } + } + pos++; + } + return index; + } + + @Override + public String getName() + { + return name; + } + + @Override + public String getDescription() + { + return description; + } + + @Override + public boolean isDNA() + { + return !peptide; + } + + @Override + public boolean isProtein() + { + return peptide; + } + + /** + * Returns a copy of the score matrix as used in getPairwiseScore. If using + * this matrix directly, callers must also call + * getMatrixIndex in order to get the matrix index for each + * character (symbol). + * + * @return + * @see #getMatrixIndex(char) + */ + public float[][] getMatrix() + { + float[][] v = new float[matrix.length][matrix.length]; + for (int i = 0; i < matrix.length; i++) + { + v[i] = Arrays.copyOf(matrix[i], matrix[i].length); + } + return v; + } + + /** + * Answers the matrix index for a given character, or -1 if unmapped in the + * matrix. Use this method only if using getMatrix in order to + * compute scores directly (without symbol lookup) for efficiency. + * + * @param c + * @return + * @see #getMatrix() + */ + public int getMatrixIndex(char c) + { + if (c < symbolIndex.length) + { + return symbolIndex[c]; + } + else + { + return UNMAPPED; + } + } + + /** + * Returns the pairwise score for substituting c with d. If either c or d is + * an unexpected character, returns 1 for identity (c == d), else the minimum + * score value in the matrix. + */ + @Override + public float getPairwiseScore(char c, char d) + { + if (c >= symbolIndex.length) + { + System.err.println(String.format(BAD_ASCII_ERROR, c)); + return 0; + } + if (d >= symbolIndex.length) + { + System.err.println(String.format(BAD_ASCII_ERROR, d)); + return 0; + } + + int cIndex = symbolIndex[c]; + int dIndex = symbolIndex[d]; + if (cIndex != UNMAPPED && dIndex != UNMAPPED) + { + return matrix[cIndex][dIndex]; + } + + /* + * one or both symbols not found in the matrix + * currently scoring as 1 (for identity) or the minimum + * matrix score value (otherwise) + * (a case could be made for using minimum row/column value instead) + */ + return c == d ? UNKNOWN_IDENTITY_SCORE : getMinimumScore(); + } + + /** + * pretty print the matrix + */ + @Override + public String toString() + { + return outputMatrix(false); + } + + /** + * Print the score matrix, optionally formatted as html, with the alphabet + * symbols as column headings and at the start of each row. + *

+ * The non-html format should give an output which can be parsed as a score + * matrix file + * + * @param html + * @return + */ + public String outputMatrix(boolean html) + { + StringBuilder sb = new StringBuilder(512); + + /* + * heading row with alphabet + */ + if (html) + { + sb.append(""); + sb.append(html ? "" : ""); + } + else + { + sb.append("ScoreMatrix ").append(getName()).append("\n"); + } + for (char sym : symbols) + { + if (html) + { + sb.append(""); + } + else + { + sb.append("\t").append(sym); + } + } + sb.append(html ? "\n" : "\n"); + + /* + * table of scores + */ + for (char c1 : symbols) + { + if (html) + { + sb.append("" : ""); + for (char c2 : symbols) + { + sb.append(html ? "" : ""); + } + sb.append(html ? "\n" : "\n"); + } + if (html) + { + sb.append("
 ").append(sym).append(" 
"); + } + sb.append(c1).append(html ? "" : "\t") + .append(matrix[symbolIndex[c1]][symbolIndex[c2]]) + .append(html ? "
"); + } + return sb.toString(); + } + + /** + * Answers the number of symbols coded for (also equal to the number of rows + * and columns of the score matrix) + * + * @return + */ + public int getSize() + { + return symbols.length; + } + + /** + * Computes an NxN matrix where N is the number of sequences, and entry [i, j] + * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores + * computed using the current score matrix. For example + *

+ * This method is thread-safe. + */ + @Override + public MatrixI findSimilarities(AlignmentView seqstrings, + SimilarityParamsI options) + { + char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X') + : GAP_CHARACTER; + String[] seqs = seqstrings.getSequenceStrings(gapChar); + return findSimilarities(seqs, options); + } + + /** + * Computes pairwise similarities of a set of sequences using the given + * parameters + * + * @param seqs + * @param params + * @return + */ + protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI params) + { + double[][] values = new double[seqs.length][]; + for (int row = 0; row < seqs.length; row++) + { + values[row] = new double[seqs.length]; + for (int col = 0; col < seqs.length; col++) + { + double total = computeSimilarity(seqs[row], seqs[col], params); + values[row][col] = total; + } + } + return new Matrix(values); + } + + /** + * Calculates the pairwise similarity of two strings using the given + * calculation parameters + * + * @param seq1 + * @param seq2 + * @param params + * @return + */ + protected double computeSimilarity(String seq1, String seq2, + SimilarityParamsI params) + { + int len1 = seq1.length(); + int len2 = seq2.length(); + double total = 0; + + int width = Math.max(len1, len2); + for (int i = 0; i < width; i++) + { + if (i >= len1 || i >= len2) + { + /* + * off the end of one sequence; stop if we are only matching + * on the shorter sequence length, else treat as trailing gap + */ + if (params.denominateByShortestLength()) + { + break; + } + } + + char c1 = i >= len1 ? GAP_CHARACTER : seq1.charAt(i); + char c2 = i >= len2 ? GAP_CHARACTER : seq2.charAt(i); + boolean gap1 = Comparison.isGap(c1); + boolean gap2 = Comparison.isGap(c2); + + if (gap1 && gap2) + { + /* + * gap-gap: include if options say so, else ignore + */ + if (!params.includeGappedColumns()) + { + continue; + } + } + else if (gap1 || gap2) + { + /* + * gap-residue: score if options say so + */ + if (!params.includeGaps()) + { + continue; + } + } + float score = getPairwiseScore(c1, c2); + total += score; + } + return total; + } + + /** + * Answers a hashcode computed from the symbol alphabet and the matrix score + * values + */ + @Override + public int hashCode() + { + int hs = Arrays.hashCode(symbols); + for (float[] row : matrix) + { + hs = hs * 31 + Arrays.hashCode(row); + } + return hs; + } + + /** + * Answers true if the argument is a ScoreMatrix with the same symbol alphabet + * and score values, else false + */ + @Override + public boolean equals(Object obj) + { + if (!(obj instanceof ScoreMatrix)) + { + return false; + } + ScoreMatrix sm = (ScoreMatrix) obj; + if (Arrays.equals(symbols, sm.symbols) + && Arrays.deepEquals(matrix, sm.matrix)) + { + return true; + } + return false; + } + + /** + * Returns the alphabet the matrix scores for, as a string of characters + * + * @return + */ + String getSymbols() + { + return new String(symbols); + } + + public float getMinimumScore() + { + return minValue; + } + + public float getMaximumScore() + { + return maxValue; + } +} diff --git a/src/jalview/analysis/scoremodels/ScoreModels.java b/src/jalview/analysis/scoremodels/ScoreModels.java new file mode 100644 index 0000000..7146383 --- /dev/null +++ b/src/jalview/analysis/scoremodels/ScoreModels.java @@ -0,0 +1,155 @@ +package jalview.analysis.scoremodels; + +import jalview.api.AlignmentViewPanel; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.ViewBasedAnalysisI; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.ScoreMatrixFile; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * A class that can register and serve instances of ScoreModelI + */ +public class ScoreModels +{ + private final ScoreMatrix BLOSUM62; + + private final ScoreMatrix PAM250; + + private final ScoreMatrix DNA; + + private static ScoreModels instance = new ScoreModels(); + + private Map models; + + public static ScoreModels getInstance() + { + return instance; + } + + /** + * Private constructor to enforce use of singleton. Registers Jalview's + * "built-in" score models: + * + */ + private ScoreModels() + { + /* + * using LinkedHashMap keeps models ordered as added + */ + models = new LinkedHashMap(); + BLOSUM62 = loadScoreMatrix("scoreModel/blosum62.scm"); + PAM250 = loadScoreMatrix("scoreModel/pam250.scm"); + registerScoreModel(new PIDModel()); + DNA = loadScoreMatrix("scoreModel/dna.scm"); + registerScoreModel(new FeatureDistanceModel()); + } + + /** + * Tries to load a score matrix from the given resource file, and if + * successful, registers it. + * + * @param string + * @return + */ + ScoreMatrix loadScoreMatrix(String resourcePath) + { + try + { + /* + * delegate parsing to ScoreMatrixFile + */ + FileParse fp = new FileParse(resourcePath, DataSourceType.CLASSLOADER); + ScoreMatrix sm = new ScoreMatrixFile(fp).parseMatrix(); + registerScoreModel(sm); + return sm; + } catch (IOException e) + { + System.err.println("Error reading " + resourcePath + ": " + + e.getMessage()); + } + return null; + } + + /** + * Answers an iterable set of the registered score models. Currently these are + * returned in the order in which they were registered. + * + * @return + */ + public Iterable getModels() + { + return models.values(); + } + + /** + * Returns an instance of a score model for the given name. If the model is of + * 'view dependent' type (e.g. feature similarity), instantiates a new + * instance configured for the given view. Otherwise returns a cached instance + * of the score model. + * + * @param name + * @param avp + * @return + */ + public ScoreModelI getScoreModel(String name, AlignmentViewPanel avp) + { + ScoreModelI model = models.get(name); + if (model instanceof ViewBasedAnalysisI) + { + try + { + model = model.getClass().newInstance(); + ((ViewBasedAnalysisI) model).configureFromAlignmentView(avp); + } catch (IllegalAccessException | InstantiationException e) + { + System.err.println("Error creating score model " + name + ": " + + e.getMessage()); + return null; + } + } + return model; + } + + public void registerScoreModel(ScoreModelI sm) + { + ScoreModelI sm2 = models.get(sm.getName()); + if (sm2 != null) + { + System.err.println("Warning: replacing score model " + sm2.getName()); + } + models.put(sm.getName(), sm); + } + + /** + * Returns the default peptide or nucleotide score model, currently BLOSUM62 + * or DNA + * + * @param forPeptide + * @return + */ + public ScoreMatrix getDefaultModel(boolean forPeptide) + { + return forPeptide ? BLOSUM62 : DNA; + } + + public ScoreMatrix getBlosum62() + { + return BLOSUM62; + } + + public ScoreMatrix getPam250() + { + return PAM250; + } +} diff --git a/src/jalview/analysis/scoremodels/SimilarityParams.java b/src/jalview/analysis/scoremodels/SimilarityParams.java new file mode 100644 index 0000000..e5751ca --- /dev/null +++ b/src/jalview/analysis/scoremodels/SimilarityParams.java @@ -0,0 +1,130 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.SimilarityParamsI; + +/** + * A class to hold parameters that configure the pairwise similarity + * calculation. Based on the paper + * + *
+ * Quantification of the variation in percentage identity for protein sequence alignments
+ * Raghava, GP and Barton, GJ
+ * BMC Bioinformatics. 2006 Sep 19;7:415
+ * 
+ * + * @see https://www.ncbi.nlm.nih.gov/pubmed/16984632 + */ +public class SimilarityParams implements SimilarityParamsI +{ + /** + * Based on Jalview's Comparison.PID method, which includes gaps and counts + * them as matching; it counts over the length of the shorter sequence + */ + public static final SimilarityParamsI Jalview = new SimilarityParams( + true, true, true, true); + + /** + * 'SeqSpace' mode PCA calculation includes gaps but does not count them as + * matching; it uses the longest sequence length + */ + public static final SimilarityParamsI SeqSpace = new SimilarityParams( + true, false, true, true); + + /** + * as described in the Raghava-Barton paper + *
    + *
  • ignores gap-gap
  • + *
  • does not score gap-residue
  • + *
  • includes gap-residue in lengths
  • + *
  • matches on longer of two sequences
  • + *
+ */ + public static final SimilarityParamsI PID1 = new SimilarityParams(false, + false, true, false); + + /** + * as described in the Raghava-Barton paper + *
    + *
  • ignores gap-gap
  • + *
  • ignores gap-residue
  • + *
  • matches on longer of two sequences
  • + *
+ */ + public static final SimilarityParamsI PID2 = new SimilarityParams(false, + false, false, false); + + /** + * as described in the Raghava-Barton paper + *
    + *
  • ignores gap-gap
  • + *
  • ignores gap-residue
  • + *
  • matches on shorter of sequences only
  • + *
+ */ + public static final SimilarityParamsI PID3 = new SimilarityParams(false, + false, false, true); + + /** + * as described in the Raghava-Barton paper + *
    + *
  • ignores gap-gap
  • + *
  • does not score gap-residue
  • + *
  • includes gap-residue in lengths
  • + *
  • matches on shorter of sequences only
  • + *
+ */ + public static final SimilarityParamsI PID4 = new SimilarityParams(false, + false, true, true); + + private boolean includeGappedColumns; + + private boolean matchGaps; + + private boolean includeGaps; + + private boolean denominateByShortestLength; + + /** + * Constructor + * + * @param includeGapGap + * @param matchGapResidue + * @param includeGapResidue + * if true, gapped positions are counted for normalisation by length + * @param shortestLength + * if true, the denominator is the shorter sequence length (possibly + * including gaps) + */ + public SimilarityParams(boolean includeGapGap, boolean matchGapResidue, + boolean includeGapResidue, boolean shortestLength) + { + includeGappedColumns = includeGapGap; + matchGaps = matchGapResidue; + includeGaps = includeGapResidue; + denominateByShortestLength = shortestLength; + } + + @Override + public boolean includeGaps() + { + return includeGaps; + } + + @Override + public boolean denominateByShortestLength() + { + return denominateByShortestLength; + } + + @Override + public boolean includeGappedColumns() + { + return includeGappedColumns; + } + + @Override + public boolean matchGaps() + { + return matchGaps; + } +} diff --git a/src/jalview/analysis/scoremodels/SimilarityScoreModel.java b/src/jalview/analysis/scoremodels/SimilarityScoreModel.java new file mode 100644 index 0000000..dae1f62 --- /dev/null +++ b/src/jalview/analysis/scoremodels/SimilarityScoreModel.java @@ -0,0 +1,43 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; +import jalview.math.MatrixI; + +public abstract class SimilarityScoreModel implements ScoreModelI +{ + + /** + * Computed similarity scores are converted to distance scores by subtracting + * every value from the maximum value. That is, maximum similarity corresponds + * to zero distance, and smaller similarities to larger distances. + */ + @Override + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI options) + { + MatrixI similarities = findSimilarities(seqData, options); + + MatrixI distances = similarityToDistance(similarities); + + return distances; + } + + /** + * Converts a matrix of similarity scores to distance scores, by reversing the + * range of the scores, mapping the maximum to zero. The input matrix is not + * modified. + * + * @param similarities + */ + public static MatrixI similarityToDistance(MatrixI similarities) + { + MatrixI distances = similarities.copy(); + + distances.reverseRange(true); + + return distances; + } + +} diff --git a/src/jalview/analysis/scoremodels/SWScoreModel.java b/src/jalview/analysis/scoremodels/SmithWatermanModel.java similarity index 65% rename from src/jalview/analysis/scoremodels/SWScoreModel.java rename to src/jalview/analysis/scoremodels/SmithWatermanModel.java index d5d998e..2428b81 100644 --- a/src/jalview/analysis/scoremodels/SWScoreModel.java +++ b/src/jalview/analysis/scoremodels/SmithWatermanModel.java @@ -21,23 +21,40 @@ package jalview.analysis.scoremodels; import jalview.analysis.AlignSeq; -import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SequenceI; +import jalview.math.Matrix; +import jalview.math.MatrixI; import jalview.util.Comparison; -public class SWScoreModel implements ScoreModelI +/** + * A class that computes pairwise similarity scores using the Smith-Waterman + * alignment algorithm + */ +public class SmithWatermanModel extends SimilarityScoreModel { + private static final String NAME = "Smith Waterman Score"; + + private String description; + + /** + * Constructor + */ + public SmithWatermanModel() + { + } @Override - public float[][] findDistances(AlignmentView seqData) + public MatrixI findSimilarities(AlignmentView seqData, + SimilarityParamsI options) { SequenceI[] sequenceString = seqData.getVisibleAlignment( - Comparison.GapChars.charAt(0)).getSequencesArray(); + Comparison.GAP_SPACE).getSequencesArray(); int noseqs = sequenceString.length; - float[][] distance = new float[noseqs][noseqs]; + double[][] distances = new double[noseqs][noseqs]; - float max = -1; + double max = -1; for (int i = 0; i < (noseqs - 1); i++) { @@ -48,31 +65,22 @@ public class SWScoreModel implements ScoreModelI as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out); - distance[i][j] = (float) as.maxscore; + distances[i][j] = as.maxscore; - if (max < distance[i][j]) + if (max < distances[i][j]) { - max = distance[i][j]; + max = distances[i][j]; } } } - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = max - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - - return distance; + return new Matrix(distances); } @Override public String getName() { - return "Smith Waterman Score"; + return NAME; } @Override @@ -87,8 +95,9 @@ public class SWScoreModel implements ScoreModelI return true; } - public String toString() + @Override + public String getDescription() { - return "Score between two sequences aligned with Smith Waterman with default Peptide/Nucleotide matrix"; + return description; } } diff --git a/src/jalview/api/SiftsClientI.java b/src/jalview/api/SiftsClientI.java index c795f3f..367a0de 100644 --- a/src/jalview/api/SiftsClientI.java +++ b/src/jalview/api/SiftsClientI.java @@ -100,7 +100,7 @@ public interface SiftsClientI * @return Sequence<->Structure mapping as int[][] * @throws SiftsException */ - public StringBuffer getMappingOutput(MappingOutputPojo mop) + public StringBuilder getMappingOutput(MappingOutputPojo mop) throws SiftsException; /** diff --git a/src/jalview/api/analysis/PairwiseScoreModelI.java b/src/jalview/api/analysis/PairwiseScoreModelI.java new file mode 100644 index 0000000..ecada36 --- /dev/null +++ b/src/jalview/api/analysis/PairwiseScoreModelI.java @@ -0,0 +1,22 @@ +package jalview.api.analysis; + +/** + * An interface that describes classes that can compute similarity (aka + * substitution) scores for pairs of residues + */ +public interface PairwiseScoreModelI +{ + /** + * Answers a similarity score between two sequence characters (for + * substitution of the first by the second). Typically the highest scores are + * for identity, and the lowest for substitution of a residue by one with very + * different properties. + * + * @param c + * @param d + * @return + */ + abstract public float getPairwiseScore(char c, char d); + // TODO make this static when Java 8 + +} diff --git a/src/jalview/api/analysis/ScoreModelI.java b/src/jalview/api/analysis/ScoreModelI.java index 31a1c32..6afd483 100644 --- a/src/jalview/api/analysis/ScoreModelI.java +++ b/src/jalview/api/analysis/ScoreModelI.java @@ -1,36 +1,68 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ package jalview.api.analysis; import jalview.datamodel.AlignmentView; +import jalview.math.MatrixI; public interface ScoreModelI { - - float[][] findDistances(AlignmentView seqData); - + /** + * Answers a name for the score model, suitable for display in menus. Names + * should be unique across score models in use. + * + * @return + * @see jalview.analysis.scoremodels.ScoreModels#forName(String) + */ String getName(); + /** + * Answers an informative description of the model, suitable for use in + * tooltips. Descriptions may be internationalised, and need not be unique + * (but should be). + * + * @return + */ + String getDescription(); + + /** + * Answers true if this model is applicable for nucleotide data (so should be + * shown in menus in that context) + * + * @return + */ boolean isDNA(); + /** + * Answers true if this model is applicable for peptide data (so should be + * shown in menus in that context) + * + * @return + */ boolean isProtein(); + // TODO getName, isDNA, isProtein can be static methods in Java 8 + + /** + * Returns a distance score for the given sequence regions, that is, a matrix + * whose value [i][j] is the distance of sequence i from sequence j by some + * measure. The options parameter provides configuration choices for how the + * similarity score is calculated. + * + * @param seqData + * @param options + * @return + */ + + MatrixI findDistances(AlignmentView seqData, SimilarityParamsI options); + + /** + * Returns a similarity score for the given sequence regions, that is, a + * matrix whose value [i][j] is the similarity of sequence i to sequence j by + * some measure. The options parameter provides configuration choices for how + * the similarity score is calculated. + * + * @param seqData + * @param options + * @return + */ + MatrixI findSimilarities(AlignmentView seqData, SimilarityParamsI options); } diff --git a/src/jalview/api/analysis/SimilarityParamsI.java b/src/jalview/api/analysis/SimilarityParamsI.java new file mode 100644 index 0000000..581449f --- /dev/null +++ b/src/jalview/api/analysis/SimilarityParamsI.java @@ -0,0 +1,43 @@ +package jalview.api.analysis; + +/** + * A description of options when computing percentage identity of two aligned + * sequences + */ +public interface SimilarityParamsI +{ + /** + * Answers true if gap-gap aligned positions should be included in the + * calculation + * + * @return + */ + boolean includeGappedColumns(); + + /** + * Answers true if gap-residue alignment is considered a match + * + * @return + */ + // TODO is this specific to a PID score only? + // score matrix will compute whatever is configured for gap-residue + boolean matchGaps(); + + /** + * Answers true if gaps are included in the calculation. This may affect the + * calculated score, the denominator (normalisation factor) of the score, or + * both. Gap-gap positions are included if this and includeGappedColumns both + * answer true. + * + * @return + */ + boolean includeGaps(); + + /** + * Answers true if only the shortest sequence length is used to divide the + * total score, false if the longest sequence length + * + * @return + */ + boolean denominateByShortestLength(); +} diff --git a/src/jalview/appletgui/AlignFrame.java b/src/jalview/appletgui/AlignFrame.java index 2646ede..cd1e1a9 100644 --- a/src/jalview/appletgui/AlignFrame.java +++ b/src/jalview/appletgui/AlignFrame.java @@ -22,6 +22,9 @@ package jalview.appletgui; import jalview.analysis.AlignmentSorter; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; +import jalview.analysis.TreeBuilder; +import jalview.analysis.scoremodels.PIDModel; +import jalview.analysis.scoremodels.ScoreModels; import jalview.api.AlignViewControllerGuiI; import jalview.api.AlignViewControllerI; import jalview.api.AlignViewportI; @@ -2730,7 +2733,7 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, { SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray(); AlignmentSorter.sortByPID(viewport.getAlignment(), viewport - .getAlignment().getSequenceAt(0), null); + .getAlignment().getSequenceAt(0)); addHistoryItem(new OrderCommand("Pairwise Sort", oldOrder, viewport.getAlignment())); @@ -2822,25 +2825,31 @@ public class AlignFrame extends EmbmenuFrame implements ActionListener, public void averageDistanceTreeMenuItem_actionPerformed() { - NewTreePanel("AV", "PID", "Average distance tree using PID"); + newTreePanel(TreeBuilder.AVERAGE_DISTANCE, new PIDModel().getName(), + "Average distance tree using PID"); } public void neighbourTreeMenuItem_actionPerformed() { - NewTreePanel("NJ", "PID", "Neighbour joining tree using PID"); + newTreePanel(TreeBuilder.NEIGHBOUR_JOINING, new PIDModel().getName(), + "Neighbour joining tree using PID"); } protected void njTreeBlosumMenuItem_actionPerformed() { - NewTreePanel("NJ", "BL", "Neighbour joining tree using BLOSUM62"); + newTreePanel(TreeBuilder.NEIGHBOUR_JOINING, ScoreModels.getInstance() + .getBlosum62().getName(), + "Neighbour joining tree using BLOSUM62"); } protected void avTreeBlosumMenuItem_actionPerformed() { - NewTreePanel("AV", "BL", "Average distance tree using BLOSUM62"); + newTreePanel(TreeBuilder.AVERAGE_DISTANCE, ScoreModels.getInstance() + .getBlosum62().getName(), + "Average distance tree using BLOSUM62"); } - void NewTreePanel(String type, String pwType, String title) + void newTreePanel(String type, String pwType, String title) { // are the sequences aligned? if (!viewport.getAlignment().isAligned(false)) diff --git a/src/jalview/appletgui/AlignViewport.java b/src/jalview/appletgui/AlignViewport.java index e50cc09..afe57e0 100644 --- a/src/jalview/appletgui/AlignViewport.java +++ b/src/jalview/appletgui/AlignViewport.java @@ -20,7 +20,7 @@ */ package jalview.appletgui; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.api.FeatureSettingsModelI; import jalview.bin.JalviewLite; @@ -52,7 +52,7 @@ public class AlignViewport extends AlignmentViewport implements boolean validCharWidth = true; - NJTree currentTree = null; + TreeModel currentTree = null; public jalview.bin.JalviewLite applet; @@ -303,12 +303,12 @@ public class AlignViewport extends AlignmentViewport implements ranges.setEndSeq(height / getCharHeight()); } - public void setCurrentTree(NJTree tree) + public void setCurrentTree(TreeModel tree) { currentTree = tree; } - public NJTree getCurrentTree() + public TreeModel getCurrentTree() { return currentTree; } diff --git a/src/jalview/appletgui/PCAPanel.java b/src/jalview/appletgui/PCAPanel.java index c5ec0c1..2d77c59 100644 --- a/src/jalview/appletgui/PCAPanel.java +++ b/src/jalview/appletgui/PCAPanel.java @@ -20,6 +20,9 @@ */ package jalview.appletgui; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; +import jalview.api.analysis.ScoreModelI; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; @@ -56,7 +59,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, int top = 0; - public PCAPanel(AlignViewport av) + public PCAPanel(AlignViewport viewport) { try { @@ -73,19 +76,19 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, zCombobox.addItem("dim " + i); } - this.av = av; - boolean selected = av.getSelectionGroup() != null - && av.getSelectionGroup().getSize() > 0; - AlignmentView seqstrings = av.getAlignmentView(selected); - boolean nucleotide = av.getAlignment().isNucleotide(); + this.av = viewport; + boolean selected = viewport.getSelectionGroup() != null + && viewport.getSelectionGroup().getSize() > 0; + AlignmentView seqstrings = viewport.getAlignmentView(selected); + boolean nucleotide = viewport.getAlignment().isNucleotide(); SequenceI[] seqs; if (!selected) { - seqs = av.getAlignment().getSequencesArray(); + seqs = viewport.getAlignment().getSequencesArray(); } else { - seqs = av.getSelectionGroup().getSequencesInOrder(av.getAlignment()); + seqs = viewport.getSelectionGroup().getSequencesInOrder(viewport.getAlignment()); } SeqCigar sq[] = seqstrings.getSequences(); int length = sq[0].getWidth(); @@ -99,9 +102,13 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, return; } } - pcaModel = new PCAModel(seqstrings, seqs, nucleotide); - rc = new RotatableCanvas(av); + ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel( + !nucleotide); + pcaModel = new PCAModel(seqstrings, seqs, nucleotide, scoreModel, + SimilarityParams.SeqSpace); + + rc = new RotatableCanvas(viewport); embedMenuIfNeeded(rc); add(rc, BorderLayout.CENTER); @@ -116,6 +123,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, /** * DOCUMENT ME! */ + @Override public void run() { // TODO progress indicator @@ -164,6 +172,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, rc.paint(rc.getGraphics()); } + @Override public void actionPerformed(ActionEvent evt) { if (evt.getSource() == inputData) @@ -183,6 +192,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, } } + @Override public void itemStateChanged(ItemEvent evt) { if (evt.getSource() == xCombobox) @@ -206,6 +216,9 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, if (!pcaModel.isNucleotide()) { pcaModel.setNucleotide(true); + ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel( + false); + pcaModel.setScoreModel(scoreModel); new Thread(this).start(); } } @@ -214,6 +227,9 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, if (pcaModel.isNucleotide()) { pcaModel.setNucleotide(false); + ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel( + true); + pcaModel.setScoreModel(scoreModel); new Thread(this).start(); } } diff --git a/src/jalview/appletgui/TreeCanvas.java b/src/jalview/appletgui/TreeCanvas.java index e30879c..48e9d64 100755 --- a/src/jalview/appletgui/TreeCanvas.java +++ b/src/jalview/appletgui/TreeCanvas.java @@ -21,7 +21,7 @@ package jalview.appletgui; import jalview.analysis.Conservation; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; @@ -48,12 +48,13 @@ import java.awt.event.MouseListener; import java.awt.event.MouseMotionListener; import java.util.Enumeration; import java.util.Hashtable; +import java.util.List; import java.util.Vector; public class TreeCanvas extends Panel implements MouseListener, MouseMotionListener { - NJTree tree; + TreeModel tree; ScrollPane scrollPane; @@ -115,13 +116,13 @@ public class TreeCanvas extends Panel implements MouseListener, selected.addOrRemove(sequence, true); } - public void setTree(NJTree tree) + public void setTree(TreeModel tree2) { - this.tree = tree; - tree.findHeight(tree.getTopNode()); + this.tree = tree2; + tree2.findHeight(tree2.getTopNode()); // Now have to calculate longest name based on the leaves - Vector leaves = tree.findLeaves(tree.getTopNode()); + Vector leaves = tree2.findLeaves(tree2.getTopNode()); boolean has_placeholders = false; longestName = ""; @@ -146,7 +147,7 @@ public class TreeCanvas extends Panel implements MouseListener, } public void drawNode(Graphics g, SequenceNode node, float chunk, - float scale, int width, int offx, int offy) + double scale, int width, int offx, int offy) { if (node == null) { @@ -157,8 +158,8 @@ public class TreeCanvas extends Panel implements MouseListener, { // Drawing leaf node - float height = node.height; - float dist = node.dist; + double height = node.height; + double dist = node.dist; int xstart = (int) ((height - dist) * scale) + offx; int xend = (int) (height * scale) + offx; @@ -240,8 +241,8 @@ public class TreeCanvas extends Panel implements MouseListener, drawNode(g, (SequenceNode) node.right(), chunk, scale, width, offx, offy); - float height = node.height; - float dist = node.dist; + double height = node.height; + double dist = node.dist; int xstart = (int) ((height - dist) * scale) + offx; int xend = (int) (height * scale) + offx; @@ -338,7 +339,7 @@ public class TreeCanvas extends Panel implements MouseListener, SequenceNode top = tree.getTopNode(); - float wscale = (float) (width * .8 - offx * 2) / tree.getMaxHeight(); + double wscale = (float) (width * .8 - offx * 2) / tree.getMaxHeight(); if (top.count == 0) { top.count = ((SequenceNode) top.left()).count @@ -350,7 +351,7 @@ public class TreeCanvas extends Panel implements MouseListener, } public void pickNode(Rectangle pickBox, SequenceNode node, float chunk, - float scale, int width, int offx, int offy) + double scale, int width, int offx, int offy) { if (node == null) { @@ -359,7 +360,7 @@ public class TreeCanvas extends Panel implements MouseListener, if (node.left() == null && node.right() == null) { - float height = node.height; + double height = node.height; // float dist = node.dist; // int xstart = (int) ( (height - dist) * scale) + offx; @@ -465,7 +466,7 @@ public class TreeCanvas extends Panel implements MouseListener, // for // scrollbar - float wscale = (width - labelLength - offx * 2) / tree.getMaxHeight(); + double wscale = (width - labelLength - offx * 2) / tree.getMaxHeight(); SequenceNode top = tree.getTopNode(); @@ -593,8 +594,7 @@ public class TreeCanvas extends Panel implements MouseListener, threshold = (float) (x - offx) / (float) (getSize().width - labelLength - 2 * offx); - tree.getGroups().removeAllElements(); - tree.groupNodes(tree.getTopNode(), threshold); + List groups = tree.groupNodes(threshold); setColor(tree.getTopNode(), Color.black); av.setSelectionGroup(null); @@ -608,7 +608,7 @@ public class TreeCanvas extends Panel implements MouseListener, codingComplement.clearSequenceColours(); } - colourGroups(); + colourGroups(groups); } } @@ -618,17 +618,16 @@ public class TreeCanvas extends Panel implements MouseListener, } - void colourGroups() + void colourGroups(List groups) { - for (int i = 0; i < tree.getGroups().size(); i++) + for (int i = 0; i < groups.size(); i++) { Color col = new Color((int) (Math.random() * 255), (int) (Math.random() * 255), (int) (Math.random() * 255)); - setColor(tree.getGroups().elementAt(i), col.brighter()); + setColor(groups.get(i), col.brighter()); - Vector l = tree.findLeaves(tree.getGroups() - .elementAt(i)); + Vector l = tree.findLeaves(groups.get(i)); Vector sequences = new Vector(); for (int j = 0; j < l.size(); j++) diff --git a/src/jalview/appletgui/TreePanel.java b/src/jalview/appletgui/TreePanel.java index b4b8ec2..8b1f79c 100644 --- a/src/jalview/appletgui/TreePanel.java +++ b/src/jalview/appletgui/TreePanel.java @@ -20,15 +20,17 @@ */ package jalview.appletgui; +import jalview.analysis.AverageDistanceTree; import jalview.analysis.NJTree; +import jalview.analysis.TreeBuilder; +import jalview.analysis.TreeModel; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; import jalview.api.analysis.ScoreModelI; -import jalview.api.analysis.ViewBasedAnalysisI; import jalview.datamodel.Alignment; -import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; import jalview.datamodel.SequenceI; import jalview.io.NewickFile; -import jalview.schemes.ResidueProperties; import jalview.util.MessageManager; import java.awt.BorderLayout; @@ -58,17 +60,18 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, TreeCanvas treeCanvas; - NJTree tree; + TreeModel tree; AlignmentPanel ap; AlignViewport av; - public NJTree getTree() + public TreeModel getTree() { return tree; } + @Override public void finalize() throws Throwable { ap = null; @@ -78,21 +81,8 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, /** * Creates a new TreePanel object. - * - * @param av - * DOCUMENT ME! - * @param seqVector - * DOCUMENT ME! - * @param type - * DOCUMENT ME! - * @param pwtype - * DOCUMENT ME! - * @param s - * DOCUMENT ME! - * @param e - * DOCUMENT ME! */ - public TreePanel(AlignmentPanel ap, String type, String pwtype) + public TreePanel(AlignmentPanel alignPanel, String type, String pwtype) { try { @@ -103,22 +93,12 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, ex.printStackTrace(); } - initTreePanel(ap, type, pwtype, null); + initTreePanel(alignPanel, type, pwtype, null); } /** * Creates a new TreePanel object. * - * @param av - * DOCUMENT ME! - * @param seqVector - * DOCUMENT ME! - * @param newtree - * DOCUMENT ME! - * @param type - * DOCUMENT ME! - * @param pwtype - * DOCUMENT ME! */ public TreePanel(AlignmentPanel ap, String type, String pwtype, NewickFile newtree) @@ -159,7 +139,7 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, // yields unaligned seqs) // or create a selection box around columns in alignment view // test Alignment(SeqCigar[]) - if (tree.seqData != null) + if (tree.getOriginalData() != null) { char gc = '-'; try @@ -170,8 +150,8 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, } catch (Exception ex) { } - ; - Object[] alAndColsel = tree.seqData + + Object[] alAndColsel = tree.getOriginalData() .getAlignmentAndColumnSelection(gc); if (alAndColsel != null && alAndColsel[0] != null) @@ -200,62 +180,23 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, this.newtree = newtree; } + @Override public void run() { if (newtree != null) { - if (odata == null) - { - tree = new NJTree(av.getAlignment().getSequencesArray(), newtree); - } - else - { - tree = new NJTree(av.getAlignment().getSequencesArray(), odata, - newtree); - } - + tree = new TreeModel(av.getAlignment().getSequencesArray(), odata, + newtree); } else { - int start, end; - SequenceI[] seqs; - boolean selview = av.getSelectionGroup() != null - && av.getSelectionGroup().getSize() > 1; - AlignmentView seqStrings = av.getAlignmentView(selview); - if (!selview) - { - start = 0; - end = av.getAlignment().getWidth(); - seqs = av.getAlignment().getSequencesArray(); - } - else - { - start = av.getSelectionGroup().getStartRes(); - end = av.getSelectionGroup().getEndRes() + 1; - seqs = av.getSelectionGroup().getSequencesInOrder( - av.getAlignment()); - } - ScoreModelI sm = ResidueProperties.getScoreModel(pwtype); - if (sm instanceof ViewBasedAnalysisI) - { - try - { - sm = sm.getClass().newInstance(); - ((ViewBasedAnalysisI) sm) - .configureFromAlignmentView(treeCanvas.ap); - } catch (Exception q) - { - System.err.println("Couldn't create a scoremodel instance for " - + sm.getName()); - q.printStackTrace(); - } - tree = new NJTree(seqs, seqStrings, type, pwtype, sm, start, end); - } - else - { - tree = new NJTree(seqs, seqStrings, type, pwtype, null, start, - end); - } + ScoreModelI sm1 = ScoreModels.getInstance().getScoreModel(pwtype, + treeCanvas.ap); + ScoreModelI sm = sm1; + TreeBuilder njtree = type.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree( + av, sm, SimilarityParams.Jalview) + : new AverageDistanceTree(av, sm, SimilarityParams.Jalview); + tree = new TreeModel(njtree); } tree.reCount(tree.getTopNode()); @@ -286,6 +227,7 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, } } + @Override public void actionPerformed(ActionEvent evt) { if (evt.getSource() == newickOutput) @@ -302,6 +244,7 @@ public class TreePanel extends EmbmenuFrame implements ActionListener, } } + @Override public void itemStateChanged(ItemEvent evt) { if (evt.getSource() == fitToWindow) diff --git a/src/jalview/bin/Jalview.java b/src/jalview/bin/Jalview.java index 966e952..954bb34 100755 --- a/src/jalview/bin/Jalview.java +++ b/src/jalview/bin/Jalview.java @@ -578,18 +578,14 @@ public class Jalview data = aparser.getValue("tree", true); if (data != null) { - jalview.io.NewickFile fin = null; try { System.out.println("CMD [-tree " + data + "] executed successfully!"); - fin = new NewickFile(data, + NewickFile nf = new NewickFile(data, AppletFormatAdapter.checkProtocol(data)); - if (fin != null) - { - af.getViewport().setCurrentTree( - af.ShowNewickTree(fin, data).getTree()); - } + af.getViewport().setCurrentTree( + af.showNewickTree(nf, data).getTree()); } catch (IOException ex) { System.err.println("Couldn't add tree " + data); diff --git a/src/jalview/datamodel/BinarySequence.java b/src/jalview/datamodel/BinarySequence.java index 62ee974..b7e15a6 100755 --- a/src/jalview/datamodel/BinarySequence.java +++ b/src/jalview/datamodel/BinarySequence.java @@ -20,8 +20,8 @@ */ package jalview.datamodel; +import jalview.analysis.scoremodels.ScoreMatrix; import jalview.schemes.ResidueProperties; -import jalview.schemes.ScoreMatrix; /** * Encode a sequence as a numeric vector using either classic residue binary @@ -112,23 +112,23 @@ public class BinarySequence extends Sequence /** * ancode using substitution matrix given in matrix * - * @param matrix + * @param smtrx */ - public void matrixEncode(final ScoreMatrix matrix) + public void matrixEncode(final ScoreMatrix smtrx) throws InvalidSequenceTypeException { - if (isNa != matrix.isDNA()) + if (isNa != smtrx.isDNA()) { throw new InvalidSequenceTypeException("matrix " - + matrix.getClass().getCanonicalName() + + smtrx.getClass().getCanonicalName() + " is not a valid matrix for " + (isNa ? "nucleotide" : "protein") + "sequences"); } - matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex - : ResidueProperties.aaIndex, matrix.getMatrix()); + matrixEncode(smtrx.isDNA() ? ResidueProperties.nucleotideIndex + : ResidueProperties.aaIndex, smtrx.getMatrix()); } - private void matrixEncode(final int[] aaIndex, final int[][] matrix) + private void matrixEncode(final int[] aaIndex, final float[][] matrix) { int nores = initMatrixGetNoRes(); diff --git a/src/jalview/datamodel/SequenceNode.java b/src/jalview/datamodel/SequenceNode.java index b2f054c..fa12419 100755 --- a/src/jalview/datamodel/SequenceNode.java +++ b/src/jalview/datamodel/SequenceNode.java @@ -31,13 +31,13 @@ import java.awt.Color; public class SequenceNode extends BinaryNode { /** DOCUMENT ME!! */ - public float dist; + public double dist; /** DOCUMENT ME!! */ public int count; /** DOCUMENT ME!! */ - public float height; + public double height; /** DOCUMENT ME!! */ public float ycount; @@ -178,7 +178,9 @@ public class SequenceNode extends BinaryNode { char q = name.charAt(c); if ('0' <= q && q <= '9') + { continue; + } return true; } } diff --git a/src/jalview/gui/AlignFrame.java b/src/jalview/gui/AlignFrame.java index d4c87d8..8eb09ab 100644 --- a/src/jalview/gui/AlignFrame.java +++ b/src/jalview/gui/AlignFrame.java @@ -34,7 +34,7 @@ import jalview.api.AlignmentViewPanel; import jalview.api.FeatureSettingsControllerI; import jalview.api.SplitContainerI; import jalview.api.ViewStyleI; -import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.bin.Cache; import jalview.bin.Jalview; import jalview.commands.CommandI; @@ -69,6 +69,7 @@ import jalview.io.FileFormat; import jalview.io.FileFormatI; import jalview.io.FileFormats; import jalview.io.FileLoader; +import jalview.io.FileParse; import jalview.io.FormatAdapter; import jalview.io.HtmlSvgOutput; import jalview.io.IdentifyFile; @@ -77,12 +78,12 @@ import jalview.io.JalviewFileChooser; import jalview.io.JalviewFileView; import jalview.io.JnetAnnotationMaker; import jalview.io.NewickFile; +import jalview.io.ScoreMatrixFile; import jalview.io.TCoffeeScoreFile; import jalview.jbgui.GAlignFrame; import jalview.schemes.ColourSchemeI; import jalview.schemes.ColourSchemes; import jalview.schemes.ResidueColourScheme; -import jalview.schemes.ResidueProperties; import jalview.schemes.TCoffeeColourScheme; import jalview.util.MessageManager; import jalview.viewmodel.AlignmentViewport; @@ -364,7 +365,15 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, setMenusFromViewport(viewport); buildSortByAnnotationScoresMenu(); - buildTreeMenu(); + calculateTree.addActionListener(new ActionListener() + { + + @Override + public void actionPerformed(ActionEvent e) + { + openTreePcaDialog(); + } + }); buildColourMenu(); if (Desktop.desktop != null) @@ -3442,7 +3451,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray(); AlignmentSorter.sortByPID(viewport.getAlignment(), viewport - .getAlignment().getSequenceAt(0), null); + .getAlignment().getSequenceAt(0)); addHistoryItem(new OrderCommand("Pairwise Sort", oldOrder, viewport.getAlignment())); alignPanel.paintAlignment(true); @@ -3536,35 +3545,6 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } } - /** - * DOCUMENT ME! - * - * @param e - * DOCUMENT ME! - */ - @Override - public void PCAMenuItem_actionPerformed(ActionEvent e) - { - if (((viewport.getSelectionGroup() != null) - && (viewport.getSelectionGroup().getSize() < 4) && (viewport - .getSelectionGroup().getSize() > 0)) - || (viewport.getAlignment().getHeight() < 4)) - { - JvOptionPane - .showInternalMessageDialog( - this, - MessageManager - .getString("label.principal_component_analysis_must_take_least_four_input_sequences"), - MessageManager - .getString("label.sequence_selection_insufficient"), - JvOptionPane.WARNING_MESSAGE); - - return; - } - - new PCAPanel(alignPanel); - } - @Override public void autoCalculate_actionPerformed(ActionEvent e) { @@ -3589,67 +3569,21 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * DOCUMENT ME! - * - * @param e - * DOCUMENT ME! - */ - @Override - public void averageDistanceTreeMenuItem_actionPerformed(ActionEvent e) - { - newTreePanel("AV", "PID", "Average distance tree using PID"); - } - - /** - * DOCUMENT ME! - * - * @param e - * DOCUMENT ME! - */ - @Override - public void neighbourTreeMenuItem_actionPerformed(ActionEvent e) - { - newTreePanel("NJ", "PID", "Neighbour joining tree using PID"); - } - - /** - * DOCUMENT ME! - * - * @param e - * DOCUMENT ME! - */ - @Override - protected void njTreeBlosumMenuItem_actionPerformed(ActionEvent e) - { - newTreePanel("NJ", "BL", "Neighbour joining tree using BLOSUM62"); - } - - /** - * DOCUMENT ME! - * - * @param e - * DOCUMENT ME! - */ - @Override - protected void avTreeBlosumMenuItem_actionPerformed(ActionEvent e) - { - newTreePanel("AV", "BL", "Average distance tree using BLOSUM62"); - } - - /** - * DOCUMENT ME! + * Constructs a tree panel and adds it to the desktop * * @param type - * DOCUMENT ME! - * @param pwType - * DOCUMENT ME! - * @param title - * DOCUMENT ME! + * tree type (NJ or AV) + * @param modelName + * name of score model used to compute the tree + * @param options + * parameters for the distance or similarity calculation */ - void newTreePanel(String type, String pwType, String title) + void newTreePanel(String type, String modelName, SimilarityParamsI options) { + String frameTitle = ""; TreePanel tp; + boolean onSelection = false; if (viewport.getSelectionGroup() != null && viewport.getSelectionGroup().getSize() > 0) { @@ -3685,45 +3619,29 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, return; } } - - title = title + " on region"; - tp = new TreePanel(alignPanel, type, pwType); + onSelection = true; } else { - // are the visible sequences aligned? - if (!viewport.getAlignment().isAligned(false)) - { - JvOptionPane - .showMessageDialog( - Desktop.desktop, - MessageManager - .getString("label.sequences_must_be_aligned_before_creating_tree"), - MessageManager - .getString("label.sequences_not_aligned"), - JvOptionPane.WARNING_MESSAGE); - - return; - } - if (viewport.getAlignment().getHeight() < 2) { return; } - - tp = new TreePanel(alignPanel, type, pwType); } - title += " from "; + tp = new TreePanel(alignPanel, type, modelName, options); + frameTitle = tp.getPanelTitle() + (onSelection ? " on region" : ""); + + frameTitle += " from "; if (viewport.viewName != null) { - title += viewport.viewName + " of "; + frameTitle += viewport.viewName + " of "; } - title += this.title; + frameTitle += this.title; - Desktop.addInternalFrame(tp, title, 600, 500); + Desktop.addInternalFrame(tp, frameTitle, 600, 500); } /** @@ -3845,48 +3763,10 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * call. Listeners are added to remove the menu item when the treePanel is * closed, and adjust the tree leaf to sequence mapping when the alignment is * modified. - * - * @param treePanel - * Displayed tree window. - * @param title - * SortBy menu item title. */ @Override - public void buildTreeMenu() + public void buildTreeSortMenu() { - calculateTree.removeAll(); - // build the calculate menu - - for (final String type : new String[] { "NJ", "AV" }) - { - String treecalcnm = MessageManager.getString("label.tree_calc_" - + type.toLowerCase()); - for (final String pwtype : ResidueProperties.scoreMatrices.keySet()) - { - JMenuItem tm = new JMenuItem(); - ScoreModelI sm = ResidueProperties.scoreMatrices.get(pwtype); - if (sm.isDNA() == viewport.getAlignment().isNucleotide() - || sm.isProtein() == !viewport.getAlignment() - .isNucleotide()) - { - String smn = MessageManager.getStringOrReturn( - "label.score_model_", sm.getName()); - final String title = MessageManager.formatMessage( - "label.treecalc_title", treecalcnm, smn); - tm.setText(title);// - tm.addActionListener(new java.awt.event.ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - newTreePanel(type, pwtype, title); - } - }); - calculateTree.add(tm); - } - - } - } sortByTreeMenu.removeAll(); List comps = PaintRefresher.components.get(viewport @@ -4037,13 +3917,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, if (value == JalviewFileChooser.APPROVE_OPTION) { - String choice = chooser.getSelectedFile().getPath(); - jalview.bin.Cache.setProperty("LAST_DIRECTORY", choice); - jalview.io.NewickFile fin = null; + String filePath = chooser.getSelectedFile().getPath(); + Cache.setProperty("LAST_DIRECTORY", filePath); + NewickFile fin = null; try { - fin = new NewickFile(choice, DataSourceType.FILE); - viewport.setCurrentTree(ShowNewickTree(fin, choice).getTree()); + fin = new NewickFile(filePath, DataSourceType.FILE); + viewport.setCurrentTree(showNewickTree(fin, filePath).getTree()); } catch (Exception ex) { JvOptionPane @@ -4065,25 +3945,19 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } } - public TreePanel ShowNewickTree(NewickFile nf, String title) - { - return ShowNewickTree(nf, title, 600, 500, 4, 5); - } - - public TreePanel ShowNewickTree(NewickFile nf, String title, - AlignmentView input) + public TreePanel showNewickTree(NewickFile nf, String treeTitle) { - return ShowNewickTree(nf, title, input, 600, 500, 4, 5); + return showNewickTree(nf, treeTitle, 600, 500, 4, 5); } - public TreePanel ShowNewickTree(NewickFile nf, String title, int w, + public TreePanel showNewickTree(NewickFile nf, String treeTitle, int w, int h, int x, int y) { - return ShowNewickTree(nf, title, null, w, h, x, y); + return showNewickTree(nf, treeTitle, null, w, h, x, y); } /** - * Add a treeviewer for the tree extracted from a newick file object to the + * Add a treeviewer for the tree extracted from a Newick file object to the * current alignment view * * @param nf @@ -4102,7 +3976,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, * position * @return TreePanel handle */ - public TreePanel ShowNewickTree(NewickFile nf, String title, + public TreePanel showNewickTree(NewickFile nf, String treeTitle, AlignmentView input, int w, int h, int x, int y) { TreePanel tp = null; @@ -4113,7 +3987,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, if (nf.getTree() != null) { - tp = new TreePanel(alignPanel, "FromFile", title, nf, input); + tp = new TreePanel(alignPanel, nf, treeTitle, input); tp.setSize(w, h); @@ -4122,7 +3996,7 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, tp.setLocation(x, y); } - Desktop.addInternalFrame(tp, title, w, h); + Desktop.addInternalFrame(tp, treeTitle, w, h); } } catch (Exception ex) { @@ -4695,10 +4569,13 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, } /** - * Attempt to load a "dropped" file or URL string: First by testing whether - * it's an Annotation file, then a JNet file, and finally a features file. If - * all are false then the user may have dropped an alignment file onto this - * AlignFrame. + * Attempt to load a "dropped" file or URL string, by testing in turn for + *
    + *
  • an Annotation file
  • + *
  • a JNet file
  • + *
  • a features file
  • + *
  • else try to interpret as an alignment file
  • + *
* * @param file * either a filename or a URL string. @@ -4772,7 +4649,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, { format = new IdentifyFile().identify(file, sourceType); } - if (FileFormat.Jnet.equals(format)) + if (FileFormat.ScoreMatrix == format) + { + ScoreMatrixFile sm = new ScoreMatrixFile(new FileParse(file, + sourceType)); + sm.parse(); + // todo: i18n this message + statusBar + .setText(MessageManager.formatMessage( + "label.successfully_loaded_matrix", + sm.getMatrixName())); + } + else if (FileFormat.Jnet.equals(format)) { JPredFile predictions = new JPredFile(file, sourceType); new JnetAnnotationMaker(); @@ -5707,6 +5595,18 @@ public class AlignFrame extends GAlignFrame implements DropTargetListener, ColourSchemeI colourScheme = viewport.getGlobalColourScheme(); ColourMenuHelper.setColourSelected(colourMenu, colourScheme); } + + /** + * Open a dialog (if not already open) that allows the user to select and + * calculate PCA or Tree analysis + */ + protected void openTreePcaDialog() + { + if (alignPanel.getCalculationDialog() == null) + { + new CalculationChooser(AlignFrame.this); + } + } } class PrintThread extends Thread diff --git a/src/jalview/gui/AlignViewport.java b/src/jalview/gui/AlignViewport.java index 26a7a3a..f22a911 100644 --- a/src/jalview/gui/AlignViewport.java +++ b/src/jalview/gui/AlignViewport.java @@ -22,7 +22,7 @@ package jalview.gui; import jalview.analysis.AlignmentUtils; import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.api.AlignmentViewPanel; import jalview.api.FeatureColourI; @@ -76,7 +76,7 @@ public class AlignViewport extends AlignmentViewport implements { Font font; - NJTree currentTree = null; + TreeModel currentTree = null; boolean cursorMode = false; @@ -493,7 +493,7 @@ public class AlignViewport extends AlignmentViewport implements * @param tree * DOCUMENT ME! */ - public void setCurrentTree(NJTree tree) + public void setCurrentTree(TreeModel tree) { currentTree = tree; } @@ -503,7 +503,7 @@ public class AlignViewport extends AlignmentViewport implements * * @return DOCUMENT ME! */ - public NJTree getCurrentTree() + public TreeModel getCurrentTree() { return currentTree; } diff --git a/src/jalview/gui/AlignmentPanel.java b/src/jalview/gui/AlignmentPanel.java index 8ade5d6..04de50b 100644 --- a/src/jalview/gui/AlignmentPanel.java +++ b/src/jalview/gui/AlignmentPanel.java @@ -106,6 +106,8 @@ public class AlignmentPanel extends GAlignmentPanel implements private PropertyChangeListener propertyChangeListener; + private CalculationChooser calculationDialog; + /** * Creates a new AlignmentPanel object. * @@ -169,6 +171,11 @@ public class AlignmentPanel extends GAlignmentPanel implements { av.alignmentChanged(this); + if (getCalculationDialog() != null) + { + getCalculationDialog().validateCalcTypes(); + } + alignFrame.updateEditMenuBar(); paintAlignment(true); @@ -1647,6 +1654,8 @@ public class AlignmentPanel extends GAlignmentPanel implements PaintRefresher.RemoveComponent(getIdPanel().getIdCanvas()); PaintRefresher.RemoveComponent(this); + closeChildFrames(); + /* * try to ensure references are nulled */ @@ -1678,6 +1687,17 @@ public class AlignmentPanel extends GAlignmentPanel implements } /** + * Close any open dialogs that would be orphaned when this one is closed + */ + protected void closeChildFrames() + { + if (calculationDialog != null) + { + calculationDialog.closeFrame(); + } + } + + /** * hides or shows dynamic annotation rows based on groups and av state flags */ public void updateAnnotation() @@ -1906,4 +1926,24 @@ public class AlignmentPanel extends GAlignmentPanel implements repaint(); } } + + /** + * Set the reference to the PCA/Tree chooser dialog for this panel. This + * reference should be nulled when the dialog is closed. + * + * @param calculationChooser + */ + public void setCalculationDialog(CalculationChooser calculationChooser) + { + calculationDialog = calculationChooser; + } + + /** + * Returns the reference to the PCA/Tree chooser dialog for this panel (null + * if none is open) + */ + public CalculationChooser getCalculationDialog() + { + return calculationDialog; + } } diff --git a/src/jalview/gui/CalculationChooser.java b/src/jalview/gui/CalculationChooser.java new file mode 100644 index 0000000..05f1fba --- /dev/null +++ b/src/jalview/gui/CalculationChooser.java @@ -0,0 +1,558 @@ +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.gui; + +import jalview.analysis.TreeBuilder; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.util.MessageManager; + +import java.awt.BorderLayout; +import java.awt.Color; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.FlowLayout; +import java.awt.Font; +import java.awt.GridLayout; +import java.awt.Insets; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.FocusEvent; +import java.awt.event.FocusListener; +import java.awt.event.MouseAdapter; +import java.awt.event.MouseEvent; +import java.beans.PropertyVetoException; +import java.util.ArrayList; +import java.util.List; + +import javax.swing.BorderFactory; +import javax.swing.ButtonGroup; +import javax.swing.DefaultComboBoxModel; +import javax.swing.JButton; +import javax.swing.JCheckBox; +import javax.swing.JComboBox; +import javax.swing.JInternalFrame; +import javax.swing.JLabel; +import javax.swing.JLayeredPane; +import javax.swing.JPanel; +import javax.swing.JRadioButton; +import javax.swing.event.InternalFrameAdapter; +import javax.swing.event.InternalFrameEvent; + +/** + * A dialog where a user can choose and action Tree or PCA calculation options + */ +public class CalculationChooser extends JPanel +{ + /* + * flag for whether gap matches residue in the PID calculation for a Tree + * - true gives Jalview 2.10.1 behaviour + * - set to false (using Groovy) for a more correct tree + * (JAL-374) + */ + private static boolean treeMatchGaps = true; + + private static final Font VERDANA_11PT = new Font("Verdana", 0, 11); + + AlignFrame af; + + JRadioButton pca; + + JRadioButton neighbourJoining; + + JRadioButton averageDistance; + + JComboBox modelNames; + + JButton ok; + + private JInternalFrame frame; + + private JCheckBox includeGaps; + + private JCheckBox matchGaps; + + private JCheckBox includeGappedColumns; + + private JCheckBox shorterSequence; + + /** + * Constructor + * + * @param af + */ + public CalculationChooser(AlignFrame alignFrame) + { + this.af = alignFrame; + init(); + af.alignPanel.setCalculationDialog(this); + } + + /** + * Lays out the panel and adds it to the desktop + */ + void init() + { + setLayout(new BorderLayout()); + frame = new JInternalFrame(); + frame.setContentPane(this); + this.setBackground(Color.white); + frame.addFocusListener(new FocusListener() + { + + @Override + public void focusLost(FocusEvent e) + { + } + + @Override + public void focusGained(FocusEvent e) + { + validateCalcTypes(); + } + }); + /* + * Layout consists of 3 or 4 panels: + * - first with choice of PCA or tree method NJ or AV + * - second with choice of score model + * - third with score model parameter options [suppressed] + * - fourth with OK and Cancel + */ + pca = new JRadioButton( + MessageManager.getString("label.principal_component_analysis")); + pca.setOpaque(false); + neighbourJoining = new JRadioButton( + MessageManager.getString("label.tree_calc_nj")); + averageDistance = new JRadioButton( + MessageManager.getString("label.tree_calc_av")); + neighbourJoining.setOpaque(false); + + JPanel calcChoicePanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); + calcChoicePanel.setOpaque(false); + + // first create the Tree calculation's border panel + JPanel treePanel = new JPanel(new FlowLayout(FlowLayout.LEFT)); + treePanel.setOpaque(false); + + treePanel.setBorder(BorderFactory.createTitledBorder(MessageManager + .getString("label.tree"))); + + // then copy the inset dimensions for the border-less PCA panel + JPanel pcaBorderless = new JPanel(new FlowLayout(FlowLayout.LEFT)); + Insets b = treePanel.getBorder().getBorderInsets(treePanel); + pcaBorderless.setBorder(BorderFactory.createEmptyBorder(2, b.left, 2, + b.right)); + pcaBorderless.setOpaque(false); + + pcaBorderless.add(pca, FlowLayout.LEFT); + calcChoicePanel.add(pcaBorderless, FlowLayout.LEFT); + + + treePanel.add(neighbourJoining); + treePanel.add(averageDistance); + + calcChoicePanel.add(treePanel); + + ButtonGroup calcTypes = new ButtonGroup(); + calcTypes.add(pca); + calcTypes.add(neighbourJoining); + calcTypes.add(averageDistance); + + ActionListener calcChanged = new ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + validateCalcTypes(); + } + }; + pca.addActionListener(calcChanged); + neighbourJoining.addActionListener(calcChanged); + averageDistance.addActionListener(calcChanged); + /* + * score models drop-down - with added tooltips! + */ + modelNames = buildModelOptionsList(); + + JPanel scoreModelPanel = new JPanel(new FlowLayout(FlowLayout.CENTER)); + scoreModelPanel.setOpaque(false); + scoreModelPanel.add(modelNames); + + /* + * score model parameters + */ + JPanel paramsPanel = new JPanel(new GridLayout(5, 1)); + paramsPanel.setOpaque(false); + includeGaps = new JCheckBox("Include gaps"); + matchGaps = new JCheckBox("Match gaps"); + includeGappedColumns = new JCheckBox("Include gapped columns"); + shorterSequence = new JCheckBox("Match on shorter sequence"); + paramsPanel.add(new JLabel("Pairwise sequence scoring options")); + paramsPanel.add(includeGaps); + paramsPanel.add(matchGaps); + paramsPanel.add(includeGappedColumns); + paramsPanel.add(shorterSequence); + + /* + * OK / Cancel buttons + */ + ok = new JButton(MessageManager.getString("action.calculate")); + ok.setFont(VERDANA_11PT); + ok.addActionListener(new java.awt.event.ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + ok_actionPerformed(); + } + }); + JButton cancel = new JButton(MessageManager.getString("action.close")); + cancel.setFont(VERDANA_11PT); + cancel.addActionListener(new java.awt.event.ActionListener() + { + @Override + public void actionPerformed(ActionEvent e) + { + cancel_actionPerformed(e); + } + }); + JPanel actionPanel = new JPanel(); + actionPanel.setOpaque(false); + actionPanel.add(ok); + actionPanel.add(cancel); + + boolean includeParams = false; + this.add(calcChoicePanel, BorderLayout.CENTER); + calcChoicePanel.add(scoreModelPanel); + if (includeParams) + { + scoreModelPanel.add(paramsPanel); + } + this.add(actionPanel, BorderLayout.SOUTH); + + int width = 350; + int height = includeParams ? 420 : 240; + + setMinimumSize(new Dimension(325, height - 10)); + String title = MessageManager.getString("label.choose_calculation"); + if (af.getViewport().viewName != null) + { + title = title + " (" + af.getViewport().viewName + ")"; + } + + Desktop.addInternalFrame(frame, + title, width, + height, false); + calcChoicePanel.doLayout(); + revalidate(); + /* + * null the AlignmentPanel's reference to the dialog when it is closed + */ + frame.addInternalFrameListener(new InternalFrameAdapter() + { + @Override + public void internalFrameClosed(InternalFrameEvent evt) + { + af.alignPanel.setCalculationDialog(null); + }; + }); + + frame.setLayer(JLayeredPane.PALETTE_LAYER); + } + + /** + * enable calculations applicable for the current alignment or selection. + */ + protected void validateCalcTypes() + { + int size = af.getViewport().getAlignment().getHeight(); + if (af.getViewport().getSelectionGroup() != null) + { + size = af.getViewport().getSelectionGroup().getSize(); + } + if (!(checkEnabled(pca, size, 4) + | checkEnabled(neighbourJoining, size, 3) | checkEnabled( + averageDistance, size, 3))) + { + ok.setToolTipText(null); + ok.setEnabled(true); + } + else + { + ok.setEnabled(false); + } + updateScoreModels(comboBox, tips); + } + + /** + * Check the input and disable a calculation's radio button if necessary. A + * tooltip is shown for disabled calculations. + * + * @param calc + * - radio button for the calculation being validated + * @param size + * - size of input to calculation + * @param minsize + * - minimum size for calculation + * @return true if size < minsize *and* calc.isSelected + */ + private boolean checkEnabled(JRadioButton calc, int size, int minsize) + { + String ttip = MessageManager.formatMessage( + "label.you_need_more_than_n_sequences", minsize); + + calc.setEnabled(size >= minsize); + if (!calc.isEnabled()) + { + calc.setToolTipText(ttip); + } + else + { + calc.setToolTipText(null); + } + if (calc.isSelected()) + { + modelNames.setEnabled(calc.isEnabled()); + if (!calc.isEnabled()) + { + ok.setEnabled(false); + ok.setToolTipText(ttip); + return true; + } + } + return false; + } + + final JComboBox comboBox = new JComboBox(); + + final ComboBoxTooltipRenderer renderer = new ComboBoxTooltipRenderer(); + + List tips = new ArrayList(); + + /** + * A rather elaborate helper method (blame Swing, not me) that builds a + * drop-down list of score models (by name) with descriptions as tooltips. + * There is also a tooltip shown for the currently selected item when hovering + * over it (without opening the list). + */ + protected JComboBox buildModelOptionsList() + { + comboBox.setRenderer(renderer); + + /* + * show tooltip on mouse over the combobox + * note the listener has to be on the components that make up + * the combobox, doesn't work if just on the combobox + */ + final MouseAdapter mouseListener = new MouseAdapter() + { + @Override + public void mouseEntered(MouseEvent e) + { + comboBox.setToolTipText(tips.get(comboBox.getSelectedIndex())); + } + + @Override + public void mouseExited(MouseEvent e) + { + comboBox.setToolTipText(null); + } + }; + for (Component c : comboBox.getComponents()) + { + c.addMouseListener(mouseListener); + } + + updateScoreModels(comboBox, tips); + + /* + * set the list of tooltips on the combobox's renderer + */ + renderer.setTooltips(tips); + + return comboBox; + } + + private void updateScoreModels(JComboBox comboBox, List tips) + { + Object curSel = comboBox.getSelectedItem(); + tips.clear(); + DefaultComboBoxModel model = new DefaultComboBoxModel(); + + /* + * now we can actually add entries to the combobox, + * remembering their descriptions for tooltips + */ + ScoreModels scoreModels = ScoreModels.getInstance(); + boolean selectedIsPresent = false; + for (ScoreModelI sm : scoreModels.getModels()) + { + boolean nucleotide = af.getViewport().getAlignment().isNucleotide(); + if (sm.isDNA() && nucleotide || sm.isProtein() && !nucleotide) + { + if (curSel != null && sm.getName().equals(curSel)) + { + selectedIsPresent = true; + curSel = sm.getName(); + } + model.addElement(sm.getName()); + + /* + * tooltip is description if provided, else text lookup with + * fallback on the model name + */ + String tooltip = sm.getDescription(); + if (tooltip == null) + { + tooltip = MessageManager.getStringOrReturn("label.score_model_", + sm.getName()); + } + tips.add(tooltip); + } + } + if (selectedIsPresent) + { + model.setSelectedItem(curSel); + } + // finally, update the model + comboBox.setModel(model); + } + + /** + * Open and calculate the selected tree or PCA on 'OK' + */ + protected void ok_actionPerformed() + { + boolean doPCA = pca.isSelected(); + String modelName = modelNames.getSelectedItem().toString(); + SimilarityParamsI params = getSimilarityParameters(doPCA); + + if (doPCA) + { + openPcaPanel(modelName, params); + } + else + { + openTreePanel(modelName, params); + } + + // closeFrame(); + } + + /** + * Open a new Tree panel on the desktop + * + * @param modelName + * @param params + */ + protected void openTreePanel(String modelName, SimilarityParamsI params) + { + String treeType = neighbourJoining.isSelected() ? TreeBuilder.NEIGHBOUR_JOINING + : TreeBuilder.AVERAGE_DISTANCE; + af.newTreePanel(treeType, modelName, params); + } + + /** + * Open a new PCA panel on the desktop + * + * @param modelName + * @param params + */ + protected void openPcaPanel(String modelName, SimilarityParamsI params) + { + AlignViewport viewport = af.getViewport(); + if (((viewport.getSelectionGroup() != null) + && (viewport.getSelectionGroup().getSize() < 4) && (viewport + .getSelectionGroup().getSize() > 0)) + || (viewport.getAlignment().getHeight() < 4)) + { + JvOptionPane + .showInternalMessageDialog( + this, + MessageManager + .getString("label.principal_component_analysis_must_take_least_four_input_sequences"), + MessageManager + .getString("label.sequence_selection_insufficient"), + JvOptionPane.WARNING_MESSAGE); + return; + } + new PCAPanel(af.alignPanel, modelName, params); + } + + /** + * + */ + protected void closeFrame() + { + try + { + frame.setClosed(true); + } catch (PropertyVetoException ex) + { + } + } + + /** + * Returns a data bean holding parameters for similarity (or distance) model + * calculation + * + * @param doPCA + * @return + */ + protected SimilarityParamsI getSimilarityParameters(boolean doPCA) + { + // commented out: parameter choices read from gui widgets + // SimilarityParamsI params = new SimilarityParams( + // includeGappedColumns.isSelected(), matchGaps.isSelected(), + // includeGaps.isSelected(), shorterSequence.isSelected()); + + boolean includeGapGap = true; + boolean includeGapResidue = true; + boolean matchOnShortestLength = false; + + /* + * 'matchGaps' flag is only used in the PID calculation + * - set to false for PCA so that PCA using PID reproduces SeqSpace PCA + * - set to true for Tree to reproduce Jalview 2.10.1 calculation + * - set to false for Tree for a more correct calculation (JAL-374) + */ + boolean matchGap = doPCA ? false : treeMatchGaps; + + return new SimilarityParams(includeGapGap, matchGap, includeGapResidue, matchOnShortestLength); + } + + /** + * Closes dialog on cancel + * + * @param e + */ + protected void cancel_actionPerformed(ActionEvent e) + { + try + { + frame.setClosed(true); + } catch (Exception ex) + { + } + } +} diff --git a/src/jalview/gui/ComboBoxTooltipRenderer.java b/src/jalview/gui/ComboBoxTooltipRenderer.java new file mode 100644 index 0000000..b776757 --- /dev/null +++ b/src/jalview/gui/ComboBoxTooltipRenderer.java @@ -0,0 +1,42 @@ +package jalview.gui; + +import java.awt.Component; +import java.util.List; + +import javax.swing.DefaultListCellRenderer; +import javax.swing.JComponent; +import javax.swing.JList; + +/** + * A helper class to render a combobox with tooltips + * + * @see http + * ://stackoverflow.com/questions/480261/java-swing-mouseover-text-on-jcombobox + * -items + */ +public class ComboBoxTooltipRenderer extends DefaultListCellRenderer +{ + private static final long serialVersionUID = 1L; + + List tooltips; + + @Override + public Component getListCellRendererComponent(JList list, Object value, + int index, boolean isSelected, boolean cellHasFocus) + { + + JComponent comp = (JComponent) super.getListCellRendererComponent(list, + value, index, isSelected, cellHasFocus); + + if (-1 < index && null != value && null != tooltips) + { + list.setToolTipText(tooltips.get(index)); + } + return comp; + } + + public void setTooltips(List tips) + { + this.tooltips = tips; + } +} diff --git a/src/jalview/gui/Jalview2XML.java b/src/jalview/gui/Jalview2XML.java index c19f005..02136f9 100644 --- a/src/jalview/gui/Jalview2XML.java +++ b/src/jalview/gui/Jalview2XML.java @@ -1107,7 +1107,7 @@ public class Jalview2XML Tree tree = new Tree(); tree.setTitle(tp.getTitle()); tree.setCurrentTree((av.currentTree == tp.getTree())); - tree.setNewick(tp.getTree().toString()); + tree.setNewick(tp.getTree().print()); tree.setThreshold(tp.treeCanvas.threshold); tree.setFitToWindow(tp.fitToWindow.getState()); @@ -3680,7 +3680,7 @@ public class Jalview2XML TreePanel tp = (TreePanel) retrieveExistingObj(tree.getId()); if (tp == null) { - tp = af.ShowNewickTree( + tp = af.showNewickTree( new jalview.io.NewickFile(tree.getNewick()), tree.getTitle(), tree.getWidth(), tree.getHeight(), tree.getXpos(), tree.getYpos()); diff --git a/src/jalview/gui/Jalview2XML_V1.java b/src/jalview/gui/Jalview2XML_V1.java index 6235cbe..cc2f636 100755 --- a/src/jalview/gui/Jalview2XML_V1.java +++ b/src/jalview/gui/Jalview2XML_V1.java @@ -464,7 +464,7 @@ public class Jalview2XML_V1 Tree tree = jms.getTree(t); - TreePanel tp = af.ShowNewickTree( + TreePanel tp = af.showNewickTree( new jalview.io.NewickFile(tree.getNewick()), tree.getTitle(), tree.getWidth(), tree.getHeight(), tree.getXpos(), tree.getYpos()); diff --git a/src/jalview/gui/PCAPanel.java b/src/jalview/gui/PCAPanel.java index 5b1aa37..e9ba1e7 100644 --- a/src/jalview/gui/PCAPanel.java +++ b/src/jalview/gui/PCAPanel.java @@ -20,14 +20,16 @@ */ package jalview.gui; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; import jalview.datamodel.ColumnSelection; -import jalview.datamodel.SeqCigar; import jalview.datamodel.SequenceI; import jalview.jbgui.GPCAPanel; -import jalview.schemes.ResidueProperties; import jalview.util.MessageManager; import jalview.viewmodel.AlignmentViewport; import jalview.viewmodel.PCAModel; @@ -78,26 +80,47 @@ public class PCAPanel extends GPCAPanel implements Runnable, int top = 0; /** - * Creates a new PCAPanel object. + * Creates a new PCAPanel object using default score model and parameters * - * @param av - * DOCUMENT ME! - * @param s - * DOCUMENT ME! + * @param alignPanel */ - public PCAPanel(AlignmentPanel ap) + public PCAPanel(AlignmentPanel alignPanel) + { + this(alignPanel, ScoreModels.getInstance() + .getDefaultModel(!alignPanel.av.getAlignment().isNucleotide()) + .getName(), SimilarityParams.SeqSpace); + } + + /** + * Constructor given sequence data, a similarity (or distance) score model + * name, and score calculation parameters + * + * @param alignPanel + * @param modelName + * @param params + */ + public PCAPanel(AlignmentPanel alignPanel, String modelName, + SimilarityParamsI params) { super(); - this.av = ap.av; - this.ap = ap; + this.av = alignPanel.av; + this.ap = alignPanel; + boolean nucleotide = av.getAlignment().isNucleotide(); progressBar = new ProgressBar(statusPanel, statusBar); - boolean sameLength = true; + addInternalFrameListener(new InternalFrameAdapter() + { + @Override + public void internalFrameClosed(InternalFrameEvent e) + { + close_actionPerformed(); + } + }); + boolean selected = av.getSelectionGroup() != null && av.getSelectionGroup().getSize() > 0; AlignmentView seqstrings = av.getAlignmentView(selected); - boolean nucleotide = av.getAlignment().isNucleotide(); SequenceI[] seqs; if (!selected) { @@ -107,41 +130,14 @@ public class PCAPanel extends GPCAPanel implements Runnable, { seqs = av.getSelectionGroup().getSequencesInOrder(av.getAlignment()); } - SeqCigar sq[] = seqstrings.getSequences(); - int length = sq[0].getWidth(); - - for (int i = 0; i < seqs.length; i++) - { - if (sq[i].getWidth() != length) - { - sameLength = false; - break; - } - } - - if (!sameLength) - { - JvOptionPane.showMessageDialog(Desktop.desktop, - MessageManager.getString("label.pca_sequences_not_aligned"), - MessageManager.getString("label.sequences_not_aligned"), - JvOptionPane.WARNING_MESSAGE); - - return; - } - - addInternalFrameListener(new InternalFrameAdapter() - { - @Override - public void internalFrameClosed(InternalFrameEvent e) - { - close_actionPerformed(); - } - }); - pcaModel = new PCAModel(seqstrings, seqs, nucleotide); + ScoreModelI scoreModel = ScoreModels.getInstance().getScoreModel( + modelName, ap); + pcaModel = new PCAModel(seqstrings, seqs, nucleotide, scoreModel, + params); PaintRefresher.Register(this, av.getSequenceSetId()); - rc = new RotatableCanvas(ap); + rc = new RotatableCanvas(alignPanel); this.getContentPane().add(rc, BorderLayout.CENTER); Thread worker = new Thread(this); worker.start(); @@ -156,40 +152,52 @@ public class PCAPanel extends GPCAPanel implements Runnable, pcaModel = null; } + /** + * Repopulate the options and actions under the score model menu when it is + * selected. Options will depend on whether 'nucleotide' or 'peptide' + * modelling is selected (and also possibly on whether any additional score + * models have been added). + */ @Override - protected void scoreMatrix_menuSelected() + protected void scoreModel_menuSelected() { - scoreMatrixMenu.removeAll(); - for (final String sm : ResidueProperties.scoreMatrices.keySet()) - { - if (ResidueProperties.getScoreMatrix(sm) != null) + scoreModelMenu.removeAll(); + for (final ScoreModelI sm : ScoreModels.getInstance().getModels()) + { + final String name = sm.getName(); + JCheckBoxMenuItem jm = new JCheckBoxMenuItem(name); + + /* + * if the score model doesn't provide a description, try to look one + * up in the text bundle, falling back on its name + */ + String tooltip = sm.getDescription(); + if (tooltip == null) { - // create an entry for this score matrix for use in PCA - JCheckBoxMenuItem jm = new JCheckBoxMenuItem(); - jm.setText(MessageManager.getStringOrReturn("label.score_model_", - sm)); - jm.setSelected(pcaModel.getScore_matrix().equals(sm)); - if ((ResidueProperties.scoreMatrices.get(sm).isDNA() && ResidueProperties.scoreMatrices - .get(sm).isProtein()) - || pcaModel.isNucleotide() == ResidueProperties.scoreMatrices - .get(sm).isDNA()) + tooltip = MessageManager.getStringOrReturn("label.score_model_", + name); + } + jm.setToolTipText(tooltip); + jm.setSelected(pcaModel.getScoreModelName().equals(name)); + if ((pcaModel.isNucleotide() && sm.isDNA()) + || (!pcaModel.isNucleotide() && sm.isProtein())) + { + jm.addActionListener(new ActionListener() { - final PCAPanel us = this; - jm.addActionListener(new ActionListener() + @Override + public void actionPerformed(ActionEvent e) { - @Override - public void actionPerformed(ActionEvent e) + if (!pcaModel.getScoreModelName().equals(name)) { - if (!pcaModel.getScore_matrix().equals(sm)) - { - pcaModel.setScore_matrix(sm); - Thread worker = new Thread(us); - worker.start(); - } + ScoreModelI sm2 = ScoreModels.getInstance().getScoreModel( + name, ap); + pcaModel.setScoreModel(sm2); + Thread worker = new Thread(PCAPanel.this); + worker.start(); } - }); - scoreMatrixMenu.add(jm); - } + } + }); + scoreModelMenu.add(jm); } } } @@ -236,7 +244,6 @@ public class PCAPanel extends GPCAPanel implements Runnable, // rc.invalidate(); nuclSetting.setSelected(pcaModel.isNucleotide()); protSetting.setSelected(!pcaModel.isNucleotide()); - jvVersionSetting.setSelected(pcaModel.isJvCalcMode()); top = pcaModel.getTop(); } catch (OutOfMemoryError er) @@ -264,7 +271,8 @@ public class PCAPanel extends GPCAPanel implements Runnable, if (!pcaModel.isNucleotide()) { pcaModel.setNucleotide(true); - pcaModel.setScore_matrix("DNA"); + pcaModel.setScoreModel(ScoreModels.getInstance().getDefaultModel( + false)); Thread worker = new Thread(this); worker.start(); } @@ -278,20 +286,13 @@ public class PCAPanel extends GPCAPanel implements Runnable, if (pcaModel.isNucleotide()) { pcaModel.setNucleotide(false); - pcaModel.setScore_matrix("BLOSUM62"); + pcaModel.setScoreModel(ScoreModels.getInstance() + .getDefaultModel(true)); Thread worker = new Thread(this); worker.start(); } } - @Override - protected void jvVersionSetting_actionPerfomed(ActionEvent arg0) - { - pcaModel.setJvCalcMode(jvVersionSetting.isSelected()); - Thread worker = new Thread(this); - worker.start(); - } - /** * DOCUMENT ME! */ diff --git a/src/jalview/gui/SeqPanel.java b/src/jalview/gui/SeqPanel.java index db7aa36..ab53c58 100644 --- a/src/jalview/gui/SeqPanel.java +++ b/src/jalview/gui/SeqPanel.java @@ -1670,7 +1670,7 @@ public class SeqPanel extends JPanel implements MouseListener, * * @param evt * @param res - * @param sequence + * @param sequences */ void showPopupMenu(MouseEvent evt) { @@ -1956,9 +1956,20 @@ public class SeqPanel extends JPanel implements MouseListener, // shared between viewports. boolean iSentTheSelection = (av == source || (source instanceof AlignViewport && ((AlignmentViewport) source) .getSequenceSetId().equals(av.getSequenceSetId()))); - if (iSentTheSelection || !av.followSelection) + + if (iSentTheSelection) { - return; + // respond to our own event by updating dependent dialogs + if (ap.getCalculationDialog() != null) + { + ap.getCalculationDialog().validateCalcTypes(); + } + + // process further ? + if (!av.followSelection) + { + return; + } } /* @@ -2056,6 +2067,13 @@ public class SeqPanel extends JPanel implements MouseListener, PaintRefresher.Refresh(this, av.getSequenceSetId()); // ap.paintAlignment(false); } + + // lastly, update dependent dialogs + if (ap.getCalculationDialog() != null) + { + ap.getCalculationDialog().validateCalcTypes(); + } + } /** @@ -2095,6 +2113,12 @@ public class SeqPanel extends JPanel implements MouseListener, av); av.setColumnSelection(cs); + // lastly, update any dependent dialogs + if (ap.getCalculationDialog() != null) + { + ap.getCalculationDialog().validateCalcTypes(); + } + PaintRefresher.Refresh(this, av.getSequenceSetId()); return true; diff --git a/src/jalview/gui/TreeCanvas.java b/src/jalview/gui/TreeCanvas.java index 9a38d4c..3494fb8 100755 --- a/src/jalview/gui/TreeCanvas.java +++ b/src/jalview/gui/TreeCanvas.java @@ -21,7 +21,7 @@ package jalview.gui; import jalview.analysis.Conservation; -import jalview.analysis.NJTree; +import jalview.analysis.TreeModel; import jalview.api.AlignViewportI; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceGroup; @@ -53,6 +53,7 @@ import java.awt.print.PrinterException; import java.awt.print.PrinterJob; import java.util.Enumeration; import java.util.Hashtable; +import java.util.List; import java.util.Vector; import javax.swing.JColorChooser; @@ -73,7 +74,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, /** DOCUMENT ME!! */ public static final String PLACEHOLDER = " * "; - NJTree tree; + TreeModel tree; JScrollPane scrollPane; @@ -168,7 +169,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * @param tree * DOCUMENT ME! */ - public void setTree(NJTree tree) + public void setTree(TreeModel tree) { this.tree = tree; tree.findHeight(tree.getTopNode()); @@ -207,7 +208,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * DOCUMENT ME! * @param chunk * DOCUMENT ME! - * @param scale + * @param wscale * DOCUMENT ME! * @param width * DOCUMENT ME! @@ -217,7 +218,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * DOCUMENT ME! */ public void drawNode(Graphics g, SequenceNode node, float chunk, - float scale, int width, int offx, int offy) + double wscale, int width, int offx, int offy) { if (node == null) { @@ -227,11 +228,11 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, if ((node.left() == null) && (node.right() == null)) { // Drawing leaf node - float height = node.height; - float dist = node.dist; + double height = node.height; + double dist = node.dist; - int xstart = (int) ((height - dist) * scale) + offx; - int xend = (int) (height * scale) + offx; + int xstart = (int) ((height - dist) * wscale) + offx; + int xend = (int) (height * wscale) + offx; int ypos = (int) (node.ycount * chunk) + offy; @@ -306,16 +307,16 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, } else { - drawNode(g, (SequenceNode) node.left(), chunk, scale, width, offx, + drawNode(g, (SequenceNode) node.left(), chunk, wscale, width, offx, offy); - drawNode(g, (SequenceNode) node.right(), chunk, scale, width, offx, + drawNode(g, (SequenceNode) node.right(), chunk, wscale, width, offx, offy); - float height = node.height; - float dist = node.dist; + double height = node.height; + double dist = node.dist; - int xstart = (int) ((height - dist) * scale) + offx; - int xend = (int) (height * scale) + offx; + int xstart = (int) ((height - dist) * wscale) + offx; + int xend = (int) (height * wscale) + offx; int ypos = (int) (node.ycount * chunk) + offy; g.setColor(node.color.darker()); @@ -339,8 +340,8 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, Rectangle pos = new Rectangle(xend - 2, ypos - 2, 5, 5); nodeHash.put(node, pos); - g.drawLine((int) (height * scale) + offx, ystart, - (int) (height * scale) + offx, yend); + g.drawLine((int) (height * wscale) + offx, ystart, + (int) (height * wscale) + offx, yend); String nodeLabel = ""; @@ -422,7 +423,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, SequenceNode top = tree.getTopNode(); - float wscale = (float) ((width * .8) - (offx * 2)) + double wscale = ((width * .8) - (offx * 2)) / tree.getMaxHeight(); if (top.count == 0) @@ -445,7 +446,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * DOCUMENT ME! * @param chunk * DOCUMENT ME! - * @param scale + * @param wscale * DOCUMENT ME! * @param width * DOCUMENT ME! @@ -455,7 +456,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, * DOCUMENT ME! */ public void pickNode(Rectangle pickBox, SequenceNode node, float chunk, - float scale, int width, int offx, int offy) + double wscale, int width, int offx, int offy) { if (node == null) { @@ -464,11 +465,11 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, if ((node.left() == null) && (node.right() == null)) { - float height = node.height; - float dist = node.dist; + double height = node.height; + double dist = node.dist; - int xstart = (int) ((height - dist) * scale) + offx; - int xend = (int) (height * scale) + offx; + int xstart = (int) ((height - dist) * wscale) + offx; + int xend = (int) (height * wscale) + offx; int ypos = (int) (node.ycount * chunk) + offy; @@ -488,9 +489,9 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, } else { - pickNode(pickBox, (SequenceNode) node.left(), chunk, scale, width, + pickNode(pickBox, (SequenceNode) node.left(), chunk, wscale, width, offx, offy); - pickNode(pickBox, (SequenceNode) node.right(), chunk, scale, width, + pickNode(pickBox, (SequenceNode) node.right(), chunk, wscale, width, offx, offy); } } @@ -724,7 +725,8 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, labelLength = fm.stringWidth(longestName) + 20; // 20 allows for scrollbar - float wscale = (width - labelLength - (offx * 2)) / tree.getMaxHeight(); + double wscale = (width - labelLength - (offx * 2)) + / tree.getMaxHeight(); SequenceNode top = tree.getTopNode(); @@ -937,8 +939,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, threshold = (float) (x - offx) / (float) (getWidth() - labelLength - (2 * offx)); - tree.getGroups().removeAllElements(); - tree.groupNodes(tree.getTopNode(), threshold); + List groups = tree.groupNodes(threshold); setColor(tree.getTopNode(), Color.black); AlignmentPanel[] aps = getAssociatedPanels(); @@ -958,7 +959,7 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, aps[a].av.getCodingComplement().clearSequenceColours(); } } - colourGroups(); + colourGroups(groups); } PaintRefresher.Refresh(tp, ap.av.getSequenceSetId()); @@ -967,17 +968,16 @@ public class TreeCanvas extends JPanel implements MouseListener, Runnable, } - void colourGroups() + void colourGroups(List groups) { AlignmentPanel[] aps = getAssociatedPanels(); - for (int i = 0; i < tree.getGroups().size(); i++) + for (int i = 0; i < groups.size(); i++) { Color col = new Color((int) (Math.random() * 255), (int) (Math.random() * 255), (int) (Math.random() * 255)); - setColor(tree.getGroups().elementAt(i), col.brighter()); + setColor(groups.get(i), col.brighter()); - Vector l = tree.findLeaves(tree.getGroups() - .elementAt(i)); + Vector l = tree.findLeaves(groups.get(i)); Vector sequences = new Vector(); diff --git a/src/jalview/gui/TreePanel.java b/src/jalview/gui/TreePanel.java index 25f4c1b..9dd805e 100755 --- a/src/jalview/gui/TreePanel.java +++ b/src/jalview/gui/TreePanel.java @@ -21,9 +21,13 @@ package jalview.gui; import jalview.analysis.AlignmentSorter; +import jalview.analysis.AverageDistanceTree; import jalview.analysis.NJTree; +import jalview.analysis.TreeBuilder; +import jalview.analysis.TreeModel; +import jalview.analysis.scoremodels.ScoreModels; import jalview.api.analysis.ScoreModelI; -import jalview.api.analysis.ViewBasedAnalysisI; +import jalview.api.analysis.SimilarityParamsI; import jalview.bin.Cache; import jalview.commands.CommandI; import jalview.commands.OrderCommand; @@ -41,7 +45,6 @@ import jalview.io.JalviewFileChooser; import jalview.io.JalviewFileView; import jalview.io.NewickFile; import jalview.jbgui.GTreePanel; -import jalview.schemes.ResidueProperties; import jalview.util.ImageMaker; import jalview.util.MessageManager; import jalview.viewmodel.AlignmentViewport; @@ -71,68 +74,46 @@ import org.jibble.epsgraphics.EpsGraphics2D; */ public class TreePanel extends GTreePanel { - String type; + String treeType; - String pwtype; + String scoreModelName; // if tree computed + + String treeTitle; // if tree loaded + + SimilarityParamsI similarityParams; TreeCanvas treeCanvas; - NJTree tree; + TreeModel tree; AlignViewport av; /** * Creates a new TreePanel object. * - * @param av - * DOCUMENT ME! - * @param seqVector - * DOCUMENT ME! + * @param ap * @param type - * DOCUMENT ME! - * @param pwtype - * DOCUMENT ME! - * @param s - * DOCUMENT ME! - * @param e - * DOCUMENT ME! + * @param modelName + * @param options */ - public TreePanel(AlignmentPanel ap, String type, String pwtype) + public TreePanel(AlignmentPanel ap, String type, String modelName, + SimilarityParamsI options) { super(); - initTreePanel(ap, type, pwtype, null, null); + this.similarityParams = options; + initTreePanel(ap, type, modelName, null, null); // We know this tree has distances. JBPNote TODO: prolly should add this as // a userdefined default // showDistances(true); } - /** - * Creates a new TreePanel object. - * - * @param av - * DOCUMENT ME! - * @param seqVector - * DOCUMENT ME! - * @param newtree - * DOCUMENT ME! - * @param type - * DOCUMENT ME! - * @param pwtype - * DOCUMENT ME! - */ - public TreePanel(AlignmentPanel ap, String type, String pwtype, - NewickFile newtree) - { - super(); - initTreePanel(ap, type, pwtype, newtree, null); - } - - public TreePanel(AlignmentPanel av, String type, String pwtype, - NewickFile newtree, AlignmentView inputData) + public TreePanel(AlignmentPanel alignPanel, NewickFile newtree, + String theTitle, AlignmentView inputData) { super(); - initTreePanel(av, type, pwtype, newtree, inputData); + this.treeTitle = theTitle; + initTreePanel(alignPanel, null, null, newtree, inputData); } public AlignmentI getAlignment() @@ -145,13 +126,13 @@ public class TreePanel extends GTreePanel return treeCanvas.av; } - void initTreePanel(AlignmentPanel ap, String type, String pwtype, + void initTreePanel(AlignmentPanel ap, String type, String modelName, NewickFile newTree, AlignmentView inputData) { av = ap.av; - this.type = type; - this.pwtype = pwtype; + this.treeType = type; + this.scoreModelName = modelName; treeCanvas = new TreeCanvas(this, ap, scrollPane); scrollPane.setViewportView(treeCanvas); @@ -181,7 +162,7 @@ public class TreePanel extends GTreePanel .println("new alignment sequences vector value is null"); } - tree.UpdatePlaceHolders((List) evt.getNewValue()); + tree.updatePlaceHolders((List) evt.getNewValue()); treeCanvas.nameHash.clear(); // reset the mapping between canvas // rectangles and leafnodes repaint(); @@ -189,11 +170,7 @@ public class TreePanel extends GTreePanel } }); - TreeLoader tl = new TreeLoader(newTree); - if (inputData != null) - { - tl.odata = inputData; - } + TreeLoader tl = new TreeLoader(newTree, inputData); tl.start(); } @@ -265,19 +242,21 @@ public class TreePanel extends GTreePanel class TreeLoader extends Thread { - NewickFile newtree; + private NewickFile newtree; - jalview.datamodel.AlignmentView odata = null; + private AlignmentView odata = null; - public TreeLoader(NewickFile newtree) + public TreeLoader(NewickFile newickFile, AlignmentView inputData) { - this.newtree = newtree; - if (newtree != null) + this.newtree = newickFile; + this.odata = inputData; + + if (newickFile != null) { // Must be outside run(), as Jalview2XML tries to // update distance/bootstrap visibility at the same time - showBootstrap(newtree.HasBootstrap()); - showDistances(newtree.HasDistances()); + showBootstrap(newickFile.HasBootstrap()); + showDistances(newickFile.HasDistances()); } } @@ -287,60 +266,21 @@ public class TreePanel extends GTreePanel if (newtree != null) { - if (odata == null) - { - tree = new NJTree(av.getAlignment().getSequencesArray(), newtree); - } - else + tree = new TreeModel(av.getAlignment().getSequencesArray(), odata, + newtree); + if (tree.getOriginalData() == null) { - tree = new NJTree(av.getAlignment().getSequencesArray(), odata, - newtree); - } - if (!tree.hasOriginalSequenceData()) - { - allowOriginalSeqData(false); + originalSeqData.setVisible(false); } } else { - int start, end; - SequenceI[] seqs; - boolean selview = av.getSelectionGroup() != null - && av.getSelectionGroup().getSize() > 1; - AlignmentView seqStrings = av.getAlignmentView(selview); - if (!selview) - { - start = 0; - end = av.getAlignment().getWidth(); - seqs = av.getAlignment().getSequencesArray(); - } - else - { - start = av.getSelectionGroup().getStartRes(); - end = av.getSelectionGroup().getEndRes() + 1; - seqs = av.getSelectionGroup().getSequencesInOrder( - av.getAlignment()); - } - ScoreModelI sm = ResidueProperties.getScoreModel(pwtype); - if (sm instanceof ViewBasedAnalysisI) - { - try - { - sm = sm.getClass().newInstance(); - ((ViewBasedAnalysisI) sm) - .configureFromAlignmentView(treeCanvas.ap); - } catch (Exception q) - { - Cache.log.error("Couldn't create a scoremodel instance for " - + sm.getName()); - } - tree = new NJTree(seqs, seqStrings, type, pwtype, sm, start, end); - } - else - { - tree = new NJTree(seqs, seqStrings, type, pwtype, null, start, - end); - } + ScoreModelI sm = ScoreModels.getInstance().getScoreModel( + scoreModelName, treeCanvas.ap); + TreeBuilder njtree = treeType.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree( + av, sm, similarityParams) : new AverageDistanceTree(av, sm, + similarityParams); + tree = new TreeModel(njtree); showDistances(true); } @@ -374,17 +314,12 @@ public class TreePanel extends GTreePanel treeCanvas.setMarkPlaceholders(b); } - private void allowOriginalSeqData(boolean b) - { - originalSeqData.setVisible(b); - } - /** * DOCUMENT ME! * * @return DOCUMENT ME! */ - public NJTree getTree() + public TreeModel getTree() { return tree; } @@ -400,33 +335,14 @@ public class TreePanel extends GTreePanel { CutAndPasteTransfer cap = new CutAndPasteTransfer(); - StringBuffer buffer = new StringBuffer(); + String newTitle = getPanelTitle(); - if (type.equals("AV")) - { - buffer.append("Average distance tree using "); - } - else - { - buffer.append("Neighbour joining tree using "); - } - - if (pwtype.equals("BL")) - { - buffer.append("BLOSUM62"); - } - else - { - buffer.append("PID"); - } - - jalview.io.NewickFile fout = new jalview.io.NewickFile( - tree.getTopNode()); + NewickFile fout = new NewickFile(tree.getTopNode()); try { - cap.setText(fout.print(tree.isHasBootstrap(), tree.isHasDistances(), - tree.isHasRootDistance())); - Desktop.addInternalFrame(cap, buffer.toString(), 500, 100); + cap.setText(fout.print(tree.hasBootstrap(), tree.hasDistances(), + tree.hasRootDistance())); + Desktop.addInternalFrame(cap, newTitle, 500, 100); } catch (OutOfMemoryError oom) { new OOMWarning("generating newick tree file", oom); @@ -463,8 +379,8 @@ public class TreePanel extends GTreePanel { jalview.io.NewickFile fout = new jalview.io.NewickFile( tree.getTopNode()); - String output = fout.print(tree.isHasBootstrap(), - tree.isHasDistances(), tree.isHasRootDistance()); + String output = fout.print(tree.hasBootstrap(), + tree.hasDistances(), tree.hasRootDistance()); java.io.PrintWriter out = new java.io.PrintWriter( new java.io.FileWriter(choice)); out.println(output); @@ -492,7 +408,8 @@ public class TreePanel extends GTreePanel @Override public void originalSeqData_actionPerformed(ActionEvent e) { - if (!tree.hasOriginalSequenceData()) + AlignmentView originalData = tree.getOriginalData(); + if (originalData == null) { jalview.bin.Cache.log .info("Unexpected call to originalSeqData_actionPerformed - should have hidden this menu action."); @@ -514,8 +431,9 @@ public class TreePanel extends GTreePanel } catch (Exception ex) { } - ; - Object[] alAndColsel = tree.seqData.getAlignmentAndColumnSelection(gc); + + Object[] alAndColsel = originalData + .getAlignmentAndColumnSelection(gc); if (alAndColsel != null && alAndColsel[0] != null) { @@ -632,11 +550,11 @@ public class TreePanel extends GTreePanel public CommandI sortAlignmentIn(AlignmentPanel ap) { - AlignmentViewport av = ap.av; - SequenceI[] oldOrder = av.getAlignment().getSequencesArray(); - AlignmentSorter.sortByTree(av.getAlignment(), tree); + AlignmentViewport viewport = ap.av; + SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray(); + AlignmentSorter.sortByTree(viewport.getAlignment(), tree); CommandI undo; - undo = new OrderCommand("Tree Sort", oldOrder, av.getAlignment()); + undo = new OrderCommand("Tree Sort", oldOrder, viewport.getAlignment()); ap.paintAlignment(true); return undo; @@ -664,11 +582,11 @@ public class TreePanel extends GTreePanel return treeCanvas.font; } - public void setTreeFont(Font font) + public void setTreeFont(Font f) { if (treeCanvas != null) { - treeCanvas.setFont(font); + treeCanvas.setFont(f); } } @@ -891,12 +809,46 @@ public class TreePanel extends GTreePanel } if (newname != null) { - String oldname = ((SequenceNode) node).getName(); - // TODO : save in the undo object for this modification. + // String oldname = ((SequenceNode) node).getName(); + // TODO : save oldname in the undo object for this modification. ((SequenceNode) node).setName(newname); } } } }); } + + /** + * Formats a localised title for the tree panel, like + *

+ * Neighbour Joining Using BLOSUM62 + *

+ * For a tree loaded from file, just uses the file name + * @return + */ + public String getPanelTitle() + { + if (treeTitle != null) + { + return treeTitle; + } + + /* + * i18n description of Neighbour Joining or Average Distance method + */ + String treecalcnm = MessageManager.getString("label.tree_calc_" + + treeType.toLowerCase()); + + /* + * short score model name (long description can be too long) + */ + String smn = scoreModelName; + + /* + * put them together as Using + */ + final String ttl = MessageManager.formatMessage("label.treecalc_title", + treecalcnm, smn); + return ttl; + } } diff --git a/src/jalview/io/FileFormat.java b/src/jalview/io/FileFormat.java index a11147c..3354b88 100644 --- a/src/jalview/io/FileFormat.java +++ b/src/jalview/io/FileFormat.java @@ -256,6 +256,21 @@ public enum FileFormat implements FileFormatI return new FeaturesFile(); } }, + ScoreMatrix("Substitution matrix", "", false, false) + { + @Override + public AlignmentFileReaderI getReader(FileParse source) + throws IOException + { + return new ScoreMatrixFile(source); + } + + @Override + public AlignmentFileWriterI getWriter(AlignmentI al) + { + return null; + } + }, PDB("PDB", "pdb,ent", true, false) { @Override diff --git a/src/jalview/io/IdentifyFile.java b/src/jalview/io/IdentifyFile.java index 0556e76..035c1fa 100755 --- a/src/jalview/io/IdentifyFile.java +++ b/src/jalview/io/IdentifyFile.java @@ -98,12 +98,15 @@ public class IdentifyFile boolean lineswereskipped = false; boolean isBinary = false; // true if length is non-zero and non-printable // characters are encountered + try { if (!closeSource) { source.mark(); } + boolean aaIndexHeaderRead = false; + while ((data = source.nextLine()) != null) { bytesRead += data.length(); @@ -141,6 +144,20 @@ public class IdentifyFile } data = data.toUpperCase(); + if (data.startsWith(ScoreMatrixFile.SCOREMATRIX)) + { + reply = FileFormat.ScoreMatrix; + break; + } + if (data.startsWith("H ") && !aaIndexHeaderRead) + { + aaIndexHeaderRead = true; + } + if (data.startsWith("D ") && aaIndexHeaderRead) + { + reply = FileFormat.ScoreMatrix; + break; + } if (data.startsWith("##GFF-VERSION")) { // GFF - possibly embedded in a Jalview features file! diff --git a/src/jalview/io/ScoreMatrixFile.java b/src/jalview/io/ScoreMatrixFile.java new file mode 100644 index 0000000..6b2f891 --- /dev/null +++ b/src/jalview/io/ScoreMatrixFile.java @@ -0,0 +1,433 @@ +package jalview.io; + +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.datamodel.SequenceI; + +import java.io.IOException; +import java.util.StringTokenizer; + +/** + * A class that can parse a file containing a substitution matrix and register + * it for use in Jalview + *

+ * Accepts 'NCBI' format (e.g. + * https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt), with the + * addition of a header line to provide a matrix name, e.g. + * + *

+ * ScoreMatrix BLOSUM62
+ * 
+ * + * Also accepts 'AAindex' format (as described at + * http://www.genome.jp/aaindex/aaindex_help.html) with the minimum data + * required being + * + *
+ * H accession number (used as score matrix identifier in Jalview)
+ * D description (used for tooltip in Jalview)
+ * M rows = symbolList
+ * and the substitution scores
+ * 
+ */ +public class ScoreMatrixFile extends AlignFile implements + AlignmentFileReaderI +{ + // first non-comment line identifier - also checked in IdentifyFile + public static final String SCOREMATRIX = "SCOREMATRIX"; + + private static final String DELIMITERS = " ,\t"; + + private static final String COMMENT_CHAR = "#"; + + private String matrixName; + + /* + * aaindex format has scores for diagonal and below only + */ + boolean isLowerDiagonalOnly; + + /* + * ncbi format has symbols as first column on score rows + */ + boolean hasGuideColumn; + + /** + * Constructor + * + * @param source + * @throws IOException + */ + public ScoreMatrixFile(FileParse source) throws IOException + { + super(false, source); + } + + @Override + public String print(SequenceI[] sqs, boolean jvsuffix) + { + return null; + } + + /** + * Parses the score matrix file, and if successful registers the matrix so it + * will be shown in Jalview menus. This method is not thread-safe (a separate + * instance of this class should be used by each thread). + */ + @Override + public void parse() throws IOException + { + ScoreMatrix sm = parseMatrix(); + + ScoreModels.getInstance().registerScoreModel(sm); + } + + /** + * Parses the score matrix file and constructs a ScoreMatrix object. If an + * error is found in parsing, it is thrown as FileFormatException. Any + * warnings are written to syserr. + * + * @return + * @throws IOException + */ + public ScoreMatrix parseMatrix() throws IOException + { + ScoreMatrix sm = null; + int lineNo = 0; + String name = null; + char[] alphabet = null; + float[][] scores = null; + int size = 0; + int row = 0; + String err = null; + String data; + isLowerDiagonalOnly = false; + + while ((data = nextLine()) != null) + { + lineNo++; + data = data.trim(); + if (data.startsWith(COMMENT_CHAR) || data.length() == 0) + { + continue; + } + if (data.toUpperCase().startsWith(SCOREMATRIX)) + { + /* + * Parse name from ScoreMatrix + * we allow any delimiter after ScoreMatrix then take the rest of the line + */ + if (name != null) + { + throw new FileFormatException( + "Error: 'ScoreMatrix' repeated in file at line " + + lineNo); + } + StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS); + if (nameLine.countTokens() < 2) + { + err = "Format error: expected 'ScoreMatrix ', found '" + + data + "' at line " + lineNo; + throw new FileFormatException(err); + } + nameLine.nextToken(); // 'ScoreMatrix' + name = nameLine.nextToken(); // next field + name = data.substring(1).substring(data.substring(1).indexOf(name)); + continue; + } + else if (data.startsWith("H ") && name == null) + { + /* + * AAindex identifier + */ + return parseAAIndexFormat(lineNo, data); + } + else if (name == null) + { + err = "Format error: 'ScoreMatrix ' should be the first non-comment line"; + throw new FileFormatException(err); + } + + /* + * next non-comment line after ScoreMatrix should be the + * column header line with the alphabet of scored symbols + */ + if (alphabet == null) + { + StringTokenizer columnHeadings = new StringTokenizer(data, + DELIMITERS); + size = columnHeadings.countTokens(); + alphabet = new char[size]; + int col = 0; + while (columnHeadings.hasMoreTokens()) + { + alphabet[col++] = columnHeadings.nextToken().charAt(0); + } + scores = new float[size][]; + continue; + } + + /* + * too much information + */ + if (row >= size) + { + err = "Unexpected extra input line in score model file: '" + data + + "'"; + throw new FileFormatException(err); + } + + parseValues(data, lineNo, scores, row, alphabet); + row++; + } + + /* + * out of data - check we found enough + */ + if (row < size) + { + err = String + .format("Expected %d rows of score data in score matrix but only found %d", + size, row); + throw new FileFormatException(err); + } + + /* + * If we get here, then name, alphabet and scores have been parsed successfully + */ + sm = new ScoreMatrix(name, alphabet, scores); + matrixName = name; + + return sm; + } + + /** + * Parse input as AAIndex format, starting from the header line with the + * accession id + * + * @param lineNo + * @param data + * @return + * @throws IOException + */ + protected ScoreMatrix parseAAIndexFormat(int lineNo, String data) + throws IOException + { + String name = data.substring(2).trim(); + String description = null; + + float[][] scores = null; + char[] alphabet = null; + int row = 0; + int size = 0; + + while ((data = nextLine()) != null) + { + lineNo++; + data = data.trim(); + if (skipAAindexLine(data)) + { + continue; + } + if (data.startsWith("D ")) + { + description = data.substring(2).trim(); + } + else if (data.startsWith("M ")) + { + alphabet = parseAAindexRowsColumns(lineNo, data); + size = alphabet.length; + scores = new float[size][size]; + } + else if (scores == null) + { + throw new FileFormatException( + "No alphabet specified in matrix file"); + } + else if (row >= size) + { + throw new FileFormatException("Too many data rows in matrix file"); + } + else + { + parseValues(data, lineNo, scores, row, alphabet); + row++; + } + } + + ScoreMatrix sm = new ScoreMatrix(name, description, alphabet, scores); + matrixName = name; + + return sm; + } + + /** + * Parse one row of score values, delimited by whitespace or commas. The line + * may optionally include the symbol from which the scores are defined. Values + * may be present for all columns, or only up to the diagonal (in which case + * upper diagonal values are set symmetrically). + * + * @param data + * the line to be parsed + * @param lineNo + * @param scores + * the score matrix to add data to + * @param row + * the row number / alphabet index position + * @param alphabet + * @return + * @throws exception + * if invalid, or too few, or too many values + */ + protected void parseValues(String data, int lineNo, float[][] scores, + int row, char[] alphabet) throws FileFormatException + { + String err; + int size = alphabet.length; + StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS); + + int tokenCount = scoreLine.countTokens(); + + /* + * inspect first row to see if it includes the symbol in the first column, + * and to see if it is lower diagonal values only (i.e. just one score) + */ + if (row == 0) + { + if (data.startsWith(String.valueOf(alphabet[0]))) + { + hasGuideColumn = true; + } + if (tokenCount == (hasGuideColumn ? 2 : 1)) + { + isLowerDiagonalOnly = true; + } + } + + if (hasGuideColumn) + { + /* + * check 'guide' symbol is the row'th letter of the alphabet + */ + String symbol = scoreLine.nextToken(); + if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row]) + { + err = String + .format("Error parsing score matrix at line %d, expected '%s' but found '%s'", + lineNo, alphabet[row], symbol); + throw new FileFormatException(err); + } + tokenCount = scoreLine.countTokens(); // excluding guide symbol + } + + /* + * check the right number of values (lower diagonal or full format) + */ + if (isLowerDiagonalOnly && tokenCount != row + 1) + { + err = String.format( + "Expected %d scores at line %d: '%s' but found %d", row + 1, + lineNo, data, tokenCount); + throw new FileFormatException(err); + } + + if (!isLowerDiagonalOnly && tokenCount != size) + { + err = String.format( + "Expected %d scores at line %d: '%s' but found %d", size, + lineNo, data, scoreLine.countTokens()); + throw new FileFormatException(err); + } + + /* + * parse and set the values, setting the symmetrical value + * as well if lower diagonal format data + */ + scores[row] = new float[size]; + int col = 0; + String value = null; + while (scoreLine.hasMoreTokens()) + { + try + { + value = scoreLine.nextToken(); + scores[row][col] = Float.valueOf(value); + if (isLowerDiagonalOnly) + { + scores[col][row] = scores[row][col]; + } + col++; + } catch (NumberFormatException e) + { + err = String.format( + "Invalid score value '%s' at line %d column %d", value, + lineNo, col); + throw new FileFormatException(err); + } + } + } + + /** + * Parse the line in an aaindex file that looks like + * + *
+   * M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
+   * 
+ * + * rejecting it if rows and cols do not match. Returns the string of + * characters in the row/cols alphabet. + * + * @param lineNo + * @param data + * @return + * @throws FileFormatException + */ + protected char[] parseAAindexRowsColumns(int lineNo, String data) + throws FileFormatException + { + String err = "Unexpected aaIndex score matrix data at line " + lineNo + + ": " + data; + + try + { + String[] toks = data.split(","); + String rowsAlphabet = toks[0].split("=")[1].trim(); + String colsAlphabet = toks[1].split("=")[1].trim(); + if (!rowsAlphabet.equals(colsAlphabet)) + { + throw new FileFormatException("rows != cols"); + } + return rowsAlphabet.toCharArray(); + } catch (Throwable t) + { + throw new FileFormatException(err + " " + t.getMessage()); + } + } + + /** + * Answers true if line is one we are not interested in from AAindex format + * file + * + * @param data + * @return + */ + protected boolean skipAAindexLine(String data) + { + if (data.startsWith(COMMENT_CHAR) || data.length() == 0) + { + return true; + } + if (data.startsWith("*") || data.startsWith("R ") + || data.startsWith("A ") || data.startsWith("T ") + || data.startsWith("J ") || data.startsWith("//")) + { + return true; + } + return false; + } + + public String getMatrixName() + { + return matrixName; + } +} diff --git a/src/jalview/io/VamsasAppDatastore.java b/src/jalview/io/VamsasAppDatastore.java index 2c35547..1cab8ca 100644 --- a/src/jalview/io/VamsasAppDatastore.java +++ b/src/jalview/io/VamsasAppDatastore.java @@ -1942,7 +1942,7 @@ public class VamsasAppDatastore TreePanel tp = null; if (vstree.isValidTree()) { - tp = alignFrame.ShowNewickTree(vstree.getNewickTree(), + tp = alignFrame.showNewickTree(vstree.getNewickTree(), vstree.getTitle(), vstree.getInputData(), 600, 500, t * 20 + 50, t * 20 + 50); diff --git a/src/jalview/io/packed/JalviewDataset.java b/src/jalview/io/packed/JalviewDataset.java index c1ca1b7..9f84c16 100644 --- a/src/jalview/io/packed/JalviewDataset.java +++ b/src/jalview/io/packed/JalviewDataset.java @@ -20,6 +20,7 @@ */ package jalview.io.packed; +import jalview.analysis.TreeModel; import jalview.api.FeatureColourI; import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceI; @@ -151,8 +152,7 @@ public class JalviewDataset { // the following works because all trees are already had node/SequenceI // associations created. - jalview.analysis.NJTree njt = new jalview.analysis.NJTree( - al.getSequencesArray(), nf); + TreeModel njt = new TreeModel(al.getSequencesArray(), null, nf); // this just updates the displayed leaf name on the tree according to // the SequenceIs. njt.renameAssociatedNodes(); diff --git a/src/jalview/io/vamsas/Tree.java b/src/jalview/io/vamsas/Tree.java index a3781a7..d800d20 100644 --- a/src/jalview/io/vamsas/Tree.java +++ b/src/jalview/io/vamsas/Tree.java @@ -20,7 +20,8 @@ */ package jalview.io.vamsas; -import jalview.analysis.NJTree; +import jalview.analysis.TreeBuilder; +import jalview.analysis.TreeModel; import jalview.bin.Cache; import jalview.datamodel.AlignmentI; import jalview.datamodel.AlignmentView; @@ -219,15 +220,17 @@ public class Tree extends DatastoreItem prov.getEntry(0).setUser(provEntry.getUser()); prov.getEntry(0).setApp(provEntry.getApp()); prov.getEntry(0).setDate(provEntry.getDate()); - if (tp.getTree().hasOriginalSequenceData()) + + AlignmentView originalData = tp.getTree().getOriginalData(); + if (originalData != null) { Input vInput = new Input(); // LATER: check to see if tree input data is contained in this alignment - // or just correctly resolve the tree's seqData to the correct alignment // in // the document. - Vector alsqrefs = getjv2vObjs(findAlignmentSequences(jal, - tp.getTree().seqData.getSequences())); + Vector alsqrefs = getjv2vObjs(findAlignmentSequences(jal, tp + .getTree().getOriginalData().getSequences())); Object[] alsqs = new Object[alsqrefs.size()]; alsqrefs.copyInto(alsqs); vInput.setObjRef(alsqs); @@ -239,12 +242,13 @@ public class Tree extends DatastoreItem prov.getEntry(0).addParam(new Param()); prov.getEntry(0).getParam(0).setName("treeType"); prov.getEntry(0).getParam(0).setType("utf8"); - prov.getEntry(0).getParam(0).setContent("NJ"); // TODO: type of tree is a - // general parameter - int ranges[] = tp.getTree().seqData.getVisibleContigs(); + prov.getEntry(0).getParam(0) + .setContent(TreeBuilder.NEIGHBOUR_JOINING); + // TODO: type of tree is a general parameter + int ranges[] = originalData.getVisibleContigs(); // VisibleContigs are with respect to alignment coordinates. Still need // offsets - int start = tp.getTree().seqData.getAlignmentOrigin(); + int start = tp.getTree().getOriginalData().getAlignmentOrigin(); for (int r = 0; r < ranges.length; r += 2) { Seg visSeg = new Seg(); @@ -370,13 +374,14 @@ public class Tree extends DatastoreItem /** * construct treenode mappings for mapped sequences * - * @param ntree + * @param treeModel * @param newick * @return */ - public Treenode[] makeTreeNodes(NJTree ntree, Newick newick) + public Treenode[] makeTreeNodes(TreeModel treeModel, Newick newick) { - Vector leaves = ntree.findLeaves(ntree.getTopNode()); + Vector leaves = treeModel.findLeaves(treeModel + .getTopNode()); Vector tnv = new Vector(); Enumeration l = leaves.elements(); Hashtable nodespecs = new Hashtable(); @@ -496,7 +501,7 @@ public class Tree extends DatastoreItem bindjvvobj(tp, tree); tree.setTitle(tp.getTitle()); Newick newick = new Newick(); - newick.setContent(tp.getTree().toString()); + newick.setContent(tp.getTree().print()); newick.setTitle(tp.getTitle()); tree.addNewick(newick); tree.setProvenance(makeTreeProvenance(jal, tp)); diff --git a/src/jalview/jbgui/GAlignFrame.java b/src/jalview/jbgui/GAlignFrame.java index b759d64..58034d9 100755 --- a/src/jalview/jbgui/GAlignFrame.java +++ b/src/jalview/jbgui/GAlignFrame.java @@ -129,7 +129,7 @@ public class GAlignFrame extends JInternalFrame protected JMenu sort = new JMenu(); - protected JMenu calculateTree = new JMenu(); + protected JMenuItem calculateTree = new JMenuItem(); protected JCheckBoxMenuItem padGapsMenuitem = new JCheckBoxMenuItem(); @@ -523,36 +523,6 @@ public class GAlignFrame extends JInternalFrame pairwiseAlignmentMenuItem_actionPerformed(e); } }); - JMenuItem PCAMenuItem = new JMenuItem( - MessageManager.getString("label.principal_component_analysis")); - PCAMenuItem.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - PCAMenuItem_actionPerformed(e); - } - }); - JMenuItem averageDistanceTreeMenuItem = new JMenuItem( - MessageManager.getString("label.average_distance_identity")); - averageDistanceTreeMenuItem.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - averageDistanceTreeMenuItem_actionPerformed(e); - } - }); - JMenuItem neighbourTreeMenuItem = new JMenuItem( - MessageManager.getString("label.neighbour_joining_identity")); - neighbourTreeMenuItem.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - neighbourTreeMenuItem_actionPerformed(e); - } - }); this.getContentPane().setLayout(new BorderLayout()); alignFrameMenuBar.setFont(new java.awt.Font("Verdana", 0, 11)); @@ -563,27 +533,6 @@ public class GAlignFrame extends JInternalFrame outputTextboxMenu.setText(MessageManager .getString("label.out_to_textbox")); - - JMenuItem avDistanceTreeBlosumMenuItem = new JMenuItem( - MessageManager.getString("label.average_distance_blosum62")); - avDistanceTreeBlosumMenuItem.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - avTreeBlosumMenuItem_actionPerformed(e); - } - }); - JMenuItem njTreeBlosumMenuItem = new JMenuItem( - MessageManager.getString("label.neighbour_blosum62")); - njTreeBlosumMenuItem.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent e) - { - njTreeBlosumMenuItem_actionPerformed(e); - } - }); annotationPanelMenuItem.setActionCommand(""); annotationPanelMenuItem.setText(MessageManager .getString("label.show_annotations")); @@ -1203,7 +1152,7 @@ public class GAlignFrame extends JInternalFrame @Override public void menuSelected(MenuEvent e) { - buildTreeMenu(); + buildTreeSortMenu(); } @Override @@ -1240,8 +1189,8 @@ public class GAlignFrame extends JInternalFrame }); sortByAnnotScore.setVisible(false); - calculateTree - .setText(MessageManager.getString("action.calculate_tree")); + calculateTree.setText(MessageManager + .getString("action.calculate_tree_pca")); padGapsMenuitem.setText(MessageManager.getString("label.pad_gaps")); padGapsMenuitem.setState(jalview.bin.Cache @@ -1843,7 +1792,6 @@ public class GAlignFrame extends JInternalFrame calculateMenu.add(calculateTree); calculateMenu.addSeparator(); calculateMenu.add(pairwiseAlignmentMenuItem); - calculateMenu.add(PCAMenuItem); calculateMenu.addSeparator(); calculateMenu.add(showTranslation); calculateMenu.add(showReverse); @@ -2330,26 +2278,10 @@ public class GAlignFrame extends JInternalFrame { } - protected void PCAMenuItem_actionPerformed(ActionEvent e) - { - } - - protected void averageDistanceTreeMenuItem_actionPerformed(ActionEvent e) - { - } - protected void neighbourTreeMenuItem_actionPerformed(ActionEvent e) { } - protected void njTreeBlosumMenuItem_actionPerformed(ActionEvent e) - { - } - - protected void avTreeBlosumMenuItem_actionPerformed(ActionEvent e) - { - } - protected void conservationMenuItem_actionPerformed(boolean selected) { } @@ -2634,7 +2566,7 @@ public class GAlignFrame extends JInternalFrame } - public void buildTreeMenu() + public void buildTreeSortMenu() { } diff --git a/src/jalview/jbgui/GPCAPanel.java b/src/jalview/jbgui/GPCAPanel.java index 0bc6cac..3715acc 100755 --- a/src/jalview/jbgui/GPCAPanel.java +++ b/src/jalview/jbgui/GPCAPanel.java @@ -25,6 +25,7 @@ import jalview.util.MessageManager; import java.awt.BorderLayout; import java.awt.Color; import java.awt.FlowLayout; +import java.awt.Font; import java.awt.GridLayout; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; @@ -43,54 +44,20 @@ import javax.swing.event.MenuListener; public class GPCAPanel extends JInternalFrame { - JPanel jPanel2 = new JPanel(); + private static final Font VERDANA_12 = new Font("Verdana", 0, 12); - JLabel jLabel1 = new JLabel(); + protected JComboBox xCombobox = new JComboBox(); - JLabel jLabel2 = new JLabel(); + protected JComboBox yCombobox = new JComboBox(); - JLabel jLabel3 = new JLabel(); + protected JComboBox zCombobox = new JComboBox(); - protected JComboBox xCombobox = new JComboBox(); - - protected JComboBox yCombobox = new JComboBox(); - - protected JComboBox zCombobox = new JComboBox(); - - protected JButton resetButton = new JButton(); - - FlowLayout flowLayout1 = new FlowLayout(); - - BorderLayout borderLayout1 = new BorderLayout(); - - JMenuBar jMenuBar1 = new JMenuBar(); - - JMenu fileMenu = new JMenu(); - - JMenu saveMenu = new JMenu(); - - protected JMenu scoreMatrixMenu = new JMenu(); - - JMenuItem eps = new JMenuItem(); - - JMenuItem png = new JMenuItem(); - - JMenuItem print = new JMenuItem(); - - JMenuItem outputValues = new JMenuItem(); - - JMenuItem outputPoints = new JMenuItem(); - - JMenuItem outputProjPoints = new JMenuItem(); + protected JMenu scoreModelMenu = new JMenu(); protected JMenu viewMenu = new JMenu(); protected JCheckBoxMenuItem showLabels = new JCheckBoxMenuItem(); - JMenuItem bgcolour = new JMenuItem(); - - JMenuItem originalSeqData = new JMenuItem(); - protected JMenu associateViewsMenu = new JMenu(); protected JMenu calcSettings = new JMenu(); @@ -99,12 +66,8 @@ public class GPCAPanel extends JInternalFrame protected JCheckBoxMenuItem protSetting = new JCheckBoxMenuItem(); - protected JCheckBoxMenuItem jvVersionSetting = new JCheckBoxMenuItem(); - protected JLabel statusBar = new JLabel(); - protected GridLayout statusPanelLayout = new GridLayout(); - protected JPanel statusPanel = new JPanel(); public GPCAPanel() @@ -123,49 +86,55 @@ public class GPCAPanel extends JInternalFrame yCombobox.addItem("dim " + i); zCombobox.addItem("dim " + i); } - - setJMenuBar(jMenuBar1); } private void jbInit() throws Exception { - this.getContentPane().setLayout(borderLayout1); - jPanel2.setLayout(flowLayout1); - jLabel1.setFont(new java.awt.Font("Verdana", 0, 12)); + this.getContentPane().setLayout(new BorderLayout()); + JPanel jPanel2 = new JPanel(); + jPanel2.setLayout(new FlowLayout()); + JLabel jLabel1 = new JLabel(); + jLabel1.setFont(VERDANA_12); jLabel1.setText("x="); - jLabel2.setFont(new java.awt.Font("Verdana", 0, 12)); + JLabel jLabel2 = new JLabel(); + jLabel2.setFont(VERDANA_12); jLabel2.setText("y="); - jLabel3.setFont(new java.awt.Font("Verdana", 0, 12)); + JLabel jLabel3 = new JLabel(); + jLabel3.setFont(VERDANA_12); jLabel3.setText("z="); jPanel2.setBackground(Color.white); jPanel2.setBorder(null); - zCombobox.setFont(new java.awt.Font("Verdana", 0, 12)); - zCombobox.addActionListener(new java.awt.event.ActionListener() + zCombobox.setFont(VERDANA_12); + zCombobox.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { zCombobox_actionPerformed(e); } }); - yCombobox.setFont(new java.awt.Font("Verdana", 0, 12)); - yCombobox.addActionListener(new java.awt.event.ActionListener() + yCombobox.setFont(VERDANA_12); + yCombobox.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { yCombobox_actionPerformed(e); } }); - xCombobox.setFont(new java.awt.Font("Verdana", 0, 12)); - xCombobox.addActionListener(new java.awt.event.ActionListener() + xCombobox.setFont(VERDANA_12); + xCombobox.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { xCombobox_actionPerformed(e); } }); - resetButton.setFont(new java.awt.Font("Verdana", 0, 12)); + JButton resetButton = new JButton(); + resetButton.setFont(VERDANA_12); resetButton.setText(MessageManager.getString("action.reset")); - resetButton.addActionListener(new java.awt.event.ActionListener() + resetButton.addActionListener(new ActionListener() { @Override public void actionPerformed(ActionEvent e) @@ -173,51 +142,64 @@ public class GPCAPanel extends JInternalFrame resetButton_actionPerformed(e); } }); + JMenu fileMenu = new JMenu(); fileMenu.setText(MessageManager.getString("action.file")); + JMenu saveMenu = new JMenu(); saveMenu.setText(MessageManager.getString("action.save_as")); - eps.setText("EPS"); + JMenuItem eps = new JMenuItem("EPS"); eps.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { eps_actionPerformed(e); } }); - png.setText("PNG"); + JMenuItem png = new JMenuItem("PNG"); png.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { png_actionPerformed(e); } }); + JMenuItem outputValues = new JMenuItem(); outputValues.setText(MessageManager.getString("label.output_values")); outputValues.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { outputValues_actionPerformed(e); } }); + JMenuItem outputPoints = new JMenuItem(); outputPoints.setText(MessageManager.getString("label.output_points")); outputPoints.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { outputPoints_actionPerformed(e); } }); + JMenuItem outputProjPoints = new JMenuItem(); outputProjPoints.setText(MessageManager .getString("label.output_transformed_points")); outputProjPoints.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { outputProjPoints_actionPerformed(e); } }); + JMenuItem print = new JMenuItem(); + print.setText(MessageManager.getString("action.print")); print.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { print_actionPerformed(e); @@ -226,32 +208,38 @@ public class GPCAPanel extends JInternalFrame viewMenu.setText(MessageManager.getString("action.view")); viewMenu.addMenuListener(new MenuListener() { + @Override public void menuSelected(MenuEvent e) { viewMenu_menuSelected(); } + @Override public void menuDeselected(MenuEvent e) { } + @Override public void menuCanceled(MenuEvent e) { } }); - scoreMatrixMenu.setText(MessageManager + scoreModelMenu.setText(MessageManager .getString("label.select_score_model")); - scoreMatrixMenu.addMenuListener(new MenuListener() + scoreModelMenu.addMenuListener(new MenuListener() { + @Override public void menuSelected(MenuEvent e) { - scoreMatrix_menuSelected(); + scoreModel_menuSelected(); } + @Override public void menuDeselected(MenuEvent e) { } + @Override public void menuCanceled(MenuEvent e) { } @@ -259,23 +247,27 @@ public class GPCAPanel extends JInternalFrame showLabels.setText(MessageManager.getString("label.show_labels")); showLabels.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { showLabels_actionPerformed(e); } }); - print.setText(MessageManager.getString("action.print")); + JMenuItem bgcolour = new JMenuItem(); bgcolour.setText(MessageManager.getString("action.background_colour")); bgcolour.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { bgcolour_actionPerformed(e); } }); + JMenuItem originalSeqData = new JMenuItem(); originalSeqData.setText(MessageManager.getString("label.input_data")); originalSeqData.addActionListener(new ActionListener() { + @Override public void actionPerformed(ActionEvent e) { originalSeqData_actionPerformed(e); @@ -305,22 +297,12 @@ public class GPCAPanel extends JInternalFrame protSetting_actionPerfomed(arg0); } }); - jvVersionSetting.setText(MessageManager - .getString("label.jalview_pca_calculation")); - jvVersionSetting.addActionListener(new ActionListener() - { - @Override - public void actionPerformed(ActionEvent arg0) - { - jvVersionSetting_actionPerfomed(arg0); - } - }); - calcSettings.add(jvVersionSetting); + calcSettings.add(nuclSetting); calcSettings.add(protSetting); - calcSettings.add(scoreMatrixMenu); - statusPanel.setLayout(statusPanelLayout); - statusBar.setFont(new java.awt.Font("Verdana", 0, 12)); + calcSettings.add(scoreModelMenu); + statusPanel.setLayout(new GridLayout()); + statusBar.setFont(VERDANA_12); // statusPanel.setBackground(Color.lightGray); // statusBar.setBackground(Color.lightGray); // statusPanel.add(statusBar, null); @@ -335,9 +317,12 @@ public class GPCAPanel extends JInternalFrame jPanel2.add(jLabel3, null); jPanel2.add(zCombobox, null); jPanel2.add(resetButton, null); + + JMenuBar jMenuBar1 = new JMenuBar(); jMenuBar1.add(fileMenu); jMenuBar1.add(viewMenu); jMenuBar1.add(calcSettings); + setJMenuBar(jMenuBar1); fileMenu.add(saveMenu); fileMenu.add(outputValues); fileMenu.add(print); @@ -351,7 +336,7 @@ public class GPCAPanel extends JInternalFrame viewMenu.add(associateViewsMenu); } - protected void scoreMatrix_menuSelected() + protected void scoreModel_menuSelected() { // TODO Auto-generated method stub @@ -438,10 +423,4 @@ public class GPCAPanel extends JInternalFrame { } - - protected void jvVersionSetting_actionPerfomed(ActionEvent arg0) - { - // TODO Auto-generated method stub - - } } diff --git a/src/jalview/math/Matrix.java b/src/jalview/math/Matrix.java index de0bf77..b39d3c9 100755 --- a/src/jalview/math/Matrix.java +++ b/src/jalview/math/Matrix.java @@ -65,7 +65,8 @@ public class Matrix implements MatrixI } /** - * Creates a new Matrix object. For example + * Creates a new Matrix object containing a copy of the supplied array values. + * For example * *
    *   new Matrix(new double[][] {{2, 3, 4}, {5, 6, 7})
@@ -85,13 +86,27 @@ public class Matrix implements MatrixI
   {
     this.rows = values.length;
     this.cols = this.rows == 0 ? 0 : values[0].length;
-    this.value = values;
+
+    /*
+     * make a copy of the values array, for immutability
+     */
+    this.value = new double[rows][];
+    int i = 0;
+    for (double[] row : values)
+    {
+      if (row != null)
+      {
+        value[i] = new double[row.length];
+        System.arraycopy(row, 0, value[i], 0, row.length);
+      }
+      i++;
+    }
   }
 
   /**
    * Returns a new matrix which is the transpose of this one
    * 
-   * @return DOCUMENT ME!
+   * @return
    */
   @Override
   public MatrixI transpose()
@@ -532,6 +547,7 @@ public class Matrix implements MatrixI
     return value[i][j];
   }
 
+  @Override
   public void setValue(int i, int j, double val)
   {
     value[i][j] = val;
@@ -889,4 +905,91 @@ public class Matrix implements MatrixI
     System.arraycopy(value[i], 0, row, 0, cols);
     return row;
   }
+
+  /**
+   * Returns a length 2 array of {minValue, maxValue} of all values in the
+   * matrix. Returns null if the matrix is null or empty.
+   * 
+   * @return
+   */
+  double[] findMinMax()
+  {
+    if (value == null)
+    {
+      return null;
+    }
+    double min = Double.MAX_VALUE;
+    double max = -Double.MAX_VALUE;
+    boolean empty = true;
+    for (double[] row : value)
+    {
+      if (row != null)
+      {
+        for (double x : row)
+        {
+          empty = false;
+          if (x > max)
+          {
+            max = x;
+          }
+          if (x < min)
+          {
+            min = x;
+          }
+        }
+      }
+    }
+    return empty ? null : new double[] { min, max };
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void reverseRange(boolean maxToZero)
+  {
+    if (value == null)
+    {
+      return;
+    }
+    double[] minMax = findMinMax();
+    if (minMax == null)
+    {
+      return; // empty matrix
+    }
+    double subtractFrom = maxToZero ? minMax[1] : minMax[0] + minMax[1];
+
+    for (double[] row : value)
+    {
+      if (row != null)
+      {
+        int j = 0;
+        for (double x : row)
+        {
+          row[j] = subtractFrom - x;
+          j++;
+        }
+      }
+    }
+  }
+
+  /**
+   * Multiplies every entry in the matrix by the given value.
+   * 
+   * @param
+   */
+  @Override
+  public void multiply(double by)
+  {
+    for (double[] row : value)
+    {
+      if (row != null)
+      {
+        for (int i = 0; i < row.length; i++)
+        {
+          row[i] *= by;
+        }
+      }
+    }
+  }
 }
diff --git a/src/jalview/math/MatrixI.java b/src/jalview/math/MatrixI.java
index d74a98b..94b9333 100644
--- a/src/jalview/math/MatrixI.java
+++ b/src/jalview/math/MatrixI.java
@@ -28,6 +28,15 @@ public interface MatrixI
   double getValue(int i, int j);
 
   /**
+   * Sets the value at row i, colum j
+   * 
+   * @param i
+   * @param j
+   * @param d
+   */
+  void setValue(int i, int j, double d);
+
+  /**
    * Answers a copy of the values in the i'th row
    * 
    * @return
@@ -56,4 +65,33 @@ public interface MatrixI
 
   void tred();
 
+  /**
+   * Reverses the range of the matrix values, so that the smallest values become
+   * the largest, and the largest become the smallest. This operation supports
+   * using a distance measure as a similarity measure, or vice versa.
+   * 

+ * If parameter maxToZero is true, then the maximum value becomes + * zero, i.e. all values are subtracted from the maximum. This is consistent + * with converting an identity similarity score to a distance score - the most + * similar (identity) corresponds to zero distance. However note that the + * operation is not reversible (unless the original minimum value is zero). + * For example a range of 10-40 would become 30-0, which would reverse a + * second time to 0-30. Also note that a general similarity measure (such as + * BLOSUM) may give different 'identity' scores for different sequences, so + * they cannot all convert to zero distance. + *

+ * If parameter maxToZero is false, then the values are reflected + * about the average of {min, max} (effectively swapping min and max). This + * operation is reversible. + * + * @param maxToZero + */ + void reverseRange(boolean maxToZero); + + /** + * Multiply all entries by the given value + * + * @param d + */ + void multiply(double d); } diff --git a/src/jalview/schemes/Blosum62ColourScheme.java b/src/jalview/schemes/Blosum62ColourScheme.java index f35b886..70f4910 100755 --- a/src/jalview/schemes/Blosum62ColourScheme.java +++ b/src/jalview/schemes/Blosum62ColourScheme.java @@ -20,6 +20,8 @@ */ package jalview.schemes; +import jalview.analysis.scoremodels.ScoreModels; +import jalview.api.analysis.PairwiseScoreModelI; import jalview.datamodel.AnnotatedCollectionI; import jalview.datamodel.SequenceCollectionI; import jalview.datamodel.SequenceI; @@ -53,6 +55,8 @@ public class Blosum62ColourScheme extends ResidueColourScheme public Color findColour(char res, int j, SequenceI seq, String consensusResidue, float pid) { + PairwiseScoreModelI sm = ScoreModels.getInstance().getBlosum62(); + /* * compare as upper case; note consensusResidue is * always computed as uppercase @@ -75,14 +79,14 @@ public class Blosum62ColourScheme extends ResidueColourScheme } else { - int c = 0; + float score = 0; for (char consensus : consensusResidue.toCharArray()) { - c += ResidueProperties.getBLOSUM62(consensus, res); + score += sm.getPairwiseScore(consensus, res); } - if (c > 0) + if (score > 0) { colour = LIGHT_BLUE; } diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 1e6142d..751175d 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -20,10 +20,6 @@ */ package jalview.schemes; -import jalview.analysis.scoremodels.FeatureScoreModel; -import jalview.analysis.scoremodels.PIDScoreModel; -import jalview.api.analysis.ScoreModelI; - import java.awt.Color; import java.util.ArrayList; import java.util.Arrays; @@ -36,8 +32,6 @@ import java.util.Vector; public class ResidueProperties { - public static Hashtable scoreMatrices = new Hashtable(); - // Stores residue codes/names and colours and other things public static final int[] aaIndex; // aaHash version 2.1.1 and below @@ -477,105 +471,6 @@ public class ResidueProperties // public static final double hydmax = 1.38; // public static final double hydmin = -2.53; - private static final int[][] BLOSUM62 = { - { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, - -2, 0, -2, -1, 0, -4 }, - { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, - -2, -3, -1, 0, -1, -4 }, - { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, - -3, 3, 0, -1, -4 }, - { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, - -3, -3, 4, 1, -1, -4 }, - { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, - -2, -2, -1, -3, -3, -2, -4 }, - { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, - -2, 0, 3, -1, -4 }, - { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, - -2, 1, 4, -1, -4 }, - { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, - -3, -3, -1, -2, -1, -4 }, - { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, - 2, -3, 0, 0, -1, -4 }, - { -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, - -1, 3, -3, -3, -1, -4 }, - { -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, - -1, 1, -4, -3, -1, -4 }, - { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, - -2, -2, 0, 1, -1, -4 }, - { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, - -1, 1, -3, -1, -1, -4 }, - { -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, - 3, -1, -3, -3, -1, -4 }, - { -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, - -4, -3, -2, -2, -1, -2, -4 }, - { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, - -2, 0, 0, 0, -4 }, - { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, - -2, 0, -1, -1, 0, -4 }, - { -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, - 11, 2, -3, -4, -3, -2, -4 }, - { -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, - 2, 7, -1, -3, -2, -1, -4 }, - { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, - -1, 4, -3, -2, -1, -4 }, - { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, - -3, -3, 4, 1, -1, -4 }, - { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, - -2, 1, 4, -1, -4 }, - { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, - -2, -1, -1, -1, -1, -1, -4 }, - { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, - -4, -4, -4, -4, -4, -4, 1 }, }; - - static final int[][] PAM250 = { - { 2, -2, 0, 0, -2, 0, 0, 1, -1, -1, -2, -1, -1, -3, 1, 1, 1, -6, -3, - 0, 0, 0, 0, -8 }, - { -2, 6, 0, -1, -4, 1, -1, -3, 2, -2, -3, 3, 0, -4, 0, 0, -1, 2, -4, - -2, -1, 0, -1, -8 }, - { 0, 0, 2, 2, -4, 1, 1, 0, 2, -2, -3, 1, -2, -3, 0, 1, 0, -4, -2, -2, - 2, 1, 0, -8 }, - { 0, -1, 2, 4, -5, 2, 3, 1, 1, -2, -4, 0, -3, -6, -1, 0, 0, -7, -4, - -2, 3, 3, -1, -8 }, - { -2, -4, -4, -5, 12, -5, -5, -3, -3, -2, -6, -5, -5, -4, -3, 0, -2, - -8, 0, -2, -4, -5, -3, -8 }, - { 0, 1, 1, 2, -5, 4, 2, -1, 3, -2, -2, 1, -1, -5, 0, -1, -1, -5, -4, - -2, 1, 3, -1, -8 }, - { 0, -1, 1, 3, -5, 2, 4, 0, 1, -2, -3, 0, -2, -5, -1, 0, 0, -7, -4, - -2, 3, 3, -1, -8 }, - { 1, -3, 0, 1, -3, -1, 0, 5, -2, -3, -4, -2, -3, -5, 0, 1, 0, -7, -5, - -1, 0, 0, -1, -8 }, - { -1, 2, 2, 1, -3, 3, 1, -2, 6, -2, -2, 0, -2, -2, 0, -1, -1, -3, 0, - -2, 1, 2, -1, -8 }, - { -1, -2, -2, -2, -2, -2, -2, -3, -2, 5, 2, -2, 2, 1, -2, -1, 0, -5, - -1, 4, -2, -2, -1, -8 }, - { -2, -3, -3, -4, -6, -2, -3, -4, -2, 2, 6, -3, 4, 2, -3, -3, -2, -2, - -1, 2, -3, -3, -1, -8 }, - { -1, 3, 1, 0, -5, 1, 0, -2, 0, -2, -3, 5, 0, -5, -1, 0, 0, -3, -4, - -2, 1, 0, -1, -8 }, - { -1, 0, -2, -3, -5, -1, -2, -3, -2, 2, 4, 0, 6, 0, -2, -2, -1, -4, - -2, 2, -2, -2, -1, -8 }, - { -3, -4, -3, -6, -4, -5, -5, -5, -2, 1, 2, -5, 0, 9, -5, -3, -3, 0, - 7, -1, -4, -5, -2, -8 }, - { 1, 0, 0, -1, -3, 0, -1, 0, 0, -2, -3, -1, -2, -5, 6, 1, 0, -6, -5, - -1, -1, 0, -1, -8 }, - { 1, 0, 1, 0, 0, -1, 0, 1, -1, -1, -3, 0, -2, -3, 1, 2, 1, -2, -3, - -1, 0, 0, 0, -8 }, - { 1, -1, 0, 0, -2, -1, 0, 0, -1, 0, -2, 0, -1, -3, 0, 1, 3, -5, -3, - 0, 0, -1, 0, -8 }, - { -6, 2, -4, -7, -8, -5, -7, -7, -3, -5, -2, -3, -4, 0, -6, -2, -5, - 17, 0, -6, -5, -6, -4, -8 }, - { -3, -4, -2, -4, 0, -4, -4, -5, 0, -1, -1, -4, -2, 7, -5, -3, -3, 0, - 10, -2, -3, -4, -2, -8 }, - { 0, -2, -2, -2, -2, -2, -2, -1, -2, 4, 2, -2, 2, -1, -1, -1, 0, -6, - -2, 4, -2, -2, -1, -8 }, - { 0, -1, 2, 3, -4, 1, 3, 0, 1, -2, -3, 1, -2, -4, -1, 0, 0, -5, -3, - -2, 3, 2, -1, -8 }, - { 0, 0, 1, 3, -5, 3, 3, 0, 2, -2, -3, 0, -2, -5, 0, 0, -1, -6, -4, - -2, 2, 3, -1, -8 }, - { 0, -1, 0, -1, -3, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, 0, 0, -4, - -2, -1, -1, -1, -1, -8 }, - { -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, - -8, -8, -8, -8, -8, -8, 1 }, }; // not currently used // public static final Map ssHash = new Hashtable STOP = Arrays.asList("TGA", "TAA", "TAG"); public static String START = "ATG"; @@ -1244,15 +1112,6 @@ public class ResidueProperties propMatrixPos[i][i] = maxP; propMatrixEpos[i][i] = maxEP; } - // JAL-1512 comment out physicochemical score matrices for 2.8.1 release - // scoreMatrices.put("Conservation Pos", new - // ScoreMatrix("Conservation Pos",propMatrixPos,0)); - // scoreMatrices.put("Conservation Both", new - // ScoreMatrix("Conservation Both",propMatrixF,0)); - // scoreMatrices.put("Conservation EnhPos", new - // ScoreMatrix("Conservation EnhPos",propMatrixEpos,0)); - scoreMatrices.put("PID", new PIDScoreModel()); - scoreMatrices.put("Displayed Features", new FeatureScoreModel()); } private ResidueProperties() @@ -1279,39 +1138,6 @@ public class ResidueProperties return aa3Hash; } - public static int[][] getDNA() - { - return ResidueProperties.DNA; - } - - public static int[][] getBLOSUM62() - { - return ResidueProperties.BLOSUM62; - } - - public static int getPAM250(String A1, String A2) - { - return getPAM250(A1.charAt(0), A2.charAt(0)); - } - - public static int getBLOSUM62(char c1, char c2) - { - int pog = 0; - - try - { - int a = aaIndex[c1]; - int b = aaIndex[c2]; - - pog = ResidueProperties.BLOSUM62[a][b]; - } catch (Exception e) - { - // System.out.println("Unknown residue in " + A1 + " " + A2); - } - - return pog; - } - public static String codonTranslate(String lccodon) { String cdn = codonHash2.get(lccodon.toUpperCase()); @@ -1322,53 +1148,6 @@ public class ResidueProperties return cdn; } - public static int[][] getDefaultPeptideMatrix() - { - return ResidueProperties.getBLOSUM62(); - } - - public static int[][] getDefaultDnaMatrix() - { - return ResidueProperties.getDNA(); - } - - /** - * get a ScoreMatrix based on its string name - * - * @param pwtype - * @return matrix in scoreMatrices with key pwtype or null - */ - public static ScoreMatrix getScoreMatrix(String pwtype) - { - Object val = scoreMatrices.get(pwtype); - if (val != null && val instanceof ScoreMatrix) - { - return (ScoreMatrix) val; - } - return null; - } - - /** - * get a ScoreModel based on its string name - * - * @param pwtype - * @return scoremodel of type pwtype or null - */ - public static ScoreModelI getScoreModel(String pwtype) - { - return scoreMatrices.get(pwtype); - } - - public static int getPAM250(char c, char d) - { - int a = aaIndex[c]; - int b = aaIndex[d]; - - int pog = ResidueProperties.PAM250[a][b]; - - return pog; - } - public static Hashtable toDssp3State; static { diff --git a/src/jalview/schemes/ScoreMatrix.java b/src/jalview/schemes/ScoreMatrix.java deleted file mode 100644 index d82f54c..0000000 --- a/src/jalview/schemes/ScoreMatrix.java +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.schemes; - -import jalview.analysis.scoremodels.PairwiseSeqScoreModel; -import jalview.math.Matrix; -import jalview.math.MatrixI; - -public class ScoreMatrix extends PairwiseSeqScoreModel -{ - String name; - - @Override - public String getName() - { - return name; - } - - /** - * reference to integer score matrix - */ - int[][] matrix; - - /** - * 0 for Protein Score matrix. 1 for dna score matrix - */ - int type; - - /** - * - * @param name - * Unique, human readable name for the matrix - * @param matrix - * Pairwise scores indexed according to appropriate symbol alphabet - * @param type - * 0 for Protein, 1 for NA - */ - ScoreMatrix(String name, int[][] matrix, int type) - { - this.matrix = matrix; - this.type = type; - this.name = name; - } - - @Override - public boolean isDNA() - { - return type == 1; - } - - @Override - public boolean isProtein() - { - return type == 0; - } - - @Override - public int[][] getMatrix() - { - return matrix; - } - - /** - * Answers the score for substituting first char in A1 with first char in A2 - * - * @param A1 - * @param A2 - * @return - */ - public int getPairwiseScore(String A1, String A2) - { - return getPairwiseScore(A1.charAt(0), A2.charAt(0)); - } - - @Override - public int getPairwiseScore(char c, char d) - { - int score = 0; - - try - { - int a = (type == 0) ? ResidueProperties.aaIndex[c] - : ResidueProperties.nucleotideIndex[c]; - int b = (type == 0) ? ResidueProperties.aaIndex[d] - : ResidueProperties.nucleotideIndex[d]; - score = matrix[a][b]; - } catch (Exception e) - { - // System.out.println("Unknown residue in " + A1 + " " + A2); - } - - return score; - } - - /** - * pretty print the matrix - */ - @Override - public String toString() - { - return outputMatrix(false); - } - - public String outputMatrix(boolean html) - { - StringBuffer sb = new StringBuffer(); - int[] symbols = (type == 0) ? ResidueProperties.aaIndex - : ResidueProperties.nucleotideIndex; - int symMax = (type == 0) ? ResidueProperties.maxProteinIndex - : ResidueProperties.maxNucleotideIndex; - boolean header = true; - if (html) - { - sb.append(""); - } - for (char sym = 'A'; sym <= 'Z'; sym++) - { - if (symbols[sym] >= 0 && symbols[sym] < symMax) - { - if (header) - { - sb.append(html ? "" : ""); - for (char sym2 = 'A'; sym2 <= 'Z'; sym2++) - { - if (symbols[sym2] >= 0 && symbols[sym2] < symMax) - { - sb.append((html ? "" : "")); - } - } - header = false; - sb.append(html ? "\n" : "\n"); - } - if (html) - { - sb.append(""); - } - sb.append((html ? "" : "")); - for (char sym2 = 'A'; sym2 <= 'Z'; sym2++) - { - if (symbols[sym2] >= 0 && symbols[sym2] < symMax) - { - sb.append((html ? "" : "")); - } - } - sb.append(html ? "\n" : "\n"); - } - } - if (html) - { - sb.append("
 " : "\t") + sym2 - + (html ? " 
" : "") + sym + (html ? "" : "\t") - + matrix[symbols[sym]][symbols[sym2]] - + (html ? "
"); - } - return sb.toString(); - } - - /** - * Computes an NxN matrix where N is the number of sequences, and entry [i, j] - * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores - * computed using the current score matrix. For example - *

    - *
  • Sequences:
  • - *
  • FKL
  • - *
  • R-D
  • - *
  • QIA
  • - *
  • GWC
  • - *
  • Score matrix is BLOSUM62
  • - *
  • Gaps treated same as X (unknown)
  • - *
  • product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15
  • - *
  • product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10
  • - *
  • product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13
  • - *
  • product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26
  • - *
  • product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8 - *
  • and so on
  • - *
- */ - public MatrixI computePairwiseScores(String[] seqs) - { - double[][] values = new double[seqs.length][]; - for (int row = 0; row < seqs.length; row++) - { - values[row] = new double[seqs.length]; - for (int col = 0; col < seqs.length; col++) - { - int total = 0; - int width = Math.min(seqs[row].length(), seqs[col].length()); - for (int i = 0; i < width; i++) - { - char c1 = seqs[row].charAt(i); - char c2 = seqs[col].charAt(i); - int score = getPairwiseScore(c1, c2); - total += score; - } - values[row][col] = total; - } - } - return new Matrix(values); - } -} diff --git a/src/jalview/structure/StructureSelectionManager.java b/src/jalview/structure/StructureSelectionManager.java index 3ab642f..0990b56 100644 --- a/src/jalview/structure/StructureSelectionManager.java +++ b/src/jalview/structure/StructureSelectionManager.java @@ -454,7 +454,7 @@ public class StructureSelectionManager * Attempt pairwise alignment of the sequence with each chain in the PDB, * and remember the highest scoring chain */ - int max = -10; + float max = -10; AlignSeq maxAlignseq = null; String maxChainId = " "; PDBChain maxChain = null; diff --git a/src/jalview/util/Comparison.java b/src/jalview/util/Comparison.java index 1326647..22e1ab7 100644 --- a/src/jalview/util/Comparison.java +++ b/src/jalview/util/Comparison.java @@ -34,11 +34,11 @@ public class Comparison private static final int TO_UPPER_CASE = 'a' - 'A'; - private static final char GAP_SPACE = ' '; + public static final char GAP_SPACE = ' '; - private static final char GAP_DOT = '.'; + public static final char GAP_DOT = '.'; - private static final char GAP_DASH = '-'; + public static final char GAP_DASH = '-'; public static final String GapChars = new String(new char[] { GAP_SPACE, GAP_DOT, GAP_DASH }); @@ -135,7 +135,9 @@ public class Comparison * @param s2 * SequenceI * @return float + * @deprecated use PIDModel.computePID() */ + @Deprecated public final static float PID(String seq1, String seq2) { return PID(seq1, seq2, 0, seq1.length()); @@ -144,6 +146,10 @@ public class Comparison static final int caseShift = 'a' - 'A'; // Another pid with region specification + /** + * @deprecated use PIDModel.computePID() + */ + @Deprecated public final static float PID(String seq1, String seq2, int start, int end) { return PID(seq1, seq2, start, end, true, false); @@ -165,7 +171,9 @@ public class Comparison * @param ungappedOnly * - if true - only count PID over ungapped columns * @return + * @deprecated use PIDModel.computePID() */ + @Deprecated public final static float PID(String seq1, String seq2, int start, int end, boolean wcGaps, boolean ungappedOnly) { diff --git a/src/jalview/util/SetUtils.java b/src/jalview/util/SetUtils.java new file mode 100644 index 0000000..381d9f6 --- /dev/null +++ b/src/jalview/util/SetUtils.java @@ -0,0 +1,43 @@ +package jalview.util; + +import java.util.Set; + +public class SetUtils +{ + /** + * Returns the count of things that are in one or other of two sets but not in + * both. The sets are not modified. + * + * @param set1 + * @param set2 + * @return + */ + public static int countDisjunction(Set set1, + Set set2) + { + if (set1 == null) + { + return set2 == null ? 0 : set2.size(); + } + if (set2 == null) + { + return set1.size(); + } + + int size1 = set1.size(); + int size2 = set2.size(); + Set smallerSet = size1 < size2 ? set1 : set2; + Set largerSet = (smallerSet == set1 ? set2 : set1); + int inCommon = 0; + for (Object k : smallerSet) + { + if (largerSet.contains(k)) + { + inCommon++; + } + } + + int notInCommon = (size1 - inCommon) + (size2 - inCommon); + return notInCommon; + } +} diff --git a/src/jalview/viewmodel/PCAModel.java b/src/jalview/viewmodel/PCAModel.java index 0623dab..928d35e 100644 --- a/src/jalview/viewmodel/PCAModel.java +++ b/src/jalview/viewmodel/PCAModel.java @@ -22,6 +22,8 @@ package jalview.viewmodel; import jalview.analysis.PCA; import jalview.api.RotatableCanvasI; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SequenceI; import jalview.datamodel.SequencePoint; @@ -30,23 +32,6 @@ import java.util.Vector; public class PCAModel { - /* - * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide) - * for pairwise scoring; 2.10.2 uses gap score (last column) in - * score matrix (JAL-2397) - * Set this flag to true (via Groovy) for 2.10.1 behaviour - */ - private static boolean scoreGapAsAny = false; - - public PCAModel(AlignmentView seqstrings2, SequenceI[] seqs2, - boolean nucleotide2) - { - seqstrings = seqstrings2; - seqs = seqs2; - nucleotide = nucleotide2; - score_matrix = nucleotide2 ? "PID" : "BLOSUM62"; - } - private volatile PCA pca; int top; @@ -55,32 +40,41 @@ public class PCAModel SequenceI[] seqs; - /** - * Score matrix used to calculate PC + /* + * Name of score model used to calculate PCA */ - String score_matrix; + ScoreModelI scoreModel; - /** - * use the identity matrix for calculating similarity between sequences. - */ private boolean nucleotide = false; private Vector points; - private boolean jvCalcMode = true; + private SimilarityParamsI similarityParams; - public boolean isJvCalcMode() + /** + * Constructor given sequence data, score model and score calculation + * parameter options. + * + * @param seqData + * @param sqs + * @param nuc + * @param modelName + * @param params + */ + public PCAModel(AlignmentView seqData, SequenceI[] sqs, boolean nuc, + ScoreModelI modelName, + SimilarityParamsI params) { - return jvCalcMode; + seqstrings = seqData; + seqs = sqs; + nucleotide = nuc; + scoreModel = modelName; + similarityParams = params; } public void run() { - char gapChar = scoreGapAsAny ? (nucleotide ? 'N' : 'X') : ' '; - String[] sequenceStrings = seqstrings.getSequenceStrings(gapChar); - pca = new PCA(sequenceStrings, nucleotide, - score_matrix); - pca.setJvCalcMode(jvCalcMode); + pca = new PCA(seqstrings, scoreModel, similarityParams); pca.run(); // Now find the component coordinates @@ -227,19 +221,14 @@ public class PCAModel return pts; } - public void setJvCalcMode(boolean state) - { - jvCalcMode = state; - } - - public String getScore_matrix() + public String getScoreModelName() { - return score_matrix; + return scoreModel == null ? "" : scoreModel.getName(); } - public void setScore_matrix(String score_matrix) + public void setScoreModel(ScoreModelI sm) { - this.score_matrix = score_matrix; + this.scoreModel = sm; } } diff --git a/src/jalview/ws/jws1/SeqSearchWSThread.java b/src/jalview/ws/jws1/SeqSearchWSThread.java index b14917e..edc9ae8 100644 --- a/src/jalview/ws/jws1/SeqSearchWSThread.java +++ b/src/jalview/ws/jws1/SeqSearchWSThread.java @@ -666,7 +666,7 @@ class SeqSearchWSThread extends JWS1Thread implements WSClientI AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT); if (nf != null) { - af.ShowNewickTree(nf, MessageManager.formatMessage( + af.showNewickTree(nf, MessageManager.formatMessage( "label.tree_from", new String[] { this.alTitle })); } // initialise with same renderer settings as in parent alignframe. diff --git a/src/jalview/ws/sifts/SiftsClient.java b/src/jalview/ws/sifts/SiftsClient.java index fe3a25b..2ff4a8b 100644 --- a/src/jalview/ws/sifts/SiftsClient.java +++ b/src/jalview/ws/sifts/SiftsClient.java @@ -21,6 +21,8 @@ package jalview.ws.sifts; import jalview.analysis.AlignSeq; +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; import jalview.api.DBRefEntryI; import jalview.api.SiftsClientI; import jalview.datamodel.DBRefEntry; @@ -963,7 +965,7 @@ public class SiftsClient implements SiftsClientI } @Override - public StringBuffer getMappingOutput(MappingOutputPojo mp) + public StringBuilder getMappingOutput(MappingOutputPojo mp) throws SiftsException { String seqRes = mp.getSeqResidue(); @@ -985,7 +987,7 @@ public class SiftsClient implements SiftsClientI int nochunks = ((seqRes.length()) / len) + ((seqRes.length()) % len > 0 ? 1 : 0); // output mappings - StringBuffer output = new StringBuffer(); + StringBuilder output = new StringBuilder(512); output.append(NEWLINE); output.append("Sequence \u27f7 Structure mapping details").append( NEWLINE); @@ -1006,6 +1008,7 @@ public class SiftsClient implements SiftsClientI output.append(String.valueOf(pdbEnd)); output.append(NEWLINE).append(NEWLINE); + ScoreMatrix pam250 = ScoreModels.getInstance().getPam250(); int matchedSeqCount = 0; for (int j = 0; j < nochunks; j++) { @@ -1024,27 +1027,29 @@ public class SiftsClient implements SiftsClientI output.append(NEWLINE); output.append(new Format("%" + (maxid) + "s").form(" ")).append(" "); - // Print out the matching chars + /* + * Print out the match symbols: + * | for exact match (ignoring case) + * . if PAM250 score is positive + * else a space + */ for (int i = 0; i < len; i++) { try { if ((i + (j * len)) < seqRes.length()) { - boolean sameChar = Comparison.isSameResidue( - seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len)), false); - if (sameChar - && !jalview.util.Comparison.isGap(seqRes.charAt(i - + (j * len)))) + char c1 = seqRes.charAt(i + (j * len)); + char c2 = strRes.charAt(i + (j * len)); + boolean sameChar = Comparison.isSameResidue(c1, c2, false); + if (sameChar && !Comparison.isGap(c1)) { matchedSeqCount++; output.append("|"); } else if (type.equals("pep")) { - if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)), - strRes.charAt(i + (j * len))) > 0) + if (pam250.getPairwiseScore(c1, c2) > 0) { output.append("."); } diff --git a/test/jalview/analysis/AlignSeqTest.java b/test/jalview/analysis/AlignSeqTest.java index 4cb5329..837e970 100644 --- a/test/jalview/analysis/AlignSeqTest.java +++ b/test/jalview/analysis/AlignSeqTest.java @@ -20,9 +20,11 @@ */ package jalview.analysis; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; +import jalview.datamodel.Sequence; import jalview.gui.JvOptionPane; import org.testng.annotations.BeforeClass; @@ -45,10 +47,32 @@ public class AlignSeqTest assertNull(AlignSeq.extractGaps(null, "ACG")); assertNull(AlignSeq.extractGaps("-. ", null)); - assertEquals(" AC-G.T", AlignSeq.extractGaps("", " AC-G.T")); - assertEquals("AC-G.T", AlignSeq.extractGaps(" ", " AC-G.T")); - assertEquals("ACG.T", AlignSeq.extractGaps(" -", " AC-G.T")); - assertEquals("ACGT", AlignSeq.extractGaps(" -.", " AC-G.T .")); - assertEquals(" ACG.T", AlignSeq.extractGaps("-", " AC-G.T")); + assertEquals(AlignSeq.extractGaps("", " AC-G.T"), " AC-G.T"); + assertEquals(AlignSeq.extractGaps(" ", " AC-G.T"), "AC-G.T"); + assertEquals(AlignSeq.extractGaps(" -", " AC-G.T"), "ACG.T"); + assertEquals(AlignSeq.extractGaps(" -.", " AC-G.T ."), "ACGT"); + assertEquals(AlignSeq.extractGaps("-", " AC-G.T"), " ACG.T"); + } + + @Test(groups = { "Functional" }) + public void testIndexEncode_nucleotide() + { + AlignSeq as = new AlignSeq(new Sequence("s1", "TTAG"), new Sequence( + "s2", "ACGT"), AlignSeq.DNA); + int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, -1, -1, 10, -1 }; + String s = "aAcCgGtTuUiIxXrRyYnN .-?"; + assertArrayEquals(expected, as.indexEncode(s)); + } + + @Test(groups = { "Functional" }) + public void testIndexEncode_peptide() + { + AlignSeq as = new AlignSeq(new Sequence("s1", "PFY"), new Sequence( + "s2", "RQW"), AlignSeq.PEP); + int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 21, 21, 22, 22, -1, 23, + -1, -1, -1 }; + String s = "aArRnNzZxX *.-?"; + assertArrayEquals(expected, as.indexEncode(s)); } } diff --git a/test/jalview/analysis/TestAlignSeq.java b/test/jalview/analysis/TestAlignSeq.java index 9fc88ea..70e59c5 100644 --- a/test/jalview/analysis/TestAlignSeq.java +++ b/test/jalview/analysis/TestAlignSeq.java @@ -125,7 +125,7 @@ public class TestAlignSeq }; as.printAlignment(ps); - String expected = "Score = 320\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n" + String expected = "Score = 320.0\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n" + "Seq1 SDFAQQQRRR\n" + " ||||||| \n" + "Seq1 SDFAQQQSSS\n\n" + "Percentage ID = 70.00\n"; diff --git a/test/jalview/analysis/scoremodels/FeatureScoreModelTest.java b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java similarity index 50% rename from test/jalview/analysis/scoremodels/FeatureScoreModelTest.java rename to test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java index 66eb5a5..0577fae 100644 --- a/test/jalview/analysis/scoremodels/FeatureScoreModelTest.java +++ b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java @@ -20,13 +20,22 @@ */ package jalview.analysis.scoremodels; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.Sequence; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.gui.AlignFrame; +import jalview.gui.AlignViewport; import jalview.gui.JvOptionPane; import jalview.io.DataSourceType; import jalview.io.FileLoader; +import jalview.math.MatrixI; import java.util.Arrays; @@ -34,7 +43,7 @@ import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -public class FeatureScoreModelTest +public class FeatureDistanceModelTest { @BeforeClass(alwaysRun = true) @@ -52,6 +61,17 @@ public class FeatureScoreModelTest int[] sf3 = new int[] { -1, -1, -1, -1, -1, -1, 76, 77 }; + /** + *
+   * Load test alignment and add features to sequences: 
+   *      FER1_MESCR FER1_SPIOL FER3_RAPSA FER1_MAIZE 
+   *  sf1     X          X          X  
+   *  sf2                X                     X 
+   *  sf3                                      X
+   * 
+ * + * @return + */ public AlignFrame getTestAlignmentFrame() { AlignFrame alf = new FileLoader(false).LoadFileWaitTillLoaded( @@ -85,7 +105,7 @@ public class FeatureScoreModelTest alf.getFeatureRenderer().findAllFeatures(true); Assert.assertEquals(alf.getFeatureRenderer().getDisplayedFeatureTypes() .size(), 3, "Number of feature types"); - Assert.assertTrue(alf.getCurrentView().areFeaturesDisplayed()); + assertTrue(alf.getCurrentView().areFeaturesDisplayed()); return alf; } @@ -93,15 +113,17 @@ public class FeatureScoreModelTest public void testFeatureScoreModel() throws Exception { AlignFrame alf = getTestAlignmentFrame(); - FeatureScoreModel fsm = new FeatureScoreModel(); - Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() + FeatureDistanceModel fsm = new FeatureDistanceModel(); + assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView( - true)); - Assert.assertTrue(dm[0][2] == 0f, + + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); + assertEquals(dm.getValue(0, 2), 0d, "FER1_MESCR (0) should be identical with RAPSA (2)"); - Assert.assertTrue(dm[0][1] > dm[0][2], + assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2), "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)"); } @@ -111,15 +133,16 @@ public class FeatureScoreModelTest AlignFrame alf = getTestAlignmentFrame(); // hiding first two columns shouldn't affect the tree alf.getViewport().hideColumns(0, 1); - FeatureScoreModel fsm = new FeatureScoreModel(); - Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() + FeatureDistanceModel fsm = new FeatureDistanceModel(); + assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView( - true)); - Assert.assertTrue(dm[0][2] == 0f, + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); + assertEquals(dm.getValue(0, 2), 0d, "FER1_MESCR (0) should be identical with RAPSA (2)"); - Assert.assertTrue(dm[0][1] > dm[0][2], + assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2), "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)"); } @@ -130,21 +153,24 @@ public class FeatureScoreModelTest // hide columns and check tree changes alf.getViewport().hideColumns(3, 4); alf.getViewport().hideColumns(0, 1); - FeatureScoreModel fsm = new FeatureScoreModel(); - Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() + FeatureDistanceModel fsm = new FeatureDistanceModel(); + assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView( - true)); - Assert.assertTrue( - dm[0][2] == 0f, + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); + assertEquals( + dm.getValue(0, 2), + 0d, "After hiding last two columns FER1_MESCR (0) should still be identical with RAPSA (2)"); - Assert.assertTrue( - dm[0][1] == 0f, + assertEquals( + dm.getValue(0, 1), + 0d, "After hiding last two columns FER1_MESCR (0) should now also be identical with SPIOL (1)"); for (int s = 0; s < 3; s++) { - Assert.assertTrue(dm[s][3] > 0f, "After hiding last two columns " + assertTrue(dm.getValue(s, 3) > 0d, "After hiding last two columns " + alf.getViewport().getAlignment().getSequenceAt(s).getName() + "(" + s + ") should still be distinct from FER1_MAIZE (3)"); } @@ -165,7 +191,7 @@ public class FeatureScoreModelTest SequenceFeature sf = null; sf = new SequenceFeature("disulphide bond", "", 2, 5, Float.NaN, ""); aseq.addSequenceFeature(sf); - Assert.assertTrue(sf.isContactFeature()); + assertTrue(sf.isContactFeature()); af.refreshFeatureUI(true); af.getFeatureRenderer().setAllVisible(Arrays.asList("disulphide bond")); Assert.assertEquals(af.getFeatureRenderer().getDisplayedFeatureTypes() @@ -190,4 +216,135 @@ public class FeatureScoreModelTest .size(), 0); } + @Test(groups = { "Functional" }) + public void testFindDistances() throws Exception + { + String seqs = ">s1\nABCDE\n>seq2\nABCDE\n"; + AlignFrame alf = new FileLoader().LoadFileWaitTillLoaded(seqs, + DataSourceType.PASTE); + SequenceI s1 = alf.getViewport().getAlignment().getSequenceAt(0); + SequenceI s2 = alf.getViewport().getAlignment().getSequenceAt(1); + + /* + * set domain and variant features thus: + * ----5 + * s1 ddd.. + * s1 .vvv. + * s1 ..vvv + * s2 .ddd. + * s2 vv..v + * The number of unshared feature types per column is + * 20120 (two features of the same type doesn't affect score) + * giving an average (pairwise distance) of 5/5 or 1.0 + */ + s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 3, 0f, + null)); + s1.addSequenceFeature(new SequenceFeature("variant", null, 2, 4, 0f, + null)); + s1.addSequenceFeature(new SequenceFeature("variant", null, 3, 5, 0f, + null)); + s2.addSequenceFeature(new SequenceFeature("domain", null, 2, 4, 0f, + null)); + s2.addSequenceFeature(new SequenceFeature("variant", null, 1, 2, 0f, + null)); + s2.addSequenceFeature(new SequenceFeature("variant", null, 5, 5, 0f, + null)); + alf.setShowSeqFeatures(true); + alf.getFeatureRenderer().findAllFeatures(true); + + FeatureDistanceModel fsm = new FeatureDistanceModel(); + assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() + .getAlignPanel())); + alf.selectAllSequenceMenuItem_actionPerformed(null); + + MatrixI distances = fsm.findDistances(alf.getViewport() + .getAlignmentView(true), SimilarityParams.Jalview); + assertEquals(distances.width(), 2); + assertEquals(distances.height(), 2); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + + assertEquals(distances.getValue(0, 1), 1d, + "expected identical pairs. (check normalisation for similarity score)"); + assertEquals(distances.getValue(1, 0), 1d); + } + + /** + * Verify computed distances with varying parameter options + */ + @Test(groups = "Functional") + public void testFindDistances_withParams() + { + AlignFrame af = setupAlignmentView(); + AlignViewport viewport = af.getViewport(); + AlignmentView view = viewport.getAlignmentView(false); + + FeatureDistanceModel sm = new FeatureDistanceModel(); + sm.configureFromAlignmentView(af.alignPanel); + + /* + * feature distance model always normalises by region width + * gap-gap is always included (but scores zero) + * the only variable parameter is 'includeGaps' + */ + + /* + * include gaps + * score = 3 + 3 + 0 + 2 + 3 + 2 = 13/6 + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, true); + MatrixI distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 0), 0d); + assertEquals(distances.getValue(1, 1), 0d); + assertEquals(distances.getValue(0, 1), 13d / 6); // should be 13d/6 + assertEquals(distances.getValue(1, 0), 13d / 6); + + /* + * exclude gaps + * score = 3 + 3 + 0 + 0 + 0 + 0 = 6/6 + */ + params = new SimilarityParams(true, true, false, true); + distances = sm.findDistances(view, params); + assertEquals(distances.getValue(0, 1), 6d / 6);// should be 6d/6 + } + + /** + *
+   * Set up
+   *   column      1 2 3 4 5 6
+   *        seq s1 F R - K - S
+   *        seq s2 F S - - L
+   *   s1 chain    c c   c   c
+   *   s1 domain   d d   d   d
+   *   s2 chain    c c     c
+   *   s2 metal    m m     m
+   *   s2 Pfam     P P     P
+   *      scores:  3 3 0 2 3 2
+   * 
+ * + * @return + */ + protected AlignFrame setupAlignmentView() + { + /* + * for now, using space for gap to match callers of + * AlignmentView.getSequenceStrings() + * may change this to '-' (with corresponding change to matrices) + */ + SequenceI s1 = new Sequence("s1", "FR K S"); + SequenceI s2 = new Sequence("s2", "FS L"); + + s1.addSequenceFeature(new SequenceFeature("chain", null, 1, 4, 0f, null)); + s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 4, 0f, + null)); + s2.addSequenceFeature(new SequenceFeature("chain", null, 1, 3, 0f, null)); + s2.addSequenceFeature(new SequenceFeature("metal", null, 1, 3, 0f, null)); + s2.addSequenceFeature(new SequenceFeature("Pfam", null, 1, 3, 0f, null)); + AlignmentI al = new Alignment(new SequenceI[] { s1, s2 }); + AlignFrame af = new AlignFrame(al, 300, 300); + af.setShowSeqFeatures(true); + af.getFeatureRenderer().findAllFeatures(true); + return af; + } + } diff --git a/test/jalview/analysis/scoremodels/PIDModelTest.java b/test/jalview/analysis/scoremodels/PIDModelTest.java new file mode 100644 index 0000000..212f825 --- /dev/null +++ b/test/jalview/analysis/scoremodels/PIDModelTest.java @@ -0,0 +1,176 @@ +package jalview.analysis.scoremodels; + +import static org.testng.Assert.assertEquals; + +import jalview.api.analysis.SimilarityParamsI; +import jalview.util.Comparison; + +import org.testng.annotations.Test; + +public class PIDModelTest +{ + private static final double DELTA = 0.00001D; + + @Test(groups = "Functional") + public void testGetPairwiseScore() + { + PIDModel sm = new PIDModel(); + assertEquals(sm.getPairwiseScore('A', 'A'), 1f); + assertEquals(sm.getPairwiseScore('A', 'a'), 1f); + assertEquals(sm.getPairwiseScore('a', 'A'), 1f); + assertEquals(sm.getPairwiseScore('A', 'B'), 0f); + assertEquals(sm.getPairwiseScore('A', ' '), 0f); + assertEquals(sm.getPairwiseScore(' ', ' '), 0f); + assertEquals(sm.getPairwiseScore('.', '.'), 0f); + assertEquals(sm.getPairwiseScore('-', '-'), 0f); + } + + /** + * Regression test to verify that a (suitably configured) PIDModel computes + * the same percentage identities as the Comparison.PID method + */ + @Test(groups = "Functional") + public void testComputePID_matchesComparisonPID() + { + SimilarityParamsI params = new SimilarityParams(true, true, true, true); + + /* + * same length, no gaps + */ + String s1 = "ARFNQDWSGI"; + String s2 = "ARKNQDQSGI"; + + new PIDModel(); + double newScore = PIDModel.computePID(s1, s2, params); + double oldScore = Comparison.PID(s1, s2); + assertEquals(newScore, oldScore, DELTA); + + /* + * same length, with gaps + */ + s1 = "-RFNQDWSGI"; + s2 = "ARKNQ-QSGI"; + new PIDModel(); + newScore = PIDModel.computePID(s1, s2, params); + oldScore = Comparison.PID(s1, s2); + assertEquals(newScore, oldScore, DELTA); + + /* + * s2 longer than s1, with gaps + */ + s1 = "ARK-"; + s2 = "-RFNQ"; + new PIDModel(); + newScore = PIDModel.computePID(s1, s2, params); + oldScore = Comparison.PID(s1, s2); + assertEquals(newScore, oldScore, DELTA); + + /* + * s1 longer than s2, with gaps + */ + s1 = "-RFNQ"; + s2 = "ARK-"; + new PIDModel(); + newScore = PIDModel.computePID(s1, s2, params); + oldScore = Comparison.PID(s1, s2); + assertEquals(newScore, oldScore, DELTA); + + /* + * same but now also with gapped columns + */ + s1 = "-R-F-NQ"; + s2 = "AR-K--"; + new PIDModel(); + newScore = PIDModel.computePID(s1, s2, params); + oldScore = Comparison.PID(s1, s2); + assertEquals(newScore, oldScore, DELTA); + } + + /** + * Tests for percentage identity variants where only the shorter length of two + * sequences is used + */ + @Test(groups = "Functional") + public void testComputePID_matchShortestSequence() + { + String s1 = "FR-K-S"; + String s2 = "FS--L"; + + /* + * match gap-gap and gap-char + * PID = 4/5 = 80% + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, true); + assertEquals(PIDModel.computePID(s1, s2, params), 80d); + + /* + * match gap-char but not gap-gap + * PID = 3/4 = 75% + */ + params = new SimilarityParams(false, true, true, true); + assertEquals(PIDModel.computePID(s1, s2, params), 75d); + + /* + * include gaps but don't match them + * include gap-gap, counted as identity + * PID = 2/5 = 40% + */ + params = new SimilarityParams(true, false, true, true); + assertEquals(PIDModel.computePID(s1, s2, params), 40d); + + /* + * include gaps but don't match them + * exclude gap-gap + * PID = 1/4 = 25% + */ + params = new SimilarityParams(false, false, true, true); + assertEquals(PIDModel.computePID(s1, s2, params), 25d); + } + + /** + * Tests for percentage identity variants where the longer length of two + * sequences is used + */ + @Test(groups = "Functional") + public void testComputePID_matchLongestSequence() + { + String s1 = "FR-K-S"; + String s2 = "FS--L"; + + /* + * match gap-gap and gap-char + * shorter sequence treated as if with trailing gaps + * PID = 5/6 = 83.333...% + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, false); + assertEquals(PIDModel.computePID(s1, s2, params), 500d / 6); + + /* + * match gap-char but not gap-gap + * PID = 4/5 = 80% + */ + params = new SimilarityParams(false, true, true, false); + assertEquals(PIDModel.computePID(s1, s2, params), 80d); + + /* + * include gaps but don't match them + * include gap-gap, counted as identity + * PID = 2/6 = 33.333...% + */ + params = new SimilarityParams(true, false, true, false); + assertEquals(PIDModel.computePID(s1, s2, params), 100d / 3); + + /* + * include gaps but don't match them + * exclude gap-gap + * PID = 1/5 = 25% + */ + params = new SimilarityParams(false, false, true, false); + assertEquals(PIDModel.computePID(s1, s2, params), 20d); + + /* + * no tests for matchGaps=true, includeGaps=false + * as it don't make sense + */ + } +} diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java new file mode 100644 index 0000000..1a5d43c --- /dev/null +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -0,0 +1,588 @@ +package jalview.analysis.scoremodels; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNotSame; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; + +import jalview.api.analysis.SimilarityParamsI; +import jalview.io.DataSourceType; +import jalview.io.FileParse; +import jalview.io.ScoreMatrixFile; +import jalview.math.MatrixI; +import jalview.schemes.ResidueProperties; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.util.Arrays; + +import org.testng.annotations.Test; + +public class ScoreMatrixTest +{ + @Test(groups = "Functional") + public void testConstructor() + { + // note score matrix does not have to be symmetric (though it should be!) + float[][] scores = new float[3][]; + scores[0] = new float[] { 1f, 2f, 3f }; + scores[1] = new float[] { -4f, 5f, 6f }; + scores[2] = new float[] { 7f, 8f, 9f }; + ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores); + assertEquals(sm.getSize(), 3); + assertArrayEquals(scores, sm.getMatrix()); + assertEquals(sm.getPairwiseScore('A', 'a'), 1f); + assertEquals(sm.getPairwiseScore('b', 'c'), 6f); + assertEquals(sm.getPairwiseScore('c', 'b'), 8f); + assertEquals(sm.getMatrixIndex('c'), 2); + assertEquals(sm.getMatrixIndex(' '), -1); + + // substitution to or from unknown symbol gets minimum score + assertEquals(sm.getPairwiseScore('A', 'D'), -4f); + assertEquals(sm.getPairwiseScore('D', 'A'), -4f); + // unknown-to-self gets a score of 1 + assertEquals(sm.getPairwiseScore('D', 'D'), 1f); + } + + @Test( + groups = "Functional", + expectedExceptions = { IllegalArgumentException.class }) + public void testConstructor_matrixTooSmall() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 3f, 4f }; + new ScoreMatrix("Test", "ABC".toCharArray(), scores); + } + + @Test( + groups = "Functional", + expectedExceptions = { IllegalArgumentException.class }) + public void testConstructor_matrixTooBig() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 3f, 4f }; + new ScoreMatrix("Test", "A".toCharArray(), scores); + } + + @Test( + groups = "Functional", + expectedExceptions = { IllegalArgumentException.class }) + public void testConstructor_matrixNotSquare() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 3f }; + new ScoreMatrix("Test", "AB".toCharArray(), scores); + } + + @Test(groups = "Functional") + public void testBuildSymbolIndex() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 3f, 4f }; + ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' }, + scores); + short[] index = sm.buildSymbolIndex("AX-yxYp".toCharArray()); + + assertEquals(index.length, 128); // ASCII character set size + + assertEquals(index['A'], 0); + assertEquals(index['a'], 0); // lower-case mapping added + assertEquals(index['X'], 1); + assertEquals(index['-'], 2); + assertEquals(index['y'], 3); // lower-case override + assertEquals(index['x'], 4); // lower-case override + assertEquals(index['Y'], 5); + assertEquals(index['p'], 6); + assertEquals(index['P'], -1); // lower-case doesn't map upper-case + + /* + * check all unmapped symbols have index for unmapped + */ + for (int c = 0; c < index.length; c++) + { + if (!"AaXx-. Yyp".contains(String.valueOf((char) c))) + { + assertEquals(index[c], -1); + } + } + } + + /** + * check that characters not in the basic ASCII set are simply ignored + */ + @Test(groups = "Functional") + public void testBuildSymbolIndex_nonAscii() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 3f, 4f }; + ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' }, + scores); + char[] weird = new char[] { 128, 245, 'P' }; + short[] index = sm.buildSymbolIndex(weird); + assertEquals(index.length, 128); + assertEquals(index['P'], 2); + assertEquals(index['p'], 2); + for (int c = 0; c < index.length; c++) + { + if (c != 'P' && c != 'p') + { + assertEquals(index[c], -1); + } + } + } + + @Test(groups = "Functional") + public void testGetMatrix() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + float[][] m = sm.getMatrix(); + assertEquals(m.length, sm.getSize()); + assertEquals(m[2][4], -3f); + // verify a defensive copy is returned + float[][] m2 = sm.getMatrix(); + assertNotSame(m, m2); + assertTrue(Arrays.deepEquals(m, m2)); + } + + @Test(groups = "Functional") + public void testGetMatrixIndex() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + assertEquals(sm.getMatrixIndex('A'), 0); + assertEquals(sm.getMatrixIndex('R'), 1); + assertEquals(sm.getMatrixIndex('r'), 1); + assertEquals(sm.getMatrixIndex('N'), 2); + assertEquals(sm.getMatrixIndex('D'), 3); + assertEquals(sm.getMatrixIndex('X'), 22); + assertEquals(sm.getMatrixIndex('x'), 22); + assertEquals(sm.getMatrixIndex('-'), -1); + assertEquals(sm.getMatrixIndex('*'), 23); + assertEquals(sm.getMatrixIndex('.'), -1); + assertEquals(sm.getMatrixIndex(' '), -1); + assertEquals(sm.getMatrixIndex('?'), -1); + assertEquals(sm.getMatrixIndex((char) 128), -1); + } + + @Test(groups = "Functional") + public void testGetSize() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + assertEquals(sm.getMatrix().length, sm.getSize()); + } + + @Test(groups = "Functional") + public void testComputePairwiseScores() + { + /* + * NB score matrix expects '-' for gap + */ + String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" }; + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + + MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview); + + /* + * should be NxN where N = number of sequences + */ + assertEquals(pairwise.height(), 4); + assertEquals(pairwise.width(), 4); + + /* + * should be symmetrical (because BLOSUM62 is) + */ + for (int i = 0; i < pairwise.height(); i++) + { + for (int j = i + 1; j < pairwise.width(); j++) + { + assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i), + String.format("Not symmetric at [%d, %d]", i, j)); + } + } + /* + * verify expected BLOSUM dot product scores + */ + // F.F + K.K + L.L = 6 + 5 + 4 = 15 + assertEquals(pairwise.getValue(0, 0), 15d); + // R.R + -.- + D.D = 5 + 1 + 6 = 12 + assertEquals(pairwise.getValue(1, 1), 12d); + // Q.Q + I.I + A.A = 5 + 4 + 4 = 13 + assertEquals(pairwise.getValue(2, 2), 13d); + // G.G + W.W + C.C = 6 + 11 + 9 = 26 + assertEquals(pairwise.getValue(3, 3), 26d); + // F.R + K.- + L.D = -3 + -4 + -4 = -11 + assertEquals(pairwise.getValue(0, 1), -11d); + // F.Q + K.I + L.A = -3 + -3 + -1 = -7 + assertEquals(pairwise.getValue(0, 2), -7d); + // F.G + K.W + L.C = -3 + -3 + -1 = -7 + assertEquals(pairwise.getValue(0, 3), -7d); + // R.Q + -.I + D.A = 1 + -4 + -2 = -5 + assertEquals(pairwise.getValue(1, 2), -5d); + // R.G + -.W + D.C = -2 + -4 + -3 = -9 + assertEquals(pairwise.getValue(1, 3), -9d); + // Q.G + I.W + A.C = -2 + -3 + 0 = -5 + assertEquals(pairwise.getValue(2, 3), -5d); + } + + /** + * Test that the result of outputMatrix can be reparsed to give an identical + * ScoreMatrix + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testOutputMatrix_roundTrip() throws MalformedURLException, + IOException + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + String output = sm.outputMatrix(false); + FileParse fp = new FileParse(output, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm2 = parser.parseMatrix(); + assertNotNull(sm2); + assertTrue(sm2.equals(sm)); + } + + @Test(groups = "Functional") + public void testEqualsAndHashCode() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols() + .toCharArray(), sm.getMatrix()); + assertTrue(sm.equals(sm2)); + assertEquals(sm.hashCode(), sm2.hashCode()); + + sm2 = ScoreModels.getInstance().getPam250(); + assertFalse(sm.equals(sm2)); + assertNotEquals(sm.hashCode(), sm2.hashCode()); + + assertFalse(sm.equals("hello")); + } + + /** + * Tests for scoring options where the longer length of two sequences is used + */ + @Test(groups = "Functional") + public void testcomputeSimilarity_matchLongestSequence() + { + /* + * ScoreMatrix expects '-' for gaps + */ + String s1 = "FR-K-S"; + String s2 = "FS--L"; + ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62(); + + /* + * score gap-gap and gap-char + * shorter sequence treated as if with trailing gaps + * score = F^F + R^S + -^- + K^- + -^L + S^- + * = 6 + -1 + 1 + -4 + -4 + -4 = -6 + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), -6d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(true, false, true, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), -6d); + + /* + * score gap-char but not gap-gap + * score = F^F + R^S + 0 + K^- + -^L + S^- + * = 6 + -1 + 0 + -4 + -4 + -4 = -7 + */ + params = new SimilarityParams(false, true, true, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), -7d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(false, false, true, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), -7d); + + /* + * score gap-gap but not gap-char + * score = F^F + R^S + -^- + 0 + 0 + 0 + * = 6 + -1 + 1 = 6 + */ + params = new SimilarityParams(true, false, false, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), 6d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(true, true, false, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), 6d); + + /* + * score neither gap-gap nor gap-char + * score = F^F + R^S + 0 + 0 + 0 + 0 + * = 6 + -1 = 5 + */ + params = new SimilarityParams(false, false, false, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), 5d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(false, true, false, false); + assertEquals(blosum.computeSimilarity(s1, s2, params), 5d); + } + + /** + * Tests for scoring options where only the shorter length of two sequences is + * used + */ + @Test(groups = "Functional") + public void testcomputeSimilarity_matchShortestSequence() + { + /* + * ScoreMatrix expects '-' for gaps + */ + String s1 = "FR-K-S"; + String s2 = "FS--L"; + ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62(); + + /* + * score gap-gap and gap-char + * match shorter sequence only + * score = F^F + R^S + -^- + K^- + -^L + * = 6 + -1 + 1 + -4 + -4 = -2 + */ + SimilarityParamsI params = new SimilarityParams(true, true, true, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), -2d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(true, false, true, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), -2d); + + /* + * score gap-char but not gap-gap + * score = F^F + R^S + 0 + K^- + -^L + * = 6 + -1 + 0 + -4 + -4 = -3 + */ + params = new SimilarityParams(false, true, true, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), -3d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(false, false, true, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), -3d); + + /* + * score gap-gap but not gap-char + * score = F^F + R^S + -^- + 0 + 0 + * = 6 + -1 + 1 = 6 + */ + params = new SimilarityParams(true, false, false, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), 6d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(true, true, false, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), 6d); + + /* + * score neither gap-gap nor gap-char + * score = F^F + R^S + 0 + 0 + 0 + * = 6 + -1 = 5 + */ + params = new SimilarityParams(false, false, false, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), 5d); + // matchGap (arg2) is ignored: + params = new SimilarityParams(false, true, false, true); + assertEquals(blosum.computeSimilarity(s1, s2, params), 5d); + } + + @Test(groups = "Functional") + public void testSymmetric() + { + verifySymmetric(ScoreModels.getInstance().getBlosum62()); + verifySymmetric(ScoreModels.getInstance().getPam250()); + verifySymmetric(ScoreModels.getInstance().getDefaultModel(false)); // dna + } + + private void verifySymmetric(ScoreMatrix sm) + { + float[][] m = sm.getMatrix(); + int rows = m.length; + for (int row = 0; row < rows; row++) + { + assertEquals(m[row].length, rows); + for (int col = 0; col < rows; col++) + { + assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]", + sm.getName(), ResidueProperties.aa[row], + ResidueProperties.aa[col])); + } + } + } + + /** + * A test that just asserts the expected values in the Blosum62 score matrix + */ + @Test(groups = "Functional") + public void testBlosum62_values() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + + assertTrue(sm.isProtein()); + assertFalse(sm.isDNA()); + assertNull(sm.getDescription()); + + /* + * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX + * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt + */ + verifyValues(sm, 'A', new float[] { 4, -1, -2, -2, 0, -1, -1, 0, -2, + -1, + -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 }); + verifyValues(sm, 'R', new float[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, + -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 }); + verifyValues(sm, 'N', new float[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, + -3, + 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 }); + verifyValues(sm, 'D', new float[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, + -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 }); + verifyValues(sm, 'C', new float[] { 0, -3, -3, -3, 9, -3, -4, -3, -3, + -1, + -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 }); + verifyValues(sm, 'Q', new float[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, + -2, + 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 }); + verifyValues(sm, 'E', new float[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, + -3, + 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); + verifyValues(sm, 'G', new float[] { 0, -2, 0, -1, -3, -2, -2, 6, -2, + -4, + -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 }); + verifyValues(sm, 'H', new float[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, + -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 }); + verifyValues(sm, 'I', new float[] { -1, -3, -3, -3, -1, -3, -3, -4, -3, + 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 }); + verifyValues(sm, 'L', new float[] { -1, -2, -3, -4, -1, -2, -3, -4, -3, + 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 }); + verifyValues(sm, 'K', new float[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, + -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 }); + verifyValues(sm, 'M', new float[] { -1, -1, -2, -3, -1, 0, -2, -3, -2, + 1, + 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 }); + verifyValues(sm, 'F', new float[] { -2, -3, -3, -3, -2, -3, -3, -3, -1, + 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 }); + verifyValues(sm, 'P', new float[] { -1, -2, -2, -1, -3, -1, -1, -2, -2, + -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 }); + verifyValues(sm, 'S', new float[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, + -2, + 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 }); + verifyValues(sm, 'T', new float[] { 0, -1, 0, -1, -1, -1, -1, -2, -2, + -1, + -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 }); + verifyValues(sm, 'W', new float[] { -3, -3, -4, -4, -2, -2, -3, -2, -2, + -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 }); + verifyValues(sm, 'Y', new float[] { -2, -2, -2, -3, -2, -1, -2, -3, 2, + -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 }); + verifyValues(sm, 'V', new float[] { 0, -3, -3, -3, -1, -2, -2, -3, -3, + 3, + 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 }); + verifyValues(sm, 'B', new float[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, + -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 }); + verifyValues(sm, 'Z', new float[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, + -3, + 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); + verifyValues(sm, 'X', new float[] { 0, -1, -1, -1, -2, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 }); + } + + /** + * Helper method to check pairwise scores for one residue + * + * @param sm + * @param res + * @param expected + * score values against 'res', in ResidueProperties.aaIndex order + */ + private void verifyValues(ScoreMatrix sm, char res, float[] expected) + { + for (int j = 0; j < expected.length; j++) + { + char c2 = ResidueProperties.aa[j].charAt(0); + assertEquals(sm.getPairwiseScore(res, c2), expected[j], + String.format("%s->%s", res, c2)); + } + } + + @Test(groups = "Functional") + public void testConstructor_gapDash() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 4f, 5f }; + ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '-' }, + scores); + assertEquals(sm.getSize(), 2); + assertArrayEquals(scores, sm.getMatrix()); + assertEquals(sm.getPairwiseScore('A', 'a'), 1f); + assertEquals(sm.getPairwiseScore('A', 'A'), 1f); + assertEquals(sm.getPairwiseScore('a', '-'), 2f); + assertEquals(sm.getPairwiseScore('-', 'A'), 4f); + assertEquals(sm.getMatrixIndex('a'), 0); + assertEquals(sm.getMatrixIndex('A'), 0); + assertEquals(sm.getMatrixIndex('-'), 1); + assertEquals(sm.getMatrixIndex(' '), -1); + assertEquals(sm.getMatrixIndex('.'), -1); + } + + @Test(groups = "Functional") + public void testGetPairwiseScore() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { -4f, 5f }; + ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', 'B' }, + scores); + assertEquals(sm.getPairwiseScore('A', 'A'), 1f); + assertEquals(sm.getPairwiseScore('A', 'a'), 1f); + assertEquals(sm.getPairwiseScore('A', 'B'), 2f); + assertEquals(sm.getPairwiseScore('b', 'a'), -4f); + assertEquals(sm.getPairwiseScore('B', 'b'), 5f); + + /* + * unknown symbols currently score minimum score + * or 1 for identity with self + */ + assertEquals(sm.getPairwiseScore('A', '-'), -4f); + assertEquals(sm.getPairwiseScore('-', 'A'), -4f); + assertEquals(sm.getPairwiseScore('-', '-'), 1f); + assertEquals(sm.getPairwiseScore('Q', 'W'), -4f); + assertEquals(sm.getPairwiseScore('Q', 'Q'), 1f); + + /* + * symbols not in basic ASCII set score zero + */ + char c = (char) 200; + assertEquals(sm.getPairwiseScore('Q', c), 0f); + assertEquals(sm.getPairwiseScore(c, 'Q'), 0f); + } + + @Test(groups = "Functional") + public void testGetMinimumScore() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + assertEquals(sm.getMinimumScore(), -4f); + } + + @Test(groups = "Functional") + public void testGetMaximumScore() + { + ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); + assertEquals(sm.getMaximumScore(), 11f); + } + + @Test(groups = "Functional") + public void testOutputMatrix_html() + { + float[][] scores = new float[2][]; + scores[0] = new float[] { 1f, 2f }; + scores[1] = new float[] { 4f, -5.3E-10f }; + ScoreMatrix sm = new ScoreMatrix("Test", "AB".toCharArray(), scores); + String html = sm.outputMatrix(true); + String expected = "\n" + + "\n" + + "\n" + + "
 A  B 
A1.02.0
B4.0-5.3E-10
"; + assertEquals(html, expected); + } +} diff --git a/test/jalview/analysis/scoremodels/ScoreModelsTest.java b/test/jalview/analysis/scoremodels/ScoreModelsTest.java new file mode 100644 index 0000000..ffcd1a8 --- /dev/null +++ b/test/jalview/analysis/scoremodels/ScoreModelsTest.java @@ -0,0 +1,105 @@ +package jalview.analysis.scoremodels; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +import jalview.api.analysis.PairwiseScoreModelI; +import jalview.api.analysis.ScoreModelI; + +import java.util.Iterator; + +import org.testng.annotations.Test; + +public class ScoreModelsTest +{ + /** + * Verify that the singleton constructor successfully loads Jalview's built-in + * score models + */ + @Test(groups = "Functional") + public void testConstructor() + { + Iterator models = ScoreModels.getInstance().getModels() + .iterator(); + assertTrue(models.hasNext()); + + /* + * models are served in order of addition + */ + ScoreModelI sm = models.next(); + assertTrue(sm instanceof SimilarityScoreModel); + assertTrue(sm instanceof PairwiseScoreModelI); + assertFalse(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "BLOSUM62"); + assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('I', 'R'), -3f); + + sm = models.next(); + assertTrue(sm instanceof SimilarityScoreModel); + assertTrue(sm instanceof PairwiseScoreModelI); + assertFalse(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "PAM250"); + assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), -4f); + + sm = models.next(); + assertTrue(sm instanceof SimilarityScoreModel); + assertTrue(sm instanceof PairwiseScoreModelI); + assertFalse(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "PID"); + assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), 0f); + assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'r'), 1f); + + sm = models.next(); + assertTrue(sm instanceof SimilarityScoreModel); + assertTrue(sm instanceof PairwiseScoreModelI); + assertFalse(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "DNA"); + assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('c', 'x'), 1f); + + sm = models.next(); + assertFalse(sm instanceof SimilarityScoreModel); + assertFalse(sm instanceof PairwiseScoreModelI); + assertTrue(sm instanceof DistanceScoreModel); + assertEquals(sm.getName(), "Sequence Feature Similarity"); + } + + /** + * 'Test' that prints out score matrices in tab-delimited format. This test is + * intentionally not assigned to any group so would not be run as part of a + * suite. It makes no assertions and is just provided as a utility method for + * printing out matrices. Relocated here from ScoreMatrixPrinter. + */ + @Test(groups = "none") + public void printAllMatrices_tabDelimited() + { + printAllMatrices(false); + } + + /** + * 'Test' that prints out score matrices in html format. This test is + * intentionally not assigned to any group so would not be run as part of a + * suite. It makes no assertions and is just provided as a utility method for + * printing out matrices. Relocated here from ScoreMatrixPrinter. + */ + @Test(groups = "none") + public void printAllMatrices_asHtml() + { + printAllMatrices(true); + } + + /** + * Print all registered ScoreMatrix as plain or html tables + * + * @param asHtml + */ + protected void printAllMatrices(boolean asHtml) + { + for (ScoreModelI sm : ScoreModels.getInstance().getModels()) + { + if (sm instanceof ScoreMatrix) + { + System.out.println(((ScoreMatrix) sm).outputMatrix(asHtml)); + } + } + } +} diff --git a/test/jalview/io/IdentifyFileTest.java b/test/jalview/io/IdentifyFileTest.java index 3d800d8..dd4f6ba 100644 --- a/test/jalview/io/IdentifyFileTest.java +++ b/test/jalview/io/IdentifyFileTest.java @@ -110,7 +110,8 @@ public class IdentifyFileTest { "examples/testdata/cullpdb_pc25_res3.0_R0.3_d150729_chains9361.fasta.15316", FileFormat.Fasta }, - + { "resources/scoreModel/pam250.scm", FileFormat.ScoreMatrix }, + { "resources/scoreModel/blosum80.scm", FileFormat.ScoreMatrix } // { "examples/testdata/test.amsa", "AMSA" }, // { "examples/test.jnet", "JnetFile" }, }; diff --git a/test/jalview/io/NewickFileTests.java b/test/jalview/io/NewickFileTests.java index d198f0f..a92f5fb 100644 --- a/test/jalview/io/NewickFileTests.java +++ b/test/jalview/io/NewickFileTests.java @@ -22,8 +22,8 @@ package jalview.io; import static org.testng.ConversionUtils.wrapDataProvider; -import jalview.analysis.NJTree; import jalview.analysis.SequenceIdMatcher; +import jalview.analysis.TreeModel; import jalview.datamodel.SequenceI; import jalview.datamodel.SequenceNode; import jalview.gui.JvOptionPane; @@ -125,7 +125,8 @@ public class NewickFileTests stage = "Compare original and generated tree" + treename; Vector oseqs, nseqs; - oseqs = new NJTree(new SequenceI[0], nf).findLeaves(nf.getTree()); + oseqs = new TreeModel(new SequenceI[0], null, nf).findLeaves(nf + .getTree()); AssertJUnit.assertTrue(stage + "No nodes in original tree.", oseqs.size() > 0); SequenceI[] olsqs = new SequenceI[oseqs.size()]; @@ -133,7 +134,8 @@ public class NewickFileTests { olsqs[i] = (SequenceI) oseqs.get(i).element(); } - nseqs = new NJTree(new SequenceI[0], nf_regen).findLeaves(nf_regen + nseqs = new TreeModel(new SequenceI[0], null, nf_regen) + .findLeaves(nf_regen .getTree()); AssertJUnit.assertTrue(stage + "No nodes in regerated tree.", nseqs.size() > 0); diff --git a/test/jalview/io/ScoreMatrixFileTest.java b/test/jalview/io/ScoreMatrixFileTest.java new file mode 100644 index 0000000..97349b5 --- /dev/null +++ b/test/jalview/io/ScoreMatrixFileTest.java @@ -0,0 +1,506 @@ +package jalview.io; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.analysis.scoremodels.ScoreModels; + +import java.io.IOException; +import java.net.MalformedURLException; + +import org.testng.annotations.Test; + +public class ScoreMatrixFileTest +{ + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_ncbiMixedDelimiters() + throws MalformedURLException, + IOException + { + /* + * some messy but valid input data, with comma, space + * or tab (or combinations) as score value delimiters + * this example includes 'guide' symbols on score rows + */ + String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n" + + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n" + + "T,2.1 2.2 2.3 2.4 2.5 2.6\n" + + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n" + + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n" + + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n" + + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "MyTest (example)"); + assertEquals(sm.getSize(), 6); + assertNull(sm.getDescription()); + assertTrue(sm.isDNA()); + assertFalse(sm.isProtein()); + assertEquals(sm.getMinimumScore(), 1.1f); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f); + assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f); + assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent + assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent + assertEquals(sm.getPairwiseScore('a', 't'), 1.4f); + assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f); + assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f); + // X (upper) and '.' unmapped - get minimum score + assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f); + assertEquals(sm.getPairwiseScore('A', '.'), 1.1f); + assertEquals(sm.getPairwiseScore('-', '-'), 7.6f); + assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range + } + + @Test(groups = "Functional") + public void testParseMatrix_headerMissing() + { + String data; + + data = "X Y\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Format error: 'ScoreMatrix ' should be the first non-comment line"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiNotEnoughRows() + { + String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 3 rows of score data in score matrix but only found 2"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiNotEnoughColumns() + { + String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 3 scores at line 4: '4 5' but found 2"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiTooManyColumns() + { + /* + * with two too many columns: + */ + String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 3 scores at line 4: '4 5 6 7' but found 4"); + } + + /* + * with guide character and one too many columns: + */ + data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 4: 'Y 3 4 5' but found 3"); + } + + /* + * with no guide character and one too many columns + */ + data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Expected 2 scores at line 4: '3 4 5' but found 3"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiTooManyRows() + { + String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Unexpected extra input line in score model file: '10 11 12'"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiBadDelimiter() + { + String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Invalid score value '1|2|3' at line 3 column 0"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiBadFloat() + { + String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Invalid score value 'five' at line 4 column 1"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiBadGuideCharacter() + { + String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error parsing score matrix at line 4, expected 'Y' but found 'y'"); + } + + data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error parsing score matrix at line 3, expected 'X' but found 'XX'"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiNameMissing() + { + /* + * Name missing on ScoreMatrix header line + */ + String data = "ScoreMatrix\nX Y\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals( + e.getMessage(), + "Format error: expected 'ScoreMatrix ', found 'ScoreMatrix' at line 1"); + } + } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_ncbiFormat() throws MalformedURLException, + IOException + { + // input including comment and blank lines + String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n" + + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "MyTest"); + assertEquals(parser.getMatrixName(), "MyTest"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f); + assertEquals(sm.getSize(), 3); + } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_aaIndexBlosum80() + throws MalformedURLException, + IOException + { + FileParse fp = new FileParse("resources/scoreModel/blosum80.scm", + DataSourceType.FILE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getName(), "HENS920103"); + assertEquals(sm.getDescription(), + "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)"); + assertFalse(sm.isDNA()); + assertTrue(sm.isProtein()); + assertEquals(20, sm.getSize()); + + assertEquals(sm.getPairwiseScore('A', 'A'), 7f); + assertEquals(sm.getPairwiseScore('A', 'R'), -3f); + assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent + } + + /** + * Test a successful parse of a (small) score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParseMatrix_aaindexFormat() throws MalformedURLException, + IOException + { + /* + * aaindex format has scores for diagonal and below only + */ + String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n" + + "A Authors, names\n" + "T Journal title\n" + + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n" + + "M rows = ABC, cols = ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + ScoreMatrix sm = parser.parseMatrix(); + + assertNotNull(sm); + assertEquals(sm.getSize(), 3); + assertEquals(sm.getName(), "MyTest"); + assertEquals(sm.getDescription(), "My description"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f); + assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f); + assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f); + assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f); + assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f); + assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f); + assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f); + assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f); + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_mMissing() + throws MalformedURLException, + IOException + { + /* + * aaindex format but M cols=, rows= is missing + */ + String data = "H MyTest\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "No alphabet specified in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_rowColMismatch() + throws MalformedURLException, + IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals( + e.getMessage(), + "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_ncbiHeaderRepeated() + { + String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n"; + try + { + new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE)) + .parseMatrix(); + fail("expected exception"); + } catch (IOException e) + { + assertEquals(e.getMessage(), + "Error: 'ScoreMatrix' repeated in file at line 2"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_tooManyRows() + throws MalformedURLException, + IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "Too many data rows in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_extraDataLines() + throws MalformedURLException, + IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals(e.getMessage(), "Too many data rows in matrix file"); + } + } + + @Test(groups = "Functional") + public void testParseMatrix_aaindex_tooFewColumns() + throws MalformedURLException, + IOException + { + String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n" + + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + try + { + parser.parseMatrix(); + fail("Expected exception"); + } catch (FileFormatException e) + { + assertEquals( + e.getMessage(), + "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2"); + } + } + + /** + * Test a successful parse and register of a score matrix file + * + * @throws IOException + * @throws MalformedURLException + */ + @Test(groups = "Functional") + public void testParse_ncbiFormat() throws MalformedURLException, + IOException + { + assertNull(ScoreModels.getInstance().getScoreModel("MyNewTest", null)); + + String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n" + + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n" + + "C\t7.0\t8.0\t9.0\n"; + FileParse fp = new FileParse(data, DataSourceType.PASTE); + ScoreMatrixFile parser = new ScoreMatrixFile(fp); + + parser.parse(); + + ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance().getScoreModel( + "MyNewTest", null); + assertNotNull(sm); + assertEquals(sm.getName(), "MyNewTest"); + assertEquals(parser.getMatrixName(), "MyNewTest"); + assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f); + assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f); + assertEquals(sm.getSize(), 3); + } +} diff --git a/test/jalview/math/MatrixTest.java b/test/jalview/math/MatrixTest.java index 961602d..97ded5a 100644 --- a/test/jalview/math/MatrixTest.java +++ b/test/jalview/math/MatrixTest.java @@ -1,6 +1,8 @@ package jalview.math; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotSame; +import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; @@ -12,13 +14,13 @@ import org.testng.internal.junit.ArrayAsserts; public class MatrixTest { - final static double DELTA = 0.0001d; + final static double DELTA = 0.000001d; @Test(groups = "Timing") public void testPreMultiply_timing() { - int rows = 500; - int cols = 1000; + int rows = 50; // increase to stress test timing + int cols = 100; double[][] d1 = new double[rows][cols]; double[][] d2 = new double[cols][rows]; Matrix m1 = new Matrix(d1); @@ -187,6 +189,7 @@ public class MatrixTest } Matrix m1 = new Matrix(in); Matrix m2 = (Matrix) m1.copy(); + assertNotSame(m1, m2); assertTrue(matrixEquals(m1, m2)); } @@ -378,4 +381,154 @@ public class MatrixTest ArrayAsserts.assertArrayEquals(m1.getD(), m2.getD(), 0.00001d); ArrayAsserts.assertArrayEquals(m1.getE(), m2.getE(), 0.00001d); } + + @Test(groups = "Functional") + public void testFindMinMax() + { + /* + * empty matrix case + */ + Matrix m = new Matrix(new double[][] { {} }); + assertNull(m.findMinMax()); + + /* + * normal case + */ + double[][] vals = new double[2][]; + vals[0] = new double[] {7d, 1d, -2.3d}; + vals[1] = new double[] {-12d, 94.3d, -102.34d}; + m = new Matrix(vals); + double[] minMax = m.findMinMax(); + assertEquals(minMax[0], -102.34d); + assertEquals(minMax[1], 94.3d); + } + + @Test(groups = { "Functional", "Timing" }) + public void testFindMinMax_timing() + { + Random r = new Random(); + int size = 1000; // increase to stress test timing + double[][] vals = new double[size][size]; + double max = -Double.MAX_VALUE; + double min = Double.MAX_VALUE; + for (int i = 0; i < size; i++) + { + vals[i] = new double[size]; + for (int j = 0; j < size; j++) + { + // use nextLong rather than nextDouble to include negative values + double d = r.nextLong(); + if (d > max) + { + max = d; + } + if (d < min) + { + min = d; + } + vals[i][j] = d; + } + } + Matrix m = new Matrix(vals); + long now = System.currentTimeMillis(); + double[] minMax = m.findMinMax(); + System.out.println(String.format("findMinMax for %d x %d took %dms", + size, size, (System.currentTimeMillis() - now))); + assertEquals(minMax[0], min); + assertEquals(minMax[1], max); + } + + /** + * Test range reversal with maximum value becoming zero + */ + @Test(groups = "Functional") + public void testReverseRange_maxToZero() + { + Matrix m1 = new Matrix( + new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } }); + + /* + * subtract all from max: range -3.4 to 15 becomes 18.4 to 0 + */ + m1.reverseRange(true); + assertEquals(m1.getValue(0, 0), 13d, DELTA); + assertEquals(m1.getValue(0, 1), 11.5d, DELTA); + assertEquals(m1.getValue(0, 2), 11d, DELTA); + assertEquals(m1.getValue(1, 0), 18.4d, DELTA); + assertEquals(m1.getValue(1, 1), 11d, DELTA); + assertEquals(m1.getValue(1, 2), 0d, DELTA); + + /* + * repeat operation - range is now 0 to 18.4 + */ + m1.reverseRange(true); + assertEquals(m1.getValue(0, 0), 5.4d, DELTA); + assertEquals(m1.getValue(0, 1), 6.9d, DELTA); + assertEquals(m1.getValue(0, 2), 7.4d, DELTA); + assertEquals(m1.getValue(1, 0), 0d, DELTA); + assertEquals(m1.getValue(1, 1), 7.4d, DELTA); + assertEquals(m1.getValue(1, 2), 18.4d, DELTA); + } + + /** + * Test range reversal with minimum and maximum values swapped + */ + @Test(groups = "Functional") + public void testReverseRange_swapMinMax() + { + Matrix m1 = new Matrix( + new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } }); + + /* + * swap all values in min-max range + * = subtract from (min + max = 11.6) + * range -3.4 to 15 becomes 18.4 to -3.4 + */ + m1.reverseRange(false); + assertEquals(m1.getValue(0, 0), 9.6d, DELTA); + assertEquals(m1.getValue(0, 1), 8.1d, DELTA); + assertEquals(m1.getValue(0, 2), 7.6d, DELTA); + assertEquals(m1.getValue(1, 0), 15d, DELTA); + assertEquals(m1.getValue(1, 1), 7.6d, DELTA); + assertEquals(m1.getValue(1, 2), -3.4d, DELTA); + + /* + * repeat operation - original values restored + */ + m1.reverseRange(false); + assertEquals(m1.getValue(0, 0), 2d, DELTA); + assertEquals(m1.getValue(0, 1), 3.5d, DELTA); + assertEquals(m1.getValue(0, 2), 4d, DELTA); + assertEquals(m1.getValue(1, 0), -3.4d, DELTA); + assertEquals(m1.getValue(1, 1), 4d, DELTA); + assertEquals(m1.getValue(1, 2), 15d, DELTA); + } + + @Test(groups = "Functional") + public void testMultiply() + { + Matrix m = new Matrix(new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } }); + m.multiply(2d); + assertEquals(m.getValue(0, 0), 4d, DELTA); + assertEquals(m.getValue(0, 1), 7d, DELTA); + assertEquals(m.getValue(0, 2), 8d, DELTA); + assertEquals(m.getValue(1, 0), -6.8d, DELTA); + assertEquals(m.getValue(1, 1), 8d, DELTA); + assertEquals(m.getValue(1, 2), 30d, DELTA); + } + + @Test(groups = "Functional") + public void testConstructor() + { + double[][] values = new double[][] { { 1, 2, 3 }, { 4, 5, 6 } }; + Matrix m = new Matrix(values); + assertEquals(m.getValue(0, 0), 1d, DELTA); + + /* + * verify the matrix has a copy of the original array + */ + assertNotSame(values[0], m.getRow(0)); + values[0][0] = -1d; + assertEquals(m.getValue(0, 0), 1d, DELTA); // unchanged + } } diff --git a/test/jalview/schemes/ScoreMatrixPrinter.java b/test/jalview/schemes/ScoreMatrixPrinter.java deleted file mode 100644 index 80241fb..0000000 --- a/test/jalview/schemes/ScoreMatrixPrinter.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) - * Copyright (C) $$Year-Rel$$ The Jalview Authors - * - * This file is part of Jalview. - * - * Jalview is free software: you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 - * of the License, or (at your option) any later version. - * - * Jalview is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Jalview. If not, see . - * The Jalview Authors are detailed in the 'AUTHORS' file. - */ -package jalview.schemes; - -import jalview.api.analysis.ScoreModelI; -import jalview.gui.JvOptionPane; - -import java.util.Map; - -import org.testng.annotations.BeforeClass; - -public class ScoreMatrixPrinter -{ - - @BeforeClass(alwaysRun = true) - public void setUpJvOptionPane() - { - JvOptionPane.setInteractiveMode(false); - JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION); - } - - public void printAllMatrices() - { - for (Map.Entry sm : ResidueProperties.scoreMatrices - .entrySet()) - { - System.out.println("Matrix " + sm.getKey()); - System.out.println(sm.getValue().toString()); - } - } - - public void printHTMLMatrices() - { - for (Map.Entry _sm : ResidueProperties.scoreMatrices - .entrySet()) - { - if (_sm.getValue() instanceof ScoreMatrix) - { - ScoreMatrix sm = (ScoreMatrix) _sm.getValue(); - System.out.println("Matrix " + _sm.getKey()); - System.out.println(sm.outputMatrix(true)); - } - } - } - -} diff --git a/test/jalview/schemes/ScoreMatrixTest.java b/test/jalview/schemes/ScoreMatrixTest.java deleted file mode 100644 index e15dd41..0000000 --- a/test/jalview/schemes/ScoreMatrixTest.java +++ /dev/null @@ -1,168 +0,0 @@ -package jalview.schemes; - -import static org.testng.Assert.assertEquals; - -import jalview.math.MatrixI; - -import org.testng.annotations.Test; - -public class ScoreMatrixTest -{ - @Test(groups = "Functional") - public void testSymmetric() - { - verifySymmetric(ResidueProperties.getScoreMatrix("BLOSUM62")); - verifySymmetric(ResidueProperties.getScoreMatrix("PAM250")); - verifySymmetric(ResidueProperties.getScoreMatrix("DNA")); - } - - private void verifySymmetric(ScoreMatrix sm) - { - int[][] m = sm.getMatrix(); - int rows = m.length; - for (int row = 0; row < rows; row++) - { - assertEquals(m[row].length, rows); - for (int col = 0; col < rows; col++) - { - assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]", - sm.getName(), ResidueProperties.aa[row], - ResidueProperties.aa[col])); - } - } - - /* - * also check the score matrix is sized for - * the number of symbols scored, plus gap - */ - assertEquals(rows, (sm.isDNA() ? ResidueProperties.maxNucleotideIndex - : ResidueProperties.maxProteinIndex) + 1); - } - - /** - * A test that just asserts the expected values in the Blosum62 score matrix - */ - @Test(groups = "Functional") - public void testBlosum62_values() - { - ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62"); - - /* - * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX - * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt - */ - verifyValues(sm, 'A', new int[] { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, - -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 }); - verifyValues(sm, 'R', new int[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, - -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 }); - verifyValues(sm, 'N', new int[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, - 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 }); - verifyValues(sm, 'D', new int[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, - -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 }); - verifyValues(sm, 'C', new int[] { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, - -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 }); - verifyValues(sm, 'Q', new int[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, - 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 }); - verifyValues(sm, 'E', new int[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, - 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); - verifyValues(sm, 'G', new int[] { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, - -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 }); - verifyValues(sm, 'H', new int[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, - -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 }); - verifyValues(sm, 'I', new int[] { -1, -3, -3, -3, -1, -3, -3, -4, -3, - 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 }); - verifyValues(sm, 'L', new int[] { -1, -2, -3, -4, -1, -2, -3, -4, -3, - 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 }); - verifyValues(sm, 'K', new int[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, - -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 }); - verifyValues(sm, 'M', new int[] { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, - 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 }); - verifyValues(sm, 'F', new int[] { -2, -3, -3, -3, -2, -3, -3, -3, -1, - 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 }); - verifyValues(sm, 'P', new int[] { -1, -2, -2, -1, -3, -1, -1, -2, -2, - -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 }); - verifyValues(sm, 'S', new int[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, - 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 }); - verifyValues(sm, 'T', new int[] { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, - -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 }); - verifyValues(sm, 'W', new int[] { -3, -3, -4, -4, -2, -2, -3, -2, -2, - -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 }); - verifyValues(sm, 'Y', new int[] { -2, -2, -2, -3, -2, -1, -2, -3, 2, - -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 }); - verifyValues(sm, 'V', new int[] { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, - 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 }); - verifyValues(sm, 'B', new int[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, - -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 }); - verifyValues(sm, 'Z', new int[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, - 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 }); - verifyValues(sm, 'X', new int[] { 0, -1, -1, -1, -2, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 }); - } - /** - * Helper method to check pairwise scores for one residue - * - * @param sm - * @param res - * @param expected - * score values against 'res', in ResidueProperties.aaIndex order - */ - private void verifyValues(ScoreMatrix sm, char res, int[] expected) - { - for (int j = 0; j < expected.length; j++) - { - char c2 = ResidueProperties.aa[j].charAt(0); - assertEquals(sm.getPairwiseScore(res, c2), expected[j], - String.format("%s->%s", res, c2)); - } - } - - @Test(groups = "Functional") - public void testComputePairwiseScores() - { - String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" }; - ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62"); - - MatrixI pairwise = sm.computePairwiseScores(seqs); - - /* - * should be NxN where N = number of sequences - */ - assertEquals(pairwise.height(), 4); - assertEquals(pairwise.width(), 4); - - /* - * should be symmetrical (because BLOSUM62 is) - */ - for (int i = 0; i < pairwise.height(); i++) - { - for (int j = 0; j < pairwise.width(); j++) - { - assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i), - "Not symmetric"); - } - } - /* - * verify expected BLOSUM dot product scores - */ - // F.F + K.K + L.L = 6 + 5 + 4 = 15 - assertEquals(pairwise.getValue(0, 0), 15d); - // R.R + -.- + D.D = 5 + 1 + 6 = 12 - assertEquals(pairwise.getValue(1, 1), 12d); - // Q.Q + I.I + A.A = 5 + 4 + 4 = 13 - assertEquals(pairwise.getValue(2, 2), 13d); - // G.G + W.W + C.C = 6 + 11 + 9 = 26 - assertEquals(pairwise.getValue(3, 3), 26d); - // F.R + K.- + L.D = -3 + -4 + -4 = -11 - assertEquals(pairwise.getValue(0, 1), -11d); - // F.Q + K.I + L.A = -3 + -3 + -1 = -7 - assertEquals(pairwise.getValue(0, 2), -7d); - // F.G + K.W + L.C = -3 + -3 + -1 = -7 - assertEquals(pairwise.getValue(0, 3), -7d); - // R.Q + -.I + D.A = 1 + -4 + -2 = -5 - assertEquals(pairwise.getValue(1, 2), -5d); - // R.G + -.W + D.C = -2 + -4 + -3 = -9 - assertEquals(pairwise.getValue(1, 3), -9d); - // Q.G + I.W + A.C = -2 + -3 + 0 = -5 - assertEquals(pairwise.getValue(2, 3), -5d); - } -} diff --git a/test/jalview/util/ComparisonTest.java b/test/jalview/util/ComparisonTest.java index f955879..6f6841d 100644 --- a/test/jalview/util/ComparisonTest.java +++ b/test/jalview/util/ComparisonTest.java @@ -115,7 +115,7 @@ public class ComparisonTest @Test(groups = { "Functional" }) public void testPID_includingGaps() { - String seq1 = "ABCDEF"; + String seq1 = "ABCDEFG"; // extra length here is ignored String seq2 = "abcdef"; assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f); @@ -129,12 +129,14 @@ public class ComparisonTest int length = seq1.length(); // match gap-residue, match gap-gap: 9/10 identical + // TODO should gap-gap be included in a PID score? JAL-791 assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false), 0.001f); // overloaded version of the method signature above: assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f); // don't match gap-residue, match gap-gap: 7/10 identical + // TODO should gap-gap be included in a PID score? assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false), 0.001f); } @@ -163,7 +165,8 @@ public class ComparisonTest public void testPID_ungappedOnly() { // 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch - String seq1 = "a--b-cdefh"; + // the extra length of seq1 is ignored + String seq1 = "a--b-cdefhr"; String seq2 = "a---bcdefg"; int length = seq1.length(); diff --git a/test/jalview/util/SetUtilsTest.java b/test/jalview/util/SetUtilsTest.java new file mode 100644 index 0000000..ad17d4f --- /dev/null +++ b/test/jalview/util/SetUtilsTest.java @@ -0,0 +1,46 @@ +package jalview.util; + +import static org.testng.Assert.assertEquals; + +import java.awt.Color; +import java.util.HashSet; +import java.util.Set; + +import org.testng.annotations.Test; + +public class SetUtilsTest +{ + @Test(groups = "Functional") + public void testCountDisjunction() + { + Set s1 = new HashSet(); + assertEquals(SetUtils.countDisjunction(null, null), 0); + assertEquals(SetUtils.countDisjunction(s1, null), 0); + assertEquals(SetUtils.countDisjunction(null, s1), 0); + s1.add(Color.white); + assertEquals(SetUtils.countDisjunction(s1, null), 1); + assertEquals(SetUtils.countDisjunction(null, s1), 1); + assertEquals(SetUtils.countDisjunction(s1, null), 1); + assertEquals(SetUtils.countDisjunction(s1, s1), 0); + + Set s2 = new HashSet(); + assertEquals(SetUtils.countDisjunction(s2, s2), 0); + assertEquals(SetUtils.countDisjunction(s1, s2), 1); + assertEquals(SetUtils.countDisjunction(s2, s1), 1); + + s1.add(Color.yellow); + s1.add(Color.blue); + s2.add(new Color(Color.yellow.getRGB())); + + /* + * now s1 is {white, yellow, blue} + * s2 is {yellow'} + */ + assertEquals(SetUtils.countDisjunction(s1, s2), 2); + s2.add(Color.blue); + assertEquals(SetUtils.countDisjunction(s1, s2), 1); + s2.add(Color.pink); + assertEquals(SetUtils.countDisjunction(s1, s2), 2); + + } +}