action.scale_right = Scale Right
action.by_tree_order = By Tree Order
action.sort = Sort
-action.calculate_tree = Calculate Tree
+action.calculate_tree = Calculate Tree...
+action.calculate_tree_pca = Calculate Tree or PCA...
action.help = Help
action.by_annotation = By Annotation...
action.invert_sequence_selection = Invert Sequence Selection
label.principal_component_analysis = Principal Component Analysis
label.average_distance_identity = Average Distance Using % Identity
label.neighbour_joining_identity = Neighbour Joining Using % Identity
+label.choose_calculation = Choose Calculation
label.treecalc_title = {0} Using {1}
label.tree_calc_av = Average Distance
label.tree_calc_nj = Neighbour Joining
label.score_model_pid = % Identity
label.score_model_blosum62 = BLOSUM62
label.score_model_pam250 = PAM 250
+label.score_model_smithwatermanscore = Score between two sequences aligned with Smith-Waterman with default Peptide/Nucleotide matrix
+label.score_model_sequencefeaturesimilarity = Distance measure of average number of features not shared at sequence positions
label.score_model_conservation = Physicochemical property conservation
label.score_model_enhconservation = Physicochemical property conservation
label.status_bar = Status bar
label.set_this_label_text = set this label text
label.sequences_from = Sequences from {0}
label.successfully_loaded_file = Successfully loaded file {0}
+label.successfully_loaded_matrix = Successfully loaded score matrix {0}
label.successfully_saved_to_file_in_format = Successfully saved to file: {0} in {1} format.
label.copied_sequences_to_clipboard = Copied {0} sequences to clipboard.
label.check_file_matches_sequence_ids_alignment = Check that the file matches sequence IDs in the alignment.
label.not_enough_sequences = Not enough sequences
label.selected_region_to_tree_may_only_contain_residues_or_gaps = The selected region to create a tree may\nonly contain residues or gaps.\nTry using the Pad function in the edit menu,\nor one of the multiple sequence alignment web services.
label.sequences_selection_not_aligned = Sequences in selection are not aligned
-label.sequences_must_be_aligned_before_creating_tree = The sequences must be aligned before creating a tree.\nTry using the Pad function in the edit menu,\n or one of the multiple sequence alignment web services.
-label.sequences_not_aligned = Sequences not aligned
label.problem_reading_tree_file = Problem reading tree file
label.possible_problem_with_tree_file = Possible problem with tree file
label.select_at_least_three_bases_in_at_least_one_sequence_to_cDNA_translation = Please select at least three bases in at least one sequence in order to perform a cDNA translation.
label.show_labels = Show labels
action.background_colour = Background Colour...
label.associate_nodes_with = Associate Nodes With
-label.jalview_pca_calculation = Jalview PCA Calculation
label.link_name = Link Name
label.pdb_file = PDB file
label.colour_with_jmol = Colour with Jmol
label.error_whilst_saving_current_state_to = Error whilst saving current state to {0}
label.error_whilst_loading_project_from = Error whilst loading project from {0}
label.couldnt_load_project = Couldn't load project
-label.pca_sequences_not_aligned = The sequences must be aligned before calculating PCA.\nTry using the Pad function in the edit menu,\nor one of the multiple sequence alignment web services.
label.invalid_name_preset_exists = Invalid name - preset already exists.
label.invalid_name = Invalid name
label.set_proxy_settings = Please set up your proxy settings in the 'Connections' tab of the Preferences window
label.save_as_html = Save as HTML
label.recently_opened = Recently Opened
label.blasting_for_unidentified_sequence_jobs_running = BLASTing for unidentified sequences - {0} jobs running.
+label.tree = Tree
label.tree_from = Tree from {0}
label.webservice_job_title = {0} using {1}
label.select_visible_region_of = selected {0} region of {1}
action.scale_right = Escala derecha
action.by_tree_order = Por orden del árbol
action.sort = Ordenar
-action.calculate_tree = Calcular árbol
+action.calculate_tree = Calcular árbol...
+action.calculate_tree_pca = Calcular árbol o ACP...
action.help = Ayuda
action.by_annotation = Por anotación...
action.invert_sequence_selection = Invertir selección de secuencias
label.principal_component_analysis = Análisis del Componente Principal
label.average_distance_identity = Distancia Media Usando % de Identidad
label.neighbour_joining_identity = Unir vecinos utilizando % de Identidad
+label.choose_calculation = Elegir el cálculo
label.treecalc_title = {0} utilizando {1}
label.tree_calc_av = Distancia media
label.tree_calc_nj = Unir vecinos
label.score_model_pid = % Identidad
label.score_model_blosum62 = BLOSUM62
label.score_model_pam250 = PAM 250
+label.score_model_smithwatermanscore = Puntuación entre secuencias alineadas por Smith-Waterman con matriz por defecto proteica / nucleotídica
+label.score_model_sequencefeaturesimilarity = Medida de distancia por cuenta promedia de características no compartidas at sequence positions
label.score_model_conservation = Conservación de las propiedades físico-químicas
label.score_model_enhconservation = Conservación de las propiedades físico-químicas
label.status_bar = Barra de estado
label.set_this_label_text = fijar como etiqueta
label.sequences_from = Secuencias de {0}
label.successfully_loaded_file = Fichero cargado exitosamente {0}
+label.successfully_loaded_matrix = Matriz cargada exitosamente {0}
label.successfully_saved_to_file_in_format = Guardado exitosamente en el fichero: {0} en formato {1}.
label.copied_sequences_to_clipboard = Copiadas {0} secuencias en el portapapeles.
label.check_file_matches_sequence_ids_alignment = Comprobar que el fichero coincide con el ID de la secuencia en el alineamiento.
label.not_enough_sequences = No suficientes secuencias
label.selected_region_to_tree_may_only_contain_residues_or_gaps = La regi\u00F3n seleccionada para construir un \u00E1rbol puede\ncontener s\u00F3lo residuos o espacios.\nPrueba usando la funci\u00F3n Pad en el men\u00FA de edici\u00F3n,\n o uno de los m\u00FAltiples servicios web de alineamiento de secuencias.
label.sequences_selection_not_aligned = Las secuencias seleccionadas no están alineadas
-label.sequences_must_be_aligned_before_creating_tree = Las secuencias deben estar alineadas antes de crear el \u00E1rbol.\nPrueba usando la funci\u00F3n Pad en el men\u00FA de editar,\n o uno de los m\u00FAltiples servicios web de alineamiento de secuencias.
-label.sequences_not_aligned = Secuencias no alineadas
label.problem_reading_tree_file = Problema al leer el fichero del árbol
label.possible_problem_with_tree_file = Posible problema con el fichero del árbol
label.select_at_least_three_bases_in_at_least_one_sequence_to_cDNA_translation = Por favor seleccionar al menos tres bases de al menos una secuencia para poder realizar la traducción de cDNA.
label.selection_output_command = Seleccionar salida - {0}
label.annotation_for_displayid = <p><h2>Anotación para {0} </h2></p><p>
label.pdb_sequence_mapping = PDB - Mapeado de secuencia
-label.pca_details = detalles de la PCA
+label.pca_details = detalles de la ACP
label.redundancy_threshold_selection = Selección del umbral de redundancia
label.user_defined_colours = Colores definidos del usuario
label.jalviewLite_release = JalviewLite - versión {0}
label.no_features_added_to_this_alignment = No hay funciones asociadas a este alineamiento!!
label.features_can_be_added_from_searches_1 = (Las funciones pueden ser añadidas de búsquedas o
label.features_can_be_added_from_searches_2 = de ficheros de funciones Jalview / GFF)
-label.calculating_pca= Calculando PCA
+label.calculating_pca= Calculando ACP
label.jalview_cannot_open_file = Jalview no puede abrir el fichero
label.jalview_applet = Aplicación Jalview
label.loading_data = Cargando datos
label.set_as_default = Establecer por defecto
label.show_labels = Mostrar etiquetas
label.associate_nodes_with = Asociar nodos con
-label.jalview_pca_calculation = Cálculo del PCA por Jalview
label.link_name = Nombre del enalce
label.pdb_file = Fichero PDB
label.colour_with_jmol = Colorear con Jmol
-label.align_structures = Alinear estructuras
label.jmol = Jmol
label.sort_alignment_by_tree = Ordenar alineamiento por árbol
label.mark_unlinked_leaves = Marcar las hojas como no enlazadas
label.error_whilst_saving_current_state_to = Error mientras se guardaba el estado a {0}
label.error_whilst_loading_project_from = Error cargando el proyecto desde {0}
label.couldnt_load_project = No es posible cargar el proyecto
-label.pca_sequences_not_aligned = Las secuencias deben estar alineadas antes de calcular el PCA.\nPruebe a utilizar la funci\u00F3n de rellenar huecos en el men\u00FA Editar,\no cualquiera de los servicios web de alineamiento m\u00FAltiple.
label.invalid_name_preset_exists = Nombre no válido - esta preconfiguración ya existe.
label.invalid_name = Nombre no válido
label.set_proxy_settings = Por favor, configure su proxy en la pestaña 'Conexiones' de la ventana de Preferencia
label.save_as_html = Guardar como HTML
label.recently_opened = Abiertos recientemente
label.blasting_for_unidentified_sequence_jobs_running = Ejecutando BLAST de las secuencias no indentificadas - {0} trabajos en marcha.
+label.tree = Árbol
label.tree_from = Árbol de {0}
label.webservice_job_title = {0} usando {1}
label.select_visible_region_of = seleccionada {0} región de {1}
label.empty_alignment_job = Trabajo de alineamiento vacío
label.add_new_sbrs_service = Añadir un nuevo SBRS
label.edit_sbrs_entry = Editar entrada SBRS
-label.pca_recalculating = Recalculando PCA
-label.pca_calculating = Calculando PCA
+label.pca_recalculating = Recalculando ACP
+label.pca_calculating = Calculando ACP
label.select_foreground_colour = Escoger color del primer plano
label.select_colour_for_text = Seleccione el color del texto
label.adjunst_foreground_text_colour_threshold = Ajustar el umbral del color del texto en primer plano
label.alpha_helix=Hélice Alfa
label.chimera_help=Ayuda para Chimera
label.find_tip=Buscar alineamiento, selección o IDs de secuencia para una subsecuencia (sin huecos)
-label.structure_viewer=Visualizador de estructura for defecto
+label.structure_viewer=Visualizador de estructura por defecto
label.embbed_biojson=Incrustar BioJSON al exportar HTML
label.transparency_tip=Ajustar la transparencia a "ver a través" los colores de las características.
label.choose_annotations=Escoja anotaciones
--- /dev/null
+ScoreMatrix BLOSUM62
+#
+# The BLOSUM62 substitution matrix, as at https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+# The first line declares a ScoreMatrix with the name BLOSUM62 (shown in menus)
+#
+# Scores are not symbol case sensitive, unless column(s) are provided for lower case characters
+# The 'guide symbol' at the start of each row of score values is optional
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
+A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
+R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
+N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
+D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
+C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
+Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
+E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
+G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
+H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
+I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
+L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
+K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
+M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
+F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
+P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
+S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
+T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
+W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
+Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
+V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
+B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
+Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
+X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
+* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
--- /dev/null
+#
+# Source: http://www.genome.jp/dbget-bin/www_bget?aaindex:HENS920103
+#
+H HENS920103
+D BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)
+R PMID:1438297
+A Henikoff, S. and Henikoff, J.G.
+T Amino acid substitution matrices from protein blocks
+J Proc. Natl. Acad. Sci. USA 89, 10915-10919 (1992)
+* matrix in 1/3 Bit Units
+M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
+ 7.
+ -3. 9.
+ -3. -1. 9.
+ -3. -3. 2. 10.
+ -1. -6. -5. -7. 13.
+ -2. 1. 0. -1. -5. 9.
+ -2. -1. -1. 2. -7. 3. 8.
+ 0. -4. -1. -3. -6. -4. -4. 9.
+ -3. 0. 1. -2. -7. 1. 0. -4. 12.
+ -3. -5. -6. -7. -2. -5. -6. -7. -6. 7.
+ -3. -4. -6. -7. -3. -4. -6. -7. -5. 2. 6.
+ -1. 3. 0. -2. -6. 2. 1. -3. -1. -5. -4. 8.
+ -2. -3. -4. -6. -3. -1. -4. -5. -4. 2. 3. -3. 9.
+ -4. -5. -6. -6. -4. -5. -6. -6. -2. -1. 0. -5. 0. 10.
+ -1. -3. -4. -3. -6. -3. -2. -5. -4. -5. -5. -2. -4. -6. 12.
+ 2. -2. 1. -1. -2. -1. -1. -1. -2. -4. -4. -1. -3. -4. -2. 7.
+ 0. -2. 0. -2. -2. -1. -2. -3. -3. -2. -3. -1. -1. -4. -3. 2. 8.
+ -5. -5. -7. -8. -5. -4. -6. -6. -4. -5. -4. -6. -3. 0. -7. -6. -5. 16.
+ -4. -4. -4. -6. -5. -3. -5. -6. 3. -3. -2. -4. -3. 4. -6. -3. -3. 3. 11.
+ -1. -4. -5. -6. -2. -4. -4. -6. -5. 4. 1. -4. 1. -2. -4. -3. 0. -5. -3. 7.
+//
--- /dev/null
+ScoreMatrix DNA
+#
+# A DNA substitution matrix.
+# This is an ad-hoc matrix which, in addition to penalising mutations between the common
+# nucleotides (ACGT), includes T/U equivalence in order to allow both DNA and/or RNA.
+# In addition, it encodes weak equivalence between R and Y with AG and CTU, respectively,
+# and N is allowed to match any other base weakly.
+# This matrix also includes I (Inosine) and X (Xanthine), but encodes them to weakly match
+# any of (ACGTU), and unfavourably match each other.
+#
+# The first line declares a ScoreMatrix with the name DNA (shown in menus)
+# Scores are not case sensitive, unless column(s) are provided for lower case characters
+#
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+ A C G T U I X R Y N -
+A 10 -8 -8 -8 -8 1 1 1 -8 1 1
+C -8 10 -8 -8 -8 1 1 -8 1 1 1
+G -8 -8 10 -8 -8 1 1 1 -8 1 1
+T -8 -8 -8 10 10 1 1 -8 1 1 1
+U -8 -8 -8 10 10 1 1 -8 1 1 1
+I 1 1 1 1 1 10 0 0 0 1 1
+X 1 1 1 1 1 0 10 0 0 1 1
+R 1 -8 1 -8 -8 0 0 10 -8 1 1
+Y -8 1 -8 1 1 0 0 -8 10 1 1
+N 1 1 1 1 1 1 1 1 1 10 1
+- 1 1 1 1 1 1 1 1 1 1 1
--- /dev/null
+ScoreMatrix PAM250
+#
+# The PAM250 substitution matrix
+# The first line declares a ScoreMatrix with the name PAM250 (shown in menus)
+# Scores are not case sensitive, unless column(s) are provided for lower case characters
+# Values may be integer or floating point, delimited by tab, space, comma or combinations
+#
+ A R N D C Q E G H I L K M F P S T W Y V B Z X *
+A 2 -2 0 0 -2 0 0 1 -1 -1 -2 -1 -1 -3 1 1 1 -6 -3 0 0 0 0 -8
+R -2 6 0 -1 -4 1 -1 -3 2 -2 -3 3 0 -4 0 0 -1 2 -4 -2 -1 0 -1 -8
+N 0 0 2 2 -4 1 1 0 2 -2 -3 1 -2 -3 0 1 0 -4 -2 -2 2 1 0 -8
+D 0 -1 2 4 -5 2 3 1 1 -2 -4 0 -3 -6 -1 0 0 -7 -4 -2 3 3 -1 -8
+C -2 -4 -4 -5 12 -5 -5 -3 -3 -2 -6 -5 -5 -4 -3 0 -2 -8 0 -2 -4 -5 -3 -8
+Q 0 1 1 2 -5 4 2 -1 3 -2 -2 1 -1 -5 0 -1 -1 -5 -4 -2 1 3 -1 -8
+E 0 -1 1 3 -5 2 4 0 1 -2 -3 0 -2 -5 -1 0 0 -7 -4 -2 3 3 -1 -8
+G 1 -3 0 1 -3 -1 0 5 -2 -3 -4 -2 -3 -5 0 1 0 -7 -5 -1 0 0 -1 -8
+H -1 2 2 1 -3 3 1 -2 6 -2 -2 0 -2 -2 0 -1 -1 -3 0 -2 1 2 -1 -8
+I -1 -2 -2 -2 -2 -2 -2 -3 -2 5 2 -2 2 1 -2 -1 0 -5 -1 4 -2 -2 -1 -8
+L -2 -3 -3 -4 -6 -2 -3 -4 -2 2 6 -3 4 2 -3 -3 -2 -2 -1 2 -3 -3 -1 -8
+K -1 3 1 0 -5 1 0 -2 0 -2 -3 5 0 -5 -1 0 0 -3 -4 -2 1 0 -1 -8
+M -1 0 -2 -3 -5 -1 -2 -3 -2 2 4 0 6 0 -2 -2 -1 -4 -2 2 -2 -2 -1 -8
+F -3 -4 -3 -6 -4 -5 -5 -5 -2 1 2 -5 0 9 -5 -3 -3 0 7 -1 -4 -5 -2 -8
+P 1 0 0 -1 -3 0 -1 0 0 -2 -3 -1 -2 -5 6 1 0 -6 -5 -1 -1 0 -1 -8
+S 1 0 1 0 0 -1 0 1 -1 -1 -3 0 -2 -3 1 2 1 -2 -3 -1 0 0 0 -8
+T 1 -1 0 0 -2 -1 0 0 -1 0 -2 0 -1 -3 0 1 3 -5 -3 0 0 -1 0 -8
+W -6 2 -4 -7 -8 -5 -7 -7 -3 -5 -2 -3 -4 0 -6 -2 -5 17 0 -6 -5 -6 -4 -8
+Y -3 -4 -2 -4 0 -4 -4 -5 0 -1 -1 -4 -2 7 -5 -3 -3 0 10 -2 -3 -4 -2 -8
+V 0 -2 -2 -2 -2 -2 -2 -1 -2 4 2 -2 2 -1 -1 -1 0 -6 -2 4 -2 -2 -1 -8
+B 0 -1 2 3 -4 1 3 0 1 -2 -3 1 -2 -4 -1 0 0 -5 -3 -2 3 2 -1 -8
+Z 0 0 1 3 -5 3 3 0 2 -2 -3 0 -2 -5 0 0 -1 -6 -4 -2 2 3 -1 -8
+X 0 -1 0 -1 -3 -1 -1 -1 -1 -1 -1 -1 -1 -2 -1 0 0 -4 -2 -1 -1 -1 -1 -8
+* -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 1
colourBySequence();
- int max = -10;
+ float max = -10;
int maxchain = -1;
int pdbstart = 0;
int pdbend = 0;
colourBySequence();
- int max = -10;
+ float max = -10;
int maxchain = -1;
int pdbstart = 0;
int pdbend = 0;
*/
package jalview.analysis;
+import jalview.analysis.scoremodels.PIDModel;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.Mapping;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
-import jalview.schemes.ResidueProperties;
-import jalview.schemes.ScoreMatrix;
import jalview.util.Comparison;
import jalview.util.Format;
import jalview.util.MapList;
private static final String NEWLINE = System.lineSeparator();
- static String[] dna = { "A", "C", "G", "T", "-" };
+ float[][] score;
- // "C", "T", "A", "G", "-"};
- static String[] pep = { "A", "R", "N", "D", "C", "Q", "E", "G", "H", "I",
- "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V", "B", "Z", "X", "-" };
+ float[][] E;
- int[][] score;
-
- int[][] E;
-
- int[][] F;
+ float[][] F;
int[][] traceback;
int count;
/** DOCUMENT ME!! */
- public int maxscore;
+ public float maxscore;
float pid;
int gapExtend = 20;
- int[][] lookup = ResidueProperties.getBLOSUM62();
-
- String[] intToStr = pep;
-
- int defInt = 23;
-
StringBuffer output = new StringBuffer();
- String type;
+ String type; // AlignSeq.PEP or AlignSeq.DNA
+
+ private ScoreMatrix scoreMatrix;
- private int[] charToInt;
+ private static final int GAP_INDEX = -1;
/**
* Creates a new AlignSeq object.
*
- * @param s1
- * DOCUMENT ME!
- * @param s2
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
+ * @param s1 first sequence for alignment
+ * @param s2 second sequence for alignment
+ * @param type molecule type, either AlignSeq.PEP or AlignSeq.DNA
*/
public AlignSeq(SequenceI s1, SequenceI s2, String type)
{
- SeqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(),
+ seqInit(s1, s1.getSequenceAsString(), s2, s2.getSequenceAsString(),
type);
}
public AlignSeq(SequenceI s1, String string1, SequenceI s2,
String string2, String type)
{
- SeqInit(s1, string1.toUpperCase(), s2, string2.toUpperCase(), type);
+ seqInit(s1, string1.toUpperCase(), s2, string2.toUpperCase(), type);
}
/**
*
* @return DOCUMENT ME!
*/
- public int getMaxScore()
+ public float getMaxScore()
{
return maxscore;
}
}
/**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public SequenceI getS1()
- {
- return s1;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public SequenceI getS2()
- {
- return s2;
- }
-
- /**
*
* @return aligned instance of Seq 1
*/
* @param type
* DNA or PEPTIDE
*/
- public void SeqInit(SequenceI s1, String string1, SequenceI s2,
+ public void seqInit(SequenceI s1, String string1, SequenceI s2,
String string2, String type)
{
this.s1 = s1;
this.s2 = s2;
setDefaultParams(type);
- SeqInit(string1, string2);
- }
-
- /**
- * Construct score matrix for sequences with custom substitution matrix
- *
- * @param s1
- * - sequence 1
- * @param string1
- * - string to use for s1
- * @param s2
- * - sequence 2
- * @param string2
- * - string to use for s2
- * @param scoreMatrix
- * - substitution matrix to use for alignment
- */
- public void SeqInit(SequenceI s1, String string1, SequenceI s2,
- String string2, ScoreMatrix scoreMatrix)
- {
- this.s1 = s1;
- this.s2 = s2;
- setType(scoreMatrix.isDNA() ? AlignSeq.DNA : AlignSeq.PEP);
- lookup = scoreMatrix.getMatrix();
+ seqInit(string1, string2);
}
/**
* @param string1
* @param string2
*/
- private void SeqInit(String string1, String string2)
+ private void seqInit(String string1, String string2)
{
s1str = extractGaps(jalview.util.Comparison.GapChars, string1);
s2str = extractGaps(jalview.util.Comparison.GapChars, string2);
return;
}
- // System.out.println("lookuip " + rt.freeMemory() + " "+ rt.totalMemory());
- seq1 = new int[s1str.length()];
-
- // System.out.println("seq1 " + rt.freeMemory() +" " + rt.totalMemory());
- seq2 = new int[s2str.length()];
-
- // System.out.println("seq2 " + rt.freeMemory() + " " + rt.totalMemory());
- score = new int[s1str.length()][s2str.length()];
+ score = new float[s1str.length()][s2str.length()];
- // System.out.println("score " + rt.freeMemory() + " " + rt.totalMemory());
- E = new int[s1str.length()][s2str.length()];
+ E = new float[s1str.length()][s2str.length()];
- // System.out.println("E " + rt.freeMemory() + " " + rt.totalMemory());
- F = new int[s1str.length()][s2str.length()];
+ F = new float[s1str.length()][s2str.length()];
traceback = new int[s1str.length()][s2str.length()];
- // System.out.println("F " + rt.freeMemory() + " " + rt.totalMemory());
- seq1 = stringToInt(s1str, type);
-
- // System.out.println("seq1 " + rt.freeMemory() + " " + rt.totalMemory());
- seq2 = stringToInt(s2str, type);
-
- // System.out.println("Seq2 " + rt.freeMemory() + " " + rt.totalMemory());
- // long tstart = System.currentTimeMillis();
- // calcScoreMatrix();
- // long tend = System.currentTimeMillis();
- // System.out.println("Time take to calculate score matrix = " +
- // (tend-tstart) + " ms");
- // printScoreMatrix(score);
- // System.out.println();
- // printScoreMatrix(traceback);
- // System.out.println();
- // printScoreMatrix(E);
- // System.out.println();
- // /printScoreMatrix(F);
- // System.out.println();
- // tstart = System.currentTimeMillis();
- // traceAlignment();
- // tend = System.currentTimeMillis();
- // System.out.println("Time take to traceback alignment = " + (tend-tstart)
- // + " ms");
- }
-
- private void setDefaultParams(String type)
- {
- setType(type);
+ seq1 = indexEncode(s1str);
- if (type.equals(AlignSeq.PEP))
- {
- lookup = ResidueProperties.getDefaultPeptideMatrix();
- }
- else if (type.equals(AlignSeq.DNA))
- {
- lookup = ResidueProperties.getDefaultDnaMatrix();
- }
+ seq2 = indexEncode(s2str);
}
- private void setType(String type2)
+ private void setDefaultParams(String moleculeType)
{
- this.type = type2;
- if (type.equals(AlignSeq.PEP))
- {
- intToStr = pep;
- charToInt = ResidueProperties.aaIndex;
- defInt = ResidueProperties.maxProteinIndex;
- }
- else if (type.equals(AlignSeq.DNA))
- {
- intToStr = dna;
- charToInt = ResidueProperties.nucleotideIndex;
- defInt = ResidueProperties.maxNucleotideIndex;
- }
- else
+ if (!PEP.equals(moleculeType) && !DNA.equals(moleculeType))
{
output.append("Wrong type = dna or pep only");
throw new Error(MessageManager.formatMessage(
- "error.unknown_type_dna_or_pep", new String[] { type2 }));
+ "error.unknown_type_dna_or_pep",
+ new String[] { moleculeType }));
}
+
+ type = moleculeType;
+ scoreMatrix = ScoreModels.getInstance().getDefaultModel(
+ PEP.equals(type));
}
/**
public void traceAlignment()
{
// Find the maximum score along the rhs or bottom row
- int max = -9999;
+ float max = -Float.MAX_VALUE;
for (int i = 0; i < seq1.length; i++)
{
aseq1 = new int[seq1.length + seq2.length];
aseq2 = new int[seq1.length + seq2.length];
+ StringBuilder sb1 = new StringBuilder(aseq1.length);
+ StringBuilder sb2 = new StringBuilder(aseq2.length);
+
count = (seq1.length + seq2.length) - 1;
- while ((i > 0) && (j > 0))
+ while (i > 0 && j > 0)
{
- if ((aseq1[count] != defInt) && (i >= 0))
- {
- aseq1[count] = seq1[i];
- astr1 = s1str.charAt(i) + astr1;
- }
-
- if ((aseq2[count] != defInt) && (j > 0))
- {
- aseq2[count] = seq2[j];
- astr2 = s2str.charAt(j) + astr2;
- }
+ aseq1[count] = seq1[i];
+ sb1.append(s1str.charAt(i));
+ aseq2[count] = seq2[j];
+ sb2.append(s2str.charAt(j));
trace = findTrace(i, j);
else if (trace == 1)
{
j--;
- aseq1[count] = defInt;
- astr1 = "-" + astr1.substring(1);
+ aseq1[count] = GAP_INDEX;
+ sb1.replace(sb1.length() - 1, sb1.length(), "-");
}
else if (trace == -1)
{
i--;
- aseq2[count] = defInt;
- astr2 = "-" + astr2.substring(1);
+ aseq2[count] = GAP_INDEX;
+ sb2.replace(sb2.length() - 1, sb2.length(), "-");
}
count--;
seq1start = i + 1;
seq2start = j + 1;
- if (aseq1[count] != defInt)
+ if (aseq1[count] != GAP_INDEX)
{
aseq1[count] = seq1[i];
- astr1 = s1str.charAt(i) + astr1;
+ sb1.append(s1str.charAt(i));
}
- if (aseq2[count] != defInt)
+ if (aseq2[count] != GAP_INDEX)
{
aseq2[count] = seq2[j];
- astr2 = s2str.charAt(j) + astr2;
+ sb2.append(s2str.charAt(j));
}
+
+ /*
+ * we built the character strings backwards, so now
+ * reverse them to convert to sequence strings
+ */
+ astr1 = sb1.reverse().toString();
+ astr2 = sb2.reverse().toString();
}
/**
.append(String.valueOf(s2str.length())).append(")")
.append(NEWLINE).append(NEWLINE);
+ ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
+
for (int j = 0; j < nochunks; j++)
{
// Print the first aligned sequence
output.append(NEWLINE);
output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
- // Print out the matching chars
+ /*
+ * Print out the match symbols:
+ * | for exact match (ignoring case)
+ * . if PAM250 score is positive
+ * else a space
+ */
for (int i = 0; i < len; i++)
{
if ((i + (j * len)) < astr1.length())
{
- boolean sameChar = Comparison.isSameResidue(
- astr1.charAt(i + (j * len)), astr2.charAt(i + (j * len)),
- false);
- if (sameChar
- && !jalview.util.Comparison.isGap(astr1.charAt(i
- + (j * len))))
+ char c1 = astr1.charAt(i + (j * len));
+ char c2 = astr2.charAt(i + (j * len));
+ boolean sameChar = Comparison.isSameResidue(c1, c2, false);
+ if (sameChar && !Comparison.isGap(c1))
{
pid++;
output.append("|");
}
else if (type.equals("pep"))
{
- if (ResidueProperties.getPAM250(astr1.charAt(i + (j * len)),
- astr2.charAt(i + (j * len))) > 0)
+ if (pam250.getPairwiseScore(c1, c2) > 0)
{
output.append(".");
}
/**
* DOCUMENT ME!
*
- * @param mat
- * DOCUMENT ME!
- */
- public void printScoreMatrix(int[][] mat)
- {
- int n = seq1.length;
- int m = seq2.length;
-
- for (int i = 0; i < n; i++)
- {
- // Print the top sequence
- if (i == 0)
- {
- Format.print(System.out, "%8s", s2str.substring(0, 1));
-
- for (int jj = 1; jj < m; jj++)
- {
- Format.print(System.out, "%5s", s2str.substring(jj, jj + 1));
- }
-
- System.out.println();
- }
-
- for (int j = 0; j < m; j++)
- {
- if (j == 0)
- {
- Format.print(System.out, "%3s", s1str.substring(i, i + 1));
- }
-
- Format.print(System.out, "%3d ", mat[i][j] / 10);
- }
-
- System.out.println();
- }
- }
-
- /**
- * DOCUMENT ME!
- *
* @param i
* DOCUMENT ME!
* @param j
public int findTrace(int i, int j)
{
int t = 0;
- int max = score[i - 1][j - 1] + (lookup[seq1[i]][seq2[j]] * 10);
+ // float pairwiseScore = lookup[seq1[i]][seq2[j]];
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(j));
+ float max = score[i - 1][j - 1] + (pairwiseScore * 10);
if (F[i][j] > max)
{
int m = seq2.length;
// top left hand element
- score[0][0] = lookup[seq1[0]][seq2[0]] * 10;
+ score[0][0] = scoreMatrix.getPairwiseScore(s1str.charAt(0),
+ s2str.charAt(0)) * 10;
E[0][0] = -gapExtend;
F[0][0] = 0;
E[0][j] = max(score[0][j - 1] - gapOpen, E[0][j - 1] - gapExtend);
F[0][j] = -gapExtend;
- score[0][j] = max(lookup[seq1[0]][seq2[j]] * 10, -gapOpen, -gapExtend);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(0),
+ s2str.charAt(j));
+ score[0][j] = max(pairwiseScore * 10, -gapOpen, -gapExtend);
traceback[0][j] = 1;
}
E[i][0] = -gapOpen;
F[i][0] = max(score[i - 1][0] - gapOpen, F[i - 1][0] - gapExtend);
- score[i][0] = max(lookup[seq1[i]][seq2[0]] * 10, E[i][0], F[i][0]);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(0));
+ score[i][0] = max(pairwiseScore * 10, E[i][0], F[i][0]);
traceback[i][0] = -1;
}
E[i][j] = max(score[i][j - 1] - gapOpen, E[i][j - 1] - gapExtend);
F[i][j] = max(score[i - 1][j] - gapOpen, F[i - 1][j] - gapExtend);
+ float pairwiseScore = scoreMatrix.getPairwiseScore(s1str.charAt(i),
+ s2str.charAt(j));
score[i][j] = max(score[i - 1][j - 1]
- + (lookup[seq1[i]][seq2[j]] * 10), E[i][j], F[i][j]);
+ + (pairwiseScore * 10), E[i][j], F[i][j]);
traceback[i][j] = findTrace(i, j);
}
}
/**
* DOCUMENT ME!
*
- * @param i1
+ * @param f1
* DOCUMENT ME!
- * @param i2
+ * @param f2
* DOCUMENT ME!
- * @param i3
+ * @param f3
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
- public int max(int i1, int i2, int i3)
+ private static float max(float f1, float f2, float f3)
{
- int max = i1;
+ float max = f1;
- if (i2 > i1)
+ if (f2 > f1)
{
- max = i2;
+ max = f2;
}
- if (i3 > max)
+ if (f3 > max)
{
- max = i3;
+ max = f3;
}
return max;
/**
* DOCUMENT ME!
*
- * @param i1
+ * @param f1
* DOCUMENT ME!
- * @param i2
+ * @param f2
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
- public int max(int i1, int i2)
+ private static float max(float f1, float f2)
{
- int max = i1;
+ float max = f1;
- if (i2 > i1)
+ if (f2 > f1)
{
- max = i2;
+ max = f2;
}
return max;
}
/**
- * DOCUMENT ME!
+ * Converts the character string to an array of integers which are the
+ * corresponding indices to the characters in the score matrix
*
* @param s
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
*
- * @return DOCUMENT ME!
+ * @return
*/
- public int[] stringToInt(String s, String type)
+ int[] indexEncode(String s)
{
- int[] seq1 = new int[s.length()];
+ int[] encoded = new int[s.length()];
for (int i = 0; i < s.length(); i++)
{
- // String ss = s.substring(i, i + 1).toUpperCase();
char c = s.charAt(i);
- if ('a' <= c && c <= 'z')
- {
- // TO UPPERCASE !!!
- c -= ('a' - 'A');
- }
-
- try
- {
- seq1[i] = charToInt[c]; // set accordingly from setType
- if (seq1[i] < 0 || seq1[i] > defInt) // set from setType: 23 for
- // peptides, or 4 for NA.
- {
- seq1[i] = defInt;
- }
-
- } catch (Exception e)
- {
- seq1[i] = defInt;
- }
+ encoded[i] = scoreMatrix.getMatrixIndex(c);
}
- return seq1;
+ return encoded;
}
/**
public static void displayMatrix(Graphics g, int[][] mat, int n, int m,
int psize)
{
- // TODO method dosen't seem to be referenced anywhere delete??
+ // TODO method doesn't seem to be referenced anywhere delete??
int max = -1000;
int min = 1000;
{
SequenceI bestm = null;
AlignSeq bestaseq = null;
- int bestscore = 0;
+ float bestscore = 0;
for (SequenceI msq : al.getSequences())
{
AlignSeq aseq = doGlobalNWAlignment(msq, sq, dnaOrProtein);
bestm = msq;
}
}
- System.out.println("Best Score for " + (matches.size() + 1) + " :"
- + bestscore);
+ // System.out.println("Best Score for " + (matches.size() + 1) + " :"
+ // + bestscore);
matches.add(bestm);
aligns.add(bestaseq);
al.deleteSequence(bestm);
// long start = System.currentTimeMillis();
+ SimilarityParams pidParams = new SimilarityParams(true, true, true,
+ true);
float pid;
String seqi, seqj;
for (int i = 0; i < height; i++)
seqj = ug;
}
}
- pid = Comparison.PID(seqi, seqj);
+ pid = (float) PIDModel.computePID(seqi, seqj, pidParams);
// use real sequence length rather than string length
if (lngth[j] < lngth[i])
*/
package jalview.analysis;
+import jalview.analysis.scoremodels.PIDModel;
+import jalview.analysis.scoremodels.SimilarityParams;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentOrder;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.SequenceNode;
-import jalview.util.Comparison;
import jalview.util.MessageManager;
import jalview.util.QuickSort;
static boolean sortOrderAscending = true;
- static NJTree lastTree = null;
+ static TreeModel lastTree = null;
static boolean sortTreeAscending = true;
private static boolean sortLengthAscending;
/**
- * Sort by Percentage Identity w.r.t. s
+ * Sorts sequences in the alignment by Percentage Identity with the given
+ * reference sequence, sorting the highest identity to the top
*
* @param align
* AlignmentI
* @param s
* SequenceI
- * @param tosort
- * sequences from align that are to be sorted.
- */
- public static void sortByPID(AlignmentI align, SequenceI s,
- SequenceI[] tosort)
- {
- sortByPID(align, s, tosort, 0, -1);
- }
-
- /**
- * Sort by Percentage Identity w.r.t. s
- *
- * @param align
- * AlignmentI
- * @param s
- * SequenceI
- * @param tosort
- * sequences from align that are to be sorted.
- * @param start
- * start column (0 for beginning
* @param end
*/
- public static void sortByPID(AlignmentI align, SequenceI s,
- SequenceI[] tosort, int start, int end)
+ public static void sortByPID(AlignmentI align, SequenceI s)
{
int nSeq = align.getHeight();
float[] scores = new float[nSeq];
SequenceI[] seqs = new SequenceI[nSeq];
+ String refSeq = s.getSequenceAsString();
+ SimilarityParams pidParams = new SimilarityParams(true, true, true,
+ true);
for (int i = 0; i < nSeq; i++)
{
- scores[i] = Comparison.PID(align.getSequenceAt(i)
- .getSequenceAsString(), s.getSequenceAsString());
+ scores[i] = (float) PIDModel.computePID(align.getSequenceAt(i)
+ .getSequenceAsString(), refSeq, pidParams);
seqs[i] = align.getSequenceAt(i);
}
* @return DOCUMENT ME!
*/
private static List<SequenceI> getOrderByTree(AlignmentI align,
- NJTree tree)
+ TreeModel tree)
{
int nSeq = align.getHeight();
* @param tree
* tree which has
*/
- public static void sortByTree(AlignmentI align, NJTree tree)
+ public static void sortByTree(AlignmentI align, TreeModel tree)
{
List<SequenceI> tmp = getOrderByTree(align, tree);
--- /dev/null
+package jalview.analysis;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.SequenceNode;
+import jalview.viewmodel.AlignmentViewport;
+
+/**
+ * This class implements distance calculations used in constructing a Average
+ * Distance tree (also known as UPGMA)
+ */
+public class AverageDistanceTree extends TreeBuilder
+{
+ /**
+ * Constructor
+ *
+ * @param av
+ * @param sm
+ * @param scoreParameters
+ */
+ public AverageDistanceTree(AlignmentViewport av, ScoreModelI sm,
+ SimilarityParamsI scoreParameters)
+ {
+ super(av, sm, scoreParameters);
+ }
+
+ /**
+ * Calculates and saves the distance between the combination of cluster(i) and
+ * cluster(j) and all other clusters. An average of the distances from
+ * cluster(i) and cluster(j) is calculated, weighted by the sizes of each
+ * cluster.
+ *
+ * @param i
+ * @param j
+ */
+ @Override
+ protected void findClusterDistance(int i, int j)
+ {
+ int noi = clusters.elementAt(i).cardinality();
+ int noj = clusters.elementAt(j).cardinality();
+
+ // New distances from cluster i to others
+ double[] newdist = new double[noseqs];
+
+ for (int l = 0; l < noseqs; l++)
+ {
+ if ((l != i) && (l != j))
+ {
+ newdist[l] = ((distances.getValue(i, l) * noi) + (distances
+ .getValue(j, l) * noj)) / (noi + noj);
+ }
+ else
+ {
+ newdist[l] = 0;
+ }
+ }
+
+ for (int ii = 0; ii < noseqs; ii++)
+ {
+ distances.setValue(i, ii, newdist[ii]);
+ distances.setValue(ii, i, newdist[ii]);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected double findMinDistance()
+ {
+ double min = Double.MAX_VALUE;
+
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ if (!done.get(i) && !done.get(j))
+ {
+ if (distances.getValue(i, j) < min)
+ {
+ mini = i;
+ minj = j;
+
+ min = distances.getValue(i, j);
+ }
+ }
+ }
+ }
+ return min;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ protected void findNewDistances(SequenceNode nodei, SequenceNode nodej,
+ double dist)
+ {
+ double ih = 0;
+ double jh = 0;
+
+ SequenceNode sni = nodei;
+ SequenceNode snj = nodej;
+
+ while (sni != null)
+ {
+ ih = ih + sni.dist;
+ sni = (SequenceNode) sni.left();
+ }
+
+ while (snj != null)
+ {
+ jh = jh + snj.dist;
+ snj = (SequenceNode) snj.left();
+ }
+
+ nodei.dist = ((dist / 2) - ih);
+ nodej.dist = ((dist / 2) - jh);
+ }
+
+}
*/
package jalview.analysis;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.Annotation;
import jalview.datamodel.ResidueCount;
private static final int TOUPPERCASE = 'a' - 'A';
+ private static final int GAP_INDEX = -1;
+
SequenceI[] sequences;
int start;
int end;
- Vector<int[]> seqNums; // vector of int vectors where first is sequence
- // checksum
+ /*
+ * a list whose i'th element is an array whose first entry is the checksum
+ * of the i'th sequence, followed by residues encoded to score matrix index
+ */
+ Vector<int[]> seqNums;
int maxLength = 0; // used by quality calcs
*/
Map<String, Integer>[] total;
- boolean canonicaliseAa = true; // if true then conservation calculation will
-
- // map all symbols to canonical aa numbering
- // rather than consider conservation of that
- // symbol
+ /*
+ * if true then conservation calculation will map all symbols to canonical aa
+ * numbering rather than consider conservation of that symbol
+ */
+ boolean canonicaliseAa = true;
- /** Stores calculated quality values */
private Vector<Double> quality;
- /** Stores maximum and minimum values of quality values */
- private double[] qualityRange = new double[2];
+ private double qualityMinimum;
+
+ private double qualityMaximum;
private Sequence consSequence;
private String name = "";
+ /*
+ * an array, for each column, of counts of symbols (by score matrix index)
+ */
private int[][] cons2;
+ /*
+ * gap counts for each column
+ */
+ private int[] cons2GapCounts;
+
private String[] consSymbs;
/**
}
/**
- * Translate sequence i into a numerical representation and store it in the
- * i'th position of the seqNums array.
+ * Translate sequence i into score matrix indices and store it in the i'th
+ * position of the seqNums array.
*
* @param i
+ * @param sm
*/
- private void calcSeqNum(int i)
+ private void calcSeqNum(int i, ScoreMatrix sm)
{
- String sq = null; // for dumb jbuilder not-inited exception warning
- int[] sqnum = null;
-
int sSize = sequences.length;
if ((i > -1) && (i < sSize))
{
- sq = sequences[i].getSequenceAsString();
+ String sq = sequences[i].getSequenceAsString();
if (seqNums.size() <= i)
{
seqNums.addElement(new int[sq.length() + 1]);
}
+ /*
+ * the first entry in the array is the sequence's hashcode,
+ * following entries are matrix indices of sequence characters
+ */
if (sq.hashCode() != seqNums.elementAt(i)[0])
{
int j;
maxLength = len;
}
- sqnum = new int[len + 1]; // better to always make a new array -
+ int[] sqnum = new int[len + 1]; // better to always make a new array -
// sequence can change its length
sqnum[0] = sq.hashCode();
for (j = 1; j <= len; j++)
{
- sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq
- .charAt(j - 1)];
+ // sqnum[j] = ResidueProperties.aaIndex[sq.charAt(j - 1)];
+ char residue = sq.charAt(j - 1);
+ if (Comparison.isGap(residue))
+ {
+ sqnum[j] = GAP_INDEX;
+ }
+ else
+ {
+ sqnum[j] = sm.getMatrixIndex(residue);
+ if (sqnum[j] == -1)
+ {
+ sqnum[j] = GAP_INDEX;
+ }
+ }
}
seqNums.setElementAt(sqnum, i);
// From Alignment.java in jalview118
public void findQuality()
{
- findQuality(0, maxLength - 1);
+ findQuality(0, maxLength - 1, ScoreModels.getInstance().getBlosum62());
}
/**
* DOCUMENT ME!
+ *
+ * @param sm
*/
- private void percentIdentity2()
+ private void percentIdentity(ScoreMatrix sm)
{
seqNums = new Vector<int[]>();
- // calcSeqNum(s);
int i = 0, iSize = sequences.length;
// Do we need to calculate this again?
for (i = 0; i < iSize; i++)
{
- calcSeqNum(i);
+ calcSeqNum(i, sm);
}
if ((cons2 == null) || seqNumsChanged)
{
+ // FIXME remove magic number 24 without changing calc
+ // sm.getSize() returns 25 so doesn't quite do it...
cons2 = new int[maxLength][24];
+ cons2GapCounts = new int[maxLength];
- // Initialize the array
- for (int j = 0; j < 24; j++)
- {
- for (i = 0; i < maxLength; i++)
- {
- cons2[i][j] = 0;
- }
- }
-
- int[] sqnum;
int j = 0;
while (j < sequences.length)
{
- sqnum = seqNums.elementAt(j);
+ int[] sqnum = seqNums.elementAt(j);
for (i = 1; i < sqnum.length; i++)
{
- cons2[i - 1][sqnum[i]]++;
+ int index = sqnum[i];
+ if (index == GAP_INDEX)
+ {
+ cons2GapCounts[i - 1]++;
+ }
+ else
+ {
+ cons2[i - 1][index]++;
+ }
}
+ // TODO should this start from sqnum.length?
for (i = sqnum.length - 1; i < maxLength; i++)
{
- cons2[i][23]++; // gap count
+ cons2GapCounts[i]++;
}
-
j++;
}
-
- // unnecessary ?
-
- /*
- * for (int i=start; i <= end; i++) { int max = -1000; int maxi = -1; int
- * maxj = -1;
- *
- * for (int j=0;j<24;j++) { if (cons2[i][j] > max) { max = cons2[i][j];
- * maxi = i; maxj = j; } } }
- */
}
}
/**
- * Calculates the quality of the set of sequences
+ * Calculates the quality of the set of sequences over the given inclusive
+ * column range, using the specified substitution score matrix
*
- * @param startRes
- * Start residue
- * @param endRes
- * End residue
+ * @param startCol
+ * @param endCol
+ * @param scoreMatrix
*/
- public void findQuality(int startRes, int endRes)
+ protected void findQuality(int startCol, int endCol, ScoreMatrix scoreMatrix)
{
quality = new Vector<Double>();
- double max = -10000;
- int[][] BLOSUM62 = ResidueProperties.getBLOSUM62();
+ double max = -Double.MAX_VALUE;
+ float[][] scores = scoreMatrix.getMatrix();
- // Loop over columns // JBPNote Profiling info
- // long ts = System.currentTimeMillis();
- // long te = System.currentTimeMillis();
- percentIdentity2();
+ percentIdentity(scoreMatrix);
int size = seqNums.size();
int[] lengths = new int[size];
- double tot, bigtot, sr, tmp;
- double[] x, xx;
- int l, j, i, ii, i2, k, seqNum;
- for (l = 0; l < size; l++)
+ for (int l = 0; l < size; l++)
{
lengths[l] = seqNums.elementAt(l).length - 1;
}
- for (j = startRes; j <= endRes; j++)
+ final int symbolCount = scoreMatrix.getSize();
+
+ for (int j = startCol; j <= endCol; j++)
{
- bigtot = 0;
+ double bigtot = 0;
// First Xr = depends on column only
- x = new double[24];
+ double[] x = new double[symbolCount];
- for (ii = 0; ii < 24; ii++)
+ for (int ii = 0; ii < symbolCount; ii++)
{
x[ii] = 0;
- for (i2 = 0; i2 < 24; i2++)
+ /*
+ * todo JAL-728 currently assuming last symbol in matrix is * for gap
+ * (which we ignore as counted separately); true for BLOSUM62 but may
+ * not be once alternative matrices are supported
+ */
+ for (int i2 = 0; i2 < symbolCount - 1; i2++)
{
- x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + 4);
+ x[ii] += (((double) cons2[j][i2] * scores[ii][i2]) + 4D);
}
+ x[ii] += 4D + cons2GapCounts[j] * scoreMatrix.getMinimumScore();
x[ii] /= size;
}
// Now calculate D for each position and sum
- for (k = 0; k < size; k++)
+ for (int k = 0; k < size; k++)
{
- tot = 0;
- xx = new double[24];
- seqNum = (j < lengths[k]) ? seqNums.elementAt(k)[j + 1] : 23; // Sequence,
- // or gap
- // at the
- // end
-
- // This is a loop over r
- for (i = 0; i < 23; i++)
- {
- sr = 0;
+ double tot = 0;
+ double[] xx = new double[symbolCount];
+ // sequence character index, or implied gap if sequence too short
+ int seqNum = (j < lengths[k]) ? seqNums.elementAt(k)[j + 1]
+ : GAP_INDEX;
- sr = (double) BLOSUM62[i][seqNum] + 4;
+ for (int i = 0; i < symbolCount - 1; i++)
+ {
+ double sr = 4D;
+ if (seqNum == GAP_INDEX)
+ {
+ sr += scoreMatrix.getMinimumScore();
+ }
+ else
+ {
+ sr += scores[i][seqNum];
+ }
- // Calculate X with another loop over residues
- // System.out.println("Xi " + i + " " + x[i] + " " + sr);
xx[i] = x[i] - sr;
tot += (xx[i] * xx[i]);
bigtot += Math.sqrt(tot);
}
- // This is the quality for one column
- if (max < bigtot)
- {
- max = bigtot;
- }
+ max = Math.max(max, bigtot);
- // bigtot = bigtot * (size-cons2[j][23])/size;
quality.addElement(new Double(bigtot));
-
- // Need to normalize by gaps
}
- double newmax = -10000;
+ double newmax = -Double.MAX_VALUE;
- for (j = startRes; j <= endRes; j++)
+ for (int j = startCol; j <= endCol; j++)
{
- tmp = quality.elementAt(j).doubleValue();
- tmp = ((max - tmp) * (size - cons2[j][23])) / size;
+ double tmp = quality.elementAt(j).doubleValue();
+ // tmp = ((max - tmp) * (size - cons2[j][23])) / size;
+ tmp = ((max - tmp) * (size - cons2GapCounts[j])) / size;
// System.out.println(tmp+ " " + j);
quality.setElementAt(new Double(tmp), j);
}
}
- // System.out.println("Quality " + s);
- qualityRange[0] = 0D;
- qualityRange[1] = newmax;
+ qualityMinimum = 0D;
+ qualityMaximum = newmax;
}
/**
if (quality2 != null)
{
- quality2.graphMax = (float) qualityRange[1];
+ quality2.graphMax = (float) qualityMaximum;
if (quality2.annotations != null
&& quality2.annotations.length < alWidth)
{
quality2.annotations = new Annotation[alWidth];
}
- qmin = (float) qualityRange[0];
- qmax = (float) qualityRange[1];
+ qmin = (float) qualityMinimum;
+ qmax = (float) qualityMaximum;
}
for (int i = istart; i < alWidth; i++)
package jalview.analysis;
import jalview.api.analysis.ScoreModelI;
-import jalview.datamodel.AlignmentView;
-import jalview.datamodel.BinaryNode;
-import jalview.datamodel.CigarArray;
-import jalview.datamodel.NodeTransformI;
-import jalview.datamodel.SeqCigar;
-import jalview.datamodel.Sequence;
-import jalview.datamodel.SequenceI;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.datamodel.SequenceNode;
-import jalview.io.NewickFile;
-import jalview.schemes.ResidueProperties;
-
-import java.util.Enumeration;
-import java.util.List;
-import java.util.Vector;
+import jalview.viewmodel.AlignmentViewport;
/**
- * DOCUMENT ME!
- *
- * @author $author$
- * @version $Revision$
+ * This class implements distance calculations used in constructing a Neighbour
+ * Joining tree
*/
-public class NJTree
+public class NJTree extends TreeBuilder
{
- Vector<Cluster> cluster;
-
- SequenceI[] sequence;
-
- // SequenceData is a string representation of what the user
- // sees. The display may contain hidden columns.
- public AlignmentView seqData = null;
-
- int[] done;
-
- int noseqs;
-
- int noClus;
-
- float[][] distance;
-
- int mini;
-
- int minj;
-
- float ri;
-
- float rj;
-
- Vector<SequenceNode> groups = new Vector<SequenceNode>();
-
- SequenceNode maxdist;
-
- SequenceNode top;
-
- float maxDistValue;
-
- float maxheight;
-
- int ycount;
-
- Vector<SequenceNode> node;
-
- String type;
-
- String pwtype;
-
- Object found = null;
-
- boolean hasDistances = true; // normal case for jalview trees
-
- boolean hasBootstrap = false; // normal case for jalview trees
-
- private boolean hasRootDistance = true;
-
- /**
- * Create a new NJTree object with leaves associated with sequences in seqs,
- * and original alignment data represented by Cigar strings.
- *
- * @param seqs
- * SequenceI[]
- * @param odata
- * Cigar[]
- * @param treefile
- * NewickFile
- */
- public NJTree(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
- {
- this(seqs, treefile);
- if (odata != null)
- {
- seqData = odata;
- }
- /*
- * sequenceString = new String[odata.length]; char gapChar =
- * jalview.util.Comparison.GapChars.charAt(0); for (int i = 0; i <
- * odata.length; i++) { SequenceI oseq_aligned = odata[i].getSeq(gapChar);
- * sequenceString[i] = oseq_aligned.getSequence(); }
- */
- }
-
- /**
- * Creates a new NJTree object from a tree from an external source
- *
- * @param seqs
- * SequenceI which should be associated with leafs of treefile
- * @param treefile
- * A parsed tree
- */
- public NJTree(SequenceI[] seqs, NewickFile treefile)
- {
- this.sequence = seqs;
- top = treefile.getTree();
-
- /**
- * There is no dependent alignment to be recovered from an imported tree.
- *
- * if (sequenceString == null) { sequenceString = new String[seqs.length];
- * for (int i = 0; i < seqs.length; i++) { sequenceString[i] =
- * seqs[i].getSequence(); } }
- */
-
- hasDistances = treefile.HasDistances();
- hasBootstrap = treefile.HasBootstrap();
- hasRootDistance = treefile.HasRootDistance();
-
- maxheight = findHeight(top);
-
- SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
-
- Vector<SequenceNode> leaves = findLeaves(top);
-
- int i = 0;
- int namesleft = seqs.length;
-
- SequenceNode j;
- SequenceI nam;
- String realnam;
- Vector<SequenceI> one2many = new Vector<SequenceI>();
- int countOne2Many = 0;
- while (i < leaves.size())
- {
- j = leaves.elementAt(i++);
- realnam = j.getName();
- nam = null;
-
- if (namesleft > -1)
- {
- nam = algnIds.findIdMatch(realnam);
- }
-
- if (nam != null)
- {
- j.setElement(nam);
- if (one2many.contains(nam))
- {
- countOne2Many++;
- // if (jalview.bin.Cache.log.isDebugEnabled())
- // jalview.bin.Cache.log.debug("One 2 many relationship for
- // "+nam.getName());
- }
- else
- {
- one2many.addElement(nam);
- namesleft--;
- }
- }
- else
- {
- j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
- j.setPlaceholder(true);
- }
- }
- // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
- // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
- // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
- // more leaves.");
- // }
- // one2many.clear();
- }
-
/**
- * Creates a new NJTree object.
+ * Constructor given a viewport, tree type and score model
*
- * @param sequence
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
- * @param pwtype
- * DOCUMENT ME!
- * @param start
- * DOCUMENT ME!
- * @param end
- * DOCUMENT ME!
+ * @param av
+ * the current alignment viewport
+ * @param sm
+ * a distance or similarity score model to use to compute the tree
+ * @param scoreParameters
*/
- public NJTree(SequenceI[] sequence, AlignmentView seqData, String type,
- String pwtype, ScoreModelI sm, int start, int end)
+ public NJTree(AlignmentViewport av, ScoreModelI sm,
+ SimilarityParamsI scoreParameters)
{
- this.sequence = sequence;
- this.node = new Vector<SequenceNode>();
- this.type = type;
- this.pwtype = pwtype;
- if (seqData != null)
- {
- this.seqData = seqData;
- }
- else
- {
- SeqCigar[] seqs = new SeqCigar[sequence.length];
- for (int i = 0; i < sequence.length; i++)
- {
- seqs[i] = new SeqCigar(sequence[i], start, end);
- }
- CigarArray sdata = new CigarArray(seqs);
- sdata.addOperation(CigarArray.M, end - start + 1);
- this.seqData = new AlignmentView(sdata, start);
- }
- // System.err.println("Made seqData");// dbg
- if (!(type.equals("NJ")))
- {
- type = "AV";
- }
-
- if (sm == null && !(pwtype.equals("PID")))
- {
- if (ResidueProperties.getScoreMatrix(pwtype) == null)
- {
- pwtype = "BLOSUM62";
- }
- }
-
- int i = 0;
-
- done = new int[sequence.length];
-
- while ((i < sequence.length) && (sequence[i] != null))
- {
- done[i] = 0;
- i++;
- }
-
- noseqs = i++;
-
- distance = findDistances(sm);
- // System.err.println("Made distances");// dbg
- makeLeaves();
- // System.err.println("Made leaves");// dbg
-
- noClus = cluster.size();
-
- cluster();
- // System.err.println("Made clusters");// dbg
-
+ super(av, sm, scoreParameters);
}
/**
- * Generate a string representation of the Tree
- *
- * @return Newick File with all tree data available
+ * {@inheritDoc}
*/
@Override
- public String toString()
- {
- jalview.io.NewickFile fout = new jalview.io.NewickFile(getTopNode());
-
- return fout.print(isHasBootstrap(), isHasDistances(),
- isHasRootDistance()); // output all data available for tree
- }
-
- /**
- *
- * used when the alignment associated to a tree has changed.
- *
- * @param list
- * Sequence set to be associated with tree nodes
- */
- public void UpdatePlaceHolders(List<SequenceI> list)
+ protected double findMinDistance()
{
- Vector<SequenceNode> leaves = findLeaves(top);
+ double min = Double.MAX_VALUE;
- int sz = leaves.size();
- SequenceIdMatcher seqmatcher = null;
- int i = 0;
-
- while (i < sz)
+ for (int i = 0; i < (noseqs - 1); i++)
{
- SequenceNode leaf = leaves.elementAt(i++);
-
- if (list.contains(leaf.element()))
- {
- leaf.setPlaceholder(false);
- }
- else
+ for (int j = i + 1; j < noseqs; j++)
{
- if (seqmatcher == null)
+ if (!done.get(i) && !done.get(j))
{
- // Only create this the first time we need it
- SequenceI[] seqs = new SequenceI[list.size()];
+ double tmp = distances.getValue(i, j)
+ - (findr(i, j) + findr(j, i));
- for (int j = 0; j < seqs.length; j++)
+ if (tmp < min)
{
- seqs[j] = list.get(j);
- }
-
- seqmatcher = new SequenceIdMatcher(seqs);
- }
-
- SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
+ mini = i;
+ minj = j;
- if (nam != null)
- {
- if (!leaf.isPlaceholder())
- {
- // remapping the node to a new sequenceI - should remove any refs to
- // old one.
- // TODO - make many sequenceI to one leaf mappings possible!
- // (JBPNote)
- }
- leaf.setPlaceholder(false);
- leaf.setElement(nam);
- }
- else
- {
- if (!leaf.isPlaceholder())
- {
- // Construct a new placeholder sequence object for this leaf
- leaf.setElement(new Sequence(leaf.getName(),
- "THISISAPLACEHLDER"));
+ min = tmp;
}
- leaf.setPlaceholder(true);
-
- }
- }
- }
- }
-
- /**
- * rename any nodes according to their associated sequence. This will modify
- * the tree's metadata! (ie the original NewickFile or newly generated
- * BinaryTree's label data)
- */
- public void renameAssociatedNodes()
- {
- applyToNodes(new NodeTransformI()
- {
-
- @Override
- public void transform(BinaryNode nd)
- {
- Object el = nd.element();
- if (el != null && el instanceof SequenceI)
- {
- nd.setName(((SequenceI) el).getName());
}
}
- });
- }
-
- /**
- * DOCUMENT ME!
- */
- public void cluster()
- {
- while (noClus > 2)
- {
- if (type.equals("NJ"))
- {
- findMinNJDistance();
- }
- else
- {
- findMinDistance();
- }
-
- Cluster c = joinClusters(mini, minj);
-
- done[minj] = 1;
-
- cluster.setElementAt(null, minj);
- cluster.setElementAt(c, mini);
-
- noClus--;
- }
-
- boolean onefound = false;
-
- int one = -1;
- int two = -1;
-
- for (int i = 0; i < noseqs; i++)
- {
- if (done[i] != 1)
- {
- if (onefound == false)
- {
- two = i;
- onefound = true;
- }
- else
- {
- one = i;
- }
- }
- }
-
- joinClusters(one, two);
- top = (node.elementAt(one));
-
- reCount(top);
- findHeight(top);
- findMaxDist(top);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param i
- * DOCUMENT ME!
- * @param j
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public Cluster joinClusters(int i, int j)
- {
- float dist = distance[i][j];
-
- int noi = cluster.elementAt(i).value.length;
- int noj = cluster.elementAt(j).value.length;
-
- int[] value = new int[noi + noj];
-
- for (int ii = 0; ii < noi; ii++)
- {
- value[ii] = cluster.elementAt(i).value[ii];
- }
-
- for (int ii = noi; ii < (noi + noj); ii++)
- {
- value[ii] = cluster.elementAt(j).value[ii - noi];
- }
-
- Cluster c = new Cluster(value);
-
- ri = findr(i, j);
- rj = findr(j, i);
-
- if (type.equals("NJ"))
- {
- findClusterNJDistance(i, j);
- }
- else
- {
- findClusterDistance(i, j);
- }
-
- SequenceNode sn = new SequenceNode();
-
- sn.setLeft((node.elementAt(i)));
- sn.setRight((node.elementAt(j)));
-
- SequenceNode tmpi = (node.elementAt(i));
- SequenceNode tmpj = (node.elementAt(j));
-
- if (type.equals("NJ"))
- {
- findNewNJDistances(tmpi, tmpj, dist);
- }
- else
- {
- findNewDistances(tmpi, tmpj, dist);
- }
-
- tmpi.setParent(sn);
- tmpj.setParent(sn);
-
- node.setElementAt(sn, i);
-
- return c;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param tmpi
- * DOCUMENT ME!
- * @param tmpj
- * DOCUMENT ME!
- * @param dist
- * DOCUMENT ME!
- */
- public void findNewNJDistances(SequenceNode tmpi, SequenceNode tmpj,
- float dist)
- {
-
- tmpi.dist = ((dist + ri) - rj) / 2;
- tmpj.dist = (dist - tmpi.dist);
-
- if (tmpi.dist < 0)
- {
- tmpi.dist = 0;
- }
-
- if (tmpj.dist < 0)
- {
- tmpj.dist = 0;
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param tmpi
- * DOCUMENT ME!
- * @param tmpj
- * DOCUMENT ME!
- * @param dist
- * DOCUMENT ME!
- */
- public void findNewDistances(SequenceNode tmpi, SequenceNode tmpj,
- float dist)
- {
- float ih = 0;
- float jh = 0;
-
- SequenceNode sni = tmpi;
- SequenceNode snj = tmpj;
-
- while (sni != null)
- {
- ih = ih + sni.dist;
- sni = (SequenceNode) sni.left();
- }
-
- while (snj != null)
- {
- jh = jh + snj.dist;
- snj = (SequenceNode) snj.left();
}
- tmpi.dist = ((dist / 2) - ih);
- tmpj.dist = ((dist / 2) - jh);
+ return min;
}
/**
- * DOCUMENT ME!
- *
- * @param i
- * DOCUMENT ME!
- * @param j
- * DOCUMENT ME!
+ * {@inheritDoc}
*/
- public void findClusterDistance(int i, int j)
+ @Override
+ protected void findNewDistances(SequenceNode nodei, SequenceNode nodej,
+ double dist)
{
- int noi = cluster.elementAt(i).value.length;
- int noj = cluster.elementAt(j).value.length;
+ nodei.dist = ((dist + ri) - rj) / 2;
+ nodej.dist = (dist - nodei.dist);
- // New distances from cluster to others
- float[] newdist = new float[noseqs];
-
- for (int l = 0; l < noseqs; l++)
+ if (nodei.dist < 0)
{
- if ((l != i) && (l != j))
- {
- newdist[l] = ((distance[i][l] * noi) + (distance[j][l] * noj))
- / (noi + noj);
- }
- else
- {
- newdist[l] = 0;
- }
+ nodei.dist = 0;
}
- for (int ii = 0; ii < noseqs; ii++)
+ if (nodej.dist < 0)
{
- distance[i][ii] = newdist[ii];
- distance[ii][i] = newdist[ii];
+ nodej.dist = 0;
}
}
/**
- * DOCUMENT ME!
+ * Calculates and saves the distance between the combination of cluster(i) and
+ * cluster(j) and all other clusters. The new distance to cluster k is
+ * calculated as the average of the distances from i to k and from j to k,
+ * less half the distance from i to j.
*
* @param i
- * DOCUMENT ME!
* @param j
- * DOCUMENT ME!
*/
- public void findClusterNJDistance(int i, int j)
+ @Override
+ protected
+ void findClusterDistance(int i, int j)
{
-
- // New distances from cluster to others
- float[] newdist = new float[noseqs];
-
+ // New distances from cluster i to others
+ double[] newdist = new double[noseqs];
+
+ double ijDistance = distances.getValue(i, j);
for (int l = 0; l < noseqs; l++)
{
if ((l != i) && (l != j))
{
- newdist[l] = ((distance[i][l] + distance[j][l]) - distance[i][j]) / 2;
+ newdist[l] = (distances.getValue(i, l) + distances.getValue(j, l) - ijDistance) / 2;
}
else
{
newdist[l] = 0;
}
}
-
+
for (int ii = 0; ii < noseqs; ii++)
{
- distance[i][ii] = newdist[ii];
- distance[ii][i] = newdist[ii];
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param i
- * DOCUMENT ME!
- * @param j
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public float findr(int i, int j)
- {
- float tmp = 1;
-
- for (int k = 0; k < noseqs; k++)
- {
- if ((k != i) && (k != j) && (done[k] != 1))
- {
- tmp = tmp + distance[i][k];
- }
- }
-
- if (noClus > 2)
- {
- tmp = tmp / (noClus - 2);
+ distances.setValue(i, ii, newdist[ii]);
+ distances.setValue(ii, i, newdist[ii]);
}
-
- return tmp;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public float findMinNJDistance()
- {
- float min = 100000;
-
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i + 1; j < noseqs; j++)
- {
- if ((done[i] != 1) && (done[j] != 1))
- {
- float tmp = distance[i][j] - (findr(i, j) + findr(j, i));
-
- if (tmp < min)
- {
- mini = i;
- minj = j;
-
- min = tmp;
- }
- }
- }
- }
-
- return min;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public float findMinDistance()
- {
- float min = 100000;
-
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i + 1; j < noseqs; j++)
- {
- if ((done[i] != 1) && (done[j] != 1))
- {
- if (distance[i][j] < min)
- {
- mini = i;
- minj = j;
-
- min = distance[i][j];
- }
- }
- }
- }
-
- return min;
- }
-
- /**
- * Calculate a distance matrix given the sequence input data and score model
- *
- * @return similarity matrix used to compute tree
- */
- public float[][] findDistances(ScoreModelI _pwmatrix)
- {
-
- float[][] dist = new float[noseqs][noseqs];
- if (_pwmatrix == null)
- {
- // Resolve substitution model
- _pwmatrix = ResidueProperties.getScoreModel(pwtype);
- if (_pwmatrix == null)
- {
- _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
- }
- }
- dist = _pwmatrix.findDistances(seqData);
- return dist;
-
- }
-
- /**
- * DOCUMENT ME!
- */
- public void makeLeaves()
- {
- cluster = new Vector<Cluster>();
-
- for (int i = 0; i < noseqs; i++)
- {
- SequenceNode sn = new SequenceNode();
-
- sn.setElement(sequence[i]);
- sn.setName(sequence[i].getName());
- node.addElement(sn);
-
- int[] value = new int[1];
- value[0] = i;
-
- Cluster c = new Cluster(value);
- cluster.addElement(c);
- }
- }
-
- /**
- * Search for leaf nodes below (or at) the given node
- *
- * @param nd
- * root node to search from
- *
- * @return
- */
- public Vector<SequenceNode> findLeaves(SequenceNode nd)
- {
- Vector<SequenceNode> leaves = new Vector<SequenceNode>();
- findLeaves(nd, leaves);
- return leaves;
- }
-
- /**
- * Search for leaf nodes.
- *
- * @param nd
- * root node to search from
- * @param leaves
- * Vector of leaves to add leaf node objects too.
- *
- * @return Vector of leaf nodes on binary tree
- */
- Vector<SequenceNode> findLeaves(SequenceNode nd,
- Vector<SequenceNode> leaves)
- {
- if (nd == null)
- {
- return leaves;
- }
-
- if ((nd.left() == null) && (nd.right() == null)) // Interior node
- // detection
- {
- leaves.addElement(nd);
-
- return leaves;
- }
- else
- {
- /*
- * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
- * leaves.addElement(node); }
- */
- findLeaves((SequenceNode) nd.left(), leaves);
- findLeaves((SequenceNode) nd.right(), leaves);
- }
-
- return leaves;
- }
-
- /**
- * Find the leaf node with a particular ycount
- *
- * @param nd
- * initial point on tree to search from
- * @param count
- * value to search for
- *
- * @return null or the node with ycound=count
- */
- public Object findLeaf(SequenceNode nd, int count)
- {
- found = _findLeaf(nd, count);
-
- return found;
- }
-
- /*
- * #see findLeaf(SequenceNode node, count)
- */
- public Object _findLeaf(SequenceNode nd, int count)
- {
- if (nd == null)
- {
- return null;
- }
-
- if (nd.ycount == count)
- {
- found = nd.element();
-
- return found;
- }
- else
- {
- _findLeaf((SequenceNode) nd.left(), count);
- _findLeaf((SequenceNode) nd.right(), count);
- }
-
- return found;
- }
-
- /**
- * printNode is mainly for debugging purposes.
- *
- * @param nd
- * SequenceNode
- */
- public void printNode(SequenceNode nd)
- {
- if (nd == null)
- {
- return;
- }
-
- if ((nd.left() == null) && (nd.right() == null))
- {
- System.out.println("Leaf = " + ((SequenceI) nd.element()).getName());
- System.out.println("Dist " + nd.dist);
- System.out.println("Boot " + nd.getBootstrap());
- }
- else
- {
- System.out.println("Dist " + nd.dist);
- printNode((SequenceNode) nd.left());
- printNode((SequenceNode) nd.right());
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- */
- public void findMaxDist(SequenceNode nd)
- {
- if (nd == null)
- {
- return;
- }
-
- if ((nd.left() == null) && (nd.right() == null))
- {
- float dist = nd.dist;
-
- if (dist > maxDistValue)
- {
- maxdist = nd;
- maxDistValue = dist;
- }
- }
- else
- {
- findMaxDist((SequenceNode) nd.left());
- findMaxDist((SequenceNode) nd.right());
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public Vector<SequenceNode> getGroups()
- {
- return groups;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public float getMaxHeight()
- {
- return maxheight;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- * @param threshold
- * DOCUMENT ME!
- */
- public void groupNodes(SequenceNode nd, float threshold)
- {
- if (nd == null)
- {
- return;
- }
-
- if ((nd.height / maxheight) > threshold)
- {
- groups.addElement(nd);
- }
- else
- {
- groupNodes((SequenceNode) nd.left(), threshold);
- groupNodes((SequenceNode) nd.right(), threshold);
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public float findHeight(SequenceNode nd)
- {
- if (nd == null)
- {
- return maxheight;
- }
-
- if ((nd.left() == null) && (nd.right() == null))
- {
- nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
-
- if (nd.height > maxheight)
- {
- return nd.height;
- }
- else
- {
- return maxheight;
- }
- }
- else
- {
- if (nd.parent() != null)
- {
- nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
- }
- else
- {
- maxheight = 0;
- nd.height = (float) 0.0;
- }
-
- maxheight = findHeight((SequenceNode) (nd.left()));
- maxheight = findHeight((SequenceNode) (nd.right()));
- }
-
- return maxheight;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public SequenceNode reRoot()
- {
- if (maxdist != null)
- {
- ycount = 0;
-
- float tmpdist = maxdist.dist;
-
- // New top
- SequenceNode sn = new SequenceNode();
- sn.setParent(null);
-
- // New right hand of top
- SequenceNode snr = (SequenceNode) maxdist.parent();
- changeDirection(snr, maxdist);
- System.out.println("Printing reversed tree");
- printN(snr);
- snr.dist = tmpdist / 2;
- maxdist.dist = tmpdist / 2;
-
- snr.setParent(sn);
- maxdist.setParent(sn);
-
- sn.setRight(snr);
- sn.setLeft(maxdist);
-
- top = sn;
-
- ycount = 0;
- reCount(top);
- findHeight(top);
- }
-
- return top;
- }
-
- /**
- *
- * @return true if original sequence data can be recovered
- */
- public boolean hasOriginalSequenceData()
- {
- return seqData != null;
- }
-
- /**
- * Returns original alignment data used for calculation - or null where not
- * available.
- *
- * @return null or cut'n'pasteable alignment
- */
- public String printOriginalSequenceData(char gapChar)
- {
- if (seqData == null)
- {
- return null;
- }
-
- StringBuffer sb = new StringBuffer();
- String[] seqdatas = seqData.getSequenceStrings(gapChar);
- for (int i = 0; i < seqdatas.length; i++)
- {
- sb.append(new jalview.util.Format("%-" + 15 + "s").form(sequence[i]
- .getName()));
- sb.append(" " + seqdatas[i] + "\n");
- }
- return sb.toString();
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- */
- public void printN(SequenceNode nd)
- {
- if (nd == null)
- {
- return;
- }
-
- if ((nd.left() != null) && (nd.right() != null))
- {
- printN((SequenceNode) nd.left());
- printN((SequenceNode) nd.right());
- }
- else
- {
- System.out.println(" name = " + ((SequenceI) nd.element()).getName());
- }
-
- System.out.println(" dist = " + nd.dist + " " + nd.count + " "
- + nd.height);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- */
- public void reCount(SequenceNode nd)
- {
- ycount = 0;
- _lycount = 0;
- // _lylimit = this.node.size();
- _reCount(nd);
- }
-
- private long _lycount = 0, _lylimit = 0;
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- */
- public void _reCount(SequenceNode nd)
- {
- // if (_lycount<_lylimit)
- // {
- // System.err.println("Warning: depth of _recount greater than number of nodes.");
- // }
- if (nd == null)
- {
- return;
- }
- _lycount++;
-
- if ((nd.left() != null) && (nd.right() != null))
- {
-
- _reCount((SequenceNode) nd.left());
- _reCount((SequenceNode) nd.right());
-
- SequenceNode l = (SequenceNode) nd.left();
- SequenceNode r = (SequenceNode) nd.right();
-
- nd.count = l.count + r.count;
- nd.ycount = (l.ycount + r.ycount) / 2;
- }
- else
- {
- nd.count = 1;
- nd.ycount = ycount++;
- }
- _lycount--;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- */
- public void swapNodes(SequenceNode nd)
- {
- if (nd == null)
- {
- return;
- }
-
- SequenceNode tmp = (SequenceNode) nd.left();
-
- nd.setLeft(nd.right());
- nd.setRight(tmp);
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param nd
- * DOCUMENT ME!
- * @param dir
- * DOCUMENT ME!
- */
- public void changeDirection(SequenceNode nd, SequenceNode dir)
- {
- if (nd == null)
- {
- return;
- }
-
- if (nd.parent() != top)
- {
- changeDirection((SequenceNode) nd.parent(), nd);
-
- SequenceNode tmp = (SequenceNode) nd.parent();
-
- if (dir == nd.left())
- {
- nd.setParent(dir);
- nd.setLeft(tmp);
- }
- else if (dir == nd.right())
- {
- nd.setParent(dir);
- nd.setRight(tmp);
- }
- }
- else
- {
- if (dir == nd.left())
- {
- nd.setParent(nd.left());
-
- if (top.left() == nd)
- {
- nd.setRight(top.right());
- }
- else
- {
- nd.setRight(top.left());
- }
- }
- else
- {
- nd.setParent(nd.right());
-
- if (top.left() == nd)
- {
- nd.setLeft(top.right());
- }
- else
- {
- nd.setLeft(top.left());
- }
- }
- }
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public SequenceNode getMaxDist()
- {
- return maxdist;
- }
-
- /**
- * DOCUMENT ME!
- *
- * @return DOCUMENT ME!
- */
- public SequenceNode getTopNode()
- {
- return top;
- }
-
- /**
- *
- * @return true if tree has real distances
- */
- public boolean isHasDistances()
- {
- return hasDistances;
- }
-
- /**
- *
- * @return true if tree has real bootstrap values
- */
- public boolean isHasBootstrap()
- {
- return hasBootstrap;
- }
-
- public boolean isHasRootDistance()
- {
- return hasRootDistance;
- }
-
- /**
- * apply the given transform to all the nodes in the tree.
- *
- * @param nodeTransformI
- */
- public void applyToNodes(NodeTransformI nodeTransformI)
- {
- for (Enumeration<SequenceNode> nodes = node.elements(); nodes
- .hasMoreElements(); nodeTransformI.transform(nodes
- .nextElement()))
- {
- ;
- }
- }
-}
-
-/**
- * DOCUMENT ME!
- *
- * @author $author$
- * @version $Revision$
- */
-class Cluster
-{
- int[] value;
-
- /**
- * Creates a new Cluster object.
- *
- * @param value
- * DOCUMENT ME!
- */
- public Cluster(int[] value)
- {
- this.value = value;
}
}
*/
package jalview.analysis;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
import jalview.math.MatrixI;
-import jalview.schemes.ResidueProperties;
-import jalview.schemes.ScoreMatrix;
import java.io.PrintStream;
*/
public class PCA implements Runnable
{
- boolean jvCalcMode = true;
-
MatrixI symm;
double[] eigenvalue;
StringBuilder details = new StringBuilder(1024);
- private String[] seqs;
-
- private ScoreMatrix scoreMatrix;
+ final private AlignmentView seqs;
- /**
- * Creates a new PCA object. By default, uses blosum62 matrix to generate
- * sequence similarity matrices
- *
- * @param s
- * Set of amino acid sequences to perform PCA on
- */
- public PCA(String[] s)
- {
- this(s, false);
- }
-
- /**
- * Creates a new PCA object. By default, uses blosum62 matrix to generate
- * sequence similarity matrices
- *
- * @param s
- * Set of sequences to perform PCA on
- * @param nucleotides
- * if true, uses standard DNA/RNA matrix for sequence similarity
- * calculation.
- */
- public PCA(String[] s, boolean nucleotides)
- {
- this(s, nucleotides, null);
- }
+ private ScoreModelI scoreModel;
+
+ private SimilarityParamsI similarityParams;
- public PCA(String[] s, boolean nucleotides, String s_m)
+ public PCA(AlignmentView s, ScoreModelI sm, SimilarityParamsI options)
{
this.seqs = s;
-
- scoreMatrix = null;
- String sm = s_m;
- if (sm != null)
- {
- scoreMatrix = ResidueProperties.getScoreMatrix(sm);
- }
- if (scoreMatrix == null)
- {
- // either we were given a non-existent score matrix or a scoremodel that
- // isn't based on a pairwise symbol score matrix
- scoreMatrix = ResidueProperties
- .getScoreMatrix(sm = (nucleotides ? "DNA" : "BLOSUM62"));
- }
- details.append("PCA calculation using " + sm
+ this.similarityParams = options;
+ this.scoreModel = sm;
+
+ details.append("PCA calculation using " + sm.getName()
+ " sequence similarity matrix\n========\n\n");
}
// long now = System.currentTimeMillis();
try
{
- details.append("PCA Calculation Mode is "
- + (jvCalcMode ? "Jalview variant" : "Original SeqSpace")
- + "\n");
-
- eigenvector = scoreMatrix.computePairwiseScores(seqs);
+ eigenvector = scoreModel.findSimilarities(seqs, similarityParams);
details.append(" --- OrigT * Orig ---- \n");
eigenvector.print(ps, "%8.2f");
// + (System.currentTimeMillis() - now) + "ms"));
}
- public void setJvCalcMode(boolean calcMode)
- {
- this.jvCalcMode = calcMode;
- }
-
/**
* Answers the N dimensions of the NxN PCA matrix. This is the number of
* sequences involved in the pairwise score calculation.
public int getHeight()
{
// TODO can any of seqs[] be null?
- return seqs.length;
+ return seqs.getSequences().length;
}
}
--- /dev/null
+package jalview.analysis;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.CigarArray;
+import jalview.datamodel.SeqCigar;
+import jalview.datamodel.SequenceI;
+import jalview.datamodel.SequenceNode;
+import jalview.math.MatrixI;
+import jalview.viewmodel.AlignmentViewport;
+
+import java.util.BitSet;
+import java.util.Vector;
+
+public abstract class TreeBuilder
+{
+ public static final String AVERAGE_DISTANCE = "AV";
+
+ public static final String NEIGHBOUR_JOINING = "NJ";
+
+ protected Vector<BitSet> clusters;
+
+ protected SequenceI[] sequences;
+
+ public AlignmentView seqData;
+
+ protected BitSet done;
+
+ protected int noseqs;
+
+ int noClus;
+
+ protected MatrixI distances;
+
+ protected int mini;
+
+ protected int minj;
+
+ protected double ri;
+
+ protected double rj;
+
+ SequenceNode maxdist;
+
+ SequenceNode top;
+
+ double maxDistValue;
+
+ double maxheight;
+
+ int ycount;
+
+ Vector<SequenceNode> node;
+
+ private AlignmentView seqStrings;
+
+ /**
+ * Constructor
+ *
+ * @param av
+ * @param sm
+ * @param scoreParameters
+ */
+ public TreeBuilder(AlignmentViewport av, ScoreModelI sm,
+ SimilarityParamsI scoreParameters)
+ {
+ int start, end;
+ boolean selview = av.getSelectionGroup() != null
+ && av.getSelectionGroup().getSize() > 1;
+ seqStrings = av.getAlignmentView(selview);
+ if (!selview)
+ {
+ start = 0;
+ end = av.getAlignment().getWidth();
+ this.sequences = av.getAlignment().getSequencesArray();
+ }
+ else
+ {
+ start = av.getSelectionGroup().getStartRes();
+ end = av.getSelectionGroup().getEndRes() + 1;
+ this.sequences = av.getSelectionGroup().getSequencesInOrder(
+ av.getAlignment());
+ }
+
+ init(seqStrings, start, end);
+
+ computeTree(sm, scoreParameters);
+ }
+
+ public SequenceI[] getSequences()
+ {
+ return sequences;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ double findHeight(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return maxheight;
+ }
+
+ if ((nd.left() == null) && (nd.right() == null))
+ {
+ nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
+
+ if (nd.height > maxheight)
+ {
+ return nd.height;
+ }
+ else
+ {
+ return maxheight;
+ }
+ }
+ else
+ {
+ if (nd.parent() != null)
+ {
+ nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
+ }
+ else
+ {
+ maxheight = 0;
+ nd.height = (float) 0.0;
+ }
+
+ maxheight = findHeight((SequenceNode) (nd.left()));
+ maxheight = findHeight((SequenceNode) (nd.right()));
+ }
+
+ return maxheight;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ void reCount(SequenceNode nd)
+ {
+ ycount = 0;
+ // _lycount = 0;
+ // _lylimit = this.node.size();
+ _reCount(nd);
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ void _reCount(SequenceNode nd)
+ {
+ // if (_lycount<_lylimit)
+ // {
+ // System.err.println("Warning: depth of _recount greater than number of nodes.");
+ // }
+ if (nd == null)
+ {
+ return;
+ }
+ // _lycount++;
+
+ if ((nd.left() != null) && (nd.right() != null))
+ {
+
+ _reCount((SequenceNode) nd.left());
+ _reCount((SequenceNode) nd.right());
+
+ SequenceNode l = (SequenceNode) nd.left();
+ SequenceNode r = (SequenceNode) nd.right();
+
+ nd.count = l.count + r.count;
+ nd.ycount = (l.ycount + r.ycount) / 2;
+ }
+ else
+ {
+ nd.count = 1;
+ nd.ycount = ycount++;
+ }
+ // _lycount--;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public SequenceNode getTopNode()
+ {
+ return top;
+ }
+
+ /**
+ *
+ * @return true if tree has real distances
+ */
+ public boolean hasDistances()
+ {
+ return true;
+ }
+
+ /**
+ *
+ * @return true if tree has real bootstrap values
+ */
+ public boolean hasBootstrap()
+ {
+ return false;
+ }
+
+ public boolean hasRootDistance()
+ {
+ return true;
+ }
+
+ /**
+ * Form clusters by grouping sub-clusters, starting from one sequence per
+ * cluster, and finishing when only two clusters remain
+ */
+ void cluster()
+ {
+ while (noClus > 2)
+ {
+ findMinDistance();
+
+ joinClusters(mini, minj);
+
+ noClus--;
+ }
+
+ int rightChild = done.nextClearBit(0);
+ int leftChild = done.nextClearBit(rightChild + 1);
+
+ joinClusters(leftChild, rightChild);
+ top = (node.elementAt(leftChild));
+
+ reCount(top);
+ findHeight(top);
+ findMaxDist(top);
+ }
+
+ /**
+ * Returns the minimum distance between two clusters, and also sets the
+ * indices of the clusters in fields mini and minj
+ *
+ * @return
+ */
+ protected abstract double findMinDistance();
+
+ /**
+ * Calculates the tree using the given score model and parameters, and the
+ * configured tree type
+ * <p>
+ * If the score model computes pairwise distance scores, then these are used
+ * directly to derive the tree
+ * <p>
+ * If the score model computes similarity scores, then the range of the scores
+ * is reversed to give a distance measure, and this is used to derive the tree
+ *
+ * @param sm
+ * @param scoreOptions
+ */
+ protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
+ {
+ distances = sm.findDistances(seqData, scoreOptions);
+
+ makeLeaves();
+
+ noClus = clusters.size();
+
+ cluster();
+ }
+
+ /**
+ * Finds the node, at or below the given node, with the maximum distance, and
+ * saves the node and the distance value
+ *
+ * @param nd
+ */
+ void findMaxDist(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ if ((nd.left() == null) && (nd.right() == null))
+ {
+ double dist = nd.dist;
+
+ if (dist > maxDistValue)
+ {
+ maxdist = nd;
+ maxDistValue = dist;
+ }
+ }
+ else
+ {
+ findMaxDist((SequenceNode) nd.left());
+ findMaxDist((SequenceNode) nd.right());
+ }
+ }
+
+ /**
+ * Calculates and returns r, whatever that is
+ *
+ * @param i
+ * @param j
+ *
+ * @return
+ */
+ protected double findr(int i, int j)
+ {
+ double tmp = 1;
+
+ for (int k = 0; k < noseqs; k++)
+ {
+ if ((k != i) && (k != j) && (!done.get(k)))
+ {
+ tmp = tmp + distances.getValue(i, k);
+ }
+ }
+
+ if (noClus > 2)
+ {
+ tmp = tmp / (noClus - 2);
+ }
+
+ return tmp;
+ }
+
+ protected void init(AlignmentView seqView, int start, int end)
+ {
+ this.node = new Vector<SequenceNode>();
+ if (seqView != null)
+ {
+ this.seqData = seqView;
+ }
+ else
+ {
+ SeqCigar[] seqs = new SeqCigar[sequences.length];
+ for (int i = 0; i < sequences.length; i++)
+ {
+ seqs[i] = new SeqCigar(sequences[i], start, end);
+ }
+ CigarArray sdata = new CigarArray(seqs);
+ sdata.addOperation(CigarArray.M, end - start + 1);
+ this.seqData = new AlignmentView(sdata, start);
+ }
+
+ /*
+ * count the non-null sequences
+ */
+ noseqs = 0;
+
+ done = new BitSet();
+
+ for (SequenceI seq : sequences)
+ {
+ if (seq != null)
+ {
+ noseqs++;
+ }
+ }
+ }
+
+ /**
+ * Merges cluster(j) to cluster(i) and recalculates cluster and node distances
+ *
+ * @param i
+ * @param j
+ */
+ void joinClusters(final int i, final int j)
+ {
+ double dist = distances.getValue(i, j);
+
+ ri = findr(i, j);
+ rj = findr(j, i);
+
+ findClusterDistance(i, j);
+
+ SequenceNode sn = new SequenceNode();
+
+ sn.setLeft((node.elementAt(i)));
+ sn.setRight((node.elementAt(j)));
+
+ SequenceNode tmpi = (node.elementAt(i));
+ SequenceNode tmpj = (node.elementAt(j));
+
+ findNewDistances(tmpi, tmpj, dist);
+
+ tmpi.setParent(sn);
+ tmpj.setParent(sn);
+
+ node.setElementAt(sn, i);
+
+ /*
+ * move the members of cluster(j) to cluster(i)
+ * and mark cluster j as out of the game
+ */
+ clusters.get(i).or(clusters.get(j));
+ clusters.get(j).clear();
+ done.set(j);
+ }
+
+ /*
+ * Computes and stores new distances for nodei and nodej, given the previous
+ * distance between them
+ */
+ protected abstract void findNewDistances(SequenceNode nodei,
+ SequenceNode nodej, double previousDistance);
+
+ /**
+ * Calculates and saves the distance between the combination of cluster(i) and
+ * cluster(j) and all other clusters. The form of the calculation depends on
+ * the tree clustering method being used.
+ *
+ * @param i
+ * @param j
+ */
+ protected abstract void findClusterDistance(int i, int j);
+
+ /**
+ * Start by making a cluster for each individual sequence
+ */
+ void makeLeaves()
+ {
+ clusters = new Vector<BitSet>();
+
+ for (int i = 0; i < noseqs; i++)
+ {
+ SequenceNode sn = new SequenceNode();
+
+ sn.setElement(sequences[i]);
+ sn.setName(sequences[i].getName());
+ node.addElement(sn);
+ BitSet bs = new BitSet();
+ bs.set(i);
+ clusters.addElement(bs);
+ }
+ }
+
+ public AlignmentView getOriginalData()
+ {
+ return seqStrings;
+ }
+
+}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis;
+
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.BinaryNode;
+import jalview.datamodel.NodeTransformI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.datamodel.SequenceNode;
+import jalview.io.NewickFile;
+
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.Vector;
+
+/**
+ * A model of a tree, either computed by Jalview or loaded from a file or other
+ * resource or service
+ */
+public class TreeModel
+{
+
+ SequenceI[] sequences;
+
+ /*
+ * SequenceData is a string representation of what the user
+ * sees. The display may contain hidden columns.
+ */
+ private AlignmentView seqData;
+
+ int noseqs;
+
+ SequenceNode top;
+
+ double maxDistValue;
+
+ double maxheight;
+
+ int ycount;
+
+ Vector<SequenceNode> node;
+
+ boolean hasDistances = true; // normal case for jalview trees
+
+ boolean hasBootstrap = false; // normal case for jalview trees
+
+ private boolean hasRootDistance = true;
+
+ /**
+ * Create a new TreeModel object with leaves associated with sequences in
+ * seqs, and (optionally) original alignment data represented by Cigar strings
+ *
+ * @param seqs
+ * SequenceI[]
+ * @param odata
+ * Cigar[]
+ * @param treefile
+ * NewickFile
+ */
+ public TreeModel(SequenceI[] seqs, AlignmentView odata, NewickFile treefile)
+ {
+ this(seqs, treefile.getTree(), treefile.HasDistances(), treefile
+ .HasBootstrap(), treefile.HasRootDistance());
+ seqData = odata;
+
+ associateLeavesToSequences(seqs);
+ }
+
+ /**
+ * Constructor given a calculated tree
+ *
+ * @param tree
+ */
+ public TreeModel(TreeBuilder tree)
+ {
+ this(tree.getSequences(), tree.getTopNode(), tree.hasDistances(), tree
+ .hasBootstrap(), tree.hasRootDistance());
+ seqData = tree.getOriginalData();
+ }
+
+ /**
+ * Constructor given sequences, root node and tree property flags
+ *
+ * @param seqs
+ * @param root
+ * @param hasDist
+ * @param hasBoot
+ * @param hasRootDist
+ */
+ public TreeModel(SequenceI[] seqs, SequenceNode root, boolean hasDist,
+ boolean hasBoot, boolean hasRootDist)
+ {
+ this.sequences = seqs;
+ top = root;
+
+ hasDistances = hasDist;
+ hasBootstrap = hasBoot;
+ hasRootDistance = hasRootDist;
+
+ maxheight = findHeight(top);
+ }
+
+ /**
+ * @param seqs
+ */
+ public void associateLeavesToSequences(SequenceI[] seqs)
+ {
+ SequenceIdMatcher algnIds = new SequenceIdMatcher(seqs);
+
+ Vector<SequenceNode> leaves = findLeaves(top);
+
+ int i = 0;
+ int namesleft = seqs.length;
+
+ SequenceNode j;
+ SequenceI nam;
+ String realnam;
+ Vector<SequenceI> one2many = new Vector<SequenceI>();
+ // int countOne2Many = 0;
+ while (i < leaves.size())
+ {
+ j = leaves.elementAt(i++);
+ realnam = j.getName();
+ nam = null;
+
+ if (namesleft > -1)
+ {
+ nam = algnIds.findIdMatch(realnam);
+ }
+
+ if (nam != null)
+ {
+ j.setElement(nam);
+ if (one2many.contains(nam))
+ {
+ // countOne2Many++;
+ // if (jalview.bin.Cache.log.isDebugEnabled())
+ // jalview.bin.Cache.log.debug("One 2 many relationship for
+ // "+nam.getName());
+ }
+ else
+ {
+ one2many.addElement(nam);
+ namesleft--;
+ }
+ }
+ else
+ {
+ j.setElement(new Sequence(realnam, "THISISAPLACEHLDER"));
+ j.setPlaceholder(true);
+ }
+ }
+ // if (jalview.bin.Cache.log.isDebugEnabled() && countOne2Many>0) {
+ // jalview.bin.Cache.log.debug("There were "+countOne2Many+" alignment
+ // sequence ids (out of "+one2many.size()+" unique ids) linked to two or
+ // more leaves.");
+ // }
+ // one2many.clear();
+ }
+
+ /**
+ * Generate a string representation of the Tree
+ *
+ * @return Newick File with all tree data available
+ */
+ public String print()
+ {
+ NewickFile fout = new NewickFile(getTopNode());
+
+ return fout.print(hasBootstrap(), hasDistances(),
+ hasRootDistance()); // output all data available for tree
+ }
+
+ /**
+ *
+ * used when the alignment associated to a tree has changed.
+ *
+ * @param list
+ * Sequence set to be associated with tree nodes
+ */
+ public void updatePlaceHolders(List<SequenceI> list)
+ {
+ Vector<SequenceNode> leaves = findLeaves(top);
+
+ int sz = leaves.size();
+ SequenceIdMatcher seqmatcher = null;
+ int i = 0;
+
+ while (i < sz)
+ {
+ SequenceNode leaf = leaves.elementAt(i++);
+
+ if (list.contains(leaf.element()))
+ {
+ leaf.setPlaceholder(false);
+ }
+ else
+ {
+ if (seqmatcher == null)
+ {
+ // Only create this the first time we need it
+ SequenceI[] seqs = new SequenceI[list.size()];
+
+ for (int j = 0; j < seqs.length; j++)
+ {
+ seqs[j] = list.get(j);
+ }
+
+ seqmatcher = new SequenceIdMatcher(seqs);
+ }
+
+ SequenceI nam = seqmatcher.findIdMatch(leaf.getName());
+
+ if (nam != null)
+ {
+ if (!leaf.isPlaceholder())
+ {
+ // remapping the node to a new sequenceI - should remove any refs to
+ // old one.
+ // TODO - make many sequenceI to one leaf mappings possible!
+ // (JBPNote)
+ }
+ leaf.setPlaceholder(false);
+ leaf.setElement(nam);
+ }
+ else
+ {
+ if (!leaf.isPlaceholder())
+ {
+ // Construct a new placeholder sequence object for this leaf
+ leaf.setElement(new Sequence(leaf.getName(),
+ "THISISAPLACEHLDER"));
+ }
+ leaf.setPlaceholder(true);
+
+ }
+ }
+ }
+ }
+
+ /**
+ * rename any nodes according to their associated sequence. This will modify
+ * the tree's metadata! (ie the original NewickFile or newly generated
+ * BinaryTree's label data)
+ */
+ public void renameAssociatedNodes()
+ {
+ applyToNodes(new NodeTransformI()
+ {
+
+ @Override
+ public void transform(BinaryNode nd)
+ {
+ Object el = nd.element();
+ if (el != null && el instanceof SequenceI)
+ {
+ nd.setName(((SequenceI) el).getName());
+ }
+ }
+ });
+ }
+
+ /**
+ * Search for leaf nodes below (or at) the given node
+ *
+ * @param nd
+ * root node to search from
+ *
+ * @return
+ */
+ public Vector<SequenceNode> findLeaves(SequenceNode nd)
+ {
+ Vector<SequenceNode> leaves = new Vector<SequenceNode>();
+ findLeaves(nd, leaves);
+ return leaves;
+ }
+
+ /**
+ * Search for leaf nodes.
+ *
+ * @param nd
+ * root node to search from
+ * @param leaves
+ * Vector of leaves to add leaf node objects too.
+ *
+ * @return Vector of leaf nodes on binary tree
+ */
+ Vector<SequenceNode> findLeaves(SequenceNode nd,
+ Vector<SequenceNode> leaves)
+ {
+ if (nd == null)
+ {
+ return leaves;
+ }
+
+ if ((nd.left() == null) && (nd.right() == null)) // Interior node
+ // detection
+ {
+ leaves.addElement(nd);
+
+ return leaves;
+ }
+ else
+ {
+ /*
+ * TODO: Identify internal nodes... if (node.isSequenceLabel()) {
+ * leaves.addElement(node); }
+ */
+ findLeaves((SequenceNode) nd.left(), leaves);
+ findLeaves((SequenceNode) nd.right(), leaves);
+ }
+
+ return leaves;
+ }
+
+ /**
+ * printNode is mainly for debugging purposes.
+ *
+ * @param nd
+ * SequenceNode
+ */
+ void printNode(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ if ((nd.left() == null) && (nd.right() == null))
+ {
+ System.out.println("Leaf = " + ((SequenceI) nd.element()).getName());
+ System.out.println("Dist " + nd.dist);
+ System.out.println("Boot " + nd.getBootstrap());
+ }
+ else
+ {
+ System.out.println("Dist " + nd.dist);
+ printNode((SequenceNode) nd.left());
+ printNode((SequenceNode) nd.right());
+ }
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public double getMaxHeight()
+ {
+ return maxheight;
+ }
+
+ /**
+ * Makes a list of groups, where each group is represented by a node whose
+ * height (distance from the root node), as a fraction of the height of the
+ * whole tree, is greater than the given threshold. This corresponds to
+ * selecting the nodes immediately to the right of a vertical line
+ * partitioning the tree (if the tree is drawn with root to the left). Each
+ * such node represents a group that contains all of the sequences linked to
+ * the child leaf nodes.
+ *
+ * @param threshold
+ * @see #getGroups()
+ */
+ public List<SequenceNode> groupNodes(float threshold)
+ {
+ List<SequenceNode> groups = new ArrayList<SequenceNode>();
+ _groupNodes(groups, getTopNode(), threshold);
+ return groups;
+ }
+
+ protected void _groupNodes(List<SequenceNode> groups, SequenceNode nd,
+ float threshold)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ if ((nd.height / maxheight) > threshold)
+ {
+ groups.add(nd);
+ }
+ else
+ {
+ _groupNodes(groups, (SequenceNode) nd.left(), threshold);
+ _groupNodes(groups, (SequenceNode) nd.right(), threshold);
+ }
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public double findHeight(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return maxheight;
+ }
+
+ if ((nd.left() == null) && (nd.right() == null))
+ {
+ nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
+
+ if (nd.height > maxheight)
+ {
+ return nd.height;
+ }
+ else
+ {
+ return maxheight;
+ }
+ }
+ else
+ {
+ if (nd.parent() != null)
+ {
+ nd.height = ((SequenceNode) nd.parent()).height + nd.dist;
+ }
+ else
+ {
+ maxheight = 0;
+ nd.height = (float) 0.0;
+ }
+
+ maxheight = findHeight((SequenceNode) (nd.left()));
+ maxheight = findHeight((SequenceNode) (nd.right()));
+ }
+
+ return maxheight;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ void printN(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ if ((nd.left() != null) && (nd.right() != null))
+ {
+ printN((SequenceNode) nd.left());
+ printN((SequenceNode) nd.right());
+ }
+ else
+ {
+ System.out.println(" name = " + ((SequenceI) nd.element()).getName());
+ }
+
+ System.out.println(" dist = " + nd.dist + " " + nd.count + " "
+ + nd.height);
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ public void reCount(SequenceNode nd)
+ {
+ ycount = 0;
+ // _lycount = 0;
+ // _lylimit = this.node.size();
+ _reCount(nd);
+ }
+
+ // private long _lycount = 0, _lylimit = 0;
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ void _reCount(SequenceNode nd)
+ {
+ // if (_lycount<_lylimit)
+ // {
+ // System.err.println("Warning: depth of _recount greater than number of nodes.");
+ // }
+ if (nd == null)
+ {
+ return;
+ }
+ // _lycount++;
+
+ if ((nd.left() != null) && (nd.right() != null))
+ {
+
+ _reCount((SequenceNode) nd.left());
+ _reCount((SequenceNode) nd.right());
+
+ SequenceNode l = (SequenceNode) nd.left();
+ SequenceNode r = (SequenceNode) nd.right();
+
+ nd.count = l.count + r.count;
+ nd.ycount = (l.ycount + r.ycount) / 2;
+ }
+ else
+ {
+ nd.count = 1;
+ nd.ycount = ycount++;
+ }
+ // _lycount--;
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ */
+ public void swapNodes(SequenceNode nd)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ SequenceNode tmp = (SequenceNode) nd.left();
+
+ nd.setLeft(nd.right());
+ nd.setRight(tmp);
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @param nd
+ * DOCUMENT ME!
+ * @param dir
+ * DOCUMENT ME!
+ */
+ void changeDirection(SequenceNode nd, SequenceNode dir)
+ {
+ if (nd == null)
+ {
+ return;
+ }
+
+ if (nd.parent() != top)
+ {
+ changeDirection((SequenceNode) nd.parent(), nd);
+
+ SequenceNode tmp = (SequenceNode) nd.parent();
+
+ if (dir == nd.left())
+ {
+ nd.setParent(dir);
+ nd.setLeft(tmp);
+ }
+ else if (dir == nd.right())
+ {
+ nd.setParent(dir);
+ nd.setRight(tmp);
+ }
+ }
+ else
+ {
+ if (dir == nd.left())
+ {
+ nd.setParent(nd.left());
+
+ if (top.left() == nd)
+ {
+ nd.setRight(top.right());
+ }
+ else
+ {
+ nd.setRight(top.left());
+ }
+ }
+ else
+ {
+ nd.setParent(nd.right());
+
+ if (top.left() == nd)
+ {
+ nd.setLeft(top.right());
+ }
+ else
+ {
+ nd.setLeft(top.left());
+ }
+ }
+ }
+ }
+
+ /**
+ * DOCUMENT ME!
+ *
+ * @return DOCUMENT ME!
+ */
+ public SequenceNode getTopNode()
+ {
+ return top;
+ }
+
+ /**
+ *
+ * @return true if tree has real distances
+ */
+ public boolean hasDistances()
+ {
+ return hasDistances;
+ }
+
+ /**
+ *
+ * @return true if tree has real bootstrap values
+ */
+ public boolean hasBootstrap()
+ {
+ return hasBootstrap;
+ }
+
+ public boolean hasRootDistance()
+ {
+ return hasRootDistance;
+ }
+
+ /**
+ * apply the given transform to all the nodes in the tree.
+ *
+ * @param nodeTransformI
+ */
+ public void applyToNodes(NodeTransformI nodeTransformI)
+ {
+ for (Enumeration<SequenceNode> nodes = node.elements(); nodes
+ .hasMoreElements(); nodeTransformI.transform(nodes
+ .nextElement()))
+ {
+ ;
+ }
+ }
+
+ public AlignmentView getOriginalData()
+ {
+ return seqData;
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
+
+public abstract class DistanceScoreModel implements ScoreModelI
+{
+ /**
+ * A similarity score is calculated by first computing a distance score, and
+ * then reversing the min-max range of the score values
+ */
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI distances = findDistances(seqData, options);
+
+ MatrixI similarities = distanceToSimilarity(distances);
+
+ return similarities;
+ }
+
+ /**
+ * Converts distance scores to similarity scores, by reversing the range of
+ * score values so that max becomes min and vice versa. The input matrix is
+ * not modified.
+ *
+ * @param distances
+ */
+ public static MatrixI distanceToSimilarity(MatrixI distances)
+ {
+ MatrixI similarities = distances.copy();
+
+ similarities.reverseRange(false);
+
+ return similarities;
+ }
+}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import jalview.api.AlignmentViewPanel;
+import jalview.api.FeatureRenderer;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.SeqCigar;
+import jalview.datamodel.SequenceFeature;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.SetUtils;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class FeatureDistanceModel extends DistanceScoreModel implements
+ ViewBasedAnalysisI
+{
+ private static final String NAME = "Sequence Feature Similarity";
+
+ private String description;
+
+ FeatureRenderer fr;
+
+ /**
+ * Constructor
+ */
+ public FeatureDistanceModel()
+ {
+ }
+
+ @Override
+ public boolean configureFromAlignmentView(AlignmentViewPanel view)
+
+ {
+ fr = view.cloneFeatureRenderer();
+ return true;
+ }
+
+ /**
+ * Calculates a distance measure [i][j] between each pair of sequences as the
+ * average number of features they have but do not share. That is, find the
+ * features each sequence pair has at each column, ignore feature types they
+ * have in common, and count the rest. The totals are normalised by the number
+ * of columns processed.
+ * <p>
+ * The parameters argument provides settings for treatment of gap-residue
+ * aligned positions, and whether the score is over the longer or shorter of
+ * each pair of sequences
+ *
+ * @param seqData
+ * @param params
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI params)
+ {
+ SeqCigar[] seqs = seqData.getSequences();
+ int noseqs = seqs.length;
+ int cpwidth = 0;// = seqData.getWidth();
+ double[][] distances = new double[noseqs][noseqs];
+ List<String> dft = null;
+ if (fr != null)
+ {
+ dft = fr.getDisplayedFeatureTypes();
+ }
+ if (dft == null || dft.isEmpty())
+ {
+ return new Matrix(distances);
+ }
+
+ // need to get real position for view position
+ int[] viscont = seqData.getVisibleContigs();
+
+ /*
+ * scan each column, compute and add to each distance[i, j]
+ * the number of feature types that seqi and seqj do not share
+ */
+ for (int vc = 0; vc < viscont.length; vc += 2)
+ {
+ for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
+ {
+ cpwidth++;
+
+ /*
+ * first record feature types in this column for each sequence
+ */
+ Map<SeqCigar, Set<String>> sfap = findFeatureTypesAtColumn(
+ seqs, cpos);
+
+ /*
+ * count feature types on either i'th or j'th sequence but not both
+ * and add this 'distance' measure to the total for [i, j] for j > i
+ */
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ SeqCigar sc1 = seqs[i];
+ SeqCigar sc2 = seqs[j];
+ Set<String> set1 = sfap.get(sc1);
+ Set<String> set2 = sfap.get(sc2);
+ boolean gap1 = set1 == null;
+ boolean gap2 = set2 == null;
+
+ /*
+ * gap-gap always scores zero
+ * residue-residue is always scored
+ * include gap-residue score if params say to do so
+ */
+ if ((!gap1 && !gap2) || params.includeGaps())
+ {
+ int seqDistance = SetUtils.countDisjunction(set1, set2);
+ distances[i][j] += seqDistance;
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * normalise the distance scores (summed over columns) by the
+ * number of visible columns used in the calculation
+ * and fill in the bottom half of the matrix
+ */
+ // TODO JAL-2424 cpwidth may be out by 1 - affects scores but not tree shape
+ for (int i = 0; i < noseqs; i++)
+ {
+ for (int j = i + 1; j < noseqs; j++)
+ {
+ distances[i][j] /= cpwidth;
+ distances[j][i] = distances[i][j];
+ }
+ }
+ return new Matrix(distances);
+ }
+
+ /**
+ * Builds and returns a map containing a (possibly empty) list (one per
+ * SeqCigar) of visible feature types at the given column position. The map
+ * has no entry for sequences which are gapped at the column position.
+ *
+ * @param seqs
+ * @param columnPosition
+ * @return
+ */
+ protected Map<SeqCigar, Set<String>> findFeatureTypesAtColumn(
+ SeqCigar[] seqs, int columnPosition)
+ {
+ Map<SeqCigar, Set<String>> sfap = new HashMap<SeqCigar, Set<String>>();
+ for (SeqCigar seq : seqs)
+ {
+ int spos = seq.findPosition(columnPosition);
+ if (spos != -1)
+ {
+ Set<String> types = new HashSet<String>();
+ List<SequenceFeature> sfs = fr.findFeaturesAtRes(seq.getRefSeq(),
+ spos);
+ for (SequenceFeature sf : sfs)
+ {
+ types.add(sf.getType());
+ }
+ sfap.put(seq, types);
+ }
+ }
+ return sfap;
+ }
+
+ @Override
+ public String getName()
+ {
+ return NAME;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return description;
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+ @Override
+ public String toString()
+ {
+ return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
+ }
+}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.analysis.scoremodels;
-
-import jalview.api.analysis.ScoreModelI;
-import jalview.api.analysis.ViewBasedAnalysisI;
-import jalview.datamodel.AlignmentView;
-import jalview.datamodel.SeqCigar;
-import jalview.datamodel.SequenceFeature;
-
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-
-public class FeatureScoreModel implements ScoreModelI, ViewBasedAnalysisI
-{
- jalview.api.FeatureRenderer fr;
-
- @Override
- public boolean configureFromAlignmentView(
- jalview.api.AlignmentViewPanel view)
- {
- fr = view.cloneFeatureRenderer();
- return true;
- }
-
- @Override
- public float[][] findDistances(AlignmentView seqData)
- {
- int nofeats = 0;
- List<String> dft = fr.getDisplayedFeatureTypes();
- nofeats = dft.size();
- SeqCigar[] seqs = seqData.getSequences();
- int noseqs = seqs.length;
- int cpwidth = 0;// = seqData.getWidth();
- float[][] distance = new float[noseqs][noseqs];
- if (nofeats == 0)
- {
- for (float[] d : distance)
- {
- for (int i = 0; i < d.length; d[i++] = 0f)
- {
- ;
- }
- }
- return distance;
- }
- // need to get real position for view position
- int[] viscont = seqData.getVisibleContigs();
- for (int vc = 0; vc < viscont.length; vc += 2)
- {
-
- for (int cpos = viscont[vc]; cpos <= viscont[vc + 1]; cpos++)
- {
- cpwidth++;
- // get visible features at cpos under view's display settings and
- // compare them
- List<Hashtable<String, SequenceFeature>> sfap = new ArrayList<Hashtable<String, SequenceFeature>>();
- for (int i = 0; i < noseqs; i++)
- {
- Hashtable<String, SequenceFeature> types = new Hashtable<String, SequenceFeature>();
- int spos = seqs[i].findPosition(cpos);
- if (spos != -1)
- {
- List<SequenceFeature> sfs = fr.findFeaturesAtRes(
- seqs[i].getRefSeq(), spos);
- for (SequenceFeature sf : sfs)
- {
- types.put(sf.getType(), sf);
- }
- }
- sfap.add(types);
- }
- for (int i = 0; i < (noseqs - 1); i++)
- {
- if (cpos == 0)
- {
- distance[i][i] = 0f;
- }
- for (int j = i + 1; j < noseqs; j++)
- {
- int sfcommon = 0;
- // compare the two lists of features...
- Hashtable<String, SequenceFeature> fi = sfap.get(i), fk, fj = sfap
- .get(j);
- if (fi.size() > fj.size())
- {
- fk = fj;
- }
- else
- {
- fk = fi;
- fi = fj;
- }
- for (String k : fi.keySet())
- {
- SequenceFeature sfj = fk.get(k);
- if (sfj != null)
- {
- sfcommon++;
- }
- }
- distance[i][j] += (fi.size() + fk.size() - 2f * sfcommon);
- distance[j][i] += distance[i][j];
- }
- }
- }
- }
- for (int i = 0; i < noseqs; i++)
- {
- for (int j = i + 1; j < noseqs; j++)
- {
- distance[i][j] /= cpwidth;
- distance[j][i] = distance[i][j];
- }
- }
- return distance;
- }
-
- @Override
- public String getName()
- {
- return "Sequence Feature Similarity";
- }
-
- @Override
- public boolean isDNA()
- {
- return true;
- }
-
- @Override
- public boolean isProtein()
- {
- return true;
- }
-
- @Override
- public String toString()
- {
- return "Score between sequences based on hamming distance between binary vectors marking features displayed at each column";
- }
-}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.Comparison;
+
+/**
+ * A class to provide sequence pairwise similarity based on residue identity
+ */
+public class PIDModel extends SimilarityScoreModel implements
+ PairwiseScoreModelI
+{
+ private static final String NAME = "PID";
+
+ private String description;
+
+ /**
+ * Constructor
+ */
+ public PIDModel()
+ {
+ }
+
+ @Override
+ public String getName()
+ {
+ return NAME;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return description;
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+ /**
+ * Answers 1 if c and d are the same residue (ignoring case), and not gap
+ * characters. Answers 0 for non-matching or gap characters.
+ */
+ @Override
+ public float getPairwiseScore(char c, char d)
+ {
+ c = toUpper(c);
+ d = toUpper(d);
+ if (c == d && !Comparison.isGap(c))
+ {
+ return 1f;
+ }
+ return 0f;
+ }
+
+ /**
+ * @param c
+ */
+ protected static char toUpper(char c)
+ {
+ if ('a' <= c && c <= 'z')
+ {
+ c += 'A' - 'a';
+ }
+ return c;
+ }
+
+ /**
+ * Computes similarity scores based on pairwise percentage identity of
+ * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA
+ * calculation, the percentage scores are rescaled to the width of the
+ * sequences (as if counts of identical residues).
+ */
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
+
+ MatrixI result = findSimilarities(seqs, options);
+
+ result.multiply(seqData.getWidth() / 100d);
+
+ return result;
+ }
+
+ /**
+ * A distance score is computed in the usual way (by reversing the range of
+ * the similarity score results), and then rescaled to percentage values
+ * (reversing the rescaling to count values done in findSimilarities)
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI result = super.findDistances(seqData, options);
+
+ if (seqData.getWidth() != 0)
+ {
+ result.multiply(100d / seqData.getWidth());
+ }
+
+ return result;
+ }
+
+ /**
+ * Compute percentage identity scores, using the gap treatment and
+ * normalisation specified by the options parameter
+ *
+ * @param seqs
+ * @param options
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs,
+ SimilarityParamsI options)
+ {
+ // TODO reuse code in ScoreMatrix instead somehow
+ double[][] values = new double[seqs.length][];
+ for (int row = 0; row < seqs.length; row++)
+ {
+ values[row] = new double[seqs.length];
+ for (int col = 0; col < seqs.length; col++)
+ {
+ double total = computePID(seqs[row], seqs[col], options);
+ values[row][col] = total;
+ }
+ }
+ return new Matrix(values);
+ }
+
+ /**
+ * Computes a percentage identity for two sequences, using the algorithm
+ * choices specified by the options parameter
+ *
+ * @param seq1
+ * @param seq2
+ * @param options
+ * @return
+ */
+ public static double computePID(String seq1, String seq2,
+ SimilarityParamsI options)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ int width = Math.max(len1, len2);
+ int total = 0;
+ int divideBy = 0;
+
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (options.denominateByShortestLength())
+ {
+ break;
+ }
+ if (options.includeGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+ char c1 = seq1.charAt(i);
+ char c2 = seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, if so
+ * have to score as identity; else ignore
+ */
+ if (options.includeGappedColumns())
+ {
+ divideBy++;
+ total++;
+ }
+ continue;
+ }
+
+ if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: include if options say so,
+ * count as match if options say so
+ */
+ if (options.includeGaps())
+ {
+ divideBy++;
+ }
+ if (options.matchGaps())
+ {
+ total++;
+ }
+ continue;
+ }
+
+ /*
+ * remaining case is gap-residue
+ */
+ if (toUpper(c1) == toUpper(c2))
+ {
+ total++;
+ }
+ divideBy++;
+ }
+
+ return divideBy == 0 ? 0D : 100D * total / divideBy;
+ }
+}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.analysis.scoremodels;
-
-import jalview.api.analysis.ScoreModelI;
-import jalview.datamodel.AlignmentView;
-import jalview.util.Comparison;
-
-public class PIDScoreModel implements ScoreModelI
-{
-
- @Override
- public float[][] findDistances(AlignmentView seqData)
- {
- String[] sequenceString = seqData
- .getSequenceStrings(Comparison.GapChars.charAt(0));
- int noseqs = sequenceString.length;
- float[][] distance = new float[noseqs][noseqs];
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- if (j == i)
- {
- distance[i][i] = 0;
- }
- else
- {
- distance[i][j] = 100 - Comparison.PID(sequenceString[i],
- sequenceString[j]);
-
- distance[j][i] = distance[i][j];
- }
- }
- }
- return distance;
- }
-
- @Override
- public String getName()
- {
- return "PID";
- }
-
- @Override
- public boolean isDNA()
- {
- return true;
- }
-
- @Override
- public boolean isProtein()
- {
- return true;
- }
-
-}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.analysis.scoremodels;
-
-import jalview.api.analysis.ScoreModelI;
-import jalview.datamodel.AlignmentView;
-import jalview.util.Comparison;
-
-public abstract class PairwiseSeqScoreModel implements ScoreModelI
-{
- abstract public int getPairwiseScore(char c, char d);
-
- public float[][] findDistances(AlignmentView seqData)
- {
- String[] sequenceString = seqData
- .getSequenceStrings(Comparison.GapChars.charAt(0));
- int noseqs = sequenceString.length;
- float[][] distance = new float[noseqs][noseqs];
-
- int maxscore = 0;
- int end = sequenceString[0].length();
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- int score = 0;
-
- for (int k = 0; k < end; k++)
- {
- try
- {
- score += getPairwiseScore(sequenceString[i].charAt(k),
- sequenceString[j].charAt(k));
- } catch (Exception ex)
- {
- System.err.println("err creating " + getName() + " tree");
- ex.printStackTrace();
- }
- }
-
- distance[i][j] = (float) score;
-
- if (score > maxscore)
- {
- maxscore = score;
- }
- }
- }
-
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- distance[i][j] = (float) maxscore - distance[i][j];
- distance[j][i] = distance[i][j];
- }
- }
- return distance;
- }
-
- abstract public int[][] getMatrix();
-}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
+import jalview.util.Comparison;
+
+import java.util.Arrays;
+
+/**
+ * A class that models a substitution score matrix for any given alphabet of
+ * symbols
+ */
+public class ScoreMatrix extends SimilarityScoreModel implements
+ PairwiseScoreModelI
+{
+ private static final char GAP_CHARACTER = Comparison.GAP_DASH;
+
+ /*
+ * an arbitrary score to assign for identity of an unknown symbol
+ * (this is the value on the diagonal in the * column of the NCBI matrix)
+ * (though a case could be made for using the minimum diagonal value)
+ */
+ private static final int UNKNOWN_IDENTITY_SCORE = 1;
+
+ /*
+ * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
+ * for pairwise scoring; 2.10.2 uses gap score (last column) in
+ * score matrix (JAL-2397)
+ * Set this flag to true (via Groovy) for 2.10.1 behaviour
+ */
+ private static boolean scoreGapAsAny = false;
+
+ public static final short UNMAPPED = (short) -1;
+
+ private static final String BAD_ASCII_ERROR = "Unexpected character %s in getPairwiseScore";
+
+ private static final int MAX_ASCII = 127;
+
+ /*
+ * the name of the model as shown in menus
+ * each score model in use should have a unique name
+ */
+ private String name;
+
+ /*
+ * a description for the model as shown in tooltips
+ */
+ private String description;
+
+ /*
+ * the characters that the model provides scores for
+ */
+ private char[] symbols;
+
+ /*
+ * the score matrix; both dimensions must equal the number of symbols
+ * matrix[i][j] is the substitution score for replacing symbols[i] with symbols[j]
+ */
+ private float[][] matrix;
+
+ /*
+ * quick lookup to convert from an ascii character value to the index
+ * of the corresponding symbol in the score matrix
+ */
+ private short[] symbolIndex;
+
+ /*
+ * true for Protein Score matrix, false for dna score matrix
+ */
+ private boolean peptide;
+
+ private float minValue;
+
+ private float maxValue;
+
+ /**
+ * Constructor given a name, symbol alphabet, and matrix of scores for pairs
+ * of symbols. The matrix should be square and of the same size as the
+ * alphabet, for example 20x20 for a 20 symbol alphabet.
+ *
+ * @param theName
+ * Unique, human readable name for the matrix
+ * @param alphabet
+ * the symbols to which scores apply
+ * @param values
+ * Pairwise scores indexed according to the symbol alphabet
+ */
+ public ScoreMatrix(String theName, char[] alphabet, float[][] values)
+ {
+ if (alphabet.length != values.length)
+ {
+ throw new IllegalArgumentException(
+ "score matrix size must match alphabet size");
+ }
+ for (float[] row : values)
+ {
+ if (row.length != alphabet.length)
+ {
+ throw new IllegalArgumentException(
+ "score matrix size must be square");
+ }
+ }
+
+ this.matrix = values;
+ this.name = theName;
+ this.symbols = alphabet;
+
+ symbolIndex = buildSymbolIndex(alphabet);
+
+ findMinMax();
+
+ /*
+ * crude heuristic for now...
+ */
+ peptide = alphabet.length >= 20;
+ }
+
+ /**
+ * Record the minimum and maximum score values
+ */
+ protected void findMinMax()
+ {
+ float min = Float.MAX_VALUE;
+ float max = -Float.MAX_VALUE;
+ if (matrix != null)
+ {
+ for (float[] row : matrix)
+ {
+ if (row != null)
+ {
+ for (float f : row)
+ {
+ min = Math.min(min, f);
+ max = Math.max(max, f);
+ }
+ }
+ }
+ }
+ minValue = min;
+ maxValue = max;
+ }
+
+ /**
+ * Returns an array A where A[i] is the position in the alphabet array of the
+ * character whose value is i. For example if the alphabet is { 'A', 'D', 'X'
+ * } then A['D'] = A[68] = 1.
+ * <p>
+ * Unmapped characters (not in the alphabet) get an index of -1.
+ * <p>
+ * Mappings are added automatically for lower case symbols (for non case
+ * sensitive scoring), unless they are explicitly present in the alphabet (are
+ * scored separately in the score matrix).
+ * <p>
+ * the gap character (space, dash or dot) included in the alphabet (if any) is
+ * recorded in a field
+ *
+ * @param alphabet
+ * @return
+ */
+ short[] buildSymbolIndex(char[] alphabet)
+ {
+ short[] index = new short[MAX_ASCII + 1];
+ Arrays.fill(index, UNMAPPED);
+ short pos = 0;
+ for (char c : alphabet)
+ {
+ if (c <= MAX_ASCII)
+ {
+ index[c] = pos;
+ }
+
+ /*
+ * also map lower-case character (unless separately mapped)
+ */
+ if (c >= 'A' && c <= 'Z')
+ {
+ short lowerCase = (short) (c + ('a' - 'A'));
+ if (index[lowerCase] == UNMAPPED)
+ {
+ index[lowerCase] = pos;
+ }
+ }
+ pos++;
+ }
+ return index;
+ }
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
+
+ @Override
+ public String getDescription()
+ {
+ return description;
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return !peptide;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return peptide;
+ }
+
+ /**
+ * Returns a copy of the score matrix as used in getPairwiseScore. If using
+ * this matrix directly, callers <em>must</em> also call
+ * <code>getMatrixIndex</code> in order to get the matrix index for each
+ * character (symbol).
+ *
+ * @return
+ * @see #getMatrixIndex(char)
+ */
+ public float[][] getMatrix()
+ {
+ float[][] v = new float[matrix.length][matrix.length];
+ for (int i = 0; i < matrix.length; i++)
+ {
+ v[i] = Arrays.copyOf(matrix[i], matrix[i].length);
+ }
+ return v;
+ }
+
+ /**
+ * Answers the matrix index for a given character, or -1 if unmapped in the
+ * matrix. Use this method only if using <code>getMatrix</code> in order to
+ * compute scores directly (without symbol lookup) for efficiency.
+ *
+ * @param c
+ * @return
+ * @see #getMatrix()
+ */
+ public int getMatrixIndex(char c)
+ {
+ if (c < symbolIndex.length)
+ {
+ return symbolIndex[c];
+ }
+ else
+ {
+ return UNMAPPED;
+ }
+ }
+
+ /**
+ * Returns the pairwise score for substituting c with d. If either c or d is
+ * an unexpected character, returns 1 for identity (c == d), else the minimum
+ * score value in the matrix.
+ */
+ @Override
+ public float getPairwiseScore(char c, char d)
+ {
+ if (c >= symbolIndex.length)
+ {
+ System.err.println(String.format(BAD_ASCII_ERROR, c));
+ return 0;
+ }
+ if (d >= symbolIndex.length)
+ {
+ System.err.println(String.format(BAD_ASCII_ERROR, d));
+ return 0;
+ }
+
+ int cIndex = symbolIndex[c];
+ int dIndex = symbolIndex[d];
+ if (cIndex != UNMAPPED && dIndex != UNMAPPED)
+ {
+ return matrix[cIndex][dIndex];
+ }
+
+ /*
+ * one or both symbols not found in the matrix
+ * currently scoring as 1 (for identity) or the minimum
+ * matrix score value (otherwise)
+ * (a case could be made for using minimum row/column value instead)
+ */
+ return c == d ? UNKNOWN_IDENTITY_SCORE : getMinimumScore();
+ }
+
+ /**
+ * pretty print the matrix
+ */
+ @Override
+ public String toString()
+ {
+ return outputMatrix(false);
+ }
+
+ /**
+ * Print the score matrix, optionally formatted as html, with the alphabet
+ * symbols as column headings and at the start of each row.
+ * <p>
+ * The non-html format should give an output which can be parsed as a score
+ * matrix file
+ *
+ * @param html
+ * @return
+ */
+ public String outputMatrix(boolean html)
+ {
+ StringBuilder sb = new StringBuilder(512);
+
+ /*
+ * heading row with alphabet
+ */
+ if (html)
+ {
+ sb.append("<table border=\"1\">");
+ sb.append(html ? "<tr><th></th>" : "");
+ }
+ else
+ {
+ sb.append("ScoreMatrix ").append(getName()).append("\n");
+ }
+ for (char sym : symbols)
+ {
+ if (html)
+ {
+ sb.append("<th> ").append(sym).append(" </th>");
+ }
+ else
+ {
+ sb.append("\t").append(sym);
+ }
+ }
+ sb.append(html ? "</tr>\n" : "\n");
+
+ /*
+ * table of scores
+ */
+ for (char c1 : symbols)
+ {
+ if (html)
+ {
+ sb.append("<tr><td>");
+ }
+ sb.append(c1).append(html ? "</td>" : "");
+ for (char c2 : symbols)
+ {
+ sb.append(html ? "<td>" : "\t")
+ .append(matrix[symbolIndex[c1]][symbolIndex[c2]])
+ .append(html ? "</td>" : "");
+ }
+ sb.append(html ? "</tr>\n" : "\n");
+ }
+ if (html)
+ {
+ sb.append("</table>");
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Answers the number of symbols coded for (also equal to the number of rows
+ * and columns of the score matrix)
+ *
+ * @return
+ */
+ public int getSize()
+ {
+ return symbols.length;
+ }
+
+ /**
+ * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
+ * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
+ * computed using the current score matrix. For example
+ * <ul>
+ * <li>Sequences:</li>
+ * <li>FKL</li>
+ * <li>R-D</li>
+ * <li>QIA</li>
+ * <li>GWC</li>
+ * <li>Score matrix is BLOSUM62</li>
+ * <li>Gaps treated same as X (unknown)</li>
+ * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
+ * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
+ * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
+ * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
+ * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
+ * <li>and so on</li>
+ * </ul>
+ */
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqstrings,
+ SimilarityParamsI options)
+ {
+ char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X')
+ : GAP_CHARACTER;
+ String[] seqs = seqstrings.getSequenceStrings(gapChar);
+ return findSimilarities(seqs, options);
+ }
+
+ /**
+ * Computes pairwise similarities of a set of sequences using the given
+ * parameters
+ *
+ * @param seqs
+ * @param params
+ * @return
+ */
+ protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI params)
+ {
+ double[][] values = new double[seqs.length][];
+ for (int row = 0; row < seqs.length; row++)
+ {
+ values[row] = new double[seqs.length];
+ for (int col = 0; col < seqs.length; col++)
+ {
+ double total = computeSimilarity(seqs[row], seqs[col], params);
+ values[row][col] = total;
+ }
+ }
+ return new Matrix(values);
+ }
+
+ /**
+ * Calculates the pairwise similarity of two strings using the given
+ * calculation parameters
+ *
+ * @param seq1
+ * @param seq2
+ * @param params
+ * @return
+ */
+ protected double computeSimilarity(String seq1, String seq2,
+ SimilarityParamsI params)
+ {
+ int len1 = seq1.length();
+ int len2 = seq2.length();
+ double total = 0;
+
+ int width = Math.max(len1, len2);
+ for (int i = 0; i < width; i++)
+ {
+ if (i >= len1 || i >= len2)
+ {
+ /*
+ * off the end of one sequence; stop if we are only matching
+ * on the shorter sequence length, else treat as trailing gap
+ */
+ if (params.denominateByShortestLength())
+ {
+ break;
+ }
+ }
+
+ char c1 = i >= len1 ? GAP_CHARACTER : seq1.charAt(i);
+ char c2 = i >= len2 ? GAP_CHARACTER : seq2.charAt(i);
+ boolean gap1 = Comparison.isGap(c1);
+ boolean gap2 = Comparison.isGap(c2);
+
+ if (gap1 && gap2)
+ {
+ /*
+ * gap-gap: include if options say so, else ignore
+ */
+ if (!params.includeGappedColumns())
+ {
+ continue;
+ }
+ }
+ else if (gap1 || gap2)
+ {
+ /*
+ * gap-residue: score if options say so
+ */
+ if (!params.includeGaps())
+ {
+ continue;
+ }
+ }
+ float score = getPairwiseScore(c1, c2);
+ total += score;
+ }
+ return total;
+ }
+
+ /**
+ * Answers a hashcode computed from the symbol alphabet and the matrix score
+ * values
+ */
+ @Override
+ public int hashCode()
+ {
+ int hs = Arrays.hashCode(symbols);
+ for (float[] row : matrix)
+ {
+ hs = hs * 31 + Arrays.hashCode(row);
+ }
+ return hs;
+ }
+
+ /**
+ * Answers true if the argument is a ScoreMatrix with the same symbol alphabet
+ * and score values, else false
+ */
+ @Override
+ public boolean equals(Object obj)
+ {
+ if (!(obj instanceof ScoreMatrix))
+ {
+ return false;
+ }
+ ScoreMatrix sm = (ScoreMatrix) obj;
+ if (Arrays.equals(symbols, sm.symbols)
+ && Arrays.deepEquals(matrix, sm.matrix))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns the alphabet the matrix scores for, as a string of characters
+ *
+ * @return
+ */
+ String getSymbols()
+ {
+ return new String(symbols);
+ }
+
+ public void setDescription(String desc)
+ {
+ description = desc;
+ }
+
+ public float getMinimumScore()
+ {
+ return minValue;
+ }
+
+ public float getMaximumScore()
+ {
+ return maxValue;
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.ScoreMatrixFile;
+
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * A class that can register and serve instances of ScoreModelI
+ */
+public class ScoreModels
+{
+ private final ScoreMatrix BLOSUM62;
+
+ private final ScoreMatrix PAM250;
+
+ private final ScoreMatrix DNA;
+
+ private static ScoreModels instance = new ScoreModels();
+
+ private Map<String, ScoreModelI> models;
+
+ public static ScoreModels getInstance()
+ {
+ return instance;
+ }
+
+ /**
+ * Private constructor to enforce use of singleton. Registers Jalview's
+ * "built-in" score models:
+ * <ul>
+ * <li>BLOSUM62</li>
+ * <li>PAM250</li>
+ * <li>PID</li>
+ * <li>DNA</li>
+ * <li>Sequence Feature Similarity</li>
+ * </ul>
+ */
+ private ScoreModels()
+ {
+ /*
+ * using LinkedHashMap keeps models ordered as added
+ */
+ models = new LinkedHashMap<String, ScoreModelI>();
+ BLOSUM62 = loadScoreMatrix("scoreModel/blosum62.scm");
+ PAM250 = loadScoreMatrix("scoreModel/pam250.scm");
+ registerScoreModel(new PIDModel());
+ DNA = loadScoreMatrix("scoreModel/dna.scm");
+ registerScoreModel(new FeatureDistanceModel());
+ }
+
+ /**
+ * Tries to load a score matrix from the given resource file, and if
+ * successful, registers it.
+ *
+ * @param string
+ * @return
+ */
+ ScoreMatrix loadScoreMatrix(String resourcePath)
+ {
+ try
+ {
+ /*
+ * delegate parsing to ScoreMatrixFile
+ */
+ FileParse fp = new FileParse(resourcePath, DataSourceType.CLASSLOADER);
+ ScoreMatrix sm = new ScoreMatrixFile(fp).parseMatrix();
+ registerScoreModel(sm);
+ return sm;
+ } catch (IOException e)
+ {
+ System.err.println("Error reading " + resourcePath + ": "
+ + e.getMessage());
+ }
+ return null;
+ }
+
+ /**
+ * Answers an iterable set of the registered score models. Currently these are
+ * returned in the order in which they were registered.
+ *
+ * @return
+ */
+ public Iterable<ScoreModelI> getModels()
+ {
+ return models.values();
+ }
+
+ public ScoreModelI forName(String s)
+ {
+ return models.get(s);
+ }
+
+ public void registerScoreModel(ScoreModelI sm)
+ {
+ ScoreModelI sm2 = models.get(sm.getName());
+ if (sm2 != null)
+ {
+ System.err.println("Warning: replacing score model " + sm2.getName());
+ }
+ models.put(sm.getName(), sm);
+ }
+
+ /**
+ * Returns the default peptide or nucleotide score model, currently BLOSUM62
+ * or DNA
+ *
+ * @param forPeptide
+ * @return
+ */
+ public ScoreMatrix getDefaultModel(boolean forPeptide)
+ {
+ return forPeptide ? BLOSUM62 : DNA;
+ }
+
+ public ScoreMatrix getBlosum62()
+ {
+ return BLOSUM62;
+ }
+
+ public ScoreMatrix getPam250()
+ {
+ return PAM250;
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.SimilarityParamsI;
+
+/**
+ * A class to hold parameters that configure the pairwise similarity
+ * calculation. Based on the paper
+ *
+ * <pre>
+ * Quantification of the variation in percentage identity for protein sequence alignments
+ * Raghava, GP and Barton, GJ
+ * BMC Bioinformatics. 2006 Sep 19;7:415
+ * </pre>
+ *
+ * @see https://www.ncbi.nlm.nih.gov/pubmed/16984632
+ */
+public class SimilarityParams implements SimilarityParamsI
+{
+ /**
+ * Based on Jalview's Comparison.PID method, which includes gaps and counts
+ * them as matching; it counts over the length of the shorter sequence
+ */
+ public static final SimilarityParamsI Jalview = new SimilarityParams(
+ true, true, true, true);
+
+ /**
+ * 'SeqSpace' mode PCA calculation includes gaps but does not count them as
+ * matching; it uses the longest sequence length
+ */
+ public static final SimilarityParamsI SeqSpace = new SimilarityParams(
+ true, false, true, true);
+
+ /**
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>does not score gap-residue</li>
+ * <li>includes gap-residue in lengths</li>
+ * <li>matches on longer of two sequences</li>
+ * </ul>
+ */
+ public static final SimilarityParamsI PID1 = new SimilarityParams(false,
+ false, true, false);
+
+ /**
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>ignores gap-residue</li>
+ * <li>matches on longer of two sequences</li>
+ * </ul>
+ */
+ public static final SimilarityParamsI PID2 = new SimilarityParams(false,
+ false, false, false);
+
+ /**
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>ignores gap-residue</li>
+ * <li>matches on shorter of sequences only</li>
+ * </ul>
+ */
+ public static final SimilarityParamsI PID3 = new SimilarityParams(false,
+ false, false, true);
+
+ /**
+ * as described in the Raghava-Barton paper
+ * <ul>
+ * <li>ignores gap-gap</li>
+ * <li>does not score gap-residue</li>
+ * <li>includes gap-residue in lengths</li>
+ * <li>matches on shorter of sequences only</li>
+ * </ul>
+ */
+ public static final SimilarityParamsI PID4 = new SimilarityParams(false,
+ false, true, true);
+
+ private boolean includeGappedColumns;
+
+ private boolean matchGaps;
+
+ private boolean includeGaps;
+
+ private boolean denominateByShortestLength;
+
+ /**
+ * Constructor
+ *
+ * @param includeGapGap
+ * @param matchGapResidue
+ * @param includeGapResidue
+ * if true, gapped positions are counted for normalisation by length
+ * @param shortestLength
+ * if true, the denominator is the shorter sequence length (possibly
+ * including gaps)
+ */
+ public SimilarityParams(boolean includeGapGap, boolean matchGapResidue,
+ boolean includeGapResidue, boolean shortestLength)
+ {
+ includeGappedColumns = includeGapGap;
+ matchGaps = matchGapResidue;
+ includeGaps = includeGapResidue;
+ denominateByShortestLength = shortestLength;
+ }
+
+ @Override
+ public boolean includeGaps()
+ {
+ return includeGaps;
+ }
+
+ @Override
+ public boolean denominateByShortestLength()
+ {
+ return denominateByShortestLength;
+ }
+
+ @Override
+ public boolean includeGappedColumns()
+ {
+ return includeGappedColumns;
+ }
+
+ @Override
+ public boolean matchGaps()
+ {
+ return matchGaps;
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
+
+public abstract class SimilarityScoreModel implements ScoreModelI
+{
+
+ /**
+ * Computed similarity scores are converted to distance scores by subtracting
+ * every value from the maximum value. That is, maximum similarity corresponds
+ * to zero distance, and smaller similarities to larger distances.
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI similarities = findSimilarities(seqData, options);
+
+ MatrixI distances = similarityToDistance(similarities);
+
+ return distances;
+ }
+
+ /**
+ * Converts a matrix of similarity scores to distance scores, by reversing the
+ * range of the scores, mapping the maximum to zero. The input matrix is not
+ * modified.
+ *
+ * @param similarities
+ */
+ public static MatrixI similarityToDistance(MatrixI similarities)
+ {
+ MatrixI distances = similarities.copy();
+
+ distances.reverseRange(true);
+
+ return distances;
+ }
+
+}
package jalview.analysis.scoremodels;
import jalview.analysis.AlignSeq;
-import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SequenceI;
+import jalview.math.Matrix;
+import jalview.math.MatrixI;
import jalview.util.Comparison;
-public class SWScoreModel implements ScoreModelI
+/**
+ * A class that computes pairwise similarity scores using the Smith-Waterman
+ * alignment algorithm
+ */
+public class SmithWatermanModel extends SimilarityScoreModel
{
+ private static final String NAME = "Smith Waterman Score";
+
+ private String description;
+
+ /**
+ * Constructor
+ */
+ public SmithWatermanModel()
+ {
+ }
@Override
- public float[][] findDistances(AlignmentView seqData)
+ public MatrixI findSimilarities(AlignmentView seqData,
+ SimilarityParamsI options)
{
SequenceI[] sequenceString = seqData.getVisibleAlignment(
- Comparison.GapChars.charAt(0)).getSequencesArray();
+ Comparison.GAP_SPACE).getSequencesArray();
int noseqs = sequenceString.length;
- float[][] distance = new float[noseqs][noseqs];
+ double[][] distances = new double[noseqs][noseqs];
- float max = -1;
+ double max = -1;
for (int i = 0; i < (noseqs - 1); i++)
{
as.calcScoreMatrix();
as.traceAlignment();
as.printAlignment(System.out);
- distance[i][j] = (float) as.maxscore;
+ distances[i][j] = as.maxscore;
- if (max < distance[i][j])
+ if (max < distances[i][j])
{
- max = distance[i][j];
+ max = distances[i][j];
}
}
}
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- distance[i][j] = max - distance[i][j];
- distance[j][i] = distance[i][j];
- }
- }
-
- return distance;
+ return new Matrix(distances);
}
@Override
public String getName()
{
- return "Smith Waterman Score";
+ return NAME;
}
@Override
return true;
}
- public String toString()
+ @Override
+ public String getDescription()
{
- return "Score between two sequences aligned with Smith Waterman with default Peptide/Nucleotide matrix";
+ return description;
}
}
* @return Sequence<->Structure mapping as int[][]
* @throws SiftsException
*/
- public StringBuffer getMappingOutput(MappingOutputPojo mop)
+ public StringBuilder getMappingOutput(MappingOutputPojo mop)
throws SiftsException;
/**
--- /dev/null
+package jalview.api.analysis;
+
+/**
+ * An interface that describes classes that can compute similarity (aka
+ * substitution) scores for pairs of residues
+ */
+public interface PairwiseScoreModelI
+{
+ /**
+ * Answers a similarity score between two sequence characters (for
+ * substitution of the first by the second). Typically the highest scores are
+ * for identity, and the lowest for substitution of a residue by one with very
+ * different properties.
+ *
+ * @param c
+ * @param d
+ * @return
+ */
+ abstract public float getPairwiseScore(char c, char d);
+ // TODO make this static when Java 8
+
+}
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
package jalview.api.analysis;
import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
public interface ScoreModelI
{
-
- float[][] findDistances(AlignmentView seqData);
-
+ /**
+ * Answers a name for the score model, suitable for display in menus. Names
+ * should be unique across score models in use.
+ *
+ * @return
+ * @see jalview.analysis.scoremodels.ScoreModels#forName(String)
+ */
String getName();
+ /**
+ * Answers an informative description of the model, suitable for use in
+ * tooltips. Descriptions may be internationalised, and need not be unique
+ * (but should be).
+ *
+ * @return
+ */
+ String getDescription();
+
+ /**
+ * Answers true if this model is applicable for nucleotide data (so should be
+ * shown in menus in that context)
+ *
+ * @return
+ */
boolean isDNA();
+ /**
+ * Answers true if this model is applicable for peptide data (so should be
+ * shown in menus in that context)
+ *
+ * @return
+ */
boolean isProtein();
+ // TODO getName, isDNA, isProtein can be static methods in Java 8
+
+ /**
+ * Returns a distance score for the given sequence regions, that is, a matrix
+ * whose value [i][j] is the distance of sequence i from sequence j by some
+ * measure. The options parameter provides configuration choices for how the
+ * similarity score is calculated.
+ *
+ * @param seqData
+ * @param options
+ * @return
+ */
+
+ MatrixI findDistances(AlignmentView seqData, SimilarityParamsI options);
+
+ /**
+ * Returns a similarity score for the given sequence regions, that is, a
+ * matrix whose value [i][j] is the similarity of sequence i to sequence j by
+ * some measure. The options parameter provides configuration choices for how
+ * the similarity score is calculated.
+ *
+ * @param seqData
+ * @param options
+ * @return
+ */
+ MatrixI findSimilarities(AlignmentView seqData, SimilarityParamsI options);
}
--- /dev/null
+package jalview.api.analysis;
+
+/**
+ * A description of options when computing percentage identity of two aligned
+ * sequences
+ */
+public interface SimilarityParamsI
+{
+ /**
+ * Answers true if gap-gap aligned positions should be included in the
+ * calculation
+ *
+ * @return
+ */
+ boolean includeGappedColumns();
+
+ /**
+ * Answers true if gap-residue alignment is considered a match
+ *
+ * @return
+ */
+ // TODO is this specific to a PID score only?
+ // score matrix will compute whatever is configured for gap-residue
+ boolean matchGaps();
+
+ /**
+ * Answers true if gaps are included in the calculation. This may affect the
+ * calculated score, the denominator (normalisation factor) of the score, or
+ * both. Gap-gap positions are included if this and includeGappedColumns both
+ * answer true.
+ *
+ * @return
+ */
+ boolean includeGaps();
+
+ /**
+ * Answers true if only the shortest sequence length is used to divide the
+ * total score, false if the longest sequence length
+ *
+ * @return
+ */
+ boolean denominateByShortestLength();
+}
import jalview.analysis.AlignmentSorter;
import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder;
+import jalview.analysis.TreeBuilder;
+import jalview.analysis.scoremodels.PIDModel;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.api.AlignViewControllerGuiI;
import jalview.api.AlignViewControllerI;
import jalview.api.AlignViewportI;
{
SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray();
AlignmentSorter.sortByPID(viewport.getAlignment(), viewport
- .getAlignment().getSequenceAt(0), null);
+ .getAlignment().getSequenceAt(0));
addHistoryItem(new OrderCommand("Pairwise Sort", oldOrder,
viewport.getAlignment()));
public void averageDistanceTreeMenuItem_actionPerformed()
{
- NewTreePanel("AV", "PID", "Average distance tree using PID");
+ newTreePanel(TreeBuilder.AVERAGE_DISTANCE, new PIDModel().getName(),
+ "Average distance tree using PID");
}
public void neighbourTreeMenuItem_actionPerformed()
{
- NewTreePanel("NJ", "PID", "Neighbour joining tree using PID");
+ newTreePanel(TreeBuilder.NEIGHBOUR_JOINING, new PIDModel().getName(),
+ "Neighbour joining tree using PID");
}
protected void njTreeBlosumMenuItem_actionPerformed()
{
- NewTreePanel("NJ", "BL", "Neighbour joining tree using BLOSUM62");
+ newTreePanel(TreeBuilder.NEIGHBOUR_JOINING, ScoreModels.getInstance()
+ .getBlosum62().getName(),
+ "Neighbour joining tree using BLOSUM62");
}
protected void avTreeBlosumMenuItem_actionPerformed()
{
- NewTreePanel("AV", "BL", "Average distance tree using BLOSUM62");
+ newTreePanel(TreeBuilder.AVERAGE_DISTANCE, ScoreModels.getInstance()
+ .getBlosum62().getName(),
+ "Average distance tree using BLOSUM62");
}
- void NewTreePanel(String type, String pwType, String title)
+ void newTreePanel(String type, String pwType, String title)
{
// are the sequences aligned?
if (!viewport.getAlignment().isAligned(false))
*/
package jalview.appletgui;
-import jalview.analysis.NJTree;
+import jalview.analysis.TreeModel;
import jalview.api.AlignViewportI;
import jalview.api.FeatureSettingsModelI;
import jalview.bin.JalviewLite;
boolean validCharWidth = true;
- NJTree currentTree = null;
+ TreeModel currentTree = null;
public jalview.bin.JalviewLite applet;
ranges.setEndSeq(height / getCharHeight());
}
- public void setCurrentTree(NJTree tree)
+ public void setCurrentTree(TreeModel tree)
{
currentTree = tree;
}
- public NJTree getCurrentTree()
+ public TreeModel getCurrentTree()
{
return currentTree;
}
*/
package jalview.appletgui;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
+import jalview.api.analysis.ScoreModelI;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
int top = 0;
- public PCAPanel(AlignViewport av)
+ public PCAPanel(AlignViewport viewport)
{
try
{
zCombobox.addItem("dim " + i);
}
- this.av = av;
- boolean selected = av.getSelectionGroup() != null
- && av.getSelectionGroup().getSize() > 0;
- AlignmentView seqstrings = av.getAlignmentView(selected);
- boolean nucleotide = av.getAlignment().isNucleotide();
+ this.av = viewport;
+ boolean selected = viewport.getSelectionGroup() != null
+ && viewport.getSelectionGroup().getSize() > 0;
+ AlignmentView seqstrings = viewport.getAlignmentView(selected);
+ boolean nucleotide = viewport.getAlignment().isNucleotide();
SequenceI[] seqs;
if (!selected)
{
- seqs = av.getAlignment().getSequencesArray();
+ seqs = viewport.getAlignment().getSequencesArray();
}
else
{
- seqs = av.getSelectionGroup().getSequencesInOrder(av.getAlignment());
+ seqs = viewport.getSelectionGroup().getSequencesInOrder(viewport.getAlignment());
}
SeqCigar sq[] = seqstrings.getSequences();
int length = sq[0].getWidth();
return;
}
}
- pcaModel = new PCAModel(seqstrings, seqs, nucleotide);
- rc = new RotatableCanvas(av);
+ ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel(
+ !nucleotide);
+ pcaModel = new PCAModel(seqstrings, seqs, nucleotide, scoreModel,
+ SimilarityParams.SeqSpace);
+
+ rc = new RotatableCanvas(viewport);
embedMenuIfNeeded(rc);
add(rc, BorderLayout.CENTER);
/**
* DOCUMENT ME!
*/
+ @Override
public void run()
{
// TODO progress indicator
rc.paint(rc.getGraphics());
}
+ @Override
public void actionPerformed(ActionEvent evt)
{
if (evt.getSource() == inputData)
}
}
+ @Override
public void itemStateChanged(ItemEvent evt)
{
if (evt.getSource() == xCombobox)
if (!pcaModel.isNucleotide())
{
pcaModel.setNucleotide(true);
+ ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel(
+ false);
+ pcaModel.setScoreModel(scoreModel);
new Thread(this).start();
}
}
if (pcaModel.isNucleotide())
{
pcaModel.setNucleotide(false);
+ ScoreModelI scoreModel = ScoreModels.getInstance().getDefaultModel(
+ true);
+ pcaModel.setScoreModel(scoreModel);
new Thread(this).start();
}
}
package jalview.appletgui;
import jalview.analysis.Conservation;
-import jalview.analysis.NJTree;
+import jalview.analysis.TreeModel;
import jalview.api.AlignViewportI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import java.awt.event.MouseMotionListener;
import java.util.Enumeration;
import java.util.Hashtable;
+import java.util.List;
import java.util.Vector;
public class TreeCanvas extends Panel implements MouseListener,
MouseMotionListener
{
- NJTree tree;
+ TreeModel tree;
ScrollPane scrollPane;
selected.addOrRemove(sequence, true);
}
- public void setTree(NJTree tree)
+ public void setTree(TreeModel tree2)
{
- this.tree = tree;
- tree.findHeight(tree.getTopNode());
+ this.tree = tree2;
+ tree2.findHeight(tree2.getTopNode());
// Now have to calculate longest name based on the leaves
- Vector<SequenceNode> leaves = tree.findLeaves(tree.getTopNode());
+ Vector<SequenceNode> leaves = tree2.findLeaves(tree2.getTopNode());
boolean has_placeholders = false;
longestName = "";
}
public void drawNode(Graphics g, SequenceNode node, float chunk,
- float scale, int width, int offx, int offy)
+ double scale, int width, int offx, int offy)
{
if (node == null)
{
{
// Drawing leaf node
- float height = node.height;
- float dist = node.dist;
+ double height = node.height;
+ double dist = node.dist;
int xstart = (int) ((height - dist) * scale) + offx;
int xend = (int) (height * scale) + offx;
drawNode(g, (SequenceNode) node.right(), chunk, scale, width, offx,
offy);
- float height = node.height;
- float dist = node.dist;
+ double height = node.height;
+ double dist = node.dist;
int xstart = (int) ((height - dist) * scale) + offx;
int xend = (int) (height * scale) + offx;
SequenceNode top = tree.getTopNode();
- float wscale = (float) (width * .8 - offx * 2) / tree.getMaxHeight();
+ double wscale = (float) (width * .8 - offx * 2) / tree.getMaxHeight();
if (top.count == 0)
{
top.count = ((SequenceNode) top.left()).count
}
public void pickNode(Rectangle pickBox, SequenceNode node, float chunk,
- float scale, int width, int offx, int offy)
+ double scale, int width, int offx, int offy)
{
if (node == null)
{
if (node.left() == null && node.right() == null)
{
- float height = node.height;
+ double height = node.height;
// float dist = node.dist;
// int xstart = (int) ( (height - dist) * scale) + offx;
// for
// scrollbar
- float wscale = (width - labelLength - offx * 2) / tree.getMaxHeight();
+ double wscale = (width - labelLength - offx * 2) / tree.getMaxHeight();
SequenceNode top = tree.getTopNode();
threshold = (float) (x - offx)
/ (float) (getSize().width - labelLength - 2 * offx);
- tree.getGroups().removeAllElements();
- tree.groupNodes(tree.getTopNode(), threshold);
+ List<SequenceNode> groups = tree.groupNodes(threshold);
setColor(tree.getTopNode(), Color.black);
av.setSelectionGroup(null);
codingComplement.clearSequenceColours();
}
- colourGroups();
+ colourGroups(groups);
}
}
}
- void colourGroups()
+ void colourGroups(List<SequenceNode> groups)
{
- for (int i = 0; i < tree.getGroups().size(); i++)
+ for (int i = 0; i < groups.size(); i++)
{
Color col = new Color((int) (Math.random() * 255),
(int) (Math.random() * 255), (int) (Math.random() * 255));
- setColor(tree.getGroups().elementAt(i), col.brighter());
+ setColor(groups.get(i), col.brighter());
- Vector<SequenceNode> l = tree.findLeaves(tree.getGroups()
- .elementAt(i));
+ Vector<SequenceNode> l = tree.findLeaves(groups.get(i));
Vector<SequenceI> sequences = new Vector<SequenceI>();
for (int j = 0; j < l.size(); j++)
*/
package jalview.appletgui;
+import jalview.analysis.AverageDistanceTree;
import jalview.analysis.NJTree;
+import jalview.analysis.TreeBuilder;
+import jalview.analysis.TreeModel;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.bin.Cache;
import jalview.datamodel.Alignment;
-import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
import jalview.datamodel.SequenceI;
import jalview.io.NewickFile;
-import jalview.schemes.ResidueProperties;
import jalview.util.MessageManager;
import java.awt.BorderLayout;
TreeCanvas treeCanvas;
- NJTree tree;
+ TreeModel tree;
AlignmentPanel ap;
AlignViewport av;
- public NJTree getTree()
+ public TreeModel getTree()
{
return tree;
}
+ @Override
public void finalize() throws Throwable
{
ap = null;
// yields unaligned seqs)
// or create a selection box around columns in alignment view
// test Alignment(SeqCigar[])
- if (tree.seqData != null)
+ if (tree.getOriginalData() != null)
{
char gc = '-';
try
} catch (Exception ex)
{
}
- ;
- Object[] alAndColsel = tree.seqData
+
+ Object[] alAndColsel = tree.getOriginalData()
.getAlignmentAndColumnSelection(gc);
if (alAndColsel != null && alAndColsel[0] != null)
this.newtree = newtree;
}
+ @Override
public void run()
{
if (newtree != null)
{
- if (odata == null)
- {
- tree = new NJTree(av.getAlignment().getSequencesArray(), newtree);
- }
- else
- {
- tree = new NJTree(av.getAlignment().getSequencesArray(), odata,
- newtree);
- }
-
+ tree = new TreeModel(av.getAlignment().getSequencesArray(), odata,
+ newtree);
}
else
{
- int start, end;
- SequenceI[] seqs;
- boolean selview = av.getSelectionGroup() != null
- && av.getSelectionGroup().getSize() > 1;
- AlignmentView seqStrings = av.getAlignmentView(selview);
- if (!selview)
- {
- start = 0;
- end = av.getAlignment().getWidth();
- seqs = av.getAlignment().getSequencesArray();
- }
- else
- {
- start = av.getSelectionGroup().getStartRes();
- end = av.getSelectionGroup().getEndRes() + 1;
- seqs = av.getSelectionGroup().getSequencesInOrder(
- av.getAlignment());
- }
- ScoreModelI sm = ResidueProperties.getScoreModel(pwtype);
- if (sm instanceof ViewBasedAnalysisI)
- {
- try
- {
- sm = sm.getClass().newInstance();
- ((ViewBasedAnalysisI) sm)
- .configureFromAlignmentView(treeCanvas.ap);
- } catch (Exception q)
- {
- System.err.println("Couldn't create a scoremodel instance for "
- + sm.getName());
- q.printStackTrace();
- }
- tree = new NJTree(seqs, seqStrings, type, pwtype, sm, start, end);
- }
- else
- {
- tree = new NJTree(seqs, seqStrings, type, pwtype, null, start,
- end);
- }
+ ScoreModelI sm = configureScoreModel(pwtype);
+ TreeBuilder njtree = type.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree(
+ av, sm, SimilarityParams.Jalview)
+ : new AverageDistanceTree(av, sm, SimilarityParams.Jalview);
+ tree = new TreeModel(njtree);
}
tree.reCount(tree.getTopNode());
}
}
+ @Override
public void actionPerformed(ActionEvent evt)
{
if (evt.getSource() == newickOutput)
}
}
+ @Override
public void itemStateChanged(ItemEvent evt)
{
if (evt.getSource() == fitToWindow)
inputData.addActionListener(this);
}
+ /**
+ * Gets the score model for the given name. If the score model is one that
+ * requires to get state data from the current view, allow it to do so
+ *
+ * @param sm
+ * @return
+ */
+ protected ScoreModelI configureScoreModel(String modelName)
+ {
+ ScoreModelI sm = ScoreModels.getInstance().forName(modelName);
+ if (sm instanceof ViewBasedAnalysisI)
+ {
+ try
+ {
+ sm = sm.getClass().newInstance();
+ ((ViewBasedAnalysisI) sm).configureFromAlignmentView(treeCanvas.ap);
+ } catch (Exception q)
+ {
+ Cache.log.error("Couldn't create a scoremodel instance for "
+ + sm.getName());
+ }
+ }
+ return sm;
+ }
+
}
data = aparser.getValue("tree", true);
if (data != null)
{
- jalview.io.NewickFile fin = null;
try
{
System.out.println("CMD [-tree " + data
+ "] executed successfully!");
- fin = new NewickFile(data,
+ NewickFile nf = new NewickFile(data,
AppletFormatAdapter.checkProtocol(data));
- if (fin != null)
- {
- af.getViewport().setCurrentTree(
- af.ShowNewickTree(fin, data).getTree());
- }
+ af.getViewport().setCurrentTree(
+ af.showNewickTree(nf, data).getTree());
} catch (IOException ex)
{
System.err.println("Couldn't add tree " + data);
*/
package jalview.datamodel;
+import jalview.analysis.scoremodels.ScoreMatrix;
import jalview.schemes.ResidueProperties;
-import jalview.schemes.ScoreMatrix;
/**
* Encode a sequence as a numeric vector using either classic residue binary
/**
* ancode using substitution matrix given in matrix
*
- * @param matrix
+ * @param smtrx
*/
- public void matrixEncode(final ScoreMatrix matrix)
+ public void matrixEncode(final ScoreMatrix smtrx)
throws InvalidSequenceTypeException
{
- if (isNa != matrix.isDNA())
+ if (isNa != smtrx.isDNA())
{
throw new InvalidSequenceTypeException("matrix "
- + matrix.getClass().getCanonicalName()
+ + smtrx.getClass().getCanonicalName()
+ " is not a valid matrix for "
+ (isNa ? "nucleotide" : "protein") + "sequences");
}
- matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex
- : ResidueProperties.aaIndex, matrix.getMatrix());
+ matrixEncode(smtrx.isDNA() ? ResidueProperties.nucleotideIndex
+ : ResidueProperties.aaIndex, smtrx.getMatrix());
}
- private void matrixEncode(final int[] aaIndex, final int[][] matrix)
+ private void matrixEncode(final int[] aaIndex, final float[][] matrix)
{
int nores = initMatrixGetNoRes();
public class SequenceNode extends BinaryNode
{
/** DOCUMENT ME!! */
- public float dist;
+ public double dist;
/** DOCUMENT ME!! */
public int count;
/** DOCUMENT ME!! */
- public float height;
+ public double height;
/** DOCUMENT ME!! */
public float ycount;
{
char q = name.charAt(c);
if ('0' <= q && q <= '9')
+ {
continue;
+ }
return true;
}
}
import jalview.api.SplitContainerI;
import jalview.api.ViewStyleI;
import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.bin.Cache;
import jalview.bin.Jalview;
import jalview.commands.CommandI;
import jalview.io.FileFormatI;
import jalview.io.FileFormats;
import jalview.io.FileLoader;
+import jalview.io.FileParse;
import jalview.io.FormatAdapter;
import jalview.io.HtmlSvgOutput;
import jalview.io.IdentifyFile;
import jalview.io.JalviewFileView;
import jalview.io.JnetAnnotationMaker;
import jalview.io.NewickFile;
+import jalview.io.ScoreMatrixFile;
import jalview.io.TCoffeeScoreFile;
import jalview.jbgui.GAlignFrame;
import jalview.schemes.ColourSchemeI;
import jalview.schemes.ColourSchemes;
import jalview.schemes.ResidueColourScheme;
-import jalview.schemes.ResidueProperties;
import jalview.schemes.TCoffeeColourScheme;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
setMenusFromViewport(viewport);
buildSortByAnnotationScoresMenu();
- buildTreeMenu();
+ calculateTree.addActionListener(new ActionListener()
+ {
+
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ openTreePcaDialog();
+ }
+ });
buildColourMenu();
if (Desktop.desktop != null)
{
SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray();
AlignmentSorter.sortByPID(viewport.getAlignment(), viewport
- .getAlignment().getSequenceAt(0), null);
+ .getAlignment().getSequenceAt(0));
addHistoryItem(new OrderCommand("Pairwise Sort", oldOrder,
viewport.getAlignment()));
alignPanel.paintAlignment(true);
}
/**
- * DOCUMENT ME!
- *
- * @param e
- * DOCUMENT ME!
- */
- @Override
- public void averageDistanceTreeMenuItem_actionPerformed(ActionEvent e)
- {
- newTreePanel("AV", "PID", "Average distance tree using PID");
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param e
- * DOCUMENT ME!
- */
- @Override
- public void neighbourTreeMenuItem_actionPerformed(ActionEvent e)
- {
- newTreePanel("NJ", "PID", "Neighbour joining tree using PID");
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param e
- * DOCUMENT ME!
- */
- @Override
- protected void njTreeBlosumMenuItem_actionPerformed(ActionEvent e)
- {
- newTreePanel("NJ", "BL", "Neighbour joining tree using BLOSUM62");
- }
-
- /**
- * DOCUMENT ME!
- *
- * @param e
- * DOCUMENT ME!
- */
- @Override
- protected void avTreeBlosumMenuItem_actionPerformed(ActionEvent e)
- {
- newTreePanel("AV", "BL", "Average distance tree using BLOSUM62");
- }
-
- /**
- * DOCUMENT ME!
+ * Constructs a tree panel and adds it to the desktop
*
* @param type
- * DOCUMENT ME!
- * @param pwType
- * DOCUMENT ME!
- * @param title
- * DOCUMENT ME!
+ * tree type (NJ or AV)
+ * @param sm
+ * distance or similarity score model used to compute the tree
+ * @param options
+ * parameters for the distance or similarity calculation
*/
- void newTreePanel(String type, String pwType, String title)
+ void newTreePanel(String type, ScoreModelI sm, SimilarityParamsI options)
{
+ String frameTitle = "";
TreePanel tp;
if (viewport.getSelectionGroup() != null
}
}
- title = title + " on region";
- tp = new TreePanel(alignPanel, type, pwType);
+ tp = new TreePanel(alignPanel, type, sm, options);
+ frameTitle = tp.getPanelTitle() + " on region";
}
else
{
- // are the visible sequences aligned?
- if (!viewport.getAlignment().isAligned(false))
- {
- JvOptionPane
- .showMessageDialog(
- Desktop.desktop,
- MessageManager
- .getString("label.sequences_must_be_aligned_before_creating_tree"),
- MessageManager
- .getString("label.sequences_not_aligned"),
- JvOptionPane.WARNING_MESSAGE);
-
- return;
- }
-
if (viewport.getAlignment().getHeight() < 2)
{
return;
}
- tp = new TreePanel(alignPanel, type, pwType);
+ tp = new TreePanel(alignPanel, type, sm, options);
+ frameTitle = tp.getPanelTitle();
}
- title += " from ";
+ frameTitle += " from ";
if (viewport.viewName != null)
{
- title += viewport.viewName + " of ";
+ frameTitle += viewport.viewName + " of ";
}
- title += this.title;
+ frameTitle += this.title;
- Desktop.addInternalFrame(tp, title, 600, 500);
+ Desktop.addInternalFrame(tp, frameTitle, 600, 500);
}
/**
* call. Listeners are added to remove the menu item when the treePanel is
* closed, and adjust the tree leaf to sequence mapping when the alignment is
* modified.
- *
- * @param treePanel
- * Displayed tree window.
- * @param title
- * SortBy menu item title.
*/
@Override
- public void buildTreeMenu()
+ public void buildTreeSortMenu()
{
- calculateTree.removeAll();
- // build the calculate menu
-
- for (final String type : new String[] { "NJ", "AV" })
- {
- String treecalcnm = MessageManager.getString("label.tree_calc_"
- + type.toLowerCase());
- for (final String pwtype : ResidueProperties.scoreMatrices.keySet())
- {
- JMenuItem tm = new JMenuItem();
- ScoreModelI sm = ResidueProperties.scoreMatrices.get(pwtype);
- if (sm.isDNA() == viewport.getAlignment().isNucleotide()
- || sm.isProtein() == !viewport.getAlignment()
- .isNucleotide())
- {
- String smn = MessageManager.getStringOrReturn(
- "label.score_model_", sm.getName());
- final String title = MessageManager.formatMessage(
- "label.treecalc_title", treecalcnm, smn);
- tm.setText(title);//
- tm.addActionListener(new java.awt.event.ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- newTreePanel(type, pwtype, title);
- }
- });
- calculateTree.add(tm);
- }
-
- }
- }
sortByTreeMenu.removeAll();
List<Component> comps = PaintRefresher.components.get(viewport
if (value == JalviewFileChooser.APPROVE_OPTION)
{
- String choice = chooser.getSelectedFile().getPath();
- jalview.bin.Cache.setProperty("LAST_DIRECTORY", choice);
- jalview.io.NewickFile fin = null;
+ String filePath = chooser.getSelectedFile().getPath();
+ Cache.setProperty("LAST_DIRECTORY", filePath);
+ NewickFile fin = null;
try
{
- fin = new NewickFile(choice, DataSourceType.FILE);
- viewport.setCurrentTree(ShowNewickTree(fin, choice).getTree());
+ fin = new NewickFile(filePath, DataSourceType.FILE);
+ viewport.setCurrentTree(showNewickTree(fin, filePath).getTree());
} catch (Exception ex)
{
JvOptionPane
}
}
- public TreePanel ShowNewickTree(NewickFile nf, String title)
+ public TreePanel showNewickTree(NewickFile nf, String treeTitle)
{
- return ShowNewickTree(nf, title, 600, 500, 4, 5);
+ return showNewickTree(nf, treeTitle, 600, 500, 4, 5);
}
- public TreePanel ShowNewickTree(NewickFile nf, String title,
- AlignmentView input)
- {
- return ShowNewickTree(nf, title, input, 600, 500, 4, 5);
- }
-
- public TreePanel ShowNewickTree(NewickFile nf, String title, int w,
+ public TreePanel showNewickTree(NewickFile nf, String treeTitle, int w,
int h, int x, int y)
{
- return ShowNewickTree(nf, title, null, w, h, x, y);
+ return showNewickTree(nf, treeTitle, null, w, h, x, y);
}
/**
- * Add a treeviewer for the tree extracted from a newick file object to the
+ * Add a treeviewer for the tree extracted from a Newick file object to the
* current alignment view
*
* @param nf
* position
* @return TreePanel handle
*/
- public TreePanel ShowNewickTree(NewickFile nf, String title,
+ public TreePanel showNewickTree(NewickFile nf, String treeTitle,
AlignmentView input, int w, int h, int x, int y)
{
TreePanel tp = null;
if (nf.getTree() != null)
{
- tp = new TreePanel(alignPanel, "FromFile", title, nf, input);
+ tp = new TreePanel(alignPanel, nf, treeTitle, input);
tp.setSize(w, h);
tp.setLocation(x, y);
}
- Desktop.addInternalFrame(tp, title, w, h);
+ Desktop.addInternalFrame(tp, treeTitle, w, h);
}
} catch (Exception ex)
{
}
/**
- * Attempt to load a "dropped" file or URL string: First by testing whether
- * it's an Annotation file, then a JNet file, and finally a features file. If
- * all are false then the user may have dropped an alignment file onto this
- * AlignFrame.
+ * Attempt to load a "dropped" file or URL string, by testing in turn for
+ * <ul>
+ * <li>an Annotation file</li>
+ * <li>a JNet file</li>
+ * <li>a features file</li>
+ * <li>else try to interpret as an alignment file</li>
+ * </ul>
*
* @param file
* either a filename or a URL string.
{
format = new IdentifyFile().identify(file, sourceType);
}
- if (FileFormat.Jnet.equals(format))
+ if (FileFormat.ScoreMatrix == format)
+ {
+ ScoreMatrixFile sm = new ScoreMatrixFile(new FileParse(file,
+ sourceType));
+ sm.parse();
+ // todo: i18n this message
+ statusBar
+ .setText(MessageManager.formatMessage(
+ "label.successfully_loaded_matrix",
+ sm.getMatrixName()));
+ }
+ else if (FileFormat.Jnet.equals(format))
{
JPredFile predictions = new JPredFile(file, sourceType);
new JnetAnnotationMaker();
ColourSchemeI colourScheme = viewport.getGlobalColourScheme();
ColourMenuHelper.setColourSelected(colourMenu, colourScheme);
}
+
+ /**
+ * Open a dialog (if not already open) that allows the user to select and
+ * calculate PCA or Tree analysis
+ */
+ protected void openTreePcaDialog()
+ {
+ if (alignPanel.getCalculationDialog() == null)
+ {
+ new CalculationChooser(AlignFrame.this);
+ }
+ }
}
class PrintThread extends Thread
import jalview.analysis.AlignmentUtils;
import jalview.analysis.AnnotationSorter.SequenceAnnotationOrder;
-import jalview.analysis.NJTree;
+import jalview.analysis.TreeModel;
import jalview.api.AlignViewportI;
import jalview.api.AlignmentViewPanel;
import jalview.api.FeatureColourI;
{
Font font;
- NJTree currentTree = null;
+ TreeModel currentTree = null;
boolean cursorMode = false;
* @param tree
* DOCUMENT ME!
*/
- public void setCurrentTree(NJTree tree)
+ public void setCurrentTree(TreeModel tree)
{
currentTree = tree;
}
*
* @return DOCUMENT ME!
*/
- public NJTree getCurrentTree()
+ public TreeModel getCurrentTree()
{
return currentTree;
}
private PropertyChangeListener propertyChangeListener;
+ private CalculationChooser calculationDialog;
+
/**
* Creates a new AlignmentPanel object.
*
PaintRefresher.RemoveComponent(getIdPanel().getIdCanvas());
PaintRefresher.RemoveComponent(this);
+ closeChildFrames();
+
/*
* try to ensure references are nulled
*/
}
/**
+ * Close any open dialogs that would be orphaned when this one is closed
+ */
+ protected void closeChildFrames()
+ {
+ if (calculationDialog != null)
+ {
+ calculationDialog.closeFrame();
+ }
+ }
+
+ /**
* hides or shows dynamic annotation rows based on groups and av state flags
*/
public void updateAnnotation()
repaint();
}
}
+
+ /**
+ * Set the reference to the PCA/Tree chooser dialog for this panel. This
+ * reference should be nulled when the dialog is closed.
+ *
+ * @param calculationChooser
+ */
+ public void setCalculationDialog(CalculationChooser calculationChooser)
+ {
+ calculationDialog = calculationChooser;
+ }
+
+ /**
+ * Returns the reference to the PCA/Tree chooser dialog for this panel (null
+ * if none is open)
+ */
+ public CalculationChooser getCalculationDialog()
+ {
+ return calculationDialog;
+ }
}
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.gui;
+
+import jalview.analysis.TreeBuilder;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.util.MessageManager;
+
+import java.awt.Color;
+import java.awt.Component;
+import java.awt.FlowLayout;
+import java.awt.Font;
+import java.awt.GridLayout;
+import java.awt.event.ActionEvent;
+import java.awt.event.ItemEvent;
+import java.awt.event.ItemListener;
+import java.awt.event.MouseAdapter;
+import java.awt.event.MouseEvent;
+import java.beans.PropertyVetoException;
+import java.util.ArrayList;
+import java.util.List;
+
+import javax.swing.ButtonGroup;
+import javax.swing.JButton;
+import javax.swing.JCheckBox;
+import javax.swing.JComboBox;
+import javax.swing.JInternalFrame;
+import javax.swing.JLabel;
+import javax.swing.JLayeredPane;
+import javax.swing.JPanel;
+import javax.swing.JRadioButton;
+import javax.swing.event.InternalFrameAdapter;
+import javax.swing.event.InternalFrameEvent;
+
+/**
+ * A dialog where a user can choose and action Tree or PCA calculation options
+ */
+public class CalculationChooser extends JPanel
+{
+ /*
+ * flag for whether gap matches residue in the PID calculation for a Tree
+ * - true gives Jalview 2.10.1 behaviour
+ * - set to false (using Groovy) for a more correct tree
+ * (JAL-374)
+ */
+ private static boolean treeMatchGaps = true;
+
+ private static final Font VERDANA_11PT = new Font("Verdana", 0, 11);
+
+ AlignFrame af;
+
+ JRadioButton pca;
+
+ JRadioButton tree;
+
+ JRadioButton neighbourJoining;
+
+ JRadioButton averageDistance;
+
+ JComboBox<String> modelNames;
+
+ private JInternalFrame frame;
+
+ private ButtonGroup treeTypes;
+
+ private JCheckBox includeGaps;
+
+ private JCheckBox matchGaps;
+
+ private JCheckBox includeGappedColumns;
+
+ private JCheckBox shorterSequence;
+
+ /**
+ * Constructor
+ *
+ * @param af
+ */
+ public CalculationChooser(AlignFrame alignFrame)
+ {
+ this.af = alignFrame;
+ init();
+ af.alignPanel.setCalculationDialog(this);
+ }
+
+ /**
+ * Lays out the panel and adds it to the desktop
+ */
+ void init()
+ {
+ frame = new JInternalFrame();
+ frame.setContentPane(this);
+ this.setBackground(Color.white);
+
+ /*
+ * Layout consists of 4 or 5 panels:
+ * - first with choice of Tree or PCA
+ * - second with choice of tree method NJ or AV
+ * - third with choice of score model
+ * - fourth with score model parameter options [suppressed]
+ * - fifth with OK and Cancel
+ */
+ tree = new JRadioButton(MessageManager.getString("label.tree"));
+ tree.setOpaque(false);
+ pca = new JRadioButton(
+ MessageManager.getString("label.principal_component_analysis"));
+ pca.setOpaque(false);
+ neighbourJoining = new JRadioButton(
+ MessageManager.getString("label.tree_calc_nj"));
+ averageDistance = new JRadioButton(
+ MessageManager.getString("label.tree_calc_av"));
+ ItemListener listener = new ItemListener()
+ {
+ @Override
+ public void itemStateChanged(ItemEvent e)
+ {
+ neighbourJoining.setEnabled(tree.isSelected());
+ averageDistance.setEnabled(tree.isSelected());
+ }
+ };
+ pca.addItemListener(listener);
+ tree.addItemListener(listener);
+ ButtonGroup calcTypes = new ButtonGroup();
+ calcTypes.add(pca);
+ calcTypes.add(tree);
+ JPanel calcChoicePanel = new JPanel();
+ calcChoicePanel.setOpaque(false);
+ tree.setSelected(true);
+ calcChoicePanel.add(tree);
+ calcChoicePanel.add(pca);
+
+ neighbourJoining.setOpaque(false);
+ treeTypes = new ButtonGroup();
+ treeTypes.add(neighbourJoining);
+ treeTypes.add(averageDistance);
+ neighbourJoining.setSelected(true);
+ JPanel treeChoicePanel = new JPanel();
+ treeChoicePanel.setOpaque(false);
+ treeChoicePanel.add(neighbourJoining);
+ treeChoicePanel.add(averageDistance);
+
+ /*
+ * score models drop-down - with added tooltips!
+ */
+ modelNames = buildModelOptionsList();
+
+ JPanel scoreModelPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
+ scoreModelPanel.setOpaque(false);
+ scoreModelPanel.add(modelNames, FlowLayout.LEFT);
+
+ /*
+ * score model parameters
+ */
+ JPanel paramsPanel = new JPanel(new GridLayout(5, 1));
+ paramsPanel.setOpaque(false);
+ includeGaps = new JCheckBox("Include gaps");
+ matchGaps = new JCheckBox("Match gaps");
+ includeGappedColumns = new JCheckBox("Include gapped columns");
+ shorterSequence = new JCheckBox("Match on shorter sequence");
+ paramsPanel.add(new JLabel("Pairwise sequence scoring options"));
+ paramsPanel.add(includeGaps);
+ paramsPanel.add(matchGaps);
+ paramsPanel.add(includeGappedColumns);
+ paramsPanel.add(shorterSequence);
+
+ /*
+ * OK / Cancel buttons
+ */
+ JButton ok = new JButton(MessageManager.getString("action.ok"));
+ ok.setFont(VERDANA_11PT);
+ ok.addActionListener(new java.awt.event.ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ ok_actionPerformed();
+ }
+ });
+ JButton cancel = new JButton(MessageManager.getString("action.cancel"));
+ cancel.setFont(VERDANA_11PT);
+ cancel.addActionListener(new java.awt.event.ActionListener()
+ {
+ @Override
+ public void actionPerformed(ActionEvent e)
+ {
+ cancel_actionPerformed(e);
+ }
+ });
+ JPanel actionPanel = new JPanel();
+ actionPanel.setOpaque(false);
+ actionPanel.add(ok);
+ actionPanel.add(cancel);
+
+ boolean includeParams = false;
+ this.add(calcChoicePanel);
+ this.add(treeChoicePanel);
+ this.add(scoreModelPanel);
+ if (includeParams)
+ {
+ this.add(paramsPanel);
+ }
+ this.add(actionPanel);
+
+ int width = 350;
+ int height = includeParams ? 400 : 220;
+ String title = MessageManager.getString("label.choose_calculation");
+ if (af.getViewport().viewName != null)
+ {
+ title = title + " (" + af.getViewport().viewName + ")";
+ }
+
+ Desktop.addInternalFrame(frame,
+ title, width,
+ height, false);
+
+ /*
+ * null the AlignmentPanel's reference to the dialog when it is closed
+ */
+ frame.addInternalFrameListener(new InternalFrameAdapter()
+ {
+ @Override
+ public void internalFrameClosed(InternalFrameEvent evt)
+ {
+ af.alignPanel.setCalculationDialog(null);
+ };
+ });
+
+ frame.setLayer(JLayeredPane.PALETTE_LAYER);
+ }
+
+ /**
+ * A rather elaborate helper method (blame Swing, not me) that builds a
+ * drop-down list of score models (by name) with descriptions as tooltips.
+ * There is also a tooltip shown for the currently selected item when hovering
+ * over it (without opening the list).
+ */
+ protected JComboBox<String> buildModelOptionsList()
+ {
+ final JComboBox<String> comboBox = new JComboBox<String>();
+ ComboBoxTooltipRenderer renderer = new ComboBoxTooltipRenderer();
+ comboBox.setRenderer(renderer);
+ final List<String> tips = new ArrayList<String>();
+
+ /*
+ * show tooltip on mouse over the combobox
+ * note the listener has to be on the components that make up
+ * the combobox, doesn't work if just on the combobox
+ */
+ MouseAdapter mouseListener = new MouseAdapter()
+ {
+ @Override
+ public void mouseEntered(MouseEvent e)
+ {
+ comboBox.setToolTipText(tips.get(comboBox.getSelectedIndex()));
+ }
+
+ @Override
+ public void mouseExited(MouseEvent e)
+ {
+ comboBox.setToolTipText(null);
+ }
+ };
+ for (Component c : comboBox.getComponents())
+ {
+ c.addMouseListener(mouseListener);
+ }
+
+ /*
+ * now we can actually add entries to the combobox,
+ * remembering their descriptions for tooltips
+ */
+ ScoreModels scoreModels = ScoreModels.getInstance();
+ for (ScoreModelI sm : scoreModels.getModels())
+ {
+ boolean nucleotide = af.getViewport().getAlignment().isNucleotide();
+ if (sm.isDNA() && nucleotide || sm.isProtein() && !nucleotide)
+ {
+ comboBox.addItem(sm.getName());
+
+ /*
+ * tooltip is description if provided, else text lookup with
+ * fallback on the model name
+ */
+ String tooltip = sm.getDescription();
+ if (tooltip == null)
+ {
+ tooltip = MessageManager.getStringOrReturn("label.score_model_",
+ sm.getName());
+ }
+ tips.add(tooltip);
+ }
+
+ /*
+ * set the list of tooltips on the combobox's renderer
+ */
+ renderer.setTooltips(tips);
+ }
+
+ return comboBox;
+ }
+
+ /**
+ * Open and calculate the selected tree on 'OK'
+ */
+ protected void ok_actionPerformed()
+ {
+ boolean doPCA = pca.isSelected();
+ ScoreModelI sm = ScoreModels.getInstance().forName(
+ modelNames.getSelectedItem().toString());
+ SimilarityParamsI params = getSimilarityParameters(doPCA);
+
+ if (doPCA)
+ {
+ openPcaPanel(sm, params);
+ }
+ else
+ {
+ openTreePanel(sm, params);
+ }
+
+ // closeFrame();
+ }
+
+ /**
+ * Open a new Tree panel on the desktop
+ *
+ * @param sm
+ * @param params
+ */
+ protected void openTreePanel(ScoreModelI sm, SimilarityParamsI params)
+ {
+ String treeType = neighbourJoining.isSelected() ? TreeBuilder.NEIGHBOUR_JOINING
+ : TreeBuilder.AVERAGE_DISTANCE;
+ af.newTreePanel(treeType, sm, params);
+ }
+
+ /**
+ * Open a new PCA panel on the desktop
+ *
+ * @param sm
+ * @param params
+ */
+ protected void openPcaPanel(ScoreModelI sm, SimilarityParamsI params)
+ {
+ AlignViewport viewport = af.getViewport();
+ if (((viewport.getSelectionGroup() != null)
+ && (viewport.getSelectionGroup().getSize() < 4) && (viewport
+ .getSelectionGroup().getSize() > 0))
+ || (viewport.getAlignment().getHeight() < 4))
+ {
+ JvOptionPane
+ .showInternalMessageDialog(
+ this,
+ MessageManager
+ .getString("label.principal_component_analysis_must_take_least_four_input_sequences"),
+ MessageManager
+ .getString("label.sequence_selection_insufficient"),
+ JvOptionPane.WARNING_MESSAGE);
+ return;
+ }
+ new PCAPanel(af.alignPanel, sm, params);
+ }
+
+ /**
+ *
+ */
+ protected void closeFrame()
+ {
+ try
+ {
+ frame.setClosed(true);
+ } catch (PropertyVetoException ex)
+ {
+ }
+ }
+
+ /**
+ * Returns a data bean holding parameters for similarity (or distance) model
+ * calculation
+ *
+ * @param doPCA
+ * @return
+ */
+ protected SimilarityParamsI getSimilarityParameters(boolean doPCA)
+ {
+ // commented out: parameter choices read from gui widgets
+ // SimilarityParamsI params = new SimilarityParams(
+ // includeGappedColumns.isSelected(), matchGaps.isSelected(),
+ // includeGaps.isSelected(), shorterSequence.isSelected());
+
+ boolean includeGapGap = true;
+ boolean includeGapResidue = true;
+ boolean matchOnShortestLength = false;
+
+ /*
+ * 'matchGaps' flag is only used in the PID calculation
+ * - set to false for PCA so that PCA using PID reproduces SeqSpace PCA
+ * - set to true for Tree to reproduce Jalview 2.10.1 calculation
+ * - set to false for Tree for a more correct calculation (JAL-374)
+ */
+ boolean matchGap = doPCA ? false : treeMatchGaps;
+
+ return new SimilarityParams(includeGapGap, matchGap, includeGapResidue, matchOnShortestLength);
+ }
+
+ /**
+ * Closes dialog on cancel
+ *
+ * @param e
+ */
+ protected void cancel_actionPerformed(ActionEvent e)
+ {
+ try
+ {
+ frame.setClosed(true);
+ } catch (Exception ex)
+ {
+ }
+ }
+}
--- /dev/null
+package jalview.gui;
+
+import java.awt.Component;
+import java.util.List;
+
+import javax.swing.DefaultListCellRenderer;
+import javax.swing.JComponent;
+import javax.swing.JList;
+
+/**
+ * A helper class to render a combobox with tooltips
+ *
+ * @see http
+ * ://stackoverflow.com/questions/480261/java-swing-mouseover-text-on-jcombobox
+ * -items
+ */
+public class ComboBoxTooltipRenderer extends DefaultListCellRenderer
+{
+ private static final long serialVersionUID = 1L;
+
+ List<String> tooltips;
+
+ @Override
+ public Component getListCellRendererComponent(JList list, Object value,
+ int index, boolean isSelected, boolean cellHasFocus)
+ {
+
+ JComponent comp = (JComponent) super.getListCellRendererComponent(list,
+ value, index, isSelected, cellHasFocus);
+
+ if (-1 < index && null != value && null != tooltips)
+ {
+ list.setToolTipText(tooltips.get(index));
+ }
+ return comp;
+ }
+
+ public void setTooltips(List<String> tips)
+ {
+ this.tooltips = tips;
+ }
+}
Tree tree = new Tree();
tree.setTitle(tp.getTitle());
tree.setCurrentTree((av.currentTree == tp.getTree()));
- tree.setNewick(tp.getTree().toString());
+ tree.setNewick(tp.getTree().print());
tree.setThreshold(tp.treeCanvas.threshold);
tree.setFitToWindow(tp.fitToWindow.getState());
TreePanel tp = (TreePanel) retrieveExistingObj(tree.getId());
if (tp == null)
{
- tp = af.ShowNewickTree(
+ tp = af.showNewickTree(
new jalview.io.NewickFile(tree.getNewick()),
tree.getTitle(), tree.getWidth(), tree.getHeight(),
tree.getXpos(), tree.getYpos());
Tree tree = jms.getTree(t);
- TreePanel tp = af.ShowNewickTree(
+ TreePanel tp = af.showNewickTree(
new jalview.io.NewickFile(tree.getNewick()),
tree.getTitle(), tree.getWidth(), tree.getHeight(),
tree.getXpos(), tree.getYpos());
*/
package jalview.gui;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.analysis.scoremodels.SimilarityParams;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.api.analysis.ViewBasedAnalysisI;
+import jalview.bin.Cache;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.ColumnSelection;
-import jalview.datamodel.SeqCigar;
import jalview.datamodel.SequenceI;
import jalview.jbgui.GPCAPanel;
-import jalview.schemes.ResidueProperties;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
import jalview.viewmodel.PCAModel;
int top = 0;
/**
- * Creates a new PCAPanel object.
+ * Creates a new PCAPanel object using default score model and parameters
*
- * @param av
- * DOCUMENT ME!
- * @param s
- * DOCUMENT ME!
+ * @param alignPanel
+ */
+ public PCAPanel(AlignmentPanel alignPanel)
+ {
+ this(alignPanel, ScoreModels.getInstance().getDefaultModel(
+ !alignPanel.av.getAlignment().isNucleotide()),
+ SimilarityParams.SeqSpace);
+ }
+
+ /**
+ * Constructor given sequence data, a similarity (or distance) score model,
+ * and score calculation parameters
+ *
+ * @param alignPanel
+ * @param scoreModel
+ * @param params
*/
- public PCAPanel(AlignmentPanel ap)
+ public PCAPanel(AlignmentPanel alignPanel, ScoreModelI scoreModel,
+ SimilarityParamsI params)
{
super();
- this.av = ap.av;
- this.ap = ap;
+ this.av = alignPanel.av;
+ this.ap = alignPanel;
+ boolean nucleotide = av.getAlignment().isNucleotide();
progressBar = new ProgressBar(statusPanel, statusBar);
- boolean sameLength = true;
+ addInternalFrameListener(new InternalFrameAdapter()
+ {
+ @Override
+ public void internalFrameClosed(InternalFrameEvent e)
+ {
+ close_actionPerformed();
+ }
+ });
+
boolean selected = av.getSelectionGroup() != null
&& av.getSelectionGroup().getSize() > 0;
AlignmentView seqstrings = av.getAlignmentView(selected);
- boolean nucleotide = av.getAlignment().isNucleotide();
SequenceI[] seqs;
if (!selected)
{
{
seqs = av.getSelectionGroup().getSequencesInOrder(av.getAlignment());
}
- SeqCigar sq[] = seqstrings.getSequences();
- int length = sq[0].getWidth();
-
- for (int i = 0; i < seqs.length; i++)
- {
- if (sq[i].getWidth() != length)
- {
- sameLength = false;
- break;
- }
- }
- if (!sameLength)
- {
- JvOptionPane.showMessageDialog(Desktop.desktop,
- MessageManager.getString("label.pca_sequences_not_aligned"),
- MessageManager.getString("label.sequences_not_aligned"),
- JvOptionPane.WARNING_MESSAGE);
-
- return;
- }
-
- addInternalFrameListener(new InternalFrameAdapter()
- {
- @Override
- public void internalFrameClosed(InternalFrameEvent e)
- {
- close_actionPerformed();
- }
- });
-
- pcaModel = new PCAModel(seqstrings, seqs, nucleotide);
+ pcaModel = new PCAModel(seqstrings, seqs, nucleotide, scoreModel,
+ params);
PaintRefresher.Register(this, av.getSequenceSetId());
- rc = new RotatableCanvas(ap);
+ rc = new RotatableCanvas(alignPanel);
this.getContentPane().add(rc, BorderLayout.CENTER);
Thread worker = new Thread(this);
worker.start();
pcaModel = null;
}
+ /**
+ * Repopulate the options and actions under the score model menu when it is
+ * selected. Options will depend on whether 'nucleotide' or 'peptide'
+ * modelling is selected (and also possibly on whether any additional score
+ * models have been added).
+ */
@Override
- protected void scoreMatrix_menuSelected()
+ protected void scoreModel_menuSelected()
{
- scoreMatrixMenu.removeAll();
- for (final String sm : ResidueProperties.scoreMatrices.keySet())
- {
- if (ResidueProperties.getScoreMatrix(sm) != null)
+ scoreModelMenu.removeAll();
+ for (final ScoreModelI sm : ScoreModels.getInstance().getModels())
+ {
+ final String name = sm.getName();
+ JCheckBoxMenuItem jm = new JCheckBoxMenuItem(name);
+
+ /*
+ * if the score model doesn't provide a description, try to look one
+ * up in the text bundle, falling back on its name
+ */
+ String tooltip = sm.getDescription();
+ if (tooltip == null)
+ {
+ tooltip = MessageManager.getStringOrReturn("label.score_model_",
+ name);
+ }
+ jm.setToolTipText(tooltip);
+ jm.setSelected(pcaModel.getScoreModelName().equals(name));
+ if ((pcaModel.isNucleotide() && sm.isDNA())
+ || (!pcaModel.isNucleotide() && sm.isProtein()))
{
- // create an entry for this score matrix for use in PCA
- JCheckBoxMenuItem jm = new JCheckBoxMenuItem();
- jm.setText(MessageManager.getStringOrReturn("label.score_model_",
- sm));
- jm.setSelected(pcaModel.getScore_matrix().equals(sm));
- if ((ResidueProperties.scoreMatrices.get(sm).isDNA() && ResidueProperties.scoreMatrices
- .get(sm).isProtein())
- || pcaModel.isNucleotide() == ResidueProperties.scoreMatrices
- .get(sm).isDNA())
+ jm.addActionListener(new ActionListener()
{
- final PCAPanel us = this;
- jm.addActionListener(new ActionListener()
+ @Override
+ public void actionPerformed(ActionEvent e)
{
- @Override
- public void actionPerformed(ActionEvent e)
+ if (!pcaModel.getScoreModelName().equals(name))
{
- if (!pcaModel.getScore_matrix().equals(sm))
- {
- pcaModel.setScore_matrix(sm);
- Thread worker = new Thread(us);
- worker.start();
- }
+ ScoreModelI sm2 = configureScoreModel(sm);
+ pcaModel.setScoreModel(sm2);
+ Thread worker = new Thread(PCAPanel.this);
+ worker.start();
}
- });
- scoreMatrixMenu.add(jm);
- }
+ }
+ });
+ scoreModelMenu.add(jm);
}
}
}
// rc.invalidate();
nuclSetting.setSelected(pcaModel.isNucleotide());
protSetting.setSelected(!pcaModel.isNucleotide());
- jvVersionSetting.setSelected(pcaModel.isJvCalcMode());
top = pcaModel.getTop();
} catch (OutOfMemoryError er)
if (!pcaModel.isNucleotide())
{
pcaModel.setNucleotide(true);
- pcaModel.setScore_matrix("DNA");
+ pcaModel.setScoreModel(ScoreModels.getInstance().getDefaultModel(
+ false));
Thread worker = new Thread(this);
worker.start();
}
if (pcaModel.isNucleotide())
{
pcaModel.setNucleotide(false);
- pcaModel.setScore_matrix("BLOSUM62");
+ pcaModel.setScoreModel(ScoreModels.getInstance()
+ .getDefaultModel(true));
Thread worker = new Thread(this);
worker.start();
}
}
- @Override
- protected void jvVersionSetting_actionPerfomed(ActionEvent arg0)
- {
- pcaModel.setJvCalcMode(jvVersionSetting.isSelected());
- Thread worker = new Thread(this);
- worker.start();
- }
-
/**
* DOCUMENT ME!
*/
top = t;
zCombobox.setSelectedIndex(2);
}
+
+ /**
+ * If the score model is one that requires to get state data from the current
+ * view, allow it to do so
+ *
+ * @param sm
+ * @return
+ */
+ protected ScoreModelI configureScoreModel(ScoreModelI sm)
+ {
+ if (sm instanceof ViewBasedAnalysisI)
+ {
+ try
+ {
+ sm = sm.getClass().newInstance();
+ ((ViewBasedAnalysisI) sm).configureFromAlignmentView(ap);
+ } catch (Exception q)
+ {
+ Cache.log.error("Couldn't create a scoremodel instance for "
+ + sm.getName());
+ }
+ }
+ return sm;
+ }
}
*
* @param evt
* @param res
- * @param sequence
+ * @param sequences
*/
void showPopupMenu(MouseEvent evt)
{
package jalview.gui;
import jalview.analysis.Conservation;
-import jalview.analysis.NJTree;
+import jalview.analysis.TreeModel;
import jalview.api.AlignViewportI;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceGroup;
import java.awt.print.PrinterJob;
import java.util.Enumeration;
import java.util.Hashtable;
+import java.util.List;
import java.util.Vector;
import javax.swing.JColorChooser;
/** DOCUMENT ME!! */
public static final String PLACEHOLDER = " * ";
- NJTree tree;
+ TreeModel tree;
JScrollPane scrollPane;
* @param tree
* DOCUMENT ME!
*/
- public void setTree(NJTree tree)
+ public void setTree(TreeModel tree)
{
this.tree = tree;
tree.findHeight(tree.getTopNode());
* DOCUMENT ME!
* @param chunk
* DOCUMENT ME!
- * @param scale
+ * @param wscale
* DOCUMENT ME!
* @param width
* DOCUMENT ME!
* DOCUMENT ME!
*/
public void drawNode(Graphics g, SequenceNode node, float chunk,
- float scale, int width, int offx, int offy)
+ double wscale, int width, int offx, int offy)
{
if (node == null)
{
if ((node.left() == null) && (node.right() == null))
{
// Drawing leaf node
- float height = node.height;
- float dist = node.dist;
+ double height = node.height;
+ double dist = node.dist;
- int xstart = (int) ((height - dist) * scale) + offx;
- int xend = (int) (height * scale) + offx;
+ int xstart = (int) ((height - dist) * wscale) + offx;
+ int xend = (int) (height * wscale) + offx;
int ypos = (int) (node.ycount * chunk) + offy;
}
else
{
- drawNode(g, (SequenceNode) node.left(), chunk, scale, width, offx,
+ drawNode(g, (SequenceNode) node.left(), chunk, wscale, width, offx,
offy);
- drawNode(g, (SequenceNode) node.right(), chunk, scale, width, offx,
+ drawNode(g, (SequenceNode) node.right(), chunk, wscale, width, offx,
offy);
- float height = node.height;
- float dist = node.dist;
+ double height = node.height;
+ double dist = node.dist;
- int xstart = (int) ((height - dist) * scale) + offx;
- int xend = (int) (height * scale) + offx;
+ int xstart = (int) ((height - dist) * wscale) + offx;
+ int xend = (int) (height * wscale) + offx;
int ypos = (int) (node.ycount * chunk) + offy;
g.setColor(node.color.darker());
Rectangle pos = new Rectangle(xend - 2, ypos - 2, 5, 5);
nodeHash.put(node, pos);
- g.drawLine((int) (height * scale) + offx, ystart,
- (int) (height * scale) + offx, yend);
+ g.drawLine((int) (height * wscale) + offx, ystart,
+ (int) (height * wscale) + offx, yend);
String nodeLabel = "";
SequenceNode top = tree.getTopNode();
- float wscale = (float) ((width * .8) - (offx * 2))
+ double wscale = ((width * .8) - (offx * 2))
/ tree.getMaxHeight();
if (top.count == 0)
* DOCUMENT ME!
* @param chunk
* DOCUMENT ME!
- * @param scale
+ * @param wscale
* DOCUMENT ME!
* @param width
* DOCUMENT ME!
* DOCUMENT ME!
*/
public void pickNode(Rectangle pickBox, SequenceNode node, float chunk,
- float scale, int width, int offx, int offy)
+ double wscale, int width, int offx, int offy)
{
if (node == null)
{
if ((node.left() == null) && (node.right() == null))
{
- float height = node.height;
- float dist = node.dist;
+ double height = node.height;
+ double dist = node.dist;
- int xstart = (int) ((height - dist) * scale) + offx;
- int xend = (int) (height * scale) + offx;
+ int xstart = (int) ((height - dist) * wscale) + offx;
+ int xend = (int) (height * wscale) + offx;
int ypos = (int) (node.ycount * chunk) + offy;
}
else
{
- pickNode(pickBox, (SequenceNode) node.left(), chunk, scale, width,
+ pickNode(pickBox, (SequenceNode) node.left(), chunk, wscale, width,
offx, offy);
- pickNode(pickBox, (SequenceNode) node.right(), chunk, scale, width,
+ pickNode(pickBox, (SequenceNode) node.right(), chunk, wscale, width,
offx, offy);
}
}
labelLength = fm.stringWidth(longestName) + 20; // 20 allows for scrollbar
- float wscale = (width - labelLength - (offx * 2)) / tree.getMaxHeight();
+ double wscale = (width - labelLength - (offx * 2))
+ / tree.getMaxHeight();
SequenceNode top = tree.getTopNode();
threshold = (float) (x - offx)
/ (float) (getWidth() - labelLength - (2 * offx));
- tree.getGroups().removeAllElements();
- tree.groupNodes(tree.getTopNode(), threshold);
+ List<SequenceNode> groups = tree.groupNodes(threshold);
setColor(tree.getTopNode(), Color.black);
AlignmentPanel[] aps = getAssociatedPanels();
aps[a].av.getCodingComplement().clearSequenceColours();
}
}
- colourGroups();
+ colourGroups(groups);
}
PaintRefresher.Refresh(tp, ap.av.getSequenceSetId());
}
- void colourGroups()
+ void colourGroups(List<SequenceNode> groups)
{
AlignmentPanel[] aps = getAssociatedPanels();
- for (int i = 0; i < tree.getGroups().size(); i++)
+ for (int i = 0; i < groups.size(); i++)
{
Color col = new Color((int) (Math.random() * 255),
(int) (Math.random() * 255), (int) (Math.random() * 255));
- setColor(tree.getGroups().elementAt(i), col.brighter());
+ setColor(groups.get(i), col.brighter());
- Vector<SequenceNode> l = tree.findLeaves(tree.getGroups()
- .elementAt(i));
+ Vector<SequenceNode> l = tree.findLeaves(groups.get(i));
Vector<SequenceI> sequences = new Vector<SequenceI>();
package jalview.gui;
import jalview.analysis.AlignmentSorter;
+import jalview.analysis.AverageDistanceTree;
import jalview.analysis.NJTree;
+import jalview.analysis.TreeBuilder;
+import jalview.analysis.TreeModel;
import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.api.analysis.ViewBasedAnalysisI;
import jalview.bin.Cache;
import jalview.commands.CommandI;
import jalview.io.JalviewFileView;
import jalview.io.NewickFile;
import jalview.jbgui.GTreePanel;
-import jalview.schemes.ResidueProperties;
import jalview.util.ImageMaker;
import jalview.util.MessageManager;
import jalview.viewmodel.AlignmentViewport;
*/
public class TreePanel extends GTreePanel
{
- String type;
+ String treeType;
- String pwtype;
+ ScoreModelI scoreModel; // if tree computed
+
+ String treeTitle; // if tree loaded
+
+ SimilarityParamsI similarityParams;
TreeCanvas treeCanvas;
- NJTree tree;
+ TreeModel tree;
AlignViewport av;
/**
* Creates a new TreePanel object.
*
- * @param av
- * DOCUMENT ME!
- * @param seqVector
- * DOCUMENT ME!
+ * @param ap
* @param type
- * DOCUMENT ME!
- * @param pwtype
- * DOCUMENT ME!
- * @param s
- * DOCUMENT ME!
- * @param e
- * DOCUMENT ME!
+ * @param sm
+ * @param options
*/
- public TreePanel(AlignmentPanel ap, String type, String pwtype)
+ public TreePanel(AlignmentPanel ap, String type, ScoreModelI sm,
+ SimilarityParamsI options)
{
super();
- initTreePanel(ap, type, pwtype, null, null);
+ this.similarityParams = options;
+ initTreePanel(ap, type, sm, null, null);
// We know this tree has distances. JBPNote TODO: prolly should add this as
// a userdefined default
// showDistances(true);
}
- /**
- * Creates a new TreePanel object.
- *
- * @param av
- * DOCUMENT ME!
- * @param seqVector
- * DOCUMENT ME!
- * @param newtree
- * DOCUMENT ME!
- * @param type
- * DOCUMENT ME!
- * @param pwtype
- * DOCUMENT ME!
- */
- public TreePanel(AlignmentPanel ap, String type, String pwtype,
- NewickFile newtree)
+ public TreePanel(AlignmentPanel alignPanel, NewickFile newtree,
+ String theTitle, AlignmentView inputData)
{
super();
- initTreePanel(ap, type, pwtype, newtree, null);
- }
-
- public TreePanel(AlignmentPanel av, String type, String pwtype,
- NewickFile newtree, AlignmentView inputData)
- {
- super();
- initTreePanel(av, type, pwtype, newtree, inputData);
+ this.treeTitle = theTitle;
+ initTreePanel(alignPanel, null, null, newtree, inputData);
}
public AlignmentI getAlignment()
return treeCanvas.av;
}
- void initTreePanel(AlignmentPanel ap, String type, String pwtype,
+ void initTreePanel(AlignmentPanel ap, String type, ScoreModelI sm,
NewickFile newTree, AlignmentView inputData)
{
av = ap.av;
- this.type = type;
- this.pwtype = pwtype;
+ this.treeType = type;
+ this.scoreModel = sm;
treeCanvas = new TreeCanvas(this, ap, scrollPane);
scrollPane.setViewportView(treeCanvas);
.println("new alignment sequences vector value is null");
}
- tree.UpdatePlaceHolders((List<SequenceI>) evt.getNewValue());
+ tree.updatePlaceHolders((List<SequenceI>) evt.getNewValue());
treeCanvas.nameHash.clear(); // reset the mapping between canvas
// rectangles and leafnodes
repaint();
}
});
- TreeLoader tl = new TreeLoader(newTree);
- if (inputData != null)
- {
- tl.odata = inputData;
- }
+ TreeLoader tl = new TreeLoader(newTree, inputData);
tl.start();
}
class TreeLoader extends Thread
{
- NewickFile newtree;
+ private NewickFile newtree;
- jalview.datamodel.AlignmentView odata = null;
+ private AlignmentView odata = null;
- public TreeLoader(NewickFile newtree)
+ public TreeLoader(NewickFile newickFile, AlignmentView inputData)
{
- this.newtree = newtree;
- if (newtree != null)
+ this.newtree = newickFile;
+ this.odata = inputData;
+
+ if (newickFile != null)
{
// Must be outside run(), as Jalview2XML tries to
// update distance/bootstrap visibility at the same time
- showBootstrap(newtree.HasBootstrap());
- showDistances(newtree.HasDistances());
+ showBootstrap(newickFile.HasBootstrap());
+ showDistances(newickFile.HasDistances());
}
}
if (newtree != null)
{
- if (odata == null)
+ tree = new TreeModel(av.getAlignment().getSequencesArray(), odata,
+ newtree);
+ if (tree.getOriginalData() == null)
{
- tree = new NJTree(av.getAlignment().getSequencesArray(), newtree);
- }
- else
- {
- tree = new NJTree(av.getAlignment().getSequencesArray(), odata,
- newtree);
- }
- if (!tree.hasOriginalSequenceData())
- {
- allowOriginalSeqData(false);
+ originalSeqData.setVisible(false);
}
}
else
{
- int start, end;
- SequenceI[] seqs;
- boolean selview = av.getSelectionGroup() != null
- && av.getSelectionGroup().getSize() > 1;
- AlignmentView seqStrings = av.getAlignmentView(selview);
- if (!selview)
- {
- start = 0;
- end = av.getAlignment().getWidth();
- seqs = av.getAlignment().getSequencesArray();
- }
- else
- {
- start = av.getSelectionGroup().getStartRes();
- end = av.getSelectionGroup().getEndRes() + 1;
- seqs = av.getSelectionGroup().getSequencesInOrder(
- av.getAlignment());
- }
- ScoreModelI sm = ResidueProperties.getScoreModel(pwtype);
- if (sm instanceof ViewBasedAnalysisI)
- {
- try
- {
- sm = sm.getClass().newInstance();
- ((ViewBasedAnalysisI) sm)
- .configureFromAlignmentView(treeCanvas.ap);
- } catch (Exception q)
- {
- Cache.log.error("Couldn't create a scoremodel instance for "
- + sm.getName());
- }
- tree = new NJTree(seqs, seqStrings, type, pwtype, sm, start, end);
- }
- else
- {
- tree = new NJTree(seqs, seqStrings, type, pwtype, null, start,
- end);
- }
+ ScoreModelI sm = configureScoreModel();
+ TreeBuilder njtree = treeType.equals(TreeBuilder.NEIGHBOUR_JOINING) ? new NJTree(
+ av, sm, similarityParams) : new AverageDistanceTree(av, sm,
+ similarityParams);
+ tree = new TreeModel(njtree);
showDistances(true);
}
treeCanvas.setMarkPlaceholders(b);
}
- private void allowOriginalSeqData(boolean b)
- {
- originalSeqData.setVisible(b);
- }
-
/**
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
- public NJTree getTree()
+ public TreeModel getTree()
{
return tree;
}
{
CutAndPasteTransfer cap = new CutAndPasteTransfer();
- StringBuffer buffer = new StringBuffer();
-
- if (type.equals("AV"))
- {
- buffer.append("Average distance tree using ");
- }
- else
- {
- buffer.append("Neighbour joining tree using ");
- }
-
- if (pwtype.equals("BL"))
- {
- buffer.append("BLOSUM62");
- }
- else
- {
- buffer.append("PID");
- }
+ String newTitle = getPanelTitle();
- jalview.io.NewickFile fout = new jalview.io.NewickFile(
- tree.getTopNode());
+ NewickFile fout = new NewickFile(tree.getTopNode());
try
{
- cap.setText(fout.print(tree.isHasBootstrap(), tree.isHasDistances(),
- tree.isHasRootDistance()));
- Desktop.addInternalFrame(cap, buffer.toString(), 500, 100);
+ cap.setText(fout.print(tree.hasBootstrap(), tree.hasDistances(),
+ tree.hasRootDistance()));
+ Desktop.addInternalFrame(cap, newTitle, 500, 100);
} catch (OutOfMemoryError oom)
{
new OOMWarning("generating newick tree file", oom);
{
jalview.io.NewickFile fout = new jalview.io.NewickFile(
tree.getTopNode());
- String output = fout.print(tree.isHasBootstrap(),
- tree.isHasDistances(), tree.isHasRootDistance());
+ String output = fout.print(tree.hasBootstrap(),
+ tree.hasDistances(), tree.hasRootDistance());
java.io.PrintWriter out = new java.io.PrintWriter(
new java.io.FileWriter(choice));
out.println(output);
@Override
public void originalSeqData_actionPerformed(ActionEvent e)
{
- if (!tree.hasOriginalSequenceData())
+ AlignmentView originalData = tree.getOriginalData();
+ if (originalData == null)
{
jalview.bin.Cache.log
.info("Unexpected call to originalSeqData_actionPerformed - should have hidden this menu action.");
} catch (Exception ex)
{
}
- ;
- Object[] alAndColsel = tree.seqData.getAlignmentAndColumnSelection(gc);
+
+ Object[] alAndColsel = originalData
+ .getAlignmentAndColumnSelection(gc);
if (alAndColsel != null && alAndColsel[0] != null)
{
public CommandI sortAlignmentIn(AlignmentPanel ap)
{
- AlignmentViewport av = ap.av;
- SequenceI[] oldOrder = av.getAlignment().getSequencesArray();
- AlignmentSorter.sortByTree(av.getAlignment(), tree);
+ AlignmentViewport viewport = ap.av;
+ SequenceI[] oldOrder = viewport.getAlignment().getSequencesArray();
+ AlignmentSorter.sortByTree(viewport.getAlignment(), tree);
CommandI undo;
- undo = new OrderCommand("Tree Sort", oldOrder, av.getAlignment());
+ undo = new OrderCommand("Tree Sort", oldOrder, viewport.getAlignment());
ap.paintAlignment(true);
return undo;
return treeCanvas.font;
}
- public void setTreeFont(Font font)
+ public void setTreeFont(Font f)
{
if (treeCanvas != null)
{
- treeCanvas.setFont(font);
+ treeCanvas.setFont(f);
}
}
}
if (newname != null)
{
- String oldname = ((SequenceNode) node).getName();
- // TODO : save in the undo object for this modification.
+ // String oldname = ((SequenceNode) node).getName();
+ // TODO : save oldname in the undo object for this modification.
((SequenceNode) node).setName(newname);
}
}
}
});
}
+
+ /**
+ * Formats a localised title for the tree panel, like
+ * <p>
+ * Neighbour Joining Using BLOSUM62
+ * <p>
+ * For a tree loaded from file, just uses the file name
+ * @return
+ */
+ public String getPanelTitle()
+ {
+ if (treeTitle != null)
+ {
+ return treeTitle;
+ }
+
+ /*
+ * i18n description of Neighbour Joining or Average Distance method
+ */
+ String treecalcnm = MessageManager.getString("label.tree_calc_"
+ + treeType.toLowerCase());
+
+ /*
+ * short score model name (long description can be too long)
+ */
+ String smn = scoreModel.getName();
+
+ /*
+ * put them together as <method> Using <model>
+ */
+ final String ttl = MessageManager.formatMessage("label.treecalc_title",
+ treecalcnm, smn);
+ return ttl;
+ }
+
+ /**
+ * If the score model is one that requires to get state data from the current
+ * view, create and configure a new instance of it
+ *
+ * @return
+ */
+ protected ScoreModelI configureScoreModel()
+ {
+ if (scoreModel instanceof ViewBasedAnalysisI)
+ {
+ try
+ {
+ scoreModel = scoreModel.getClass().newInstance();
+ ((ViewBasedAnalysisI) scoreModel)
+ .configureFromAlignmentView(treeCanvas.ap);
+ } catch (Exception q)
+ {
+ Cache.log.error("Couldn't create a scoremodel instance for "
+ + scoreModel.getName());
+ }
+ }
+ return scoreModel;
+ }
}
return new FeaturesFile();
}
},
+ ScoreMatrix("Substitution matrix", "", false, false)
+ {
+ @Override
+ public AlignmentFileReaderI getReader(FileParse source)
+ throws IOException
+ {
+ return new ScoreMatrixFile(source);
+ }
+
+ @Override
+ public AlignmentFileWriterI getWriter(AlignmentI al)
+ {
+ return null;
+ }
+ },
PDB("PDB", "pdb,ent", true, false)
{
@Override
boolean lineswereskipped = false;
boolean isBinary = false; // true if length is non-zero and non-printable
// characters are encountered
+
try
{
if (!closeSource)
{
source.mark();
}
+ boolean aaIndexHeaderRead = false;
+
while ((data = source.nextLine()) != null)
{
bytesRead += data.length();
}
data = data.toUpperCase();
+ if (data.startsWith(ScoreMatrixFile.SCOREMATRIX))
+ {
+ reply = FileFormat.ScoreMatrix;
+ break;
+ }
+ if (data.startsWith("H ") && !aaIndexHeaderRead)
+ {
+ aaIndexHeaderRead = true;
+ }
+ if (data.startsWith("D ") && aaIndexHeaderRead)
+ {
+ reply = FileFormat.ScoreMatrix;
+ break;
+ }
if (data.startsWith("##GFF-VERSION"))
{
// GFF - possibly embedded in a Jalview features file!
--- /dev/null
+package jalview.io;
+
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.datamodel.SequenceI;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+/**
+ * A class that can parse a file containing a substitution matrix and register
+ * it for use in Jalview
+ * <p>
+ * Accepts 'NCBI' format (e.g.
+ * https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt), with the
+ * addition of a header line to provide a matrix name, e.g.
+ *
+ * <pre>
+ * ScoreMatrix BLOSUM62
+ * </pre>
+ *
+ * Also accepts 'AAindex' format (as described at
+ * http://www.genome.jp/aaindex/aaindex_help.html) with the minimum data
+ * required being
+ *
+ * <pre>
+ * H accession number (used as score matrix identifier in Jalview)
+ * D description (used for tooltip in Jalview)
+ * M rows = symbolList
+ * and the substitution scores
+ * </pre>
+ */
+public class ScoreMatrixFile extends AlignFile implements
+ AlignmentFileReaderI
+{
+ // first non-comment line identifier - also checked in IdentifyFile
+ public static final String SCOREMATRIX = "SCOREMATRIX";
+
+ private static final String DELIMITERS = " ,\t";
+
+ private static final String COMMENT_CHAR = "#";
+
+ private String matrixName;
+
+ /*
+ * aaindex format has scores for diagonal and below only
+ */
+ boolean isLowerDiagonalOnly;
+
+ /*
+ * ncbi format has symbols as first column on score rows
+ */
+ boolean hasGuideColumn;
+
+ /**
+ * Constructor
+ *
+ * @param source
+ * @throws IOException
+ */
+ public ScoreMatrixFile(FileParse source) throws IOException
+ {
+ super(false, source);
+ }
+
+ @Override
+ public String print(SequenceI[] sqs, boolean jvsuffix)
+ {
+ return null;
+ }
+
+ /**
+ * Parses the score matrix file, and if successful registers the matrix so it
+ * will be shown in Jalview menus. This method is not thread-safe (a separate
+ * instance of this class should be used by each thread).
+ */
+ @Override
+ public void parse() throws IOException
+ {
+ ScoreMatrix sm = parseMatrix();
+
+ ScoreModels.getInstance().registerScoreModel(sm);
+ }
+
+ /**
+ * Parses the score matrix file and constructs a ScoreMatrix object. If an
+ * error is found in parsing, it is thrown as FileFormatException. Any
+ * warnings are written to syserr.
+ *
+ * @return
+ * @throws IOException
+ */
+ public ScoreMatrix parseMatrix() throws IOException
+ {
+ ScoreMatrix sm = null;
+ int lineNo = 0;
+ String name = null;
+ char[] alphabet = null;
+ float[][] scores = null;
+ int size = 0;
+ int row = 0;
+ String err = null;
+ String data;
+ isLowerDiagonalOnly = false;
+
+ while ((data = nextLine()) != null)
+ {
+ lineNo++;
+ data = data.trim();
+ if (data.startsWith(COMMENT_CHAR) || data.length() == 0)
+ {
+ continue;
+ }
+ if (data.toUpperCase().startsWith(SCOREMATRIX))
+ {
+ /*
+ * Parse name from ScoreMatrix <name>
+ * we allow any delimiter after ScoreMatrix then take the rest of the line
+ */
+ if (name != null)
+ {
+ throw new FileFormatException(
+ "Error: 'ScoreMatrix' repeated in file at line "
+ + lineNo);
+ }
+ StringTokenizer nameLine = new StringTokenizer(data, DELIMITERS);
+ if (nameLine.countTokens() < 2)
+ {
+ err = "Format error: expected 'ScoreMatrix <name>', found '"
+ + data + "' at line " + lineNo;
+ throw new FileFormatException(err);
+ }
+ nameLine.nextToken(); // 'ScoreMatrix'
+ name = nameLine.nextToken(); // next field
+ name = data.substring(1).substring(data.substring(1).indexOf(name));
+ continue;
+ }
+ else if (data.startsWith("H ") && name == null)
+ {
+ /*
+ * AAindex identifier
+ */
+ return parseAAIndexFormat(lineNo, data);
+ }
+ else if (name == null)
+ {
+ err = "Format error: 'ScoreMatrix <name>' should be the first non-comment line";
+ throw new FileFormatException(err);
+ }
+
+ /*
+ * next non-comment line after ScoreMatrix should be the
+ * column header line with the alphabet of scored symbols
+ */
+ if (alphabet == null)
+ {
+ StringTokenizer columnHeadings = new StringTokenizer(data,
+ DELIMITERS);
+ size = columnHeadings.countTokens();
+ alphabet = new char[size];
+ int col = 0;
+ while (columnHeadings.hasMoreTokens())
+ {
+ alphabet[col++] = columnHeadings.nextToken().charAt(0);
+ }
+ scores = new float[size][];
+ continue;
+ }
+
+ /*
+ * too much information
+ */
+ if (row >= size)
+ {
+ err = "Unexpected extra input line in score model file: '" + data
+ + "'";
+ throw new FileFormatException(err);
+ }
+
+ parseValues(data, lineNo, scores, row, alphabet);
+ row++;
+ }
+
+ /*
+ * out of data - check we found enough
+ */
+ if (row < size)
+ {
+ err = String
+ .format("Expected %d rows of score data in score matrix but only found %d",
+ size, row);
+ throw new FileFormatException(err);
+ }
+
+ /*
+ * If we get here, then name, alphabet and scores have been parsed successfully
+ */
+ sm = new ScoreMatrix(name, alphabet, scores);
+ matrixName = name;
+
+ return sm;
+ }
+
+ /**
+ * Parse input as AAIndex format, starting from the header line with the
+ * accession id
+ *
+ * @param lineNo
+ * @param data
+ * @return
+ * @throws IOException
+ */
+ protected ScoreMatrix parseAAIndexFormat(int lineNo, String data)
+ throws IOException
+ {
+ String name = data.substring(2).trim();
+ String description = null;
+
+ float[][] scores = null;
+ char[] alphabet = null;
+ int row = 0;
+ int size = 0;
+
+ while ((data = nextLine()) != null)
+ {
+ lineNo++;
+ data = data.trim();
+ if (skipAAindexLine(data))
+ {
+ continue;
+ }
+ if (data.startsWith("D "))
+ {
+ description = data.substring(2).trim();
+ }
+ else if (data.startsWith("M "))
+ {
+ alphabet = parseAAindexRowsColumns(lineNo, data);
+ size = alphabet.length;
+ scores = new float[size][size];
+ }
+ else if (scores == null)
+ {
+ throw new FileFormatException(
+ "No alphabet specified in matrix file");
+ }
+ else if (row >= size)
+ {
+ throw new FileFormatException("Too many data rows in matrix file");
+ }
+ else
+ {
+ parseValues(data, lineNo, scores, row, alphabet);
+ row++;
+ }
+ }
+
+ ScoreMatrix sm = new ScoreMatrix(name, alphabet, scores);
+ sm.setDescription(description);
+ matrixName = name;
+
+ return sm;
+ }
+
+ /**
+ * Parse one row of score values, delimited by whitespace or commas. The line
+ * may optionally include the symbol from which the scores are defined. Values
+ * may be present for all columns, or only up to the diagonal (in which case
+ * upper diagonal values are set symmetrically).
+ *
+ * @param data
+ * the line to be parsed
+ * @param lineNo
+ * @param scores
+ * the score matrix to add data to
+ * @param row
+ * the row number / alphabet index position
+ * @param alphabet
+ * @return
+ * @throws exception
+ * if invalid, or too few, or too many values
+ */
+ protected void parseValues(String data, int lineNo, float[][] scores,
+ int row, char[] alphabet) throws FileFormatException
+ {
+ String err;
+ int size = alphabet.length;
+ StringTokenizer scoreLine = new StringTokenizer(data, DELIMITERS);
+
+ int tokenCount = scoreLine.countTokens();
+
+ /*
+ * inspect first row to see if it includes the symbol in the first column,
+ * and to see if it is lower diagonal values only (i.e. just one score)
+ */
+ if (row == 0)
+ {
+ if (data.startsWith(String.valueOf(alphabet[0])))
+ {
+ hasGuideColumn = true;
+ }
+ if (tokenCount == (hasGuideColumn ? 2 : 1))
+ {
+ isLowerDiagonalOnly = true;
+ }
+ }
+
+ if (hasGuideColumn)
+ {
+ /*
+ * check 'guide' symbol is the row'th letter of the alphabet
+ */
+ String symbol = scoreLine.nextToken();
+ if (symbol.length() > 1 || symbol.charAt(0) != alphabet[row])
+ {
+ err = String
+ .format("Error parsing score matrix at line %d, expected '%s' but found '%s'",
+ lineNo, alphabet[row], symbol);
+ throw new FileFormatException(err);
+ }
+ tokenCount = scoreLine.countTokens(); // excluding guide symbol
+ }
+
+ /*
+ * check the right number of values (lower diagonal or full format)
+ */
+ if (isLowerDiagonalOnly && tokenCount != row + 1)
+ {
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", row + 1,
+ lineNo, data, tokenCount);
+ throw new FileFormatException(err);
+ }
+
+ if (!isLowerDiagonalOnly && tokenCount != size)
+ {
+ err = String.format(
+ "Expected %d scores at line %d: '%s' but found %d", size,
+ lineNo, data, scoreLine.countTokens());
+ throw new FileFormatException(err);
+ }
+
+ /*
+ * parse and set the values, setting the symmetrical value
+ * as well if lower diagonal format data
+ */
+ scores[row] = new float[size];
+ int col = 0;
+ String value = null;
+ while (scoreLine.hasMoreTokens())
+ {
+ try
+ {
+ value = scoreLine.nextToken();
+ scores[row][col] = Float.valueOf(value);
+ if (isLowerDiagonalOnly)
+ {
+ scores[col][row] = scores[row][col];
+ }
+ col++;
+ } catch (NumberFormatException e)
+ {
+ err = String.format(
+ "Invalid score value '%s' at line %d column %d", value,
+ lineNo, col);
+ throw new FileFormatException(err);
+ }
+ }
+ }
+
+ /**
+ * Parse the line in an aaindex file that looks like
+ *
+ * <pre>
+ * M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
+ * </pre>
+ *
+ * rejecting it if rows and cols do not match. Returns the string of
+ * characters in the row/cols alphabet.
+ *
+ * @param lineNo
+ * @param data
+ * @return
+ * @throws FileFormatException
+ */
+ protected char[] parseAAindexRowsColumns(int lineNo, String data)
+ throws FileFormatException
+ {
+ String err = "Unexpected aaIndex score matrix data at line " + lineNo
+ + ": " + data;
+
+ try
+ {
+ String[] toks = data.split(",");
+ String rowsAlphabet = toks[0].split("=")[1].trim();
+ String colsAlphabet = toks[1].split("=")[1].trim();
+ if (!rowsAlphabet.equals(colsAlphabet))
+ {
+ throw new FileFormatException("rows != cols");
+ }
+ return rowsAlphabet.toCharArray();
+ } catch (Throwable t)
+ {
+ throw new FileFormatException(err + " " + t.getMessage());
+ }
+ }
+
+ /**
+ * Answers true if line is one we are not interested in from AAindex format
+ * file
+ *
+ * @param data
+ * @return
+ */
+ protected boolean skipAAindexLine(String data)
+ {
+ if (data.startsWith(COMMENT_CHAR) || data.length() == 0)
+ {
+ return true;
+ }
+ if (data.startsWith("*") || data.startsWith("R ")
+ || data.startsWith("A ") || data.startsWith("T ")
+ || data.startsWith("J ") || data.startsWith("//"))
+ {
+ return true;
+ }
+ return false;
+ }
+
+ public String getMatrixName()
+ {
+ return matrixName;
+ }
+}
TreePanel tp = null;
if (vstree.isValidTree())
{
- tp = alignFrame.ShowNewickTree(vstree.getNewickTree(),
+ tp = alignFrame.showNewickTree(vstree.getNewickTree(),
vstree.getTitle(), vstree.getInputData(), 600,
500, t * 20 + 50, t * 20 + 50);
*/
package jalview.io.packed;
+import jalview.analysis.TreeModel;
import jalview.api.FeatureColourI;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SequenceI;
{
// the following works because all trees are already had node/SequenceI
// associations created.
- jalview.analysis.NJTree njt = new jalview.analysis.NJTree(
- al.getSequencesArray(), nf);
+ TreeModel njt = new TreeModel(al.getSequencesArray(), null, nf);
// this just updates the displayed leaf name on the tree according to
// the SequenceIs.
njt.renameAssociatedNodes();
*/
package jalview.io.vamsas;
-import jalview.analysis.NJTree;
+import jalview.analysis.TreeBuilder;
+import jalview.analysis.TreeModel;
import jalview.bin.Cache;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.AlignmentView;
prov.getEntry(0).setUser(provEntry.getUser());
prov.getEntry(0).setApp(provEntry.getApp());
prov.getEntry(0).setDate(provEntry.getDate());
- if (tp.getTree().hasOriginalSequenceData())
+
+ AlignmentView originalData = tp.getTree().getOriginalData();
+ if (originalData != null)
{
Input vInput = new Input();
// LATER: check to see if tree input data is contained in this alignment -
// or just correctly resolve the tree's seqData to the correct alignment
// in
// the document.
- Vector alsqrefs = getjv2vObjs(findAlignmentSequences(jal,
- tp.getTree().seqData.getSequences()));
+ Vector alsqrefs = getjv2vObjs(findAlignmentSequences(jal, tp
+ .getTree().getOriginalData().getSequences()));
Object[] alsqs = new Object[alsqrefs.size()];
alsqrefs.copyInto(alsqs);
vInput.setObjRef(alsqs);
prov.getEntry(0).addParam(new Param());
prov.getEntry(0).getParam(0).setName("treeType");
prov.getEntry(0).getParam(0).setType("utf8");
- prov.getEntry(0).getParam(0).setContent("NJ"); // TODO: type of tree is a
- // general parameter
- int ranges[] = tp.getTree().seqData.getVisibleContigs();
+ prov.getEntry(0).getParam(0)
+ .setContent(TreeBuilder.NEIGHBOUR_JOINING);
+ // TODO: type of tree is a general parameter
+ int ranges[] = originalData.getVisibleContigs();
// VisibleContigs are with respect to alignment coordinates. Still need
// offsets
- int start = tp.getTree().seqData.getAlignmentOrigin();
+ int start = tp.getTree().getOriginalData().getAlignmentOrigin();
for (int r = 0; r < ranges.length; r += 2)
{
Seg visSeg = new Seg();
/**
* construct treenode mappings for mapped sequences
*
- * @param ntree
+ * @param treeModel
* @param newick
* @return
*/
- public Treenode[] makeTreeNodes(NJTree ntree, Newick newick)
+ public Treenode[] makeTreeNodes(TreeModel treeModel, Newick newick)
{
- Vector<SequenceNode> leaves = ntree.findLeaves(ntree.getTopNode());
+ Vector<SequenceNode> leaves = treeModel.findLeaves(treeModel
+ .getTopNode());
Vector tnv = new Vector();
Enumeration l = leaves.elements();
Hashtable nodespecs = new Hashtable();
bindjvvobj(tp, tree);
tree.setTitle(tp.getTitle());
Newick newick = new Newick();
- newick.setContent(tp.getTree().toString());
+ newick.setContent(tp.getTree().print());
newick.setTitle(tp.getTitle());
tree.addNewick(newick);
tree.setProvenance(makeTreeProvenance(jal, tp));
protected JMenu sort = new JMenu();
- protected JMenu calculateTree = new JMenu();
+ protected JMenuItem calculateTree = new JMenuItem();
protected JCheckBoxMenuItem padGapsMenuitem = new JCheckBoxMenuItem();
PCAMenuItem_actionPerformed(e);
}
});
- JMenuItem averageDistanceTreeMenuItem = new JMenuItem(
- MessageManager.getString("label.average_distance_identity"));
- averageDistanceTreeMenuItem.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- averageDistanceTreeMenuItem_actionPerformed(e);
- }
- });
- JMenuItem neighbourTreeMenuItem = new JMenuItem(
- MessageManager.getString("label.neighbour_joining_identity"));
- neighbourTreeMenuItem.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- neighbourTreeMenuItem_actionPerformed(e);
- }
- });
this.getContentPane().setLayout(new BorderLayout());
alignFrameMenuBar.setFont(new java.awt.Font("Verdana", 0, 11));
outputTextboxMenu.setText(MessageManager
.getString("label.out_to_textbox"));
-
- JMenuItem avDistanceTreeBlosumMenuItem = new JMenuItem(
- MessageManager.getString("label.average_distance_blosum62"));
- avDistanceTreeBlosumMenuItem.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- avTreeBlosumMenuItem_actionPerformed(e);
- }
- });
- JMenuItem njTreeBlosumMenuItem = new JMenuItem(
- MessageManager.getString("label.neighbour_blosum62"));
- njTreeBlosumMenuItem.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent e)
- {
- njTreeBlosumMenuItem_actionPerformed(e);
- }
- });
annotationPanelMenuItem.setActionCommand("");
annotationPanelMenuItem.setText(MessageManager
.getString("label.show_annotations"));
@Override
public void menuSelected(MenuEvent e)
{
- buildTreeMenu();
+ buildTreeSortMenu();
}
@Override
});
sortByAnnotScore.setVisible(false);
- calculateTree
- .setText(MessageManager.getString("action.calculate_tree"));
+ calculateTree.setText(MessageManager
+ .getString("action.calculate_tree_pca"));
padGapsMenuitem.setText(MessageManager.getString("label.pad_gaps"));
padGapsMenuitem.setState(jalview.bin.Cache
calculateMenu.add(calculateTree);
calculateMenu.addSeparator();
calculateMenu.add(pairwiseAlignmentMenuItem);
- calculateMenu.add(PCAMenuItem);
+ // calculateMenu.add(PCAMenuItem);
calculateMenu.addSeparator();
calculateMenu.add(showTranslation);
calculateMenu.add(showReverse);
{
}
- protected void averageDistanceTreeMenuItem_actionPerformed(ActionEvent e)
- {
- }
-
protected void neighbourTreeMenuItem_actionPerformed(ActionEvent e)
{
}
- protected void njTreeBlosumMenuItem_actionPerformed(ActionEvent e)
- {
- }
-
- protected void avTreeBlosumMenuItem_actionPerformed(ActionEvent e)
- {
- }
-
protected void conservationMenuItem_actionPerformed(boolean selected)
{
}
}
- public void buildTreeMenu()
+ public void buildTreeSortMenu()
{
}
import java.awt.BorderLayout;
import java.awt.Color;
import java.awt.FlowLayout;
+import java.awt.Font;
import java.awt.GridLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
public class GPCAPanel extends JInternalFrame
{
- JPanel jPanel2 = new JPanel();
+ private static final Font VERDANA_12 = new Font("Verdana", 0, 12);
- JLabel jLabel1 = new JLabel();
+ protected JComboBox<String> xCombobox = new JComboBox<String>();
- JLabel jLabel2 = new JLabel();
+ protected JComboBox<String> yCombobox = new JComboBox<String>();
- JLabel jLabel3 = new JLabel();
+ protected JComboBox<String> zCombobox = new JComboBox<String>();
- protected JComboBox xCombobox = new JComboBox();
-
- protected JComboBox yCombobox = new JComboBox();
-
- protected JComboBox zCombobox = new JComboBox();
-
- protected JButton resetButton = new JButton();
-
- FlowLayout flowLayout1 = new FlowLayout();
-
- BorderLayout borderLayout1 = new BorderLayout();
-
- JMenuBar jMenuBar1 = new JMenuBar();
-
- JMenu fileMenu = new JMenu();
-
- JMenu saveMenu = new JMenu();
-
- protected JMenu scoreMatrixMenu = new JMenu();
-
- JMenuItem eps = new JMenuItem();
-
- JMenuItem png = new JMenuItem();
-
- JMenuItem print = new JMenuItem();
-
- JMenuItem outputValues = new JMenuItem();
-
- JMenuItem outputPoints = new JMenuItem();
-
- JMenuItem outputProjPoints = new JMenuItem();
+ protected JMenu scoreModelMenu = new JMenu();
protected JMenu viewMenu = new JMenu();
protected JCheckBoxMenuItem showLabels = new JCheckBoxMenuItem();
- JMenuItem bgcolour = new JMenuItem();
-
- JMenuItem originalSeqData = new JMenuItem();
-
protected JMenu associateViewsMenu = new JMenu();
protected JMenu calcSettings = new JMenu();
protected JCheckBoxMenuItem protSetting = new JCheckBoxMenuItem();
- protected JCheckBoxMenuItem jvVersionSetting = new JCheckBoxMenuItem();
-
protected JLabel statusBar = new JLabel();
- protected GridLayout statusPanelLayout = new GridLayout();
-
protected JPanel statusPanel = new JPanel();
public GPCAPanel()
yCombobox.addItem("dim " + i);
zCombobox.addItem("dim " + i);
}
-
- setJMenuBar(jMenuBar1);
}
private void jbInit() throws Exception
{
- this.getContentPane().setLayout(borderLayout1);
- jPanel2.setLayout(flowLayout1);
- jLabel1.setFont(new java.awt.Font("Verdana", 0, 12));
+ this.getContentPane().setLayout(new BorderLayout());
+ JPanel jPanel2 = new JPanel();
+ jPanel2.setLayout(new FlowLayout());
+ JLabel jLabel1 = new JLabel();
+ jLabel1.setFont(VERDANA_12);
jLabel1.setText("x=");
- jLabel2.setFont(new java.awt.Font("Verdana", 0, 12));
+ JLabel jLabel2 = new JLabel();
+ jLabel2.setFont(VERDANA_12);
jLabel2.setText("y=");
- jLabel3.setFont(new java.awt.Font("Verdana", 0, 12));
+ JLabel jLabel3 = new JLabel();
+ jLabel3.setFont(VERDANA_12);
jLabel3.setText("z=");
jPanel2.setBackground(Color.white);
jPanel2.setBorder(null);
- zCombobox.setFont(new java.awt.Font("Verdana", 0, 12));
- zCombobox.addActionListener(new java.awt.event.ActionListener()
+ zCombobox.setFont(VERDANA_12);
+ zCombobox.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
zCombobox_actionPerformed(e);
}
});
- yCombobox.setFont(new java.awt.Font("Verdana", 0, 12));
- yCombobox.addActionListener(new java.awt.event.ActionListener()
+ yCombobox.setFont(VERDANA_12);
+ yCombobox.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
yCombobox_actionPerformed(e);
}
});
- xCombobox.setFont(new java.awt.Font("Verdana", 0, 12));
- xCombobox.addActionListener(new java.awt.event.ActionListener()
+ xCombobox.setFont(VERDANA_12);
+ xCombobox.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
xCombobox_actionPerformed(e);
}
});
- resetButton.setFont(new java.awt.Font("Verdana", 0, 12));
+ JButton resetButton = new JButton();
+ resetButton.setFont(VERDANA_12);
resetButton.setText(MessageManager.getString("action.reset"));
- resetButton.addActionListener(new java.awt.event.ActionListener()
+ resetButton.addActionListener(new ActionListener()
{
@Override
public void actionPerformed(ActionEvent e)
resetButton_actionPerformed(e);
}
});
+ JMenu fileMenu = new JMenu();
fileMenu.setText(MessageManager.getString("action.file"));
+ JMenu saveMenu = new JMenu();
saveMenu.setText(MessageManager.getString("action.save_as"));
- eps.setText("EPS");
+ JMenuItem eps = new JMenuItem("EPS");
eps.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
eps_actionPerformed(e);
}
});
- png.setText("PNG");
+ JMenuItem png = new JMenuItem("PNG");
png.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
png_actionPerformed(e);
}
});
+ JMenuItem outputValues = new JMenuItem();
outputValues.setText(MessageManager.getString("label.output_values"));
outputValues.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
outputValues_actionPerformed(e);
}
});
+ JMenuItem outputPoints = new JMenuItem();
outputPoints.setText(MessageManager.getString("label.output_points"));
outputPoints.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
outputPoints_actionPerformed(e);
}
});
+ JMenuItem outputProjPoints = new JMenuItem();
outputProjPoints.setText(MessageManager
.getString("label.output_transformed_points"));
outputProjPoints.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
outputProjPoints_actionPerformed(e);
}
});
+ JMenuItem print = new JMenuItem();
+ print.setText(MessageManager.getString("action.print"));
print.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
print_actionPerformed(e);
viewMenu.setText(MessageManager.getString("action.view"));
viewMenu.addMenuListener(new MenuListener()
{
+ @Override
public void menuSelected(MenuEvent e)
{
viewMenu_menuSelected();
}
+ @Override
public void menuDeselected(MenuEvent e)
{
}
+ @Override
public void menuCanceled(MenuEvent e)
{
}
});
- scoreMatrixMenu.setText(MessageManager
+ scoreModelMenu.setText(MessageManager
.getString("label.select_score_model"));
- scoreMatrixMenu.addMenuListener(new MenuListener()
+ scoreModelMenu.addMenuListener(new MenuListener()
{
+ @Override
public void menuSelected(MenuEvent e)
{
- scoreMatrix_menuSelected();
+ scoreModel_menuSelected();
}
+ @Override
public void menuDeselected(MenuEvent e)
{
}
+ @Override
public void menuCanceled(MenuEvent e)
{
}
showLabels.setText(MessageManager.getString("label.show_labels"));
showLabels.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
showLabels_actionPerformed(e);
}
});
- print.setText(MessageManager.getString("action.print"));
+ JMenuItem bgcolour = new JMenuItem();
bgcolour.setText(MessageManager.getString("action.background_colour"));
bgcolour.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
bgcolour_actionPerformed(e);
}
});
+ JMenuItem originalSeqData = new JMenuItem();
originalSeqData.setText(MessageManager.getString("label.input_data"));
originalSeqData.addActionListener(new ActionListener()
{
+ @Override
public void actionPerformed(ActionEvent e)
{
originalSeqData_actionPerformed(e);
protSetting_actionPerfomed(arg0);
}
});
- jvVersionSetting.setText(MessageManager
- .getString("label.jalview_pca_calculation"));
- jvVersionSetting.addActionListener(new ActionListener()
- {
- @Override
- public void actionPerformed(ActionEvent arg0)
- {
- jvVersionSetting_actionPerfomed(arg0);
- }
- });
- calcSettings.add(jvVersionSetting);
+
calcSettings.add(nuclSetting);
calcSettings.add(protSetting);
- calcSettings.add(scoreMatrixMenu);
- statusPanel.setLayout(statusPanelLayout);
- statusBar.setFont(new java.awt.Font("Verdana", 0, 12));
+ calcSettings.add(scoreModelMenu);
+ statusPanel.setLayout(new GridLayout());
+ statusBar.setFont(VERDANA_12);
// statusPanel.setBackground(Color.lightGray);
// statusBar.setBackground(Color.lightGray);
// statusPanel.add(statusBar, null);
jPanel2.add(jLabel3, null);
jPanel2.add(zCombobox, null);
jPanel2.add(resetButton, null);
+
+ JMenuBar jMenuBar1 = new JMenuBar();
jMenuBar1.add(fileMenu);
jMenuBar1.add(viewMenu);
jMenuBar1.add(calcSettings);
+ setJMenuBar(jMenuBar1);
fileMenu.add(saveMenu);
fileMenu.add(outputValues);
fileMenu.add(print);
viewMenu.add(associateViewsMenu);
}
- protected void scoreMatrix_menuSelected()
+ protected void scoreModel_menuSelected()
{
// TODO Auto-generated method stub
{
}
-
- protected void jvVersionSetting_actionPerfomed(ActionEvent arg0)
- {
- // TODO Auto-generated method stub
-
- }
}
}
/**
- * Creates a new Matrix object. For example
+ * Creates a new Matrix object containing a copy of the supplied array values.
+ * For example
*
* <pre>
* new Matrix(new double[][] {{2, 3, 4}, {5, 6, 7})
{
this.rows = values.length;
this.cols = this.rows == 0 ? 0 : values[0].length;
- this.value = values;
+
+ /*
+ * make a copy of the values array, for immutability
+ */
+ this.value = new double[rows][];
+ int i = 0;
+ for (double[] row : values)
+ {
+ if (row != null)
+ {
+ value[i] = new double[row.length];
+ System.arraycopy(row, 0, value[i], 0, row.length);
+ }
+ i++;
+ }
}
/**
* Returns a new matrix which is the transpose of this one
*
- * @return DOCUMENT ME!
+ * @return
*/
@Override
public MatrixI transpose()
return value[i][j];
}
+ @Override
public void setValue(int i, int j, double val)
{
value[i][j] = val;
System.arraycopy(value[i], 0, row, 0, cols);
return row;
}
+
+ /**
+ * Returns a length 2 array of {minValue, maxValue} of all values in the
+ * matrix. Returns null if the matrix is null or empty.
+ *
+ * @return
+ */
+ double[] findMinMax()
+ {
+ if (value == null)
+ {
+ return null;
+ }
+ double min = Double.MAX_VALUE;
+ double max = -Double.MAX_VALUE;
+ boolean empty = true;
+ for (double[] row : value)
+ {
+ if (row != null)
+ {
+ for (double x : row)
+ {
+ empty = false;
+ if (x > max)
+ {
+ max = x;
+ }
+ if (x < min)
+ {
+ min = x;
+ }
+ }
+ }
+ }
+ return empty ? null : new double[] { min, max };
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void reverseRange(boolean maxToZero)
+ {
+ if (value == null)
+ {
+ return;
+ }
+ double[] minMax = findMinMax();
+ if (minMax == null)
+ {
+ return; // empty matrix
+ }
+ double subtractFrom = maxToZero ? minMax[1] : minMax[0] + minMax[1];
+
+ for (double[] row : value)
+ {
+ if (row != null)
+ {
+ int j = 0;
+ for (double x : row)
+ {
+ row[j] = subtractFrom - x;
+ j++;
+ }
+ }
+ }
+ }
+
+ /**
+ * Multiplies every entry in the matrix by the given value.
+ *
+ * @param
+ */
+ @Override
+ public void multiply(double by)
+ {
+ for (double[] row : value)
+ {
+ if (row != null)
+ {
+ for (int i = 0; i < row.length; i++)
+ {
+ row[i] *= by;
+ }
+ }
+ }
+ }
}
double getValue(int i, int j);
/**
+ * Sets the value at row i, colum j
+ *
+ * @param i
+ * @param j
+ * @param d
+ */
+ void setValue(int i, int j, double d);
+
+ /**
* Answers a copy of the values in the i'th row
*
* @return
void tred();
+ /**
+ * Reverses the range of the matrix values, so that the smallest values become
+ * the largest, and the largest become the smallest. This operation supports
+ * using a distance measure as a similarity measure, or vice versa.
+ * <p>
+ * If parameter <code>maxToZero</code> is true, then the maximum value becomes
+ * zero, i.e. all values are subtracted from the maximum. This is consistent
+ * with converting an identity similarity score to a distance score - the most
+ * similar (identity) corresponds to zero distance. However note that the
+ * operation is not reversible (unless the original minimum value is zero).
+ * For example a range of 10-40 would become 30-0, which would reverse a
+ * second time to 0-30. Also note that a general similarity measure (such as
+ * BLOSUM) may give different 'identity' scores for different sequences, so
+ * they cannot all convert to zero distance.
+ * <p>
+ * If parameter <code>maxToZero</code> is false, then the values are reflected
+ * about the average of {min, max} (effectively swapping min and max). This
+ * operation <em>is</em> reversible.
+ *
+ * @param maxToZero
+ */
+ void reverseRange(boolean maxToZero);
+
+ /**
+ * Multiply all entries by the given value
+ *
+ * @param d
+ */
+ void multiply(double d);
}
*/
package jalview.schemes;
+import jalview.analysis.scoremodels.ScoreModels;
+import jalview.api.analysis.PairwiseScoreModelI;
import jalview.datamodel.AnnotatedCollectionI;
import jalview.datamodel.SequenceCollectionI;
import jalview.datamodel.SequenceI;
public Color findColour(char res, int j, SequenceI seq,
String consensusResidue, float pid)
{
+ PairwiseScoreModelI sm = ScoreModels.getInstance().getBlosum62();
+
/*
* compare as upper case; note consensusResidue is
* always computed as uppercase
}
else
{
- int c = 0;
+ float score = 0;
for (char consensus : consensusResidue.toCharArray())
{
- c += ResidueProperties.getBLOSUM62(consensus, res);
+ score += sm.getPairwiseScore(consensus, res);
}
- if (c > 0)
+ if (score > 0)
{
colour = LIGHT_BLUE;
}
*/
package jalview.schemes;
-import jalview.analysis.scoremodels.FeatureScoreModel;
-import jalview.analysis.scoremodels.PIDScoreModel;
-import jalview.api.analysis.ScoreModelI;
-
import java.awt.Color;
import java.util.ArrayList;
import java.util.Arrays;
public class ResidueProperties
{
- public static Hashtable<String, ScoreModelI> scoreMatrices = new Hashtable<String, ScoreModelI>();
-
// Stores residue codes/names and colours and other things
public static final int[] aaIndex; // aaHash version 2.1.1 and below
// public static final double hydmax = 1.38;
// public static final double hydmin = -2.53;
- private static final int[][] BLOSUM62 = {
- { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3,
- -2, 0, -2, -1, 0, -4 },
- { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3,
- -2, -3, -1, 0, -1, -4 },
- { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2,
- -3, 3, 0, -1, -4 },
- { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4,
- -3, -3, 4, 1, -1, -4 },
- { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1,
- -2, -2, -1, -3, -3, -2, -4 },
- { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1,
- -2, 0, 3, -1, -4 },
- { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2,
- -2, 1, 4, -1, -4 },
- { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2,
- -3, -3, -1, -2, -1, -4 },
- { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2,
- 2, -3, 0, 0, -1, -4 },
- { -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3,
- -1, 3, -3, -3, -1, -4 },
- { -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2,
- -1, 1, -4, -3, -1, -4 },
- { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3,
- -2, -2, 0, 1, -1, -4 },
- { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1,
- -1, 1, -3, -1, -1, -4 },
- { -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1,
- 3, -1, -3, -3, -1, -4 },
- { -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1,
- -4, -3, -2, -2, -1, -2, -4 },
- { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2,
- -2, 0, 0, 0, -4 },
- { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2,
- -2, 0, -1, -1, 0, -4 },
- { -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2,
- 11, 2, -3, -4, -3, -2, -4 },
- { -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2,
- 2, 7, -1, -3, -2, -1, -4 },
- { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3,
- -1, 4, -3, -2, -1, -4 },
- { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4,
- -3, -3, 4, 1, -1, -4 },
- { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2,
- -2, 1, 4, -1, -4 },
- { 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0,
- -2, -1, -1, -1, -1, -1, -4 },
- { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4,
- -4, -4, -4, -4, -4, -4, 1 }, };
-
- static final int[][] PAM250 = {
- { 2, -2, 0, 0, -2, 0, 0, 1, -1, -1, -2, -1, -1, -3, 1, 1, 1, -6, -3,
- 0, 0, 0, 0, -8 },
- { -2, 6, 0, -1, -4, 1, -1, -3, 2, -2, -3, 3, 0, -4, 0, 0, -1, 2, -4,
- -2, -1, 0, -1, -8 },
- { 0, 0, 2, 2, -4, 1, 1, 0, 2, -2, -3, 1, -2, -3, 0, 1, 0, -4, -2, -2,
- 2, 1, 0, -8 },
- { 0, -1, 2, 4, -5, 2, 3, 1, 1, -2, -4, 0, -3, -6, -1, 0, 0, -7, -4,
- -2, 3, 3, -1, -8 },
- { -2, -4, -4, -5, 12, -5, -5, -3, -3, -2, -6, -5, -5, -4, -3, 0, -2,
- -8, 0, -2, -4, -5, -3, -8 },
- { 0, 1, 1, 2, -5, 4, 2, -1, 3, -2, -2, 1, -1, -5, 0, -1, -1, -5, -4,
- -2, 1, 3, -1, -8 },
- { 0, -1, 1, 3, -5, 2, 4, 0, 1, -2, -3, 0, -2, -5, -1, 0, 0, -7, -4,
- -2, 3, 3, -1, -8 },
- { 1, -3, 0, 1, -3, -1, 0, 5, -2, -3, -4, -2, -3, -5, 0, 1, 0, -7, -5,
- -1, 0, 0, -1, -8 },
- { -1, 2, 2, 1, -3, 3, 1, -2, 6, -2, -2, 0, -2, -2, 0, -1, -1, -3, 0,
- -2, 1, 2, -1, -8 },
- { -1, -2, -2, -2, -2, -2, -2, -3, -2, 5, 2, -2, 2, 1, -2, -1, 0, -5,
- -1, 4, -2, -2, -1, -8 },
- { -2, -3, -3, -4, -6, -2, -3, -4, -2, 2, 6, -3, 4, 2, -3, -3, -2, -2,
- -1, 2, -3, -3, -1, -8 },
- { -1, 3, 1, 0, -5, 1, 0, -2, 0, -2, -3, 5, 0, -5, -1, 0, 0, -3, -4,
- -2, 1, 0, -1, -8 },
- { -1, 0, -2, -3, -5, -1, -2, -3, -2, 2, 4, 0, 6, 0, -2, -2, -1, -4,
- -2, 2, -2, -2, -1, -8 },
- { -3, -4, -3, -6, -4, -5, -5, -5, -2, 1, 2, -5, 0, 9, -5, -3, -3, 0,
- 7, -1, -4, -5, -2, -8 },
- { 1, 0, 0, -1, -3, 0, -1, 0, 0, -2, -3, -1, -2, -5, 6, 1, 0, -6, -5,
- -1, -1, 0, -1, -8 },
- { 1, 0, 1, 0, 0, -1, 0, 1, -1, -1, -3, 0, -2, -3, 1, 2, 1, -2, -3,
- -1, 0, 0, 0, -8 },
- { 1, -1, 0, 0, -2, -1, 0, 0, -1, 0, -2, 0, -1, -3, 0, 1, 3, -5, -3,
- 0, 0, -1, 0, -8 },
- { -6, 2, -4, -7, -8, -5, -7, -7, -3, -5, -2, -3, -4, 0, -6, -2, -5,
- 17, 0, -6, -5, -6, -4, -8 },
- { -3, -4, -2, -4, 0, -4, -4, -5, 0, -1, -1, -4, -2, 7, -5, -3, -3, 0,
- 10, -2, -3, -4, -2, -8 },
- { 0, -2, -2, -2, -2, -2, -2, -1, -2, 4, 2, -2, 2, -1, -1, -1, 0, -6,
- -2, 4, -2, -2, -1, -8 },
- { 0, -1, 2, 3, -4, 1, 3, 0, 1, -2, -3, 1, -2, -4, -1, 0, 0, -5, -3,
- -2, 3, 2, -1, -8 },
- { 0, 0, 1, 3, -5, 3, 3, 0, 2, -2, -3, 0, -2, -5, 0, 0, -1, -6, -4,
- -2, 2, 3, -1, -8 },
- { 0, -1, 0, -1, -3, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, 0, 0, -4,
- -2, -1, -1, -1, -1, -8 },
- { -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8,
- -8, -8, -8, -8, -8, -8, 1 }, };
// not currently used
// public static final Map<String, Color> ssHash = new Hashtable<String,
* Color.white, // R Color.white, // Y Color.white, // N Color.white, // Gap
*/
- // JBPNote: patch matrix for T/U equivalence when working with DNA or RNA.
- // Will equate sequences if working with mixed nucleotide sets.
- // treats T and U identically. R and Y weak equivalence with AG and CTU.
- // N matches any other base weakly
- //
- static final int[][] DNA = { { 10, -8, -8, -8, -8, 1, 1, 1, -8, 1, 1 }, // A
- { -8, 10, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C
- { -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // G
- { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // T
- { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // U
- { 1, 1, 1, 1, 1, 10, 0, 0, 0, 1, 1 }, // I
- { 1, 1, 1, 1, 1, 0, 10, 0, 0, 1, 1 }, // X
- { 1, -8, 1, -8, -8, 0, 0, 10, -8, 1, 1 }, // R
- { -8, 1, -8, 1, 1, 0, 0, -8, 10, 1, 1 }, // Y
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1 }, // N
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // -
- };
- /**
- * register matrices in list
- */
- static
- {
- scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62, 0));
- scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250, 0));
- scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA, 1));
- }
-
public static List<String> STOP = Arrays.asList("TGA", "TAA", "TAG");
public static String START = "ATG";
propMatrixPos[i][i] = maxP;
propMatrixEpos[i][i] = maxEP;
}
- // JAL-1512 comment out physicochemical score matrices for 2.8.1 release
- // scoreMatrices.put("Conservation Pos", new
- // ScoreMatrix("Conservation Pos",propMatrixPos,0));
- // scoreMatrices.put("Conservation Both", new
- // ScoreMatrix("Conservation Both",propMatrixF,0));
- // scoreMatrices.put("Conservation EnhPos", new
- // ScoreMatrix("Conservation EnhPos",propMatrixEpos,0));
- scoreMatrices.put("PID", new PIDScoreModel());
- scoreMatrices.put("Displayed Features", new FeatureScoreModel());
}
private ResidueProperties()
return aa3Hash;
}
- public static int[][] getDNA()
- {
- return ResidueProperties.DNA;
- }
-
- public static int[][] getBLOSUM62()
- {
- return ResidueProperties.BLOSUM62;
- }
-
- public static int getPAM250(String A1, String A2)
- {
- return getPAM250(A1.charAt(0), A2.charAt(0));
- }
-
- public static int getBLOSUM62(char c1, char c2)
- {
- int pog = 0;
-
- try
- {
- int a = aaIndex[c1];
- int b = aaIndex[c2];
-
- pog = ResidueProperties.BLOSUM62[a][b];
- } catch (Exception e)
- {
- // System.out.println("Unknown residue in " + A1 + " " + A2);
- }
-
- return pog;
- }
-
public static String codonTranslate(String lccodon)
{
String cdn = codonHash2.get(lccodon.toUpperCase());
return cdn;
}
- public static int[][] getDefaultPeptideMatrix()
- {
- return ResidueProperties.getBLOSUM62();
- }
-
- public static int[][] getDefaultDnaMatrix()
- {
- return ResidueProperties.getDNA();
- }
-
- /**
- * get a ScoreMatrix based on its string name
- *
- * @param pwtype
- * @return matrix in scoreMatrices with key pwtype or null
- */
- public static ScoreMatrix getScoreMatrix(String pwtype)
- {
- Object val = scoreMatrices.get(pwtype);
- if (val != null && val instanceof ScoreMatrix)
- {
- return (ScoreMatrix) val;
- }
- return null;
- }
-
- /**
- * get a ScoreModel based on its string name
- *
- * @param pwtype
- * @return scoremodel of type pwtype or null
- */
- public static ScoreModelI getScoreModel(String pwtype)
- {
- return scoreMatrices.get(pwtype);
- }
-
- public static int getPAM250(char c, char d)
- {
- int a = aaIndex[c];
- int b = aaIndex[d];
-
- int pog = ResidueProperties.PAM250[a][b];
-
- return pog;
- }
-
public static Hashtable<String, String> toDssp3State;
static
{
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.schemes;
-
-import jalview.analysis.scoremodels.PairwiseSeqScoreModel;
-import jalview.math.Matrix;
-import jalview.math.MatrixI;
-
-public class ScoreMatrix extends PairwiseSeqScoreModel
-{
- String name;
-
- @Override
- public String getName()
- {
- return name;
- }
-
- /**
- * reference to integer score matrix
- */
- int[][] matrix;
-
- /**
- * 0 for Protein Score matrix. 1 for dna score matrix
- */
- int type;
-
- /**
- *
- * @param name
- * Unique, human readable name for the matrix
- * @param matrix
- * Pairwise scores indexed according to appropriate symbol alphabet
- * @param type
- * 0 for Protein, 1 for NA
- */
- ScoreMatrix(String name, int[][] matrix, int type)
- {
- this.matrix = matrix;
- this.type = type;
- this.name = name;
- }
-
- @Override
- public boolean isDNA()
- {
- return type == 1;
- }
-
- @Override
- public boolean isProtein()
- {
- return type == 0;
- }
-
- @Override
- public int[][] getMatrix()
- {
- return matrix;
- }
-
- /**
- * Answers the score for substituting first char in A1 with first char in A2
- *
- * @param A1
- * @param A2
- * @return
- */
- public int getPairwiseScore(String A1, String A2)
- {
- return getPairwiseScore(A1.charAt(0), A2.charAt(0));
- }
-
- @Override
- public int getPairwiseScore(char c, char d)
- {
- int score = 0;
-
- try
- {
- int a = (type == 0) ? ResidueProperties.aaIndex[c]
- : ResidueProperties.nucleotideIndex[c];
- int b = (type == 0) ? ResidueProperties.aaIndex[d]
- : ResidueProperties.nucleotideIndex[d];
- score = matrix[a][b];
- } catch (Exception e)
- {
- // System.out.println("Unknown residue in " + A1 + " " + A2);
- }
-
- return score;
- }
-
- /**
- * pretty print the matrix
- */
- @Override
- public String toString()
- {
- return outputMatrix(false);
- }
-
- public String outputMatrix(boolean html)
- {
- StringBuffer sb = new StringBuffer();
- int[] symbols = (type == 0) ? ResidueProperties.aaIndex
- : ResidueProperties.nucleotideIndex;
- int symMax = (type == 0) ? ResidueProperties.maxProteinIndex
- : ResidueProperties.maxNucleotideIndex;
- boolean header = true;
- if (html)
- {
- sb.append("<table border=\"1\">");
- }
- for (char sym = 'A'; sym <= 'Z'; sym++)
- {
- if (symbols[sym] >= 0 && symbols[sym] < symMax)
- {
- if (header)
- {
- sb.append(html ? "<tr><td></td>" : "");
- for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
- {
- if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
- {
- sb.append((html ? "<td> " : "\t") + sym2
- + (html ? " </td>" : ""));
- }
- }
- header = false;
- sb.append(html ? "</tr>\n" : "\n");
- }
- if (html)
- {
- sb.append("<tr>");
- }
- sb.append((html ? "<td>" : "") + sym + (html ? "</td>" : ""));
- for (char sym2 = 'A'; sym2 <= 'Z'; sym2++)
- {
- if (symbols[sym2] >= 0 && symbols[sym2] < symMax)
- {
- sb.append((html ? "<td>" : "\t")
- + matrix[symbols[sym]][symbols[sym2]]
- + (html ? "</td>" : ""));
- }
- }
- sb.append(html ? "</tr>\n" : "\n");
- }
- }
- if (html)
- {
- sb.append("</table>");
- }
- return sb.toString();
- }
-
- /**
- * Computes an NxN matrix where N is the number of sequences, and entry [i, j]
- * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores
- * computed using the current score matrix. For example
- * <ul>
- * <li>Sequences:</li>
- * <li>FKL</li>
- * <li>R-D</li>
- * <li>QIA</li>
- * <li>GWC</li>
- * <li>Score matrix is BLOSUM62</li>
- * <li>Gaps treated same as X (unknown)</li>
- * <li>product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15</li>
- * <li>product [1, 1] = R.R + -.- + D.D = 5 + -1 + 6 = 10</li>
- * <li>product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13</li>
- * <li>product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26</li>
- * <li>product[0, 1] = F.R + K.- + L.D = -3 + -1 + -3 = -8
- * <li>and so on</li>
- * </ul>
- */
- public MatrixI computePairwiseScores(String[] seqs)
- {
- double[][] values = new double[seqs.length][];
- for (int row = 0; row < seqs.length; row++)
- {
- values[row] = new double[seqs.length];
- for (int col = 0; col < seqs.length; col++)
- {
- int total = 0;
- int width = Math.min(seqs[row].length(), seqs[col].length());
- for (int i = 0; i < width; i++)
- {
- char c1 = seqs[row].charAt(i);
- char c2 = seqs[col].charAt(i);
- int score = getPairwiseScore(c1, c2);
- total += score;
- }
- values[row][col] = total;
- }
- }
- return new Matrix(values);
- }
-}
* Attempt pairwise alignment of the sequence with each chain in the PDB,
* and remember the highest scoring chain
*/
- int max = -10;
+ float max = -10;
AlignSeq maxAlignseq = null;
String maxChainId = " ";
PDBChain maxChain = null;
private static final int TO_UPPER_CASE = 'a' - 'A';
- private static final char GAP_SPACE = ' ';
+ public static final char GAP_SPACE = ' ';
- private static final char GAP_DOT = '.';
+ public static final char GAP_DOT = '.';
- private static final char GAP_DASH = '-';
+ public static final char GAP_DASH = '-';
public static final String GapChars = new String(new char[] { GAP_SPACE,
GAP_DOT, GAP_DASH });
* @param s2
* SequenceI
* @return float
+ * @deprecated use PIDModel.computePID()
*/
+ @Deprecated
public final static float PID(String seq1, String seq2)
{
return PID(seq1, seq2, 0, seq1.length());
static final int caseShift = 'a' - 'A';
// Another pid with region specification
+ /**
+ * @deprecated use PIDModel.computePID()
+ */
+ @Deprecated
public final static float PID(String seq1, String seq2, int start, int end)
{
return PID(seq1, seq2, start, end, true, false);
* @param ungappedOnly
* - if true - only count PID over ungapped columns
* @return
+ * @deprecated use PIDModel.computePID()
*/
+ @Deprecated
public final static float PID(String seq1, String seq2, int start,
int end, boolean wcGaps, boolean ungappedOnly)
{
--- /dev/null
+package jalview.util;
+
+import java.util.Set;
+
+public class SetUtils
+{
+ /**
+ * Returns the count of things that are in one or other of two sets but not in
+ * both. The sets are not modified.
+ *
+ * @param set1
+ * @param set2
+ * @return
+ */
+ public static int countDisjunction(Set<? extends Object> set1,
+ Set<? extends Object> set2)
+ {
+ if (set1 == null)
+ {
+ return set2 == null ? 0 : set2.size();
+ }
+ if (set2 == null)
+ {
+ return set1.size();
+ }
+
+ int size1 = set1.size();
+ int size2 = set2.size();
+ Set<? extends Object> smallerSet = size1 < size2 ? set1 : set2;
+ Set<? extends Object> largerSet = (smallerSet == set1 ? set2 : set1);
+ int inCommon = 0;
+ for (Object k : smallerSet)
+ {
+ if (largerSet.contains(k))
+ {
+ inCommon++;
+ }
+ }
+
+ int notInCommon = (size1 - inCommon) + (size2 - inCommon);
+ return notInCommon;
+ }
+}
import jalview.analysis.PCA;
import jalview.api.RotatableCanvasI;
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SequenceI;
import jalview.datamodel.SequencePoint;
public class PCAModel
{
- /*
- * Jalview 2.10.1 treated gaps as X (peptide) or N (nucleotide)
- * for pairwise scoring; 2.10.2 uses gap score (last column) in
- * score matrix (JAL-2397)
- * Set this flag to true (via Groovy) for 2.10.1 behaviour
- */
- private static boolean scoreGapAsAny = false;
-
- public PCAModel(AlignmentView seqstrings2, SequenceI[] seqs2,
- boolean nucleotide2)
- {
- seqstrings = seqstrings2;
- seqs = seqs2;
- nucleotide = nucleotide2;
- score_matrix = nucleotide2 ? "PID" : "BLOSUM62";
- }
-
private volatile PCA pca;
int top;
SequenceI[] seqs;
- /**
- * Score matrix used to calculate PC
+ /*
+ * Score model used to calculate PCA
*/
- String score_matrix;
+ ScoreModelI scoreModel;
- /**
- * use the identity matrix for calculating similarity between sequences.
- */
private boolean nucleotide = false;
private Vector<SequencePoint> points;
private boolean jvCalcMode = true;
- public boolean isJvCalcMode()
+ private SimilarityParamsI similarityParams;
+
+ /**
+ * Constructor given sequence data, score model and score calculation
+ * parameter options.
+ *
+ * @param seqData
+ * @param sqs
+ * @param nuc
+ * @param sm
+ * @param params
+ */
+ public PCAModel(AlignmentView seqData, SequenceI[] sqs, boolean nuc, ScoreModelI sm,
+ SimilarityParamsI params)
{
- return jvCalcMode;
+ seqstrings = seqData;
+ seqs = sqs;
+ nucleotide = nuc;
+ scoreModel = sm;
+ similarityParams = params;
}
public void run()
{
- char gapChar = scoreGapAsAny ? (nucleotide ? 'N' : 'X') : ' ';
- String[] sequenceStrings = seqstrings.getSequenceStrings(gapChar);
- pca = new PCA(sequenceStrings, nucleotide,
- score_matrix);
- pca.setJvCalcMode(jvCalcMode);
+ pca = new PCA(seqstrings, scoreModel, similarityParams);
pca.run();
// Now find the component coordinates
jvCalcMode = state;
}
- public String getScore_matrix()
+ public String getScoreModelName()
{
- return score_matrix;
+ return scoreModel == null ? "" : scoreModel.getName();
}
- public void setScore_matrix(String score_matrix)
+ public void setScoreModel(ScoreModelI sm)
{
- this.score_matrix = score_matrix;
+ this.scoreModel = sm;
}
}
AlignFrame.DEFAULT_WIDTH, AlignFrame.DEFAULT_HEIGHT);
if (nf != null)
{
- af.ShowNewickTree(nf, MessageManager.formatMessage(
+ af.showNewickTree(nf, MessageManager.formatMessage(
"label.tree_from", new String[] { this.alTitle }));
}
// initialise with same renderer settings as in parent alignframe.
package jalview.ws.sifts;
import jalview.analysis.AlignSeq;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
import jalview.api.DBRefEntryI;
import jalview.api.SiftsClientI;
import jalview.datamodel.DBRefEntry;
}
@Override
- public StringBuffer getMappingOutput(MappingOutputPojo mp)
+ public StringBuilder getMappingOutput(MappingOutputPojo mp)
throws SiftsException
{
String seqRes = mp.getSeqResidue();
int nochunks = ((seqRes.length()) / len)
+ ((seqRes.length()) % len > 0 ? 1 : 0);
// output mappings
- StringBuffer output = new StringBuffer();
+ StringBuilder output = new StringBuilder(512);
output.append(NEWLINE);
output.append("Sequence \u27f7 Structure mapping details").append(
NEWLINE);
output.append(String.valueOf(pdbEnd));
output.append(NEWLINE).append(NEWLINE);
+ ScoreMatrix pam250 = ScoreModels.getInstance().getPam250();
int matchedSeqCount = 0;
for (int j = 0; j < nochunks; j++)
{
output.append(NEWLINE);
output.append(new Format("%" + (maxid) + "s").form(" ")).append(" ");
- // Print out the matching chars
+ /*
+ * Print out the match symbols:
+ * | for exact match (ignoring case)
+ * . if PAM250 score is positive
+ * else a space
+ */
for (int i = 0; i < len; i++)
{
try
{
if ((i + (j * len)) < seqRes.length())
{
- boolean sameChar = Comparison.isSameResidue(
- seqRes.charAt(i + (j * len)),
- strRes.charAt(i + (j * len)), false);
- if (sameChar
- && !jalview.util.Comparison.isGap(seqRes.charAt(i
- + (j * len))))
+ char c1 = seqRes.charAt(i + (j * len));
+ char c2 = strRes.charAt(i + (j * len));
+ boolean sameChar = Comparison.isSameResidue(c1, c2, false);
+ if (sameChar && !Comparison.isGap(c1))
{
matchedSeqCount++;
output.append("|");
}
else if (type.equals("pep"))
{
- if (ResidueProperties.getPAM250(seqRes.charAt(i + (j * len)),
- strRes.charAt(i + (j * len))) > 0)
+ if (pam250.getPairwiseScore(c1, c2) > 0)
{
output.append(".");
}
*/
package jalview.analysis;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+import jalview.datamodel.Sequence;
import jalview.gui.JvOptionPane;
import org.testng.annotations.BeforeClass;
assertNull(AlignSeq.extractGaps(null, "ACG"));
assertNull(AlignSeq.extractGaps("-. ", null));
- assertEquals(" AC-G.T", AlignSeq.extractGaps("", " AC-G.T"));
- assertEquals("AC-G.T", AlignSeq.extractGaps(" ", " AC-G.T"));
- assertEquals("ACG.T", AlignSeq.extractGaps(" -", " AC-G.T"));
- assertEquals("ACGT", AlignSeq.extractGaps(" -.", " AC-G.T ."));
- assertEquals(" ACG.T", AlignSeq.extractGaps("-", " AC-G.T"));
+ assertEquals(AlignSeq.extractGaps("", " AC-G.T"), " AC-G.T");
+ assertEquals(AlignSeq.extractGaps(" ", " AC-G.T"), "AC-G.T");
+ assertEquals(AlignSeq.extractGaps(" -", " AC-G.T"), "ACG.T");
+ assertEquals(AlignSeq.extractGaps(" -.", " AC-G.T ."), "ACGT");
+ assertEquals(AlignSeq.extractGaps("-", " AC-G.T"), " ACG.T");
+ }
+
+ @Test(groups = { "Functional" })
+ public void testIndexEncode_nucleotide()
+ {
+ AlignSeq as = new AlignSeq(new Sequence("s1", "TTAG"), new Sequence(
+ "s2", "ACGT"), AlignSeq.DNA);
+ int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, -1, -1, 10, -1 };
+ String s = "aAcCgGtTuUiIxXrRyYnN .-?";
+ assertArrayEquals(expected, as.indexEncode(s));
+ }
+
+ @Test(groups = { "Functional" })
+ public void testIndexEncode_peptide()
+ {
+ AlignSeq as = new AlignSeq(new Sequence("s1", "PFY"), new Sequence(
+ "s2", "RQW"), AlignSeq.PEP);
+ int[] expected = new int[] { 0, 0, 1, 1, 2, 2, 21, 21, 22, 22, -1, 23,
+ -1, -1, -1 };
+ String s = "aArRnNzZxX *.-?";
+ assertArrayEquals(expected, as.indexEncode(s));
}
}
};
as.printAlignment(ps);
- String expected = "Score = 320\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n"
+ String expected = "Score = 320.0\nLength of alignment = 10\nSequence Seq1 : 3 - 18 (Sequence length = 14)\nSequence Seq1 : 1 - 10 (Sequence length = 10)\n\n"
+ "Seq1 SDFAQQQRRR\n"
+ " ||||||| \n"
+ "Seq1 SDFAQQQSSS\n\n" + "Percentage ID = 70.00\n";
--- /dev/null
+/*
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
+ *
+ * This file is part of Jalview.
+ *
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
+ */
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceFeature;
+import jalview.datamodel.SequenceI;
+import jalview.gui.AlignFrame;
+import jalview.gui.AlignViewport;
+import jalview.gui.JvOptionPane;
+import jalview.io.DataSourceType;
+import jalview.io.FileLoader;
+import jalview.math.MatrixI;
+
+import java.util.Arrays;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+public class FeatureDistanceModelTest
+{
+
+ @BeforeClass(alwaysRun = true)
+ public void setUpJvOptionPane()
+ {
+ JvOptionPane.setInteractiveMode(false);
+ JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
+ }
+
+ public static String alntestFile = "FER1_MESCR/72-76 DVYIL\nFER1_SPIOL/71-75 DVYIL\nFER3_RAPSA/21-25 DVYVL\nFER1_MAIZE/73-77 DVYIL\n";
+
+ int[] sf1 = new int[] { 74, 74, 73, 73, 23, 23, -1, -1 };
+
+ int[] sf2 = new int[] { -1, -1, 74, 75, -1, -1, 76, 77 };
+
+ int[] sf3 = new int[] { -1, -1, -1, -1, -1, -1, 76, 77 };
+
+ /**
+ * <pre>
+ * Load test alignment and add features to sequences:
+ * FER1_MESCR FER1_SPIOL FER3_RAPSA FER1_MAIZE
+ * sf1 X X X
+ * sf2 X X
+ * sf3 X
+ * </pre>
+ *
+ * @return
+ */
+ public AlignFrame getTestAlignmentFrame()
+ {
+ AlignFrame alf = new FileLoader(false).LoadFileWaitTillLoaded(
+ alntestFile, DataSourceType.PASTE);
+ AlignmentI al = alf.getViewport().getAlignment();
+ Assert.assertEquals(al.getHeight(), 4);
+ Assert.assertEquals(al.getWidth(), 5);
+ for (int i = 0; i < 4; i++)
+ {
+ SequenceI ds = al.getSequenceAt(i).getDatasetSequence();
+ if (sf1[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf1", "sf1", "sf1",
+ sf1[i * 2], sf1[i * 2 + 1], "sf1"));
+ }
+ if (sf2[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf2", "sf2", "sf2",
+ sf2[i * 2], sf2[i * 2 + 1], "sf2"));
+ }
+ if (sf3[i * 2] > 0)
+ {
+ ds.addSequenceFeature(new SequenceFeature("sf3", "sf3", "sf3",
+ sf3[i * 2], sf3[i * 2 + 1], "sf3"));
+ }
+ }
+ alf.setShowSeqFeatures(true);
+ alf.getFeatureRenderer().setVisible("sf1");
+ alf.getFeatureRenderer().setVisible("sf2");
+ alf.getFeatureRenderer().setVisible("sf3");
+ alf.getFeatureRenderer().findAllFeatures(true);
+ Assert.assertEquals(alf.getFeatureRenderer().getDisplayedFeatureTypes()
+ .size(), 3, "Number of feature types");
+ assertTrue(alf.getCurrentView().areFeaturesDisplayed());
+ return alf;
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFeatureScoreModel() throws Exception
+ {
+ AlignFrame alf = getTestAlignmentFrame();
+ FeatureDistanceModel fsm = new FeatureDistanceModel();
+ assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
+ .getAlignPanel()));
+ alf.selectAllSequenceMenuItem_actionPerformed(null);
+
+ MatrixI dm = fsm.findDistances(
+ alf.getViewport().getAlignmentView(true),
+ SimilarityParams.Jalview);
+ assertEquals(dm.getValue(0, 2), 0d,
+ "FER1_MESCR (0) should be identical with RAPSA (2)");
+ assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2),
+ "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)");
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFeatureScoreModel_hiddenFirstColumn() throws Exception
+ {
+ AlignFrame alf = getTestAlignmentFrame();
+ // hiding first two columns shouldn't affect the tree
+ alf.getViewport().hideColumns(0, 1);
+ FeatureDistanceModel fsm = new FeatureDistanceModel();
+ assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
+ .getAlignPanel()));
+ alf.selectAllSequenceMenuItem_actionPerformed(null);
+ MatrixI dm = fsm.findDistances(
+ alf.getViewport().getAlignmentView(true),
+ SimilarityParams.Jalview);
+ assertEquals(dm.getValue(0, 2), 0d,
+ "FER1_MESCR (0) should be identical with RAPSA (2)");
+ assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2),
+ "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)");
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFeatureScoreModel_HiddenColumns() throws Exception
+ {
+ AlignFrame alf = getTestAlignmentFrame();
+ // hide columns and check tree changes
+ alf.getViewport().hideColumns(3, 4);
+ alf.getViewport().hideColumns(0, 1);
+ FeatureDistanceModel fsm = new FeatureDistanceModel();
+ assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
+ .getAlignPanel()));
+ alf.selectAllSequenceMenuItem_actionPerformed(null);
+ MatrixI dm = fsm.findDistances(
+ alf.getViewport().getAlignmentView(true),
+ SimilarityParams.Jalview);
+ assertEquals(
+ dm.getValue(0, 2),
+ 0d,
+ "After hiding last two columns FER1_MESCR (0) should still be identical with RAPSA (2)");
+ assertEquals(
+ dm.getValue(0, 1),
+ 0d,
+ "After hiding last two columns FER1_MESCR (0) should now also be identical with SPIOL (1)");
+ for (int s = 0; s < 3; s++)
+ {
+ assertTrue(dm.getValue(s, 3) > 0d, "After hiding last two columns "
+ + alf.getViewport().getAlignment().getSequenceAt(s).getName()
+ + "(" + s + ") should still be distinct from FER1_MAIZE (3)");
+ }
+ }
+
+ /**
+ * Check findFeatureAt doesn't return contact features except at contact
+ * points TODO:move to under the FeatureRendererModel test suite
+ */
+ @Test(groups = { "Functional" })
+ public void testFindFeatureAt_PointFeature() throws Exception
+ {
+ String alignment = "a CCCCCCGGGGGGCCCCCC\n" + "b CCCCCCGGGGGGCCCCCC\n"
+ + "c CCCCCCGGGGGGCCCCCC\n";
+ AlignFrame af = new jalview.io.FileLoader(false)
+ .LoadFileWaitTillLoaded(alignment, DataSourceType.PASTE);
+ SequenceI aseq = af.getViewport().getAlignment().getSequenceAt(0);
+ SequenceFeature sf = null;
+ sf = new SequenceFeature("disulphide bond", "", 2, 5, Float.NaN, "");
+ aseq.addSequenceFeature(sf);
+ assertTrue(sf.isContactFeature());
+ af.refreshFeatureUI(true);
+ af.getFeatureRenderer().setAllVisible(Arrays.asList("disulphide bond"));
+ Assert.assertEquals(af.getFeatureRenderer().getDisplayedFeatureTypes()
+ .size(), 1, "Should be just one feature type displayed");
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 1)
+ .size(), 0);
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 2)
+ .size(), 1);
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 3)
+ .size(), 0);
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 4)
+ .size(), 0);
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 5)
+ .size(), 1);
+ // step through and check for pointwise feature presence/absence
+ Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 6)
+ .size(), 0);
+ }
+
+ @Test(groups = { "Functional" })
+ public void testFindDistances() throws Exception
+ {
+ String seqs = ">s1\nABCDE\n>seq2\nABCDE\n";
+ AlignFrame alf = new FileLoader().LoadFileWaitTillLoaded(seqs,
+ DataSourceType.PASTE);
+ SequenceI s1 = alf.getViewport().getAlignment().getSequenceAt(0);
+ SequenceI s2 = alf.getViewport().getAlignment().getSequenceAt(1);
+
+ /*
+ * set domain and variant features thus:
+ * ----5
+ * s1 ddd..
+ * s1 .vvv.
+ * s1 ..vvv
+ * s2 .ddd.
+ * s2 vv..v
+ * The number of unshared feature types per column is
+ * 20120 (two features of the same type doesn't affect score)
+ * giving an average (pairwise distance) of 5/5 or 1.0
+ */
+ s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 3, 0f,
+ null));
+ s1.addSequenceFeature(new SequenceFeature("variant", null, 2, 4, 0f,
+ null));
+ s1.addSequenceFeature(new SequenceFeature("variant", null, 3, 5, 0f,
+ null));
+ s2.addSequenceFeature(new SequenceFeature("domain", null, 2, 4, 0f,
+ null));
+ s2.addSequenceFeature(new SequenceFeature("variant", null, 1, 2, 0f,
+ null));
+ s2.addSequenceFeature(new SequenceFeature("variant", null, 5, 5, 0f,
+ null));
+ alf.setShowSeqFeatures(true);
+ alf.getFeatureRenderer().findAllFeatures(true);
+
+ FeatureDistanceModel fsm = new FeatureDistanceModel();
+ assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
+ .getAlignPanel()));
+ alf.selectAllSequenceMenuItem_actionPerformed(null);
+
+ MatrixI distances = fsm.findDistances(alf.getViewport()
+ .getAlignmentView(true), SimilarityParams.Jalview);
+ assertEquals(distances.width(), 2);
+ assertEquals(distances.height(), 2);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+
+ // these left to fail pending resolution of
+ // JAL-2424 (computing score as 5/6, should be 5/5)
+ // see also PCATest.testComputeSimilarity_featureDistances()
+ assertEquals(distances.getValue(0, 1), 1f, "JAL-2424!");
+ assertEquals(distances.getValue(1, 0), 1f);
+ }
+
+ /**
+ * Verify computed distances with varying parameter options
+ */
+ @Test(groups = "Functional")
+ public void testFindDistances_withParams()
+ {
+ AlignFrame af = setupAlignmentView();
+ AlignViewport viewport = af.getViewport();
+ AlignmentView view = viewport.getAlignmentView(false);
+
+ FeatureDistanceModel sm = new FeatureDistanceModel();
+ sm.configureFromAlignmentView(af.alignPanel);
+
+ /*
+ * feature distance model always normalises by region width
+ * gap-gap is always included (but scores zero)
+ * the only variable parameter is 'includeGaps'
+ */
+
+ /*
+ * include gaps
+ * score = 3 + 3 + 0 + 2 + 3 + 2 = 13/6
+ // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ MatrixI distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 0), 0d);
+ assertEquals(distances.getValue(1, 1), 0d);
+ assertEquals(distances.getValue(0, 1), 13d / 7); // should be 13d/6
+ assertEquals(distances.getValue(1, 0), 13d / 7);
+
+ /*
+ * exclude gaps
+ * score = 3 + 3 + 0 + 0 + 0 + 0 = 6/6
+ // FIXME out by 1 error in cpwidth JAL-2424 - dividing by 7
+ */
+ params = new SimilarityParams(true, true, false, true);
+ distances = sm.findDistances(view, params);
+ assertEquals(distances.getValue(0, 1), 6d / 7);// should be 6d/6
+ }
+
+ /**
+ * <pre>
+ * Set up
+ * column 1 2 3 4 5 6
+ * seq s1 F R - K - S
+ * seq s2 F S - - L
+ * s1 chain c c c c
+ * s1 domain d d d d
+ * s2 chain c c c
+ * s2 metal m m m
+ * s2 Pfam P P P
+ * scores: 3 3 0 2 3 2
+ * </pre>
+ *
+ * @return
+ */
+ protected AlignFrame setupAlignmentView()
+ {
+ /*
+ * for now, using space for gap to match callers of
+ * AlignmentView.getSequenceStrings()
+ * may change this to '-' (with corresponding change to matrices)
+ */
+ SequenceI s1 = new Sequence("s1", "FR K S");
+ SequenceI s2 = new Sequence("s2", "FS L");
+
+ s1.addSequenceFeature(new SequenceFeature("chain", null, 1, 4, 0f, null));
+ s1.addSequenceFeature(new SequenceFeature("domain", null, 1, 4, 0f,
+ null));
+ s2.addSequenceFeature(new SequenceFeature("chain", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(new SequenceFeature("metal", null, 1, 3, 0f, null));
+ s2.addSequenceFeature(new SequenceFeature("Pfam", null, 1, 3, 0f, null));
+ AlignmentI al = new Alignment(new SequenceI[] { s1, s2 });
+ AlignFrame af = new AlignFrame(al, 300, 300);
+ af.setShowSeqFeatures(true);
+ af.getFeatureRenderer().findAllFeatures(true);
+ return af;
+ }
+
+}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.analysis.scoremodels;
-
-import jalview.datamodel.AlignmentI;
-import jalview.datamodel.SequenceFeature;
-import jalview.datamodel.SequenceI;
-import jalview.gui.AlignFrame;
-import jalview.gui.JvOptionPane;
-import jalview.io.DataSourceType;
-import jalview.io.FileLoader;
-
-import java.util.Arrays;
-
-import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-public class FeatureScoreModelTest
-{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
- public static String alntestFile = "FER1_MESCR/72-76 DVYIL\nFER1_SPIOL/71-75 DVYIL\nFER3_RAPSA/21-25 DVYVL\nFER1_MAIZE/73-77 DVYIL\n";
-
- int[] sf1 = new int[] { 74, 74, 73, 73, 23, 23, -1, -1 };
-
- int[] sf2 = new int[] { -1, -1, 74, 75, -1, -1, 76, 77 };
-
- int[] sf3 = new int[] { -1, -1, -1, -1, -1, -1, 76, 77 };
-
- public AlignFrame getTestAlignmentFrame()
- {
- AlignFrame alf = new FileLoader(false).LoadFileWaitTillLoaded(
- alntestFile, DataSourceType.PASTE);
- AlignmentI al = alf.getViewport().getAlignment();
- Assert.assertEquals(al.getHeight(), 4);
- Assert.assertEquals(al.getWidth(), 5);
- for (int i = 0; i < 4; i++)
- {
- SequenceI ds = al.getSequenceAt(i).getDatasetSequence();
- if (sf1[i * 2] > 0)
- {
- ds.addSequenceFeature(new SequenceFeature("sf1", "sf1", "sf1",
- sf1[i * 2], sf1[i * 2 + 1], "sf1"));
- }
- if (sf2[i * 2] > 0)
- {
- ds.addSequenceFeature(new SequenceFeature("sf2", "sf2", "sf2",
- sf2[i * 2], sf2[i * 2 + 1], "sf2"));
- }
- if (sf3[i * 2] > 0)
- {
- ds.addSequenceFeature(new SequenceFeature("sf3", "sf3", "sf3",
- sf3[i * 2], sf3[i * 2 + 1], "sf3"));
- }
- }
- alf.setShowSeqFeatures(true);
- alf.getFeatureRenderer().setVisible("sf1");
- alf.getFeatureRenderer().setVisible("sf2");
- alf.getFeatureRenderer().setVisible("sf3");
- alf.getFeatureRenderer().findAllFeatures(true);
- Assert.assertEquals(alf.getFeatureRenderer().getDisplayedFeatureTypes()
- .size(), 3, "Number of feature types");
- Assert.assertTrue(alf.getCurrentView().areFeaturesDisplayed());
- return alf;
- }
-
- @Test(groups = { "Functional" })
- public void testFeatureScoreModel() throws Exception
- {
- AlignFrame alf = getTestAlignmentFrame();
- FeatureScoreModel fsm = new FeatureScoreModel();
- Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
- .getAlignPanel()));
- alf.selectAllSequenceMenuItem_actionPerformed(null);
- float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView(
- true));
- Assert.assertTrue(dm[0][2] == 0f,
- "FER1_MESCR (0) should be identical with RAPSA (2)");
- Assert.assertTrue(dm[0][1] > dm[0][2],
- "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)");
- }
-
- @Test(groups = { "Functional" })
- public void testFeatureScoreModel_hiddenFirstColumn() throws Exception
- {
- AlignFrame alf = getTestAlignmentFrame();
- // hiding first two columns shouldn't affect the tree
- alf.getViewport().hideColumns(0, 1);
- FeatureScoreModel fsm = new FeatureScoreModel();
- Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
- .getAlignPanel()));
- alf.selectAllSequenceMenuItem_actionPerformed(null);
- float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView(
- true));
- Assert.assertTrue(dm[0][2] == 0f,
- "FER1_MESCR (0) should be identical with RAPSA (2)");
- Assert.assertTrue(dm[0][1] > dm[0][2],
- "FER1_MESCR (0) should be further from SPIOL (1) than it is from RAPSA (2)");
- }
-
- @Test(groups = { "Functional" })
- public void testFeatureScoreModel_HiddenColumns() throws Exception
- {
- AlignFrame alf = getTestAlignmentFrame();
- // hide columns and check tree changes
- alf.getViewport().hideColumns(3, 4);
- alf.getViewport().hideColumns(0, 1);
- FeatureScoreModel fsm = new FeatureScoreModel();
- Assert.assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView()
- .getAlignPanel()));
- alf.selectAllSequenceMenuItem_actionPerformed(null);
- float[][] dm = fsm.findDistances(alf.getViewport().getAlignmentView(
- true));
- Assert.assertTrue(
- dm[0][2] == 0f,
- "After hiding last two columns FER1_MESCR (0) should still be identical with RAPSA (2)");
- Assert.assertTrue(
- dm[0][1] == 0f,
- "After hiding last two columns FER1_MESCR (0) should now also be identical with SPIOL (1)");
- for (int s = 0; s < 3; s++)
- {
- Assert.assertTrue(dm[s][3] > 0f, "After hiding last two columns "
- + alf.getViewport().getAlignment().getSequenceAt(s).getName()
- + "(" + s + ") should still be distinct from FER1_MAIZE (3)");
- }
- }
-
- /**
- * Check findFeatureAt doesn't return contact features except at contact
- * points TODO:move to under the FeatureRendererModel test suite
- */
- @Test(groups = { "Functional" })
- public void testFindFeatureAt_PointFeature() throws Exception
- {
- String alignment = "a CCCCCCGGGGGGCCCCCC\n" + "b CCCCCCGGGGGGCCCCCC\n"
- + "c CCCCCCGGGGGGCCCCCC\n";
- AlignFrame af = new jalview.io.FileLoader(false)
- .LoadFileWaitTillLoaded(alignment, DataSourceType.PASTE);
- SequenceI aseq = af.getViewport().getAlignment().getSequenceAt(0);
- SequenceFeature sf = null;
- sf = new SequenceFeature("disulphide bond", "", 2, 5, Float.NaN, "");
- aseq.addSequenceFeature(sf);
- Assert.assertTrue(sf.isContactFeature());
- af.refreshFeatureUI(true);
- af.getFeatureRenderer().setAllVisible(Arrays.asList("disulphide bond"));
- Assert.assertEquals(af.getFeatureRenderer().getDisplayedFeatureTypes()
- .size(), 1, "Should be just one feature type displayed");
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 1)
- .size(), 0);
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 2)
- .size(), 1);
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 3)
- .size(), 0);
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 4)
- .size(), 0);
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 5)
- .size(), 1);
- // step through and check for pointwise feature presence/absence
- Assert.assertEquals(af.getFeatureRenderer().findFeaturesAtRes(aseq, 6)
- .size(), 0);
- }
-
-}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.util.Comparison;
+
+import org.testng.annotations.Test;
+
+public class PIDModelTest
+{
+ private static final double DELTA = 0.00001D;
+
+ @Test(groups = "Functional")
+ public void testGetPairwiseScore()
+ {
+ PIDModel sm = new PIDModel();
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('a', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 0f);
+ assertEquals(sm.getPairwiseScore('A', ' '), 0f);
+ assertEquals(sm.getPairwiseScore(' ', ' '), 0f);
+ assertEquals(sm.getPairwiseScore('.', '.'), 0f);
+ assertEquals(sm.getPairwiseScore('-', '-'), 0f);
+ }
+
+ /**
+ * Regression test to verify that a (suitably configured) PIDModel computes
+ * the same percentage identities as the Comparison.PID method
+ */
+ @Test(groups = "Functional")
+ public void testComputePID_matchesComparisonPID()
+ {
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+
+ /*
+ * same length, no gaps
+ */
+ String s1 = "ARFNQDWSGI";
+ String s2 = "ARKNQDQSGI";
+
+ new PIDModel();
+ double newScore = PIDModel.computePID(s1, s2, params);
+ double oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * same length, with gaps
+ */
+ s1 = "-RFNQDWSGI";
+ s2 = "ARKNQ-QSGI";
+ new PIDModel();
+ newScore = PIDModel.computePID(s1, s2, params);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * s2 longer than s1, with gaps
+ */
+ s1 = "ARK-";
+ s2 = "-RFNQ";
+ new PIDModel();
+ newScore = PIDModel.computePID(s1, s2, params);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * s1 longer than s2, with gaps
+ */
+ s1 = "-RFNQ";
+ s2 = "ARK-";
+ new PIDModel();
+ newScore = PIDModel.computePID(s1, s2, params);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+
+ /*
+ * same but now also with gapped columns
+ */
+ s1 = "-R-F-NQ";
+ s2 = "AR-K--";
+ new PIDModel();
+ newScore = PIDModel.computePID(s1, s2, params);
+ oldScore = Comparison.PID(s1, s2);
+ assertEquals(newScore, oldScore, DELTA);
+ }
+
+ /**
+ * Tests for percentage identity variants where only the shorter length of two
+ * sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testComputePID_matchShortestSequence()
+ {
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+
+ /*
+ * match gap-gap and gap-char
+ * PID = 4/5 = 80%
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ assertEquals(PIDModel.computePID(s1, s2, params), 80d);
+
+ /*
+ * match gap-char but not gap-gap
+ * PID = 3/4 = 75%
+ */
+ params = new SimilarityParams(false, true, true, true);
+ assertEquals(PIDModel.computePID(s1, s2, params), 75d);
+
+ /*
+ * include gaps but don't match them
+ * include gap-gap, counted as identity
+ * PID = 2/5 = 40%
+ */
+ params = new SimilarityParams(true, false, true, true);
+ assertEquals(PIDModel.computePID(s1, s2, params), 40d);
+
+ /*
+ * include gaps but don't match them
+ * exclude gap-gap
+ * PID = 1/4 = 25%
+ */
+ params = new SimilarityParams(false, false, true, true);
+ assertEquals(PIDModel.computePID(s1, s2, params), 25d);
+ }
+
+ /**
+ * Tests for percentage identity variants where the longer length of two
+ * sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testComputePID_matchLongestSequence()
+ {
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+
+ /*
+ * match gap-gap and gap-char
+ * shorter sequence treated as if with trailing gaps
+ * PID = 5/6 = 83.333...%
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, false);
+ assertEquals(PIDModel.computePID(s1, s2, params), 500d / 6);
+
+ /*
+ * match gap-char but not gap-gap
+ * PID = 4/5 = 80%
+ */
+ params = new SimilarityParams(false, true, true, false);
+ assertEquals(PIDModel.computePID(s1, s2, params), 80d);
+
+ /*
+ * include gaps but don't match them
+ * include gap-gap, counted as identity
+ * PID = 2/6 = 33.333...%
+ */
+ params = new SimilarityParams(true, false, true, false);
+ assertEquals(PIDModel.computePID(s1, s2, params), 100d / 3);
+
+ /*
+ * include gaps but don't match them
+ * exclude gap-gap
+ * PID = 1/5 = 25%
+ */
+ params = new SimilarityParams(false, false, true, false);
+ assertEquals(PIDModel.computePID(s1, s2, params), 20d);
+
+ /*
+ * no tests for matchGaps=true, includeGaps=false
+ * as it don't make sense
+ */
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotEquals;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNotSame;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
+
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.io.DataSourceType;
+import jalview.io.FileParse;
+import jalview.io.ScoreMatrixFile;
+import jalview.math.MatrixI;
+import jalview.schemes.ResidueProperties;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.Arrays;
+
+import org.testng.annotations.Test;
+
+public class ScoreMatrixTest
+{
+ @Test(groups = "Functional")
+ public void testConstructor()
+ {
+ // note score matrix does not have to be symmetric (though it should be!)
+ float[][] scores = new float[3][];
+ scores[0] = new float[] { 1f, 2f, 3f };
+ scores[1] = new float[] { -4f, 5f, 6f };
+ scores[2] = new float[] { 7f, 8f, 9f };
+ ScoreMatrix sm = new ScoreMatrix("Test", "ABC".toCharArray(), scores);
+ assertEquals(sm.getSize(), 3);
+ assertArrayEquals(scores, sm.getMatrix());
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('b', 'c'), 6f);
+ assertEquals(sm.getPairwiseScore('c', 'b'), 8f);
+ assertEquals(sm.getMatrixIndex('c'), 2);
+ assertEquals(sm.getMatrixIndex(' '), -1);
+
+ // substitution to or from unknown symbol gets minimum score
+ assertEquals(sm.getPairwiseScore('A', 'D'), -4f);
+ assertEquals(sm.getPairwiseScore('D', 'A'), -4f);
+ // unknown-to-self gets a score of 1
+ assertEquals(sm.getPairwiseScore('D', 'D'), 1f);
+ }
+
+ @Test(
+ groups = "Functional",
+ expectedExceptions = { IllegalArgumentException.class })
+ public void testConstructor_matrixTooSmall()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ new ScoreMatrix("Test", "ABC".toCharArray(), scores);
+ }
+
+ @Test(
+ groups = "Functional",
+ expectedExceptions = { IllegalArgumentException.class })
+ public void testConstructor_matrixTooBig()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ new ScoreMatrix("Test", "A".toCharArray(), scores);
+ }
+
+ @Test(
+ groups = "Functional",
+ expectedExceptions = { IllegalArgumentException.class })
+ public void testConstructor_matrixNotSquare()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f };
+ new ScoreMatrix("Test", "AB".toCharArray(), scores);
+ }
+
+ @Test(groups = "Functional")
+ public void testBuildSymbolIndex()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
+ short[] index = sm.buildSymbolIndex("AX-yxYp".toCharArray());
+
+ assertEquals(index.length, 128); // ASCII character set size
+
+ assertEquals(index['A'], 0);
+ assertEquals(index['a'], 0); // lower-case mapping added
+ assertEquals(index['X'], 1);
+ assertEquals(index['-'], 2);
+ assertEquals(index['y'], 3); // lower-case override
+ assertEquals(index['x'], 4); // lower-case override
+ assertEquals(index['Y'], 5);
+ assertEquals(index['p'], 6);
+ assertEquals(index['P'], -1); // lower-case doesn't map upper-case
+
+ /*
+ * check all unmapped symbols have index for unmapped
+ */
+ for (int c = 0; c < index.length; c++)
+ {
+ if (!"AaXx-. Yyp".contains(String.valueOf((char) c)))
+ {
+ assertEquals(index[c], -1);
+ }
+ }
+ }
+
+ /**
+ * check that characters not in the basic ASCII set are simply ignored
+ */
+ @Test(groups = "Functional")
+ public void testBuildSymbolIndex_nonAscii()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 3f, 4f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '.' },
+ scores);
+ char[] weird = new char[] { 128, 245, 'P' };
+ short[] index = sm.buildSymbolIndex(weird);
+ assertEquals(index.length, 128);
+ assertEquals(index['P'], 2);
+ assertEquals(index['p'], 2);
+ for (int c = 0; c < index.length; c++)
+ {
+ if (c != 'P' && c != 'p')
+ {
+ assertEquals(index[c], -1);
+ }
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMatrix()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ float[][] m = sm.getMatrix();
+ assertEquals(m.length, sm.getSize());
+ assertEquals(m[2][4], -3f);
+ // verify a defensive copy is returned
+ float[][] m2 = sm.getMatrix();
+ assertNotSame(m, m2);
+ assertTrue(Arrays.deepEquals(m, m2));
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMatrixIndex()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMatrixIndex('A'), 0);
+ assertEquals(sm.getMatrixIndex('R'), 1);
+ assertEquals(sm.getMatrixIndex('r'), 1);
+ assertEquals(sm.getMatrixIndex('N'), 2);
+ assertEquals(sm.getMatrixIndex('D'), 3);
+ assertEquals(sm.getMatrixIndex('X'), 22);
+ assertEquals(sm.getMatrixIndex('x'), 22);
+ assertEquals(sm.getMatrixIndex('-'), -1);
+ assertEquals(sm.getMatrixIndex('*'), 23);
+ assertEquals(sm.getMatrixIndex('.'), -1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
+ assertEquals(sm.getMatrixIndex('?'), -1);
+ assertEquals(sm.getMatrixIndex((char) 128), -1);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetSize()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMatrix().length, sm.getSize());
+ }
+
+ @Test(groups = "Functional")
+ public void testComputePairwiseScores()
+ {
+ /*
+ * NB score matrix expects '-' for gap
+ */
+ String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" };
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+
+ MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview);
+
+ /*
+ * should be NxN where N = number of sequences
+ */
+ assertEquals(pairwise.height(), 4);
+ assertEquals(pairwise.width(), 4);
+
+ /*
+ * should be symmetrical (because BLOSUM62 is)
+ */
+ for (int i = 0; i < pairwise.height(); i++)
+ {
+ for (int j = i + 1; j < pairwise.width(); j++)
+ {
+ assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i),
+ String.format("Not symmetric at [%d, %d]", i, j));
+ }
+ }
+ /*
+ * verify expected BLOSUM dot product scores
+ */
+ // F.F + K.K + L.L = 6 + 5 + 4 = 15
+ assertEquals(pairwise.getValue(0, 0), 15d);
+ // R.R + -.- + D.D = 5 + 1 + 6 = 12
+ assertEquals(pairwise.getValue(1, 1), 12d);
+ // Q.Q + I.I + A.A = 5 + 4 + 4 = 13
+ assertEquals(pairwise.getValue(2, 2), 13d);
+ // G.G + W.W + C.C = 6 + 11 + 9 = 26
+ assertEquals(pairwise.getValue(3, 3), 26d);
+ // F.R + K.- + L.D = -3 + -4 + -4 = -11
+ assertEquals(pairwise.getValue(0, 1), -11d);
+ // F.Q + K.I + L.A = -3 + -3 + -1 = -7
+ assertEquals(pairwise.getValue(0, 2), -7d);
+ // F.G + K.W + L.C = -3 + -3 + -1 = -7
+ assertEquals(pairwise.getValue(0, 3), -7d);
+ // R.Q + -.I + D.A = 1 + -4 + -2 = -5
+ assertEquals(pairwise.getValue(1, 2), -5d);
+ // R.G + -.W + D.C = -2 + -4 + -3 = -9
+ assertEquals(pairwise.getValue(1, 3), -9d);
+ // Q.G + I.W + A.C = -2 + -3 + 0 = -5
+ assertEquals(pairwise.getValue(2, 3), -5d);
+ }
+
+ /**
+ * Test that the result of outputMatrix can be reparsed to give an identical
+ * ScoreMatrix
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testOutputMatrix_roundTrip() throws MalformedURLException,
+ IOException
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ String output = sm.outputMatrix(false);
+ FileParse fp = new FileParse(output, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm2 = parser.parseMatrix();
+ assertNotNull(sm2);
+ assertTrue(sm2.equals(sm));
+ }
+
+ @Test(groups = "Functional")
+ public void testEqualsAndHashCode()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ ScoreMatrix sm2 = new ScoreMatrix(sm.getName(), sm.getSymbols()
+ .toCharArray(), sm.getMatrix());
+ assertTrue(sm.equals(sm2));
+ assertEquals(sm.hashCode(), sm2.hashCode());
+
+ sm2 = ScoreModels.getInstance().getPam250();
+ assertFalse(sm.equals(sm2));
+ assertNotEquals(sm.hashCode(), sm2.hashCode());
+
+ assertFalse(sm.equals("hello"));
+ }
+
+ /**
+ * Tests for scoring options where the longer length of two sequences is used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchLongestSequence()
+ {
+ /*
+ * ScoreMatrix expects '-' for gaps
+ */
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * shorter sequence treated as if with trailing gaps
+ * score = F^F + R^S + -^- + K^- + -^L + S^-
+ * = 6 + -1 + 1 + -4 + -4 + -4 = -6
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -6d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L + S^-
+ * = 6 + -1 + 0 + -4 + -4 + -4 = -7
+ */
+ params = new SimilarityParams(false, true, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -7d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, false);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
+
+ /**
+ * Tests for scoring options where only the shorter length of two sequences is
+ * used
+ */
+ @Test(groups = "Functional")
+ public void testcomputeSimilarity_matchShortestSequence()
+ {
+ /*
+ * ScoreMatrix expects '-' for gaps
+ */
+ String s1 = "FR-K-S";
+ String s2 = "FS--L";
+ ScoreMatrix blosum = ScoreModels.getInstance().getBlosum62();
+
+ /*
+ * score gap-gap and gap-char
+ * match shorter sequence only
+ * score = F^F + R^S + -^- + K^- + -^L
+ * = 6 + -1 + 1 + -4 + -4 = -2
+ */
+ SimilarityParamsI params = new SimilarityParams(true, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -2d);
+
+ /*
+ * score gap-char but not gap-gap
+ * score = F^F + R^S + 0 + K^- + -^L
+ * = 6 + -1 + 0 + -4 + -4 = -3
+ */
+ params = new SimilarityParams(false, true, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, false, true, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), -3d);
+
+ /*
+ * score gap-gap but not gap-char
+ * score = F^F + R^S + -^- + 0 + 0
+ * = 6 + -1 + 1 = 6
+ */
+ params = new SimilarityParams(true, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(true, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 6d);
+
+ /*
+ * score neither gap-gap nor gap-char
+ * score = F^F + R^S + 0 + 0 + 0
+ * = 6 + -1 = 5
+ */
+ params = new SimilarityParams(false, false, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ // matchGap (arg2) is ignored:
+ params = new SimilarityParams(false, true, false, true);
+ assertEquals(blosum.computeSimilarity(s1, s2, params), 5d);
+ }
+
+ @Test(groups = "Functional")
+ public void testSymmetric()
+ {
+ verifySymmetric(ScoreModels.getInstance().getBlosum62());
+ verifySymmetric(ScoreModels.getInstance().getPam250());
+ verifySymmetric(ScoreModels.getInstance().getDefaultModel(false)); // dna
+ }
+
+ private void verifySymmetric(ScoreMatrix sm)
+ {
+ float[][] m = sm.getMatrix();
+ int rows = m.length;
+ for (int row = 0; row < rows; row++)
+ {
+ assertEquals(m[row].length, rows);
+ for (int col = 0; col < rows; col++)
+ {
+ assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]",
+ sm.getName(), ResidueProperties.aa[row],
+ ResidueProperties.aa[col]));
+ }
+ }
+ }
+
+ /**
+ * A test that just asserts the expected values in the Blosum62 score matrix
+ */
+ @Test(groups = "Functional")
+ public void testBlosum62_values()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+
+ assertTrue(sm.isProtein());
+ assertFalse(sm.isDNA());
+ assertNull(sm.getDescription());
+ sm.setDescription("BLOSUM62");
+ assertEquals(sm.getDescription(), "BLOSUM62");
+
+ /*
+ * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX
+ * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+ */
+ verifyValues(sm, 'A', new float[] { 4, -1, -2, -2, 0, -1, -1, 0, -2,
+ -1,
+ -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 });
+ verifyValues(sm, 'R', new float[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3,
+ -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 });
+ verifyValues(sm, 'N', new float[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3,
+ -3,
+ 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 });
+ verifyValues(sm, 'D', new float[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3,
+ -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 });
+ verifyValues(sm, 'C', new float[] { 0, -3, -3, -3, 9, -3, -4, -3, -3,
+ -1,
+ -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 });
+ verifyValues(sm, 'Q', new float[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3,
+ -2,
+ 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 });
+ verifyValues(sm, 'E', new float[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3,
+ -3,
+ 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
+ verifyValues(sm, 'G', new float[] { 0, -2, 0, -1, -3, -2, -2, 6, -2,
+ -4,
+ -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 });
+ verifyValues(sm, 'H', new float[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3,
+ -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 });
+ verifyValues(sm, 'I', new float[] { -1, -3, -3, -3, -1, -3, -3, -4, -3,
+ 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 });
+ verifyValues(sm, 'L', new float[] { -1, -2, -3, -4, -1, -2, -3, -4, -3,
+ 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 });
+ verifyValues(sm, 'K', new float[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3,
+ -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 });
+ verifyValues(sm, 'M', new float[] { -1, -1, -2, -3, -1, 0, -2, -3, -2,
+ 1,
+ 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 });
+ verifyValues(sm, 'F', new float[] { -2, -3, -3, -3, -2, -3, -3, -3, -1,
+ 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 });
+ verifyValues(sm, 'P', new float[] { -1, -2, -2, -1, -3, -1, -1, -2, -2,
+ -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 });
+ verifyValues(sm, 'S', new float[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2,
+ -2,
+ 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 });
+ verifyValues(sm, 'T', new float[] { 0, -1, 0, -1, -1, -1, -1, -2, -2,
+ -1,
+ -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 });
+ verifyValues(sm, 'W', new float[] { -3, -3, -4, -4, -2, -2, -3, -2, -2,
+ -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 });
+ verifyValues(sm, 'Y', new float[] { -2, -2, -2, -3, -2, -1, -2, -3, 2,
+ -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 });
+ verifyValues(sm, 'V', new float[] { 0, -3, -3, -3, -1, -2, -2, -3, -3,
+ 3,
+ 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 });
+ verifyValues(sm, 'B', new float[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3,
+ -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 });
+ verifyValues(sm, 'Z', new float[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3,
+ -3,
+ 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
+ verifyValues(sm, 'X', new float[] { 0, -1, -1, -1, -2, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 });
+ }
+
+ /**
+ * Helper method to check pairwise scores for one residue
+ *
+ * @param sm
+ * @param res
+ * @param expected
+ * score values against 'res', in ResidueProperties.aaIndex order
+ */
+ private void verifyValues(ScoreMatrix sm, char res, float[] expected)
+ {
+ for (int j = 0; j < expected.length; j++)
+ {
+ char c2 = ResidueProperties.aa[j].charAt(0);
+ assertEquals(sm.getPairwiseScore(res, c2), expected[j],
+ String.format("%s->%s", res, c2));
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testConstructor_gapDash()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, 5f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', '-' },
+ scores);
+ assertEquals(sm.getSize(), 2);
+ assertArrayEquals(scores, sm.getMatrix());
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('a', '-'), 2f);
+ assertEquals(sm.getPairwiseScore('-', 'A'), 4f);
+ assertEquals(sm.getMatrixIndex('a'), 0);
+ assertEquals(sm.getMatrixIndex('A'), 0);
+ assertEquals(sm.getMatrixIndex('-'), 1);
+ assertEquals(sm.getMatrixIndex(' '), -1);
+ assertEquals(sm.getMatrixIndex('.'), -1);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetPairwiseScore()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { -4f, 5f };
+ ScoreMatrix sm = new ScoreMatrix("Test", new char[] { 'A', 'B' },
+ scores);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'a'), 1f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 2f);
+ assertEquals(sm.getPairwiseScore('b', 'a'), -4f);
+ assertEquals(sm.getPairwiseScore('B', 'b'), 5f);
+
+ /*
+ * unknown symbols currently score minimum score
+ * or 1 for identity with self
+ */
+ assertEquals(sm.getPairwiseScore('A', '-'), -4f);
+ assertEquals(sm.getPairwiseScore('-', 'A'), -4f);
+ assertEquals(sm.getPairwiseScore('-', '-'), 1f);
+ assertEquals(sm.getPairwiseScore('Q', 'W'), -4f);
+ assertEquals(sm.getPairwiseScore('Q', 'Q'), 1f);
+
+ /*
+ * symbols not in basic ASCII set score zero
+ */
+ char c = (char) 200;
+ assertEquals(sm.getPairwiseScore('Q', c), 0f);
+ assertEquals(sm.getPairwiseScore(c, 'Q'), 0f);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMinimumScore()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMinimumScore(), -4f);
+ }
+
+ @Test(groups = "Functional")
+ public void testGetMaximumScore()
+ {
+ ScoreMatrix sm = ScoreModels.getInstance().getBlosum62();
+ assertEquals(sm.getMaximumScore(), 11f);
+ }
+
+ @Test(groups = "Functional")
+ public void testOutputMatrix_html()
+ {
+ float[][] scores = new float[2][];
+ scores[0] = new float[] { 1f, 2f };
+ scores[1] = new float[] { 4f, -5.3E-10f };
+ ScoreMatrix sm = new ScoreMatrix("Test", "AB".toCharArray(), scores);
+ String html = sm.outputMatrix(true);
+ String expected = "<table border=\"1\"><tr><th></th><th> A </th><th> B </th></tr>\n"
+ + "<tr><td>A</td><td>1.0</td><td>2.0</td></tr>\n"
+ + "<tr><td>B</td><td>4.0</td><td>-5.3E-10</td></tr>\n"
+ + "</table>";
+ assertEquals(html, expected);
+ }
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertTrue;
+
+import jalview.api.analysis.PairwiseScoreModelI;
+import jalview.api.analysis.ScoreModelI;
+
+import java.util.Iterator;
+
+import org.testng.annotations.Test;
+
+public class ScoreModelsTest
+{
+ /**
+ * Verify that the singleton constructor successfully loads Jalview's built-in
+ * score models
+ */
+ @Test(groups = "Functional")
+ public void testConstructor()
+ {
+ Iterator<ScoreModelI> models = ScoreModels.getInstance().getModels()
+ .iterator();
+ assertTrue(models.hasNext());
+
+ /*
+ * models are served in order of addition
+ */
+ ScoreModelI sm = models.next();
+ assertTrue(sm instanceof SimilarityScoreModel);
+ assertTrue(sm instanceof PairwiseScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
+ assertEquals(sm.getName(), "BLOSUM62");
+ assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('I', 'R'), -3f);
+
+ sm = models.next();
+ assertTrue(sm instanceof SimilarityScoreModel);
+ assertTrue(sm instanceof PairwiseScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
+ assertEquals(sm.getName(), "PAM250");
+ assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), -4f);
+
+ sm = models.next();
+ assertTrue(sm instanceof SimilarityScoreModel);
+ assertTrue(sm instanceof PairwiseScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
+ assertEquals(sm.getName(), "PID");
+ assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), 0f);
+ assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'r'), 1f);
+
+ sm = models.next();
+ assertTrue(sm instanceof SimilarityScoreModel);
+ assertTrue(sm instanceof PairwiseScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
+ assertEquals(sm.getName(), "DNA");
+ assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('c', 'x'), 1f);
+
+ sm = models.next();
+ assertFalse(sm instanceof SimilarityScoreModel);
+ assertFalse(sm instanceof PairwiseScoreModelI);
+ assertTrue(sm instanceof DistanceScoreModel);
+ assertEquals(sm.getName(), "Sequence Feature Similarity");
+ }
+
+ /**
+ * 'Test' that prints out score matrices in tab-delimited format. This test is
+ * intentionally not assigned to any group so would not be run as part of a
+ * suite. It makes no assertions and is just provided as a utility method for
+ * printing out matrices. Relocated here from ScoreMatrixPrinter.
+ */
+ @Test(groups = "none")
+ public void printAllMatrices_tabDelimited()
+ {
+ printAllMatrices(false);
+ }
+
+ /**
+ * 'Test' that prints out score matrices in html format. This test is
+ * intentionally not assigned to any group so would not be run as part of a
+ * suite. It makes no assertions and is just provided as a utility method for
+ * printing out matrices. Relocated here from ScoreMatrixPrinter.
+ */
+ @Test(groups = "none")
+ public void printAllMatrices_asHtml()
+ {
+ printAllMatrices(true);
+ }
+
+ /**
+ * Print all registered ScoreMatrix as plain or html tables
+ *
+ * @param asHtml
+ */
+ protected void printAllMatrices(boolean asHtml)
+ {
+ for (ScoreModelI sm : ScoreModels.getInstance().getModels())
+ {
+ if (sm instanceof ScoreMatrix)
+ {
+ System.out.println(((ScoreMatrix) sm).outputMatrix(asHtml));
+ }
+ }
+ }
+}
{
"examples/testdata/cullpdb_pc25_res3.0_R0.3_d150729_chains9361.fasta.15316",
FileFormat.Fasta },
-
+ { "resources/scoreModel/pam250.scm", FileFormat.ScoreMatrix },
+ { "resources/scoreModel/blosum80.scm", FileFormat.ScoreMatrix }
// { "examples/testdata/test.amsa", "AMSA" },
// { "examples/test.jnet", "JnetFile" },
};
import static org.testng.ConversionUtils.wrapDataProvider;
-import jalview.analysis.NJTree;
import jalview.analysis.SequenceIdMatcher;
+import jalview.analysis.TreeModel;
import jalview.datamodel.SequenceI;
import jalview.datamodel.SequenceNode;
import jalview.gui.JvOptionPane;
stage = "Compare original and generated tree" + treename;
Vector<SequenceNode> oseqs, nseqs;
- oseqs = new NJTree(new SequenceI[0], nf).findLeaves(nf.getTree());
+ oseqs = new TreeModel(new SequenceI[0], null, nf).findLeaves(nf
+ .getTree());
AssertJUnit.assertTrue(stage + "No nodes in original tree.",
oseqs.size() > 0);
SequenceI[] olsqs = new SequenceI[oseqs.size()];
{
olsqs[i] = (SequenceI) oseqs.get(i).element();
}
- nseqs = new NJTree(new SequenceI[0], nf_regen).findLeaves(nf_regen
+ nseqs = new TreeModel(new SequenceI[0], null, nf_regen)
+ .findLeaves(nf_regen
.getTree());
AssertJUnit.assertTrue(stage + "No nodes in regerated tree.",
nseqs.size() > 0);
--- /dev/null
+package jalview.io;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.analysis.scoremodels.ScoreModels;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import org.testng.annotations.Test;
+
+public class ScoreMatrixFileTest
+{
+
+ /**
+ * Test a successful parse of a (small) score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiMixedDelimiters()
+ throws MalformedURLException,
+ IOException
+ {
+ /*
+ * some messy but valid input data, with comma, space
+ * or tab (or combinations) as score value delimiters
+ * this example includes 'guide' symbols on score rows
+ */
+ String data = "ScoreMatrix MyTest (example)\n" + "A\tT\tU\tt\tx\t-\n"
+ + "A,1.1,1.2,1.3,1.4, 1.5, 1.6\n"
+ + "T,2.1 2.2 2.3 2.4 2.5 2.6\n"
+ + "U\t3.1\t3.2\t3.3\t3.4\t3.5\t3.6\t\n"
+ + "t, 5.1,5.3,5.3,5.4,5.5, 5.6\n"
+ + "x\t6.1, 6.2 6.3 6.4 6.5 6.6\n"
+ + "-, \t7.1\t7.2 7.3, 7.4, 7.5\t,7.6\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm = parser.parseMatrix();
+
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "MyTest (example)");
+ assertEquals(sm.getSize(), 6);
+ assertNull(sm.getDescription());
+ assertTrue(sm.isDNA());
+ assertFalse(sm.isProtein());
+ assertEquals(sm.getMinimumScore(), 1.1f);
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.1f);
+ assertEquals(sm.getPairwiseScore('A', 'T'), 1.2f);
+ assertEquals(sm.getPairwiseScore('a', 'T'), 1.2f); // A/a equivalent
+ assertEquals(sm.getPairwiseScore('A', 't'), 1.4f); // T/t not equivalent
+ assertEquals(sm.getPairwiseScore('a', 't'), 1.4f);
+ assertEquals(sm.getPairwiseScore('U', 'x'), 3.5f);
+ assertEquals(sm.getPairwiseScore('u', 'x'), 3.5f);
+ // X (upper) and '.' unmapped - get minimum score
+ assertEquals(sm.getPairwiseScore('U', 'X'), 1.1f);
+ assertEquals(sm.getPairwiseScore('A', '.'), 1.1f);
+ assertEquals(sm.getPairwiseScore('-', '-'), 7.6f);
+ assertEquals(sm.getPairwiseScore('A', (char) 128), 0f); // out of range
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_headerMissing()
+ {
+ String data;
+
+ data = "X Y\n1 2\n3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Format error: 'ScoreMatrix <name>' should be the first non-comment line");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiNotEnoughRows()
+ {
+ String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5 6\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Expected 3 rows of score data in score matrix but only found 2");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiNotEnoughColumns()
+ {
+ String data = "ScoreMatrix MyTest\nX Y Z\n1 2 3\n4 5\n7 8 9\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Expected 3 scores at line 4: '4 5' but found 2");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiTooManyColumns()
+ {
+ /*
+ * with two too many columns:
+ */
+ String data = "ScoreMatrix MyTest\nX\tY\tZ\n1 2 3\n4 5 6 7\n8 9 10\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Expected 3 scores at line 4: '4 5 6 7' but found 4");
+ }
+
+ /*
+ * with guide character and one too many columns:
+ */
+ data = "ScoreMatrix MyTest\nX Y\nX 1 2\nY 3 4 5\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Expected 2 scores at line 4: 'Y 3 4 5' but found 3");
+ }
+
+ /*
+ * with no guide character and one too many columns
+ */
+ data = "ScoreMatrix MyTest\nX Y\n1 2\n3 4 5\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Expected 2 scores at line 4: '3 4 5' but found 3");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiTooManyRows()
+ {
+ String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 5 6\n7 8 9\n10 11 12\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Unexpected extra input line in score model file: '10 11 12'");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiBadDelimiter()
+ {
+ String data = "ScoreMatrix MyTest\n X Y Z\n1|2|3\n4|5|6\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Invalid score value '1|2|3' at line 3 column 0");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiBadFloat()
+ {
+ String data = "ScoreMatrix MyTest\n\tX\tY\tZ\n1 2 3\n4 five 6\n7 8 9\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Invalid score value 'five' at line 4 column 1");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiBadGuideCharacter()
+ {
+ String data = "ScoreMatrix MyTest\n\tX Y\nX 1 2\ny 3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Error parsing score matrix at line 4, expected 'Y' but found 'y'");
+ }
+
+ data = "ScoreMatrix MyTest\n\tX Y\nXX 1 2\nY 3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Error parsing score matrix at line 3, expected 'X' but found 'XX'");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiNameMissing()
+ {
+ /*
+ * Name missing on ScoreMatrix header line
+ */
+ String data = "ScoreMatrix\nX Y\n1 2\n3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(
+ e.getMessage(),
+ "Format error: expected 'ScoreMatrix <name>', found 'ScoreMatrix' at line 1");
+ }
+ }
+
+ /**
+ * Test a successful parse of a (small) score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiFormat() throws MalformedURLException,
+ IOException
+ {
+ // input including comment and blank lines
+ String data = "ScoreMatrix MyTest\n#comment\n\n" + "\tA\tB\tC\n"
+ + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm = parser.parseMatrix();
+
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "MyTest");
+ assertEquals(parser.getMatrixName(), "MyTest");
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
+ assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
+ assertEquals(sm.getSize(), 3);
+ }
+
+ /**
+ * Test a successful parse of a (small) score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaIndexBlosum80()
+ throws MalformedURLException,
+ IOException
+ {
+ FileParse fp = new FileParse("resources/scoreModel/blosum80.scm",
+ DataSourceType.FILE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm = parser.parseMatrix();
+
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "HENS920103");
+ assertEquals(sm.getDescription(),
+ "BLOSUM80 substitution matrix (Henikoff-Henikoff, 1992)");
+ assertFalse(sm.isDNA());
+ assertTrue(sm.isProtein());
+ assertEquals(20, sm.getSize());
+
+ assertEquals(sm.getPairwiseScore('A', 'A'), 7f);
+ assertEquals(sm.getPairwiseScore('A', 'R'), -3f);
+ assertEquals(sm.getPairwiseScore('r', 'a'), -3f); // A/a equivalent
+ }
+
+ /**
+ * Test a successful parse of a (small) score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindexFormat() throws MalformedURLException,
+ IOException
+ {
+ /*
+ * aaindex format has scores for diagonal and below only
+ */
+ String data = "H MyTest\n" + "D My description\n" + "R PMID:1438297\n"
+ + "A Authors, names\n" + "T Journal title\n"
+ + "J Journal reference\n" + "* matrix in 1/3 Bit Units\n"
+ + "M rows = ABC, cols = ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ ScoreMatrix sm = parser.parseMatrix();
+
+ assertNotNull(sm);
+ assertEquals(sm.getSize(), 3);
+ assertEquals(sm.getName(), "MyTest");
+ assertEquals(sm.getDescription(), "My description");
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
+ assertEquals(sm.getPairwiseScore('A', 'B'), 4.0f);
+ assertEquals(sm.getPairwiseScore('A', 'C'), 7.0f);
+ assertEquals(sm.getPairwiseScore('B', 'A'), 4.0f);
+ assertEquals(sm.getPairwiseScore('B', 'B'), 5.0f);
+ assertEquals(sm.getPairwiseScore('B', 'C'), 8.0f);
+ assertEquals(sm.getPairwiseScore('C', 'C'), 9.0f);
+ assertEquals(sm.getPairwiseScore('C', 'B'), 8.0f);
+ assertEquals(sm.getPairwiseScore('C', 'A'), 7.0f);
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_mMissing()
+ throws MalformedURLException,
+ IOException
+ {
+ /*
+ * aaindex format but M cols=, rows= is missing
+ */
+ String data = "H MyTest\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "No alphabet specified in matrix file");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_rowColMismatch()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABD\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(
+ e.getMessage(),
+ "Unexpected aaIndex score matrix data at line 2: M rows=ABC, cols=ABD rows != cols");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_ncbiHeaderRepeated()
+ {
+ String data = "ScoreMatrix BLOSUM\nScoreMatrix PAM250\nX Y\n1 2\n3 4\n";
+ try
+ {
+ new ScoreMatrixFile(new FileParse(data, DataSourceType.PASTE))
+ .parseMatrix();
+ fail("expected exception");
+ } catch (IOException e)
+ {
+ assertEquals(e.getMessage(),
+ "Error: 'ScoreMatrix' repeated in file at line 2");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_tooManyRows()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "Too many data rows in matrix file");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_extraDataLines()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\t9.0\n" + "something extra\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(e.getMessage(), "Too many data rows in matrix file");
+ }
+ }
+
+ @Test(groups = "Functional")
+ public void testParseMatrix_aaindex_tooFewColumns()
+ throws MalformedURLException,
+ IOException
+ {
+ String data = "H MyTest\n" + "M rows=ABC, cols=ABC\n" + "A\t1.0\n"
+ + "B\t4.0\t5.0\n" + "C\t7.0\t8.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+ try
+ {
+ parser.parseMatrix();
+ fail("Expected exception");
+ } catch (FileFormatException e)
+ {
+ assertEquals(
+ e.getMessage(),
+ "Expected 3 scores at line 5: 'C\t7.0\t8.0' but found 2");
+ }
+ }
+
+ /**
+ * Test a successful parse and register of a score matrix file
+ *
+ * @throws IOException
+ * @throws MalformedURLException
+ */
+ @Test(groups = "Functional")
+ public void testParse_ncbiFormat() throws MalformedURLException,
+ IOException
+ {
+ assertNull(ScoreModels.getInstance().forName("MyNewTest"));
+
+ String data = "ScoreMatrix MyNewTest\n" + "\tA\tB\tC\n"
+ + "A\t1.0\t2.0\t3.0\n" + "B\t4.0\t5.0\t6.0\n"
+ + "C\t7.0\t8.0\t9.0\n";
+ FileParse fp = new FileParse(data, DataSourceType.PASTE);
+ ScoreMatrixFile parser = new ScoreMatrixFile(fp);
+
+ parser.parse();
+
+ ScoreMatrix sm = (ScoreMatrix) ScoreModels.getInstance().forName(
+ "MyNewTest");
+ assertNotNull(sm);
+ assertEquals(sm.getName(), "MyNewTest");
+ assertEquals(parser.getMatrixName(), "MyNewTest");
+ assertEquals(sm.getPairwiseScore('A', 'A'), 1.0f);
+ assertEquals(sm.getPairwiseScore('B', 'c'), 6.0f);
+ assertEquals(sm.getSize(), 3);
+ }
+}
package jalview.math;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotSame;
+import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
public class MatrixTest
{
- final static double DELTA = 0.0001d;
+ final static double DELTA = 0.000001d;
@Test(groups = "Timing")
public void testPreMultiply_timing()
{
- int rows = 500;
- int cols = 1000;
+ int rows = 50; // increase to stress test timing
+ int cols = 100;
double[][] d1 = new double[rows][cols];
double[][] d2 = new double[cols][rows];
Matrix m1 = new Matrix(d1);
}
Matrix m1 = new Matrix(in);
Matrix m2 = (Matrix) m1.copy();
+ assertNotSame(m1, m2);
assertTrue(matrixEquals(m1, m2));
}
ArrayAsserts.assertArrayEquals(m1.getD(), m2.getD(), 0.00001d);
ArrayAsserts.assertArrayEquals(m1.getE(), m2.getE(), 0.00001d);
}
+
+ @Test(groups = "Functional")
+ public void testFindMinMax()
+ {
+ /*
+ * empty matrix case
+ */
+ Matrix m = new Matrix(new double[][] { {} });
+ assertNull(m.findMinMax());
+
+ /*
+ * normal case
+ */
+ double[][] vals = new double[2][];
+ vals[0] = new double[] {7d, 1d, -2.3d};
+ vals[1] = new double[] {-12d, 94.3d, -102.34d};
+ m = new Matrix(vals);
+ double[] minMax = m.findMinMax();
+ assertEquals(minMax[0], -102.34d);
+ assertEquals(minMax[1], 94.3d);
+ }
+
+ @Test(groups = { "Functional", "Timing" })
+ public void testFindMinMax_timing()
+ {
+ Random r = new Random();
+ int size = 1000; // increase to stress test timing
+ double[][] vals = new double[size][size];
+ double max = -Double.MAX_VALUE;
+ double min = Double.MAX_VALUE;
+ for (int i = 0; i < size; i++)
+ {
+ vals[i] = new double[size];
+ for (int j = 0; j < size; j++)
+ {
+ // use nextLong rather than nextDouble to include negative values
+ double d = r.nextLong();
+ if (d > max)
+ {
+ max = d;
+ }
+ if (d < min)
+ {
+ min = d;
+ }
+ vals[i][j] = d;
+ }
+ }
+ Matrix m = new Matrix(vals);
+ long now = System.currentTimeMillis();
+ double[] minMax = m.findMinMax();
+ System.out.println(String.format("findMinMax for %d x %d took %dms",
+ size, size, (System.currentTimeMillis() - now)));
+ assertEquals(minMax[0], min);
+ assertEquals(minMax[1], max);
+ }
+
+ /**
+ * Test range reversal with maximum value becoming zero
+ */
+ @Test(groups = "Functional")
+ public void testReverseRange_maxToZero()
+ {
+ Matrix m1 = new Matrix(
+ new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } });
+
+ /*
+ * subtract all from max: range -3.4 to 15 becomes 18.4 to 0
+ */
+ m1.reverseRange(true);
+ assertEquals(m1.getValue(0, 0), 13d, DELTA);
+ assertEquals(m1.getValue(0, 1), 11.5d, DELTA);
+ assertEquals(m1.getValue(0, 2), 11d, DELTA);
+ assertEquals(m1.getValue(1, 0), 18.4d, DELTA);
+ assertEquals(m1.getValue(1, 1), 11d, DELTA);
+ assertEquals(m1.getValue(1, 2), 0d, DELTA);
+
+ /*
+ * repeat operation - range is now 0 to 18.4
+ */
+ m1.reverseRange(true);
+ assertEquals(m1.getValue(0, 0), 5.4d, DELTA);
+ assertEquals(m1.getValue(0, 1), 6.9d, DELTA);
+ assertEquals(m1.getValue(0, 2), 7.4d, DELTA);
+ assertEquals(m1.getValue(1, 0), 0d, DELTA);
+ assertEquals(m1.getValue(1, 1), 7.4d, DELTA);
+ assertEquals(m1.getValue(1, 2), 18.4d, DELTA);
+ }
+
+ /**
+ * Test range reversal with minimum and maximum values swapped
+ */
+ @Test(groups = "Functional")
+ public void testReverseRange_swapMinMax()
+ {
+ Matrix m1 = new Matrix(
+ new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } });
+
+ /*
+ * swap all values in min-max range
+ * = subtract from (min + max = 11.6)
+ * range -3.4 to 15 becomes 18.4 to -3.4
+ */
+ m1.reverseRange(false);
+ assertEquals(m1.getValue(0, 0), 9.6d, DELTA);
+ assertEquals(m1.getValue(0, 1), 8.1d, DELTA);
+ assertEquals(m1.getValue(0, 2), 7.6d, DELTA);
+ assertEquals(m1.getValue(1, 0), 15d, DELTA);
+ assertEquals(m1.getValue(1, 1), 7.6d, DELTA);
+ assertEquals(m1.getValue(1, 2), -3.4d, DELTA);
+
+ /*
+ * repeat operation - original values restored
+ */
+ m1.reverseRange(false);
+ assertEquals(m1.getValue(0, 0), 2d, DELTA);
+ assertEquals(m1.getValue(0, 1), 3.5d, DELTA);
+ assertEquals(m1.getValue(0, 2), 4d, DELTA);
+ assertEquals(m1.getValue(1, 0), -3.4d, DELTA);
+ assertEquals(m1.getValue(1, 1), 4d, DELTA);
+ assertEquals(m1.getValue(1, 2), 15d, DELTA);
+ }
+
+ @Test(groups = "Functional")
+ public void testMultiply()
+ {
+ Matrix m = new Matrix(new double[][] { { 2, 3.5, 4 }, { -3.4, 4, 15 } });
+ m.multiply(2d);
+ assertEquals(m.getValue(0, 0), 4d, DELTA);
+ assertEquals(m.getValue(0, 1), 7d, DELTA);
+ assertEquals(m.getValue(0, 2), 8d, DELTA);
+ assertEquals(m.getValue(1, 0), -6.8d, DELTA);
+ assertEquals(m.getValue(1, 1), 8d, DELTA);
+ assertEquals(m.getValue(1, 2), 30d, DELTA);
+ }
+
+ @Test(groups = "Functional")
+ public void testConstructor()
+ {
+ double[][] values = new double[][] { { 1, 2, 3 }, { 4, 5, 6 } };
+ Matrix m = new Matrix(values);
+ assertEquals(m.getValue(0, 0), 1d, DELTA);
+
+ /*
+ * verify the matrix has a copy of the original array
+ */
+ assertNotSame(values[0], m.getRow(0));
+ values[0][0] = -1d;
+ assertEquals(m.getValue(0, 0), 1d, DELTA); // unchanged
+ }
}
+++ /dev/null
-/*
- * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
- * Copyright (C) $$Year-Rel$$ The Jalview Authors
- *
- * This file is part of Jalview.
- *
- * Jalview is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation, either version 3
- * of the License, or (at your option) any later version.
- *
- * Jalview is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty
- * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
- * The Jalview Authors are detailed in the 'AUTHORS' file.
- */
-package jalview.schemes;
-
-import jalview.api.analysis.ScoreModelI;
-import jalview.gui.JvOptionPane;
-
-import java.util.Map;
-
-import org.testng.annotations.BeforeClass;
-
-public class ScoreMatrixPrinter
-{
-
- @BeforeClass(alwaysRun = true)
- public void setUpJvOptionPane()
- {
- JvOptionPane.setInteractiveMode(false);
- JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
- }
-
- public void printAllMatrices()
- {
- for (Map.Entry<String, ScoreModelI> sm : ResidueProperties.scoreMatrices
- .entrySet())
- {
- System.out.println("Matrix " + sm.getKey());
- System.out.println(sm.getValue().toString());
- }
- }
-
- public void printHTMLMatrices()
- {
- for (Map.Entry<String, ScoreModelI> _sm : ResidueProperties.scoreMatrices
- .entrySet())
- {
- if (_sm.getValue() instanceof ScoreMatrix)
- {
- ScoreMatrix sm = (ScoreMatrix) _sm.getValue();
- System.out.println("Matrix " + _sm.getKey());
- System.out.println(sm.outputMatrix(true));
- }
- }
- }
-
-}
+++ /dev/null
-package jalview.schemes;
-
-import static org.testng.Assert.assertEquals;
-
-import jalview.math.MatrixI;
-
-import org.testng.annotations.Test;
-
-public class ScoreMatrixTest
-{
- @Test(groups = "Functional")
- public void testSymmetric()
- {
- verifySymmetric(ResidueProperties.getScoreMatrix("BLOSUM62"));
- verifySymmetric(ResidueProperties.getScoreMatrix("PAM250"));
- verifySymmetric(ResidueProperties.getScoreMatrix("DNA"));
- }
-
- private void verifySymmetric(ScoreMatrix sm)
- {
- int[][] m = sm.getMatrix();
- int rows = m.length;
- for (int row = 0; row < rows; row++)
- {
- assertEquals(m[row].length, rows);
- for (int col = 0; col < rows; col++)
- {
- assertEquals(m[row][col], m[col][row], String.format("%s [%s, %s]",
- sm.getName(), ResidueProperties.aa[row],
- ResidueProperties.aa[col]));
- }
- }
-
- /*
- * also check the score matrix is sized for
- * the number of symbols scored, plus gap
- */
- assertEquals(rows, (sm.isDNA() ? ResidueProperties.maxNucleotideIndex
- : ResidueProperties.maxProteinIndex) + 1);
- }
-
- /**
- * A test that just asserts the expected values in the Blosum62 score matrix
- */
- @Test(groups = "Functional")
- public void testBlosum62_values()
- {
- ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62");
-
- /*
- * verify expected scores against ARNDCQEGHILKMFPSTWYVBZX
- * scraped from https://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
- */
- verifyValues(sm, 'A', new int[] { 4, -1, -2, -2, 0, -1, -1, 0, -2, -1,
- -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0 });
- verifyValues(sm, 'R', new int[] { -1, 5, 0, -2, -3, 1, 0, -2, 0, -3,
- -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1 });
- verifyValues(sm, 'N', new int[] { -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3,
- 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1 });
- verifyValues(sm, 'D', new int[] { -2, -2, 1, 6, -3, 0, 2, -1, -1, -3,
- -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1 });
- verifyValues(sm, 'C', new int[] { 0, -3, -3, -3, 9, -3, -4, -3, -3, -1,
- -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2 });
- verifyValues(sm, 'Q', new int[] { -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2,
- 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1 });
- verifyValues(sm, 'E', new int[] { -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3,
- 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
- verifyValues(sm, 'G', new int[] { 0, -2, 0, -1, -3, -2, -2, 6, -2, -4,
- -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1 });
- verifyValues(sm, 'H', new int[] { -2, 0, 1, -1, -3, 0, 0, -2, 8, -3,
- -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1 });
- verifyValues(sm, 'I', new int[] { -1, -3, -3, -3, -1, -3, -3, -4, -3,
- 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1 });
- verifyValues(sm, 'L', new int[] { -1, -2, -3, -4, -1, -2, -3, -4, -3,
- 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1 });
- verifyValues(sm, 'K', new int[] { -1, 2, 0, -1, -3, 1, 1, -2, -1, -3,
- -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1 });
- verifyValues(sm, 'M', new int[] { -1, -1, -2, -3, -1, 0, -2, -3, -2, 1,
- 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1 });
- verifyValues(sm, 'F', new int[] { -2, -3, -3, -3, -2, -3, -3, -3, -1,
- 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1 });
- verifyValues(sm, 'P', new int[] { -1, -2, -2, -1, -3, -1, -1, -2, -2,
- -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2 });
- verifyValues(sm, 'S', new int[] { 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2,
- 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0 });
- verifyValues(sm, 'T', new int[] { 0, -1, 0, -1, -1, -1, -1, -2, -2, -1,
- -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0 });
- verifyValues(sm, 'W', new int[] { -3, -3, -4, -4, -2, -2, -3, -2, -2,
- -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2 });
- verifyValues(sm, 'Y', new int[] { -2, -2, -2, -3, -2, -1, -2, -3, 2,
- -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1 });
- verifyValues(sm, 'V', new int[] { 0, -3, -3, -3, -1, -2, -2, -3, -3, 3,
- 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1 });
- verifyValues(sm, 'B', new int[] { -2, -1, 3, 4, -3, 0, 1, -1, 0, -3,
- -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1 });
- verifyValues(sm, 'Z', new int[] { -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3,
- 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1 });
- verifyValues(sm, 'X', new int[] { 0, -1, -1, -1, -2, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1 });
- }
- /**
- * Helper method to check pairwise scores for one residue
- *
- * @param sm
- * @param res
- * @param expected
- * score values against 'res', in ResidueProperties.aaIndex order
- */
- private void verifyValues(ScoreMatrix sm, char res, int[] expected)
- {
- for (int j = 0; j < expected.length; j++)
- {
- char c2 = ResidueProperties.aa[j].charAt(0);
- assertEquals(sm.getPairwiseScore(res, c2), expected[j],
- String.format("%s->%s", res, c2));
- }
- }
-
- @Test(groups = "Functional")
- public void testComputePairwiseScores()
- {
- String[] seqs = new String[] { "FKL", "R-D", "QIA", "GWC" };
- ScoreMatrix sm = ResidueProperties.getScoreMatrix("BLOSUM62");
-
- MatrixI pairwise = sm.computePairwiseScores(seqs);
-
- /*
- * should be NxN where N = number of sequences
- */
- assertEquals(pairwise.height(), 4);
- assertEquals(pairwise.width(), 4);
-
- /*
- * should be symmetrical (because BLOSUM62 is)
- */
- for (int i = 0; i < pairwise.height(); i++)
- {
- for (int j = 0; j < pairwise.width(); j++)
- {
- assertEquals(pairwise.getValue(i, j), pairwise.getValue(j, i),
- "Not symmetric");
- }
- }
- /*
- * verify expected BLOSUM dot product scores
- */
- // F.F + K.K + L.L = 6 + 5 + 4 = 15
- assertEquals(pairwise.getValue(0, 0), 15d);
- // R.R + -.- + D.D = 5 + 1 + 6 = 12
- assertEquals(pairwise.getValue(1, 1), 12d);
- // Q.Q + I.I + A.A = 5 + 4 + 4 = 13
- assertEquals(pairwise.getValue(2, 2), 13d);
- // G.G + W.W + C.C = 6 + 11 + 9 = 26
- assertEquals(pairwise.getValue(3, 3), 26d);
- // F.R + K.- + L.D = -3 + -4 + -4 = -11
- assertEquals(pairwise.getValue(0, 1), -11d);
- // F.Q + K.I + L.A = -3 + -3 + -1 = -7
- assertEquals(pairwise.getValue(0, 2), -7d);
- // F.G + K.W + L.C = -3 + -3 + -1 = -7
- assertEquals(pairwise.getValue(0, 3), -7d);
- // R.Q + -.I + D.A = 1 + -4 + -2 = -5
- assertEquals(pairwise.getValue(1, 2), -5d);
- // R.G + -.W + D.C = -2 + -4 + -3 = -9
- assertEquals(pairwise.getValue(1, 3), -9d);
- // Q.G + I.W + A.C = -2 + -3 + 0 = -5
- assertEquals(pairwise.getValue(2, 3), -5d);
- }
-}
@Test(groups = { "Functional" })
public void testPID_includingGaps()
{
- String seq1 = "ABCDEF";
+ String seq1 = "ABCDEFG"; // extra length here is ignored
String seq2 = "abcdef";
assertEquals("identical", 100f, Comparison.PID(seq1, seq2), 0.001f);
int length = seq1.length();
// match gap-residue, match gap-gap: 9/10 identical
+ // TODO should gap-gap be included in a PID score? JAL-791
assertEquals(90f, Comparison.PID(seq1, seq2, 0, length, true, false),
0.001f);
// overloaded version of the method signature above:
assertEquals(90f, Comparison.PID(seq1, seq2), 0.001f);
// don't match gap-residue, match gap-gap: 7/10 identical
+ // TODO should gap-gap be included in a PID score?
assertEquals(70f, Comparison.PID(seq1, seq2, 0, length, false, false),
0.001f);
}
public void testPID_ungappedOnly()
{
// 5 identical, 2 gap-gap, 2 gap-residue, 1 mismatch
- String seq1 = "a--b-cdefh";
+ // the extra length of seq1 is ignored
+ String seq1 = "a--b-cdefhr";
String seq2 = "a---bcdefg";
int length = seq1.length();
--- /dev/null
+package jalview.util;
+
+import static org.testng.Assert.assertEquals;
+
+import java.awt.Color;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.testng.annotations.Test;
+
+public class SetUtilsTest
+{
+ @Test(groups = "Functional")
+ public void testCountDisjunction()
+ {
+ Set<Color> s1 = new HashSet<Color>();
+ assertEquals(SetUtils.countDisjunction(null, null), 0);
+ assertEquals(SetUtils.countDisjunction(s1, null), 0);
+ assertEquals(SetUtils.countDisjunction(null, s1), 0);
+ s1.add(Color.white);
+ assertEquals(SetUtils.countDisjunction(s1, null), 1);
+ assertEquals(SetUtils.countDisjunction(null, s1), 1);
+ assertEquals(SetUtils.countDisjunction(s1, null), 1);
+ assertEquals(SetUtils.countDisjunction(s1, s1), 0);
+
+ Set<Object> s2 = new HashSet<Object>();
+ assertEquals(SetUtils.countDisjunction(s2, s2), 0);
+ assertEquals(SetUtils.countDisjunction(s1, s2), 1);
+ assertEquals(SetUtils.countDisjunction(s2, s1), 1);
+
+ s1.add(Color.yellow);
+ s1.add(Color.blue);
+ s2.add(new Color(Color.yellow.getRGB()));
+
+ /*
+ * now s1 is {white, yellow, blue}
+ * s2 is {yellow'}
+ */
+ assertEquals(SetUtils.countDisjunction(s1, s2), 2);
+ s2.add(Color.blue);
+ assertEquals(SetUtils.countDisjunction(s1, s2), 1);
+ s2.add(Color.pink);
+ assertEquals(SetUtils.countDisjunction(s1, s2), 2);
+
+ }
+}