X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=website%2Farchive%2Fbinaries%2Fmac%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FkNN.py;fp=website%2Farchive%2Fbinaries%2Fmac%2Fsrc%2Fglobplot%2Fbiopython-1.50%2FBio%2FkNN.py;h=0000000000000000000000000000000000000000;hb=fb21789ba11ae9b9f6abed1b01d4542e54abfe34;hp=4bd2d8d2ea286c297db36a417ce47ed142e589fb;hpb=7d68cf3489c816b2ed7afb1925a6de2168eccf4a;p=jabaws.git diff --git a/website/archive/binaries/mac/src/globplot/biopython-1.50/Bio/kNN.py b/website/archive/binaries/mac/src/globplot/biopython-1.50/Bio/kNN.py deleted file mode 100644 index 4bd2d8d..0000000 --- a/website/archive/binaries/mac/src/globplot/biopython-1.50/Bio/kNN.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/env python - -""" -This module provides code for doing k-nearest-neighbors classification. - -k Nearest Neighbors is a supervised learning algorithm that classifies -a new observation based the classes in its surrounding neighborhood. - -Glossary: -distance The distance between two points in the feature space. -weight The importance given to each point for classification. - - -Classes: -kNN Holds information for a nearest neighbors classifier. - - -Functions: -train Train a new kNN classifier. -calculate Calculate the probabilities of each class, given an observation. -classify Classify an observation into a class. - - Weighting Functions: -equal_weight Every example is given a weight of 1. - -""" - -#TODO - Remove this work around once we drop python 2.3 support -try: - set = set -except NameError: - from sets import Set as set - -import numpy - -class kNN: - """Holds information necessary to do nearest neighbors classification. - - Members: - classes Set of the possible classes. - xs List of the neighbors. - ys List of the classes that the neighbors belong to. - k Number of neighbors to look at. - - """ - def __init__(self): - """kNN()""" - self.classes = set() - self.xs = [] - self.ys = [] - self.k = None - -def equal_weight(x, y): - """equal_weight(x, y) -> 1""" - # everything gets 1 vote - return 1 - -def train(xs, ys, k, typecode=None): - """train(xs, ys, k) -> kNN - - Train a k nearest neighbors classifier on a training set. xs is a - list of observations and ys is a list of the class assignments. - Thus, xs and ys should contain the same number of elements. k is - the number of neighbors that should be examined when doing the - classification. - - """ - knn = kNN() - knn.classes = set(ys) - knn.xs = numpy.asarray(xs, typecode) - knn.ys = ys - knn.k = k - return knn - -def calculate(knn, x, weight_fn=equal_weight, distance_fn=None): - """calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict - - Calculate the probability for each class. knn is a kNN object. x - is the observed data. weight_fn is an optional function that - takes x and a training example, and returns a weight. distance_fn - is an optional function that takes two points and returns the - distance between them. If distance_fn is None (the default), the - Euclidean distance is used. Returns a dictionary of the class to - the weight given to the class. - - """ - x = numpy.asarray(x) - - order = [] # list of (distance, index) - if distance_fn: - for i in range(len(knn.xs)): - dist = distance_fn(x, knn.xs[i]) - order.append((dist, i)) - else: - # Default: Use a fast implementation of the Euclidean distance - temp = numpy.zeros(len(x)) - # Predefining temp allows reuse of this array, making this - # function about twice as fast. - for i in range(len(knn.xs)): - temp[:] = x - knn.xs[i] - dist = numpy.sqrt(numpy.dot(temp,temp)) - order.append((dist, i)) - order.sort() - - # first 'k' are the ones I want. - weights = {} # class -> number of votes - for k in knn.classes: - weights[k] = 0.0 - for dist, i in order[:knn.k]: - klass = knn.ys[i] - weights[klass] = weights[klass] + weight_fn(x, knn.xs[i]) - - return weights - -def classify(knn, x, weight_fn=equal_weight, distance_fn=None): - """classify(knn, x[, weight_fn][, distance_fn]) -> class - - Classify an observation into a class. If not specified, weight_fn will - give all neighbors equal weight. distance_fn is an optional function - that takes two points and returns the distance between them. If - distance_fn is None (the default), the Euclidean distance is used. - """ - weights = calculate( - knn, x, weight_fn=weight_fn, distance_fn=distance_fn) - - most_class = None - most_weight = None - for klass, weight in weights.items(): - if most_class is None or weight > most_weight: - most_class = klass - most_weight = weight - return most_class