-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathknearest.py
More file actions
67 lines (52 loc) · 1.98 KB
/
knearest.py
File metadata and controls
67 lines (52 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
'''
K-nearest Neighbour classifier
Y = KNEAREST( k, x, data, truelabels )
Arguments:
'data' should be a N rows by M columns matrix of data, composed
of N training examples, each with M dimensions.
'truelabels' should be a Nx1 column vector, with class labels.
'x' is the TEST data vector, size 1xM, where the knn estimate is required.
'k' is the number of neighbours to take into account.
Note that even values will result in ties broken randomly.
Returns:
'y' - a predicted class label for your data vector 'x'
'''
import numpy as np
import scipy
import random
from collections import Counter
def knearest(k, x, data, datalabels):
numtrain = len(data) # rows (number of training examples)
# columns (dimension size of each example)
numfeatures = len(data[0])
if len(x) != numfeatures:
exit('Test data dimensions does not match train data dimensions.')
if k > numtrain:
exit('Not enough training samples to use k = ' +
str(k) + ' (you only supplied ' + str(numtrain) + ')')
# measure Euclidean distance from this test example
# to every training example
distances = []
for i in range(numtrain):
# first we compute the euclidean distance
distance = np.sqrt(abs(np.sum(np.square(x - data[i]))))
# add it to list of distances
distances.append([distance, datalabels[i]])
distances = sorted(distances)
# get k closest values
closest = []
for i in range(0, k):
closest.append(distances[i][1])
# check if tie exists
# unique_classes = list(Counter(closest).keys())
# class_counts = list(Counter(closest).values())
# tie_exists = all(x == class_counts[0] for x in class_counts)
#
# if tie_exists:
# y = random.choice(unique_classes)
# return y
# else:
# y = (scipy.stats.mode(closest, axis=0))
# return y[0]
y = (scipy.stats.mode(closest, axis=0))
return y[0]