简单实现KNN(处理连续型数据)
阅读原文时间:2023年07月08日阅读:2

import numpy as np
import matplotlib.pyplot as plt
import time
import math
import collections

raw_data_x = [[3.39,2.33],
[3.11,1.78],
[1.34,3.36],
[3.58,4.67],
[2.28,2.86],
[7.442,4.69],
[5.74,3.53],
[9.17,2.51],
[7.79,3.42],
[7.93,0.79]
]
raw_data_y = [0,0,0,0,0,1,1,1,1,1]
x_train = np.array(raw_data_x)
y_train = np.array(raw_data_y)

x_test = np.array([8.0,3.36])

plt.scatter(x_train[y_train == 0,0],x_train[y_train == 0,1],color = 'r')
plt.scatter(x_train[y_train == 1,0],x_train[y_train == 1,1],color = 'g')
plt.scatter(x_test[0],x_test[1],color = 'b')

plt.show()

#compute the Euclidean distance

distance = [] #creat empty list
for i in x_train: # for loop
result_sum = math.sqrt(sum((i-x_test)**2)) #compute distance for single vector
distance.append(result_sum) # add the result to list

index = np.argsort(distance)
k = 6
result = [y_train[i] for i in index[:k]]

vote = collections.Counter(result)
print("the class is {}".format(vote.most_common(1)[0][0]))

手机扫一扫

移动阅读更方便

阿里云服务器
腾讯云服务器
七牛云服务器

你可能感兴趣的文章