#This data sets consists of 3 different types of irises’ (Setosa, Versicolour, and Virginica) #petal and sepal length, stored in a 150x4 numpy.ndarray #The rows being the samples and the columns being: Sepal Length, Sepal Width, Petal Length and Petal Width.
1 2 3 4 5 6 7 8 9 10 11 12
from sklearn import datasets
iris = datasets.load_iris() X = iris.data Y = iris.target
# 获取数据集大小 len(X)
# 随机将数据集划分成成70%训练集,30%测试集。 from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
for index in range(1, len(self.x_train)): dist = enc(element, self.x_train[index]) if dist < best_dist: best_dist = dist best_index = index
return self.y_train[best_index]
defvote(self, element, k): defenc(a, b): return distance.euclidean(a, b)
k_list = [] for index in range(k): best_dist = enc(element, self.x_train[index]) k_list.append([index, best_dist])
for index in range(k, len(self.x_train)): dist = enc(element, self.x_train[index]) for i in range(k): if dist < k_list[i][1]: k_list.pop(i) k_list.insert(i, [index, dist])
index_list = [] for index in range(k): index_list.append(k_list[index][0])
# list with one element, it's a tuple (index, times) counter = Counter(index_list) index = counter.most_common(1)[0][0]
return self.y_train[index]
1 2 3 4 5 6 7 8 9
# 选择算法:分别选用KNeighborsClassifier、rocky实现的KNN、DecisionTreeClassifier算法 from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier