import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
iris = load_iris()
iris.keys()
iris.target_names
iris.feature_names
iris.target #데이터 정보 확인하는 것..
#데이터 나누기
from sklearn.model_selection import train_test_split
X = iris.data
y = iris.target
X_train, X_tst, y_train, y_test = train_test_split(X, y, test_size=0.3)
#KNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5) #neighbor 수 정하기
knn.fit(X_train, y_train) #훈련
from sklearn import metrics
print("Accuracy: ", knn.score(X_test, y_test))
#K 구하기
k_range = range(1,50) #k 범위
scores = {}
scores_list = []
for k in k_range: #k의 지정해놓은 범위 내에서 어떤 K 값이 가장 정확도가 높은지 찾는 과정
knn = KNeighborsClassifier(n_neighbors = k)
knn.fit(X_train, y_train)
y_prediction = knn.predict(X_test)
scores[k] = metrics.accuracy_score(y_test, y_prediction)
scores_list.append(metrics.accuracy_score(y_test, y_prediction))
#그래프 출력
plt.plot(k_range, scores_list)
plt.xlabel('K값')
plt.ylabel('Accuracy')