KNN演算法是機器學習最為簡單的演算法之一,具體的思想這裡不做講解了,可以自行上網查閱。本文主要是用python來模仿sklearn實現knn演算法。

匯入所需的庫

import

numpy

as

np

from

math

import

sqrt

from

collections

import

Counter

knn的核心程式碼

def

KNN_classify

k

X_train

y_train

x

):

“”“

k:表示knn的中k的值

X_train: 訓練集的features

y_train: 訓練集的labels

x: 新的資料

”“”

assert

1

<=

k

<=

X_train

shape

0

],

“k must be valid”

assert

X_train

shape

0

==

y_train

shape

0

],

\

“the size of X_train must equal to the size of y_train”

assert

X_train

shape

1

==

x

shape

0

],

\

“the feature number of x must to be equal to X_train”

# 計算新來的資料x與整個訓練資料中每個樣本資料的距離

distances

=

sqrt

np

sum

((

x_train

-

x

**

2

))

for

x_train

in

X_train

nearest

=

np

argsort

distances

# 對距離排序並返回對應的索引

topK_y

=

y_train

i

for

i

in

nearest

# 返回最近的k個距離對應的分類

votes

=

Counter

topK_y

# 統計屬於每個分類的樣本數

return

votes

most_common

1

)[

0

][

0

# 返回屬於樣本數最多的分類結果

虛擬資料進行驗證

x

=

[[

0

0

],

1

1

],

2

2

],

10

10

],

11

11

],

12

12

]]

y

=

0

0

0

1

1

1

X_train

=

np

array

x

y_train

=

np

array

y

import

matplotlib。pyplot

as

plt

plt

scatter

X_train

[:

3

0

],

X_train

[:

3

1

],

color

=

‘red’

plt

scatter

X_train

3

:,

0

],

X_train

3

:,

1

],

color

=

‘blue’

plt

show

()

python實現knn演算法

x

=

np

array

([

13

13

])

KNN_classify

2

X_train

y_train

x

1

sklearn實現knn分類

from

sklearn。neighbors

import

KNeighborsClassifier

kNN_classifier

=

KNeighborsClassifier

()

kNN_classifier

fit

X_train

y_train

KNeighborsClassifier

algorithm

=

‘auto’

leaf_size

=

30

metric

=

‘minkowski’

metric_params

=

None

n_jobs

=

None

n_neighbors

=

5

p

=

2

weights

=

‘uniform’

X_predict

=

x

reshape

1

-

1

kNN_classifier

predict

X_predict

array

([

1

])

模仿sklearn重新封裝knn

import

numpy

as

np

from

math

import

sqrt

from

collections

import

Counter

class

KNNClassifier

def

__init__

self

k

):

“”“初始化knn分類器”“”

assert

k

>=

1

“k must be valid”

self

k

=

k

# knn中的k

self

_X_train

=

None

# 訓練資料集在類中,使用者不能隨意操作,故設定為私有

self

_y_train

=

None

def

fit

self

X_train

y_train

):

“”“根據訓練資料集X_train和y_train訓練kNN分類器”“”

assert

X_train

shape

0

==

y_train

shape

0

],

\

“the size of X_train must equal to the size of y_train”

assert

self

k

<=

X_train

shape

0

],

“the size of X_train must be at least k。”

self

_X_train

=

X_train

self

_y_train

=

y_train

return

self

# 模仿sklearn,呼叫fit函式會返回自身

def

predict

self

X_predict

):

“”“給定待預測資料集X_predict,跟sklearn一樣,要求使用者傳來的是陣列格式的資料,

返回表示X_predict的結果向量”“”

assert

self

_X_train

is

not

None

and

self

_y_train

is

not

None

\

“must fit before predict!”

assert

X_predict

shape

1

==

self

_X_train

shape

1

],

\

“the feature number of X_predict must be equal to X_train”

# 預測X_predict矩陣每一行所屬的類別

y_predict

=

self

_predict

x

for

x

in

X_predict

return

np

array

y_predict

# 返回的結果也遵循sklearn

def

_predict

self

x

):

“”“給定單個待預測的資料x,返回x_predict的預測結果值”“”

# 先判斷x是合法的

assert

x

shape

0

==

self

_X_train

shape

1

],

\

“the feature number of x must be equal to X_train”

# 計算新來的資料與整個訓練資料的距離

distances

=

sqrt

np

sum

((

x_train

-

x

**

2

))

for

x_train

in

self

_X_train

nearest

=

np

argsort

distances

# 對距離排序並返回對應的索引

topK_y

=

self

_y_train

i

for

i

in

nearest

[:

self

k

]]

# 返回最近的k個距離對應的分類

votes

=

Counter

topK_y

return

votes

most_common

1

)[

0

][

0

def

__repr__

self

):

return

“KNN(k=

%d

)”

%

self

k

X_train

=

np

array

([[

0

0

],

1

1

],

2

2

],

10

10

],

11

11

],

12

12

]])

y_train

=

np

array

([

0

0

0

1

1

1

])

import

matplotlib。pyplot

as

plt

plt

scatter

X_train

[:

3

0

],

X_train

[:

3

1

],

color

=

‘red’

plt

scatter

X_train

3

:,

0

],

X_train

3

:,

1

],

color

=

‘blue’

plt

show

()

python實現knn演算法

x

=

np

array

([[

13

13

],[

-

1

-

1

]])

knn_clf

=

KNNClassifier

k

=

2

knn_clf

fit

X_train

y_train

KNN

k

=

2

knn_clf

predict

x

array

([

1

0

])