Создание практического задания. Выполнение задач кластеризации разными способами

parents
id,number_of_rooms,s_squared,price
1,1,29,5.58
2,1,34,4.81
3,1,46,6.44
4,1,51,7.95
5,2,59,7.31
6,2,64,7.88
7,2,69,8.22
8,2,51,4.49
9,2,70,5.3
10,3,60,8.7
11,3,84,9.14
12,3,89,10.18
13,3,68,5.55
14,3,76,5.99
15,3,68,6.2
16,3,115,9.7
17,4,112,11.59
18,4,166,28
19,4,96,33
20,3,113,16.5
21,3,201,37.5
from pandas import read_csv
from sklearn.cluster import KMeans
# Оценка разным количеством кластеров
estimators = [('k_means_iris_2', KMeans(n_clusters=2)),
('k_means_iris_3', KMeans(n_clusters=3)),
('k_means_iris_4', KMeans(n_clusters=4))]
data = read_csv('flats.csv', index_col='id', delimiter=',')[:]
x = data.iloc[0:20,[1,2]]
for name, est in estimators:
print(name)
est.fit(x)
print(est)
print("Centers")
print(est.cluster_centers_)
print("Dataset labels")
print(est.labels_)
# проверяем принадлежность точки 80, 2
pr = [[80,2]]
pred = est.predict(pr)
print("Predicting")
print(f"Point {pr[0]} in cluster: {pred[0]}")
import scipy
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plot
from sklearn.neighbors import NearestNeighbors
size = 100
points = []
for i in range(size):
points.append([random.randint(0,100), random.randint(0,100)])
pt: pd.DataFrame = pd.DataFrame(points)
x = pt.iloc[:,0]
y = pt.iloc[:,1]
plot.scatter(x,y)
data = np.array(pt.iloc[:,:])
nbs:NearestNeighbors = NearestNeighbors(n_neighbors=2, algorithm='ball_tree')
fd = nbs.fit(data)
new_point = [ [ random.randint(0,100), random.randint(0,100) ] ]
distances, indices = nbs.kneighbors(new_point)
print(f"Point={new_point}, Dist={distances} IND={indices}")
plot.scatter([new_point[0][0]],[new_point[0][1]],edgecolors=[0,0,1])
plot.show()
This diff is collapsed.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import neighbors
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
label=train_data.iloc[0:890,0]
test_d=train_data.iloc[0:890,[1,3,4]] # 1,3,4 PClass,Sex,Age
testdat=test_data.iloc[0:267,[1,3,4]] # 1,3,4 PClass,Sex,Age
# Заполняем пустые записи
test_d = test_d.fillna(0)
testdat = testdat.fillna(0)
x = [test_d, testdat]
# Замена названия пола на идентификатор
for change in x:
change['sex'] = change['sex'].map({'female':0, 'male': 1}).astype(int)
# Обучающий набор
train_data,test_data,train_labels,test_labels=train_test_split(test_d,label,random_state=7,train_size=0.7)
knn = neighbors.KNeighborsClassifier(weights='distance', n_neighbors=5)
# Тренировка модели
knn.fit(test_d, label)
# Проверка на тестовом наборе
predictions = knn.predict(testdat)
print(f"Точность={accuracy_score(test_labels, predictions)}")
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment