sklearn多标签分类算法练习
1.例1
import numpy as np import pandas as pd import scipy from scipy.io import arff #数据集 data, meta = scipy.io.arff.loadarff(D:/Programs/meka1.9.2/data/Yeast.arff) df = pd.DataFrame(data) columns1=df.columns.tolist() df.info() #DataFrame 转为array() df_y=df[columns1[:14]].astype(int).values df_x=df[columns1[14:]].values #方法1:x_train 对每一个单标签. from skmultilearn.problem_transform import BinaryRelevance from sklearn.naive_bayes import GaussianNB from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(df_x, df_y, test_size=0.33, random_state=42) # initialize binary relevance multi-label classifier # with a gaussian naive bayes base classifier classifier = BinaryRelevance(GaussianNB()) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) from sklearn.metrics import accuracy_score accuracy_score(y_test,predictions) #例2:onevsrest:想要分类的作为正类,其他的类作为反类。 from sklearn.multiclass import OneVsRestClassifier from sklearn.svm import SVC # 分类器使用1对多,SVM用linear kernel #clf1 = OneVsRestClassifier(SVC(kernel=linear), n_jobs=-1) clf1 = OneVsRestClassifier(SVC(kernel=poly), n_jobs=-1) # 训练 clf1.fit(X_train, y_train) # output OneVsRestClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,decision_function_shape=None, degree=3, gamma=auto, kernel=linear, max_iter=-1, probability=False, random_state=None, shrinking=True,tol=0.001, verbose=False),n_jobs=-1) #输出预测的标签结果 predict_class = clf1.predict(X_test) #准确率,预测的结果和实际的结果 clf1.score(X_test, y_test) #例3:powerset:随机抽取k个label,将这k类(有2^k种组合)转化为单标签. from skmultilearn.problem_transform import LabelPowerset from sklearn.naive_bayes import GaussianNB # initialize Label Powerset multi-label classifier # with a gaussian naive bayes base classifier classifier = LabelPowerset(GaussianNB()) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) accuracy_score(y_test,predictions) #例4:Adapted Algorithm:多标签KNN算法MLKNN from skmultilearn.adapt import MLkNN classifier = MLkNN(k=20) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) accuracy_score(y_test,predictions) #例5:分类器链 from skmultilearn.problem_transform import ClassifierChain from sklearn.naive_bayes import GaussianNB # initialize classifier chains multi-label classifier # with a gaussian naive bayes base classifier classifier = ClassifierChain(GaussianNB()) # train classifier.fit(X_train, y_train) # predict predictions = classifier.predict(X_test) accuracy_score(y_test,predictions)
2.例2
from skmultilearn.dataset import load_dataset X_train, y_train, feature_names, label_names = load_dataset(emotions, train) X_test, y_test, _, _ = load_dataset(emotions, test) feature_names[:10] label_names from skmultilearn.problem_transform import BinaryRelevance from sklearn.svm import SVC clf = BinaryRelevance( classifier=SVC(), require_dense=[False, True] ) clf.fit(X_train, y_train) clf.classifier prediction = clf.predict(X_test) import sklearn.metrics as metrics print("x=",metrics.hamming_loss(y_test, prediction)) print("s=",metrics.accuracy_score(y_test, prediction))
上一篇:
通过多线程提高代码的执行效率例子