728x90
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import StratifiedKFold as SKF
from sklearn.metrics import roc_auc_score,f1_score,precision_score,accuracy_score
X_train = pd.read_csv('X_train.csv',encoding='cp949')
y_train = pd.read_csv('y_train.csv',encoding='cp949')
X_train = X_train.drop('cust_id',1).fillna(0)
y_train = y_train['gender']
X_train = pd.concat([X_train.drop(['주구매상품','주구매지점'], axis=1),\
pd.get_dummies(X_train[['주구매상품','주구매지점']])],1)
import xgboost
xgb_clf = xgboost.XGBClassifier()
from sklearn.linear_model import LogisticRegression
log_clf = LogisticRegression()
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier()
from sklearn.svm import LinearSVC as SVC
svc = SVC()
from sklearn.ensemble import GradientBoostingClassifier as GBC
gbc = GBC()
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
lda = LDA()
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
qda = QDA()
from sklearn.tree import DecisionTreeClassifier as DT
dt = DT()
from sklearn.neighbors import KNeighborsClassifier as KNN
knn = KNN()
from sklearn.linear_model import SGDClassifier as SGD
sgd = SGD()
models = [xgb_clf, log_clf, rf_clf, svc, gbc, nb, lda, qda, dt, knn, sgd]
skf = SKF(n_splits=10, shuffle=True, random_state=5252)
folds = []
for train_idx, valid_idx in skf.split(X_train, y_train):
folds.append((train_idx,valid_idx))
results = []
for model in models:
model_name = str(model).split('(')[0]
roc_auc = 0
f1 = 0
prec = 0
acc = 0
s = time.time()
for train_idx, valid_idx in folds:
train_x = X_train.iloc[train_idx].values
valid_x = X_train.iloc[valid_idx].values
train_y = y_train.iloc[train_idx].values
valid_y = y_train.iloc[valid_idx].values
model.fit(train_x, train_y)
preds = model.predict(valid_x)
roc_auc += roc_auc_score(valid_y, preds)/10
f1 += f1_score(valid_y, preds)/10
prec += precision_score(valid_y, preds)/10
acc += accuracy_score(valid_y, preds)/10
e = time.time()
sec = str(e-s)[:5]
results.append([model_name, sec, acc, roc_auc, f1, prec])
results.sort(key = lambda x: x[1])
pd.DataFrame(results, columns=['model', 'time(sec)', 'acc','roc_auc','f1','prec'])
728x90
'코딩코딩 > 머신러닝, 딥러닝' 카테고리의 다른 글
optimizer 비교 (0) | 2021.10.28 |
---|---|
[강화학습] MDP를 알 때의 플래닝 (0) | 2021.10.03 |
LinearRegression의 fit_intercept 파라미터 알아보기 (0) | 2021.05.31 |
ValueError: Setting a random_state has no effect since shuffle is False. You should leave random_state to its default (None), or set shuffle=True. (0) | 2021.04.28 |
Transposed Convolution layer (0) | 2021.02.23 |
댓글