Skip to main content
Version: 4.4

Xgboost Logging

Import necessary packagesโ€‹

from katonic.ml import MyClient
import os
import numpy as np
from xgboost import XGBClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss, recall_score, f1_score, precision_score
from sklearn.model_selection import train_test_split
import pandas as pd

Create client object using MyClientโ€‹

myclient = MyClient()
mlflow = myclient.mlflow
client = myclient.client
mlflow.create_experiment("test_exp")

Define metric functionโ€‹

def metric(actual, pred):
acc_score = accuracy_score(actual, pred)
recall = recall_score(actual, pred)
precision_scr = precision_score(actual, pred)
f1_scr = f1_score(actual, pred)
auc_roc = roc_auc_score(actual, pred)
log_los = log_loss(actual, pred)

return (
acc_score,
auc_roc,
log_los,
recall,
f1_scr,
precision_scr
)

Implement model training and logging stepsโ€‹

exp_name = "mlflow-test-XGB2"
mlflow.set_experiment(exp_name)
exp_details = mlflow.get_experiment_by_name(exp_name)
with mlflow.start_run(run_name=exp_name):
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
x = df.drop(columns=['Outcome'], axis=1)
y = df['Outcome']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=.20, random_state=98)
# The predicted column is "quality" which is a scalar from [3, 9]
model_clf = XGBClassifier(random_state=0)
#mlflow.log_param("")
model_clf.fit(X_train, y_train)

y_pred = model_clf.predict(X_test)
(acc_score, auc_roc, log_los, recall, f1_scr, precision_scr) = metric(y_test, y_pred)

model_metrics = {
"accuracy_score": acc_score,
"roc_auc_score": auc_roc,
"log_loss": log_los,
"recall": recall,
"f1_score": f1_scr,
"precision_score": precision_scr
}

for metric_name, score in model_metrics.items():
mlflow.log_metric(metric_name, score)

mlflow.xgboost.log_model(xgb_model=model_clf, artifact_path="model")