Experiment Tracking Model Training
import practicuscore as prt
import os
import mlflow
import xgboost as xgb
import cloudpickle
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
region = prt.current_region()
# Defining parameters
# You need to configure using the service unique key and name
service_name = None
service_key = None
# Optionally, you can provide experiment name to create a new experiment while configuring
experiment_name = None
# If you don't know service key and name you can checkout down below
addon_list = region.addon_list
display(addon_list.to_pandas())
assert service_name, "Please select a service_name"
assert service_key, "Please select a service_key"
assert experiment_name, "Please select a experiment_name"
prt.experiments.configure(service_name=service_name, service_key=service_key, experiment_name=experiment_name)
data_set_conn = {
"connection_type": "WORKER_FILE",
"file_path": "/home/ubuntu/samples/ice_cream.csv"
}
import practicuscore as prt
region = prt.current_region()
worker = region.get_or_create_worker()
proc = worker.load(data_set_conn)
data = proc.get_df_copy()
data.head()
# Set experiment name, if you haven't already while configuring the service
mlflow.set_experiment("XGBoost Experiment")
# Loading the dataset
X = data.Temperature
y = data.Revenue
# Test and Train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# XGBoost parameters
params = {
'max_depth': 3,
'eta': 0.1,
'objective': 'reg:squarederror',
}
# Creation of DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
# Training of model by using mlflow
with mlflow.start_run():
mlflow.log_params(params)
model = xgb.train(params, dtrain, num_boost_round=200)
# Prediction process
predictions = model.predict(dtest)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
mlflow.log_metric("rmse", rmse)
# Saving the model in MLFlow
artifact_path = "model"
if not os.path.exists(artifact_path):
os.makedirs(artifact_path)
model_path = os.path.join(artifact_path, "xgboost_model.pkl")
with open(model_path, "wb") as f:
cloudpickle.dump(model, f)
# Saving the serialised model in MLflow
mlflow.log_artifacts(artifact_path)
mlflow.log_artifacts(artifact_path)
# Printing out the run id
print("Run ID:", mlflow.active_run().info.run_id)
Previous: Experiment Tracking Logging | Next: Model Drift > Model Drift