Skip to content

Tracking Examples with Popular ML Libraries

All these examples are also available on GitHub.


1. scikit-learn

This example trains a Random Forest Regressor using sklearn and TrackingClient.

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Load the dataset and split it
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # Create and train RandomForestRegressor
    rf = RandomForestRegressor(n_estimators=10, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Predictions
    y_pred = rf.predict(X_test)

    # Metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    TrackingClient.log_metric("mse", mse)
    TrackingClient.log_metric("r2", r2)

    # Log model with signature
    signature = TrackingClient.infer_signature(X_train, y_train)
    TrackingClient.sklearn.log_model(rf, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    predict_fn: predict
    python_version: 3.9.7
  sklearn:
    code: null
    pickled_model: model.pkl
    serialization_format: cloudpickle
    sklearn_version: 1.0.2
mlflow_version: 2.4.0
model_uuid: e456b488a05942b58f173cfec10cdc42
run_id: 2fcfd26d59ad41f7aad7e6f487c56dd3
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 10]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 09:52:24.567632'

2. xgboost

This example trains an XGBoost model using xgboost and TrackingClient.

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Load the dataset and split it
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # XGBoost parameters
    params = {
        "n_estimators": 100,
        "max_depth": 6,
        "learning_rate": 0.1,
        "objective": "reg:squarederror",
    }

    xgb_model = xgb.XGBRegressor(**params)
    xgb_model.fit(X_train, y_train)

    signature = TrackingClient.infer_signature(X_train, y_train)
    TrackingClient.xgboost.log_model(xgb_model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.xgb
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.xgboost
    python_version: 3.9.7
  xgboost:
    code: null
    data: model.xgb
    model_class: xgboost.sklearn.XGBRegressor
    model_format: xgb
    xgb_version: 1.6.2
mlflow_version: 2.4.0
model_uuid: 1b41fa0786e34aedb2db835a43a71a05
run_id: 56159271f40f4351976391ff0bdadd77
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 10]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 10:24:57.517327'

3. statsmodels

This example trains a SARIMAX (or OLS) model using statsmodels and TrackingClient.

import numpy as np
import statsmodels.api as sm
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Generate sample data
np.random.seed(42)
num_samples = 100
num_features = 10

X = np.random.rand(num_samples, num_features)
true_coefficients = np.random.rand(num_features)
noise = np.random.normal(loc=0, scale=0.1, size=num_samples)
y = np.dot(X, true_coefficients) + noise

# Add a constant term
X = sm.add_constant(X)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # Create and fit model
    model = sm.OLS(y, X)
    model_fit = model.fit()

    # Log the model
    signature = TrackingClient.infer_signature(X, y)
    TrackingClient.statsmodels.log_model(model_fit, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.statsmodels
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.statsmodels
    python_version: 3.9.7
  statsmodels:
    code: null
    data: model.statsmodels
    statsmodels_version: 0.13.2
mlflow_version: 2.4.0
model_uuid: 2e70fe54dc224704ac0870af09487bbc
run_id: 3d0b08ff27314c759400580083e98c4f
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 11]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 10:22:09.899267'


4. Keras

Keras is a high-level API for building and training neural networks. This example logs a Keras model with TrackingClient.

import numpy as np
from tensorflow import keras
from tensorflow.keras.datasets import reuters
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.preprocessing.text import Tokenizer
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

max_words = 50
batch_size = 32
epochs = 4

(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2)
num_classes = np.max(y_train) + 1

# Vectorizing data
tokenizer = Tokenizer(num_words=max_words)
x_train = tokenizer.sequences_to_matrix(x_train, mode="binary")
x_test = tokenizer.sequences_to_matrix(x_test, mode="binary")

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.tensorflow.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # Build model
    model = Sequential()
    model.add(Dense(512, input_shape=(max_words,)))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation("softmax"))

    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1)

    signature = TrackingClient.infer_signature(x_train, y_train)
    TrackingClient.keras.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.tensorflow
    python_version: 3.9.7
  tensorflow:
    code: null
    data: data
    keras_version: 2.8.0
    model_type: keras
    save_format: tf
mlflow_version: 2.4.0
model_uuid: 82aa11563c54406bb1bafc76feafac5a
run_id: 56ccd94b83fa4d83865f0b8cb3644131
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 50]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 46]}}]'
utc_time_created: '2023-11-24 10:42:44.701726'

5. Transformers

Use transformers for sentiment analysis with a pre-trained BERT model. Log everything with TrackingClient.

import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
sentiment_analysis = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

review = "I really enjoyed watching this movie."
inputs = tokenizer(review, return_tensors="pt", padding=True, truncation=True, max_length=128)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")
    TrackingClient.log_param("model_name", model_name)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)

    predicted_label = torch.argmax(outputs.logits).item()
    sentiment = "positive" if predicted_label == 1 else "negative"

    TrackingClient.transformers.log_model(sentiment_analysis, "model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.transformers
    model_binary: model
    python_version: 3.9.7
  transformers:
    code: null
    components:
      - tokenizer
    framework: pt
    instance_type: TextClassificationPipeline
    model_binary: model
    pipeline_model_type: BertForSequenceClassification
    source_model_name: bert-base-uncased
    task: sentiment-analysis
    tokenizer_type: BertTokenizer
    transformers_version: 4.29.2
mlflow_version: 2.4.0
model_uuid: 07915c2dafdb4373bc76ce3a2425888e
run_id: 6914f9db67424052bee8e139acf74b70
signature:
  inputs: '[{"type": "string"}]'
  outputs: '[{"type": "string", "name": "label"}, {"type": "double", "name": "score"}]'
utc_time_created: '2023-11-24 09:13:36.883081'

6. PyTorch

This example uses PyTorch to train a CNN for image classification on FashionMNIST. TrackingClient logs your runs and artifacts.

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from ignite.engine import (
    Events, create_supervised_trainer, create_supervised_evaluator
)
from ignite.metrics import Accuracy, Loss, ConfusionMatrix, RunningAverage
from ignite.handlers import EarlyStopping
from ignite.contrib.handlers import ProgressBar
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Dataset
trainset = datasets.FashionMNIST("./data", download=True, train=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True)

validationset = datasets.FashionMNIST("./data", download=True, train=False, transform=transform)
val_loader = DataLoader(validationset, batch_size=64, shuffle=True)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.convlayer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.convlayer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc1 = nn.Linear(64 * 6 * 6, 600)
        self.drop = nn.Dropout2d(0.25)
        self.fc2 = nn.Linear(600, 120)
        self.fc3 = nn.Linear(120, 10)

    def forward(self, x):
        x = self.convlayer1(x)
        x = self.convlayer2(x)
        x = x.view(-1, 64 * 6 * 6)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
epochs = 3

trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
metrics = {
    "accuracy": Accuracy(),
    "nll": Loss(criterion),
    "cm": ConfusionMatrix(num_classes=10),
}
train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")

def score_function(engine):
    return -engine.state.metrics["nll"]

handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
val_evaluator.add_event_handler(Events.COMPLETED, handler)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(trainer, ["loss"])

    trainer.run(train_loader, max_epochs=epochs)

    x_train_batch, y_train_batch = next(iter(train_loader))
    x_train_np = x_train_batch.numpy()
    y_train_np = y_train_batch.numpy()

    signature = TrackingClient.infer_signature(x_train_np, y_train_np)
    TrackingClient.pytorch.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.pytorch
    pickle_module_name: mlflow.pytorch.pickle_module
    python_version: 3.8.10
  pytorch:
    code: null
    model_data: data
    pytorch_version: 1.13.1+cu117
mlflow_version: 2.6.0
model_uuid: dd70b8ae29354d9091a1a94ffdb9b2a3
run_id: ee063c63cb5b49b89953cefff6a203b9
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 1, 28, 28]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int64", "shape": [-1]}}]'
utc_time_created: '2023-11-27 14:00:32.746631'

7. Fastai

This example shows CNN training with Fastai, integrated with TrackingClient.

from fastai.vision.all import (
    URLs, untar_data, ImageDataLoaders, cnn_learner, resnet18, accuracy
)
import numpy as np
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

path = untar_data(URLs.CIFAR)
dls = ImageDataLoaders.from_folder(path, train="train", valid="test")

inputs, targets = dls.valid.one_batch()
x_valid = inputs.numpy()
_, ch, w, h = x_valid.shape
x_valid = np.zeros((1, w, h, ch), dtype=np.uint8)

learn = cnn_learner(dls, resnet18, metrics=accuracy)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")
    TrackingClient.log_params({"epochs": 2, "lr": 1e-3})

    learn.fit_one_cycle(1, lr_max=1e-3)

    y_valid = np.zeros((1, 10), dtype=np.float32)  # Probability vector of size 10
    signature = TrackingClient.infer_signature(x_valid, y_valid)
    TrackingClient.fastai.log_model(learn, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  fastai:
    code: null
    data: model.fastai
    fastai_version: 2.7.12
  python_function:
    data: model.fastai
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.fastai
    python_version: 3.8.10
mlflow_version: 2.6.0
model_uuid: 8b83ba5aebab4015a88abc0730103367
run_id: dbc2db9f3b85456e88dc9a58be5943bb
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 3,
    32, 32]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int64", "shape": [-1]}}]'
  params: null
utc_time_created: '2023-11-27 14:38:42.470057'

8. Tensorflow

This example illustrates training a simple neural network using TensorFlow and integrates it with TrackingClient for efficient experiment tracking.

import tensorflow as tf
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ]
)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    model.fit(x_train, y_train, epochs=1)

    signature = TrackingClient.infer_signature(x_train, y_train)
    TrackingClient.tensorflow.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.tensorflow
    python_version: 3.9.7
  tensorflow:
    code: null
    data: data
    keras_version: 2.8.0
    model_type: keras
    save_format: tf
mlflow_version: 2.4.0
model_uuid: 1c5b72f9ae684923a175771562531706
run_id: 80c2a7e836c645c6b9189658732beaed
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 28,
    28]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "uint8", "shape": [-1]}}]'
utc_time_created: '2023-11-27 15:14:32.143630'

9. Sentence Transformers

This example demonstrates how to use the Sentence Transformers library for sentence embeddings and integrates it with TrackingClient for experiment tracking.

from sentence_transformers import SentenceTransformer
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    model = SentenceTransformer("all-MiniLM-L6-v2")
    # the signature logged automatically
    TrackingClient.sentence_transformers.log_model(model, "model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.sentence_transformer
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.sentence_transformers
    python_version: 3.9.7
  sentence_transformers:
    code: null
    sentence_transformers_version: 2.2.2
mlflow_version: 2.4.0
model_uuid: e0eb4cfef2fb496d8c1116a5ac97c067
run_id: 387d039bae5d4dacad46d3d3a6d5baae
signature:
  inputs: '[{"type": "string"}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-27 15:29:27.653734'

10. Spacy

The provided example demonstrates how to train a Spacy named entity recognition (NER) model and log it with TrackingClient.

import random
import spacy
from packaging.version import Version
from spacy.util import compounding, minibatch
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0 = Version(spacy.__version__) >= Version(
    "3.0.0"
)

# training data
TRAIN_DATA = [
    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
]


# create blank model and add ner to the pipeline
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    nlp = spacy.blank("en")
    if IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0:
        ner = nlp.add_pipe("ner", last=True)
    else:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
    params = {"n_iter": 100, "drop": 0.5}
    TrackingClient.log_params(params)

    nlp.begin_training()
    for itn in range(params["n_iter"]):
        random.shuffle(TRAIN_DATA)
        losses = {}
        # batch up the examples using spaCy's minibatch
        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(
                texts,  # batch of texts
                annotations,  # batch of annotations
                drop=params["drop"],  # dropout - make it harder to memorise data
                losses=losses,
            )
        print("Losses", losses)
        TrackingClient.log_metrics(losses)
    # Log the spaCy model using mlflow
    TrackingClient.spacy.log_model(spacy_model=nlp, artifact_path="model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  spacy:
    code: null
    data: model.spacy
    spacy_version: 2.2.3
mlflow_version: 2.4.0
model_uuid: b5c9444b3722449e845d93d07289f3a0
run_id: 5271f94120094896b67a3877644d1f4a
utc_time_created: '2023-11-28 08:50:57.284239'

Next Steps

  • Tracking-API-Client – Learn how to set up and customize your tracking for each experiment.
  • Run UI – View the logged details, artifacts, and metrics within the OICM platform.
  • Experiment UI – Compare multiple runs and gather insights on overall experiment performance.