Skip to content

Tracking Examples with Popular ML Libraries

All these examples are also available on GitHub.


1. scikit-learn

This example trains a Random Forest Regressor using sklearn and TrackingClient.

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Load the dataset and split it
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # Create and train RandomForestRegressor
    rf = RandomForestRegressor(n_estimators=10, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Predictions
    y_pred = rf.predict(X_test)

    # Metrics
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    TrackingClient.log_metric("mse", mse)
    TrackingClient.log_metric("r2", r2)

    # Log model with signature
    signature = TrackingClient.infer_signature(X_train, y_train)
    TrackingClient.sklearn.log_model(rf, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.sklearn
    model_path: model.pkl
    predict_fn: predict
    python_version: 3.9.7
  sklearn:
    code: null
    pickled_model: model.pkl
    serialization_format: cloudpickle
    sklearn_version: 1.0.2
mlflow_version: 2.4.0
model_uuid: e456b488a05942b58f173cfec10cdc42
run_id: 2fcfd26d59ad41f7aad7e6f487c56dd3
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 10]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 09:52:24.567632'

2. xgboost

This example trains an XGBoost model using xgboost and TrackingClient.

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Load the dataset and split it
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # XGBoost parameters
    params = {
        "n_estimators": 100,
        "max_depth": 6,
        "learning_rate": 0.1,
        "objective": "reg:squarederror",
    }

    xgb_model = xgb.XGBRegressor(**params)
    xgb_model.fit(X_train, y_train)

    signature = TrackingClient.infer_signature(X_train, y_train)
    TrackingClient.xgboost.log_model(xgb_model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.xgb
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.xgboost
    python_version: 3.9.7
  xgboost:
    code: null
    data: model.xgb
    model_class: xgboost.sklearn.XGBRegressor
    model_format: xgb
    xgb_version: 1.6.2
mlflow_version: 2.4.0
model_uuid: 1b41fa0786e34aedb2db835a43a71a05
run_id: 56159271f40f4351976391ff0bdadd77
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 10]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 10:24:57.517327'

3. statsmodels

This example trains a SARIMAX (or OLS) model using statsmodels and TrackingClient.

import numpy as np
import statsmodels.api as sm
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# Set the experiment
experiment_name = "experiment_name"
TrackingClient.set_experiment(experiment_name)

# Generate sample data
np.random.seed(42)
num_samples = 100
num_features = 10

X = np.random.rand(num_samples, num_features)
true_coefficients = np.random.rand(num_features)
noise = np.random.normal(loc=0, scale=0.1, size=num_samples)
y = np.dot(X, true_coefficients) + noise

# Add a constant term
X = sm.add_constant(X)

# Enable system metrics logging and start run
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.autolog()
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    # Create and fit model
    model = sm.OLS(y, X)
    model_fit = model.fit()

    # Log the model
    signature = TrackingClient.infer_signature(X, y)
    TrackingClient.statsmodels.log_model(model_fit, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.statsmodels
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.statsmodels
    python_version: 3.9.7
  statsmodels:
    code: null
    data: model.statsmodels
    statsmodels_version: 0.13.2
mlflow_version: 2.4.0
model_uuid: 2e70fe54dc224704ac0870af09487bbc
run_id: 3d0b08ff27314c759400580083e98c4f
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 11]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-24 10:22:09.899267'


4. Keras

Keras is a high-level API for building and training neural networks. This example logs a Keras model with TrackingClient.

import numpy as np
from tensorflow import keras
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, TextVectorization
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "Example 4 - Keras"
TrackingClient.set_experiment(experiment_name)

max_words = 50
batch_size = 32
epochs = 4

(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, test_split=0.2)
num_classes = int(np.max(y_train) + 1)

# Vectorizing data
vectorizer = TextVectorization(
    max_tokens=max_words,
    output_mode="multi_hot"
)

def int_seq_to_str(sequences):
    return np.array([" ".join(map(str, seq)) for seq in sequences])

x_train_text = int_seq_to_str(x_train)
x_test_text = int_seq_to_str(x_test)

vectorizer.adapt(x_train_text)

x_train_vec = vectorizer(x_train_text).numpy()
x_test_vec = vectorizer(x_test_text).numpy()

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run() as run:
    TrackingClient.tensorflow.autolog()
    TrackingClient.set_run_name("Example 4 - Keras - Run " + run.info.run_id)

    # Dynamically match vectorizer output
    input_dim = x_train_vec.shape[1]

    # Build model
    model = Sequential()
    model.add(Dense(512, input_shape=(input_dim,)))
    model.add(Activation("relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes))
    model.add(Activation("softmax"))

    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    model.fit(x_train_vec, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1)

    signature = TrackingClient.infer_signature(x_train_vec, y_train)
    TrackingClient.keras.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.tensorflow
    python_version: 3.9.7
  tensorflow:
    code: null
    data: data
    keras_version: 2.8.0
    model_type: keras
    save_format: tf
mlflow_version: 2.4.0
model_uuid: 82aa11563c54406bb1bafc76feafac5a
run_id: 56ccd94b83fa4d83865f0b8cb3644131
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 50]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 46]}}]'
utc_time_created: '2023-11-24 10:42:44.701726'

5. Transformers

Use transformers for sentiment analysis with a pre-trained BERT model. Log everything with TrackingClient.

import torch
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

model_name = "bert-base-uncased"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
sentiment_analysis = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

review = "I really enjoyed watching this movie."
inputs = tokenizer(review, return_tensors="pt", padding=True, truncation=True, max_length=128)
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")
    TrackingClient.log_param("model_name", model_name)

    with torch.no_grad():
        outputs = model(input_ids, attention_mask=attention_mask)

    predicted_label = torch.argmax(outputs.logits).item()
    sentiment = "positive" if predicted_label == 1 else "negative"

    TrackingClient.transformers.log_model(sentiment_analysis, "model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.transformers
    model_binary: model
    python_version: 3.9.7
  transformers:
    code: null
    components:
      - tokenizer
    framework: pt
    instance_type: TextClassificationPipeline
    model_binary: model
    pipeline_model_type: BertForSequenceClassification
    source_model_name: bert-base-uncased
    task: sentiment-analysis
    tokenizer_type: BertTokenizer
    transformers_version: 4.29.2
mlflow_version: 2.4.0
model_uuid: 07915c2dafdb4373bc76ce3a2425888e
run_id: 6914f9db67424052bee8e139acf74b70
signature:
  inputs: '[{"type": "string"}]'
  outputs: '[{"type": "string", "name": "label"}, {"type": "double", "name": "score"}]'
utc_time_created: '2023-11-24 09:13:36.883081'

6. PyTorch

This example uses PyTorch to train a CNN for image classification on FashionMNIST. TrackingClient logs your runs and artifacts.

import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from ignite.engine import (
    Events, create_supervised_trainer, create_supervised_evaluator
)
from ignite.metrics import Accuracy, Loss, ConfusionMatrix, RunningAverage
from ignite.handlers import EarlyStopping
from ignite.contrib.handlers import ProgressBar
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Dataset
trainset = datasets.FashionMNIST("./data", download=True, train=True, transform=transform)
train_loader = DataLoader(trainset, batch_size=64, shuffle=True)

validationset = datasets.FashionMNIST("./data", download=True, train=False, transform=transform)
val_loader = DataLoader(validationset, batch_size=64, shuffle=True)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.convlayer1 = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.convlayer2 = nn.Sequential(
            nn.Conv2d(32, 64, 3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc1 = nn.Linear(64 * 6 * 6, 600)
        self.drop = nn.Dropout2d(0.25)
        self.fc2 = nn.Linear(600, 120)
        self.fc3 = nn.Linear(120, 10)

    def forward(self, x):
        x = self.convlayer1(x)
        x = self.convlayer2(x)
        x = x.view(-1, 64 * 6 * 6)
        x = self.fc1(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.NLLLoss()
epochs = 3

trainer = create_supervised_trainer(model, optimizer, criterion, device=device)
metrics = {
    "accuracy": Accuracy(),
    "nll": Loss(criterion),
    "cm": ConfusionMatrix(num_classes=10),
}
train_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)
val_evaluator = create_supervised_evaluator(model, metrics=metrics, device=device)

RunningAverage(output_transform=lambda x: x).attach(trainer, "loss")

def score_function(engine):
    return -engine.state.metrics["nll"]

handler = EarlyStopping(patience=10, score_function=score_function, trainer=trainer)
val_evaluator.add_event_handler(Events.COMPLETED, handler)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    pbar = ProgressBar(persist=True, bar_format="")
    pbar.attach(trainer, ["loss"])

    trainer.run(train_loader, max_epochs=epochs)

    x_train_batch, y_train_batch = next(iter(train_loader))
    x_train_np = x_train_batch.numpy()
    y_train_np = y_train_batch.numpy()

    signature = TrackingClient.infer_signature(x_train_np, y_train_np)
    TrackingClient.pytorch.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.pytorch
    pickle_module_name: mlflow.pytorch.pickle_module
    python_version: 3.8.10
  pytorch:
    code: null
    model_data: data
    pytorch_version: 1.13.1+cu117
mlflow_version: 2.6.0
model_uuid: dd70b8ae29354d9091a1a94ffdb9b2a3
run_id: ee063c63cb5b49b89953cefff6a203b9
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 1, 28, 28]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int64", "shape": [-1]}}]'
utc_time_created: '2023-11-27 14:00:32.746631'

7. Fastai

This example shows CNN training with Fastai, integrated with TrackingClient.

from fastai.vision.all import (
    URLs, untar_data, ImageDataLoaders, cnn_learner, resnet18, accuracy
)
import numpy as np
from oip_tracking_client.tracking import TrackingClient

# Set up TrackingClient
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

path = untar_data(URLs.CIFAR)
dls = ImageDataLoaders.from_folder(path, train="train", valid="test")

inputs, targets = dls.valid.one_batch()
x_valid = inputs.numpy()
_, ch, w, h = x_valid.shape
x_valid = np.zeros((1, w, h, ch), dtype=np.uint8)

learn = cnn_learner(dls, resnet18, metrics=accuracy)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():
    TrackingClient.set_run_name("YOUR_RUN_NAME")
    TrackingClient.log_params({"epochs": 2, "lr": 1e-3})

    learn.fit_one_cycle(1, lr_max=1e-3)

    y_valid = np.zeros((1, 10), dtype=np.float32)  # Probability vector of size 10
    signature = TrackingClient.infer_signature(x_valid, y_valid)
    TrackingClient.fastai.log_model(learn, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  fastai:
    code: null
    data: model.fastai
    fastai_version: 2.7.12
  python_function:
    data: model.fastai
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.fastai
    python_version: 3.8.10
mlflow_version: 2.6.0
model_uuid: 8b83ba5aebab4015a88abc0730103367
run_id: dbc2db9f3b85456e88dc9a58be5943bb
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float32", "shape": [-1, 3,
    32, 32]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "int64", "shape": [-1]}}]'
  params: null
utc_time_created: '2023-11-27 14:38:42.470057'

8. Tensorflow

This example illustrates training a simple neural network using TensorFlow and integrates it with TrackingClient for efficient experiment tracking.

import tensorflow as tf
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential(
    [
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10),
    ]
)

loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(optimizer="adam", loss=loss_fn, metrics=["accuracy"])

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    model.fit(x_train, y_train, epochs=1)

    signature = TrackingClient.infer_signature(x_train, y_train)
    TrackingClient.tensorflow.log_model(model, "model", signature=signature)

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: data
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.tensorflow
    python_version: 3.9.7
  tensorflow:
    code: null
    data: data
    keras_version: 2.8.0
    model_type: keras
    save_format: tf
mlflow_version: 2.4.0
model_uuid: 1c5b72f9ae684923a175771562531706
run_id: 80c2a7e836c645c6b9189658732beaed
signature:
  inputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1, 28,
    28]}}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "uint8", "shape": [-1]}}]'
utc_time_created: '2023-11-27 15:14:32.143630'

9. Sentence Transformers

This example demonstrates how to use the Sentence Transformers library for sentence embeddings and integrates it with TrackingClient for experiment tracking.

from sentence_transformers import SentenceTransformer
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    model = SentenceTransformer("all-MiniLM-L6-v2")
    # the signature logged automatically
    TrackingClient.sentence_transformers.log_model(model, "model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  python_function:
    data: model.sentence_transformer
    env:
      conda: conda.yaml
      virtualenv: python_env.yaml
    loader_module: mlflow.sentence_transformers
    python_version: 3.9.7
  sentence_transformers:
    code: null
    sentence_transformers_version: 2.2.2
mlflow_version: 2.4.0
model_uuid: e0eb4cfef2fb496d8c1116a5ac97c067
run_id: 387d039bae5d4dacad46d3d3a6d5baae
signature:
  inputs: '[{"type": "string"}]'
  outputs: '[{"type": "tensor", "tensor-spec": {"dtype": "float64", "shape": [-1]}}]'
utc_time_created: '2023-11-27 15:29:27.653734'

10. Spacy

The provided example demonstrates how to train a Spacy named entity recognition (NER) model and log it with TrackingClient.

import random
import spacy
from packaging.version import Version
from spacy.util import compounding, minibatch
from oip_tracking_client.tracking import TrackingClient

# set up TrackingClient 
api_host = "<API_HOST>"
api_key = "<API_KEY>"
workspace_name = "WORKSPACE_NAME"
TrackingClient.connect(api_host, api_key, workspace_name)

# set the experiment
experiment_name = "expr001"
TrackingClient.set_experiment(experiment_name)

IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0 = Version(spacy.__version__) >= Version(
    "3.0.0"
)

# training data
TRAIN_DATA = [
    ("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
    ("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
]


# create blank model and add ner to the pipeline
TrackingClient.enable_system_metrics_logging()
with TrackingClient.start_run():

    # Set the run name
    TrackingClient.set_run_name("YOUR_RUN_NAME")

    nlp = spacy.blank("en")
    if IS_SPACY_VERSION_NEWER_THAN_OR_EQUAL_TO_3_0_0:
        ner = nlp.add_pipe("ner", last=True)
    else:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner, last=True)
    # add labels
    for _, annotations in TRAIN_DATA:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])
    params = {"n_iter": 100, "drop": 0.5}
    TrackingClient.log_params(params)

    nlp.begin_training()
    for itn in range(params["n_iter"]):
        random.shuffle(TRAIN_DATA)
        losses = {}
        # batch up the examples using spaCy's minibatch
        batches = minibatch(TRAIN_DATA, size=compounding(4.0, 32.0, 1.001))
        for batch in batches:
            texts, annotations = zip(*batch)
            nlp.update(
                texts,  # batch of texts
                annotations,  # batch of annotations
                drop=params["drop"],  # dropout - make it harder to memorise data
                losses=losses,
            )
        print("Losses", losses)
        TrackingClient.log_metrics(losses)
    # Log the spaCy model using mlflow
    TrackingClient.spacy.log_model(spacy_model=nlp, artifact_path="model")

Generated MLmodel YAML (example):

artifact_path: model
flavors:
  spacy:
    code: null
    data: model.spacy
    spacy_version: 2.2.3
mlflow_version: 2.4.0
model_uuid: b5c9444b3722449e845d93d07289f3a0
run_id: 5271f94120094896b67a3877644d1f4a
utc_time_created: '2023-11-28 08:50:57.284239'

Next Steps

  • Tracking-API-Client – Learn how to set up and customize your tracking for each experiment.
  • Run UI – View the logged details, artifacts, and metrics within the OICM platform.
  • Experiment UI – Compare multiple runs and gather insights on overall experiment performance.