AI8 min read

MLOps and Model Deployment

Deploy and manage AI models in production.

Dr. Andrew Wilson
December 18, 2025
0.0k0

Take AI models from notebook to production.

What is MLOps?

DevOps practices for machine learning.

Goal: Reliably deploy and maintain ML models!

Model Deployment Pipeline

  1. Train model
  2. Version control
  3. Test model
  4. Package model
  5. Deploy to production
  6. Monitor performance

Save and Load Models

# Scikit-learn
import joblib

# Save
joblib.dump(model, 'model.pkl')

# Load
model = joblib.load('model.pkl')

# TensorFlow/Keras
model.save('model.h5')
loaded_model = tf.keras.models.load_model('model.h5')

# PyTorch
torch.save(model.state_dict(), 'model.pth')
model.load_state_dict(torch.load('model.pth'))

Create API with Flask

from flask import Flask, request, jsonify
import joblib
import numpy as np

app = Flask(__name__)

# Load model
model = joblib.load('model.pkl')

@app.route('/predict', methods=['POST'])
def predict():
    # Get data from request
    data = request.get_json()
    features = np.array(data['features']).reshape(1, -1)
    
    # Predict
    prediction = model.predict(features)
    probability = model.predict_proba(features)
    
    # Return response
    return jsonify({
        'prediction': int(prediction[0]),
        'probability': float(probability[0][1])
    })

@app.route('/health', methods=['GET'])
def health():
    return jsonify({'status': 'healthy'})

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

FastAPI (Faster Alternative)

from fastapi import FastAPI
from pydantic import BaseModel
import joblib

app = FastAPI()
model = joblib.load('model.pkl')

class PredictionRequest(BaseModel):
    features: list

@app.post("/predict")
async def predict(request: PredictionRequest):
    features = np.array(request.features).reshape(1, -1)
    prediction = model.predict(features)
    return {"prediction": int(prediction[0])}

# Run with: uvicorn main:app --reload

Docker Containerization

Create Dockerfile:

FROM python:3.9-slim

WORKDIR /app

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY . .

CMD ["python", "app.py"]

Build and run:

docker build -t ml-model .
docker run -p 5000:5000 ml-model

Model Versioning with MLflow

# Install
# pip install mlflow

import mlflow
import mlflow.sklearn

# Start experiment
mlflow.set_experiment("my-experiment")

with mlflow.start_run():
    # Train model
    model.fit(X_train, y_train)
    
    # Log parameters
    mlflow.log_param("max_depth", 5)
    mlflow.log_param("n_estimators", 100)
    
    # Log metrics
    accuracy = model.score(X_test, y_test)
    mlflow.log_metric("accuracy", accuracy)
    
    # Log model
    mlflow.sklearn.log_model(model, "model")

# View experiments: mlflow ui

Load Model from MLflow

# Load by run ID
run_id = "abc123"
model = mlflow.sklearn.load_model(f"runs:/{run_id}/model")

# Or use model registry
model = mlflow.sklearn.load_model("models:/my-model/production")

Monitoring in Production

import logging
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    start_time = datetime.now()
    
    data = request.get_json()
    features = np.array(data['features']).reshape(1, -1)
    
    # Predict
    prediction = model.predict(features)
    
    # Calculate latency
    latency = (datetime.now() - start_time).total_seconds()
    
    # Log
    logger.info(f"Prediction: {prediction[0]}, Latency: {latency:.3f}s")
    
    return jsonify({'prediction': int(prediction[0])})

Data Drift Detection

# Install
# pip install evidently

from evidently.dashboard import Dashboard
from evidently.tabs import DataDriftTab

# Create drift report
dashboard = Dashboard(tabs=[DataDriftTab()])
dashboard.calculate(reference_data, production_data)
dashboard.save("drift_report.html")

A/B Testing

import random

# Two models
model_a = joblib.load('model_a.pkl')
model_b = joblib.load('model_b.pkl')

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    features = np.array(data['features']).reshape(1, -1)
    
    # Randomly choose model (50/50 split)
    if random.random() < 0.5:
        prediction = model_a.predict(features)
        model_version = 'A'
    else:
        prediction = model_b.predict(features)
        model_version = 'B'
    
    # Log which model was used
    logger.info(f"Model {model_version} used")
    
    return jsonify({
        'prediction': int(prediction[0]),
        'model_version': model_version
    })

Cloud Deployment

AWS SageMaker

import sagemaker
from sagemaker.sklearn import SKLearnModel

# Deploy to SageMaker
sklearn_model = SKLearnModel(
    model_data='s3://bucket/model.tar.gz',
    role=role,
    entry_point='inference.py',
    framework_version='0.23-1'
)

predictor = sklearn_model.deploy(
    initial_instance_count=1,
    instance_type='ml.m5.large'
)

# Predict
result = predictor.predict(data)

Best Practices

  1. Version everything: Code, data, models
  2. Automate testing: Unit tests, integration tests
  3. Monitor continuously: Accuracy, latency, errors
  4. Gradual rollout: Canary deployment, A/B testing
  5. Easy rollback: Keep old model versions
  6. Document: API docs, model cards

Remember

  • MLOps is essential for production AI
  • Use Docker for consistency
  • Monitor model performance over time
  • Plan for model retraining
  • Security: Validate inputs, use HTTPS
#AI#Advanced#MLOps