AI7 min read

Image Segmentation

Segment images pixel by pixel.

Dr. Rachel Green
December 18, 2025
0.0k0

Label every pixel in an image.

What is Image Segmentation?

Classify each pixel (not just boxes).

Types:

  • Semantic: Label categories
  • Instance: Separate individual objects

U-Net for Segmentation

import tensorflow as tf

def unet_model(input_size=(256, 256, 3)):
    inputs = tf.keras.Input(input_size)
    
    # Encoder (downsampling)
    c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
    c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
    p1 = tf.keras.layers.MaxPooling2D()(c1)
    
    c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
    c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
    p2 = tf.keras.layers.MaxPooling2D()(c2)
    
    # Bottleneck
    c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
    c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(c3)
    
    # Decoder (upsampling)
    u1 = tf.keras.layers.Conv2DTranspose(128, 2, strides=2, padding='same')(c3)
    u1 = tf.keras.layers.concatenate([u1, c2])
    c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(u1)
    c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c4)
    
    u2 = tf.keras.layers.Conv2DTranspose(64, 2, strides=2, padding='same')(c4)
    u2 = tf.keras.layers.concatenate([u2, c1])
    c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(u2)
    c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c5)
    
    # Output
    outputs = tf.keras.layers.Conv2D(1, 1, activation='sigmoid')(c5)
    
    model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
    return model

model = unet_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Training Segmentation Model

import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Load data
def load_images_and_masks(image_dir, mask_dir, img_size=(256, 256)):
    images = []
    masks = []
    
    for img_file in os.listdir(image_dir):
        # Load image
        img = load_img(f"{image_dir}/{img_file}", target_size=img_size)
        img = img_to_array(img) / 255.0
        images.append(img)
        
        # Load mask
        mask = load_img(f"{mask_dir}/{img_file}", target_size=img_size, color_mode='grayscale')
        mask = img_to_array(mask) / 255.0
        masks.append(mask)
    
    return np.array(images), np.array(masks)

X_train, y_train = load_images_and_masks('train/images', 'train/masks')

# Train
history = model.fit(
    X_train, y_train,
    batch_size=16,
    epochs=50,
    validation_split=0.2
)

Prediction and Visualization

import matplotlib.pyplot as plt

# Predict
test_img = load_img('test.jpg', target_size=(256, 256))
test_img_array = img_to_array(test_img) / 255.0
test_img_array = np.expand_dims(test_img_array, axis=0)

prediction = model.predict(test_img_array)[0]

# Visualize
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
axes[0].imshow(test_img)
axes[0].set_title('Original Image')
axes[1].imshow(prediction.squeeze(), cmap='gray')
axes[1].set_title('Predicted Mask')
axes[2].imshow(test_img)
axes[2].imshow(prediction.squeeze(), alpha=0.5, cmap='jet')
axes[2].set_title('Overlay')
plt.show()

DeepLab v3+

State-of-the-art segmentation:

# Install
# pip install segmentation-models-pytorch

import segmentation_models_pytorch as smp

# Load pre-trained model
model = smp.DeepLabV3Plus(
    encoder_name='resnet50',
    encoder_weights='imagenet',
    classes=1,
    activation='sigmoid'
)

# Use with PyTorch
import torch
model.eval()

image = torch.randn(1, 3, 256, 256)
mask = model(image)
print(mask.shape)  # (1, 1, 256, 256)

Mask R-CNN (Instance Segmentation)

# Install
# pip install detectron2

from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer

# Setup
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

predictor = DefaultPredictor(cfg)

# Predict
import cv2
image = cv2.imread('image.jpg')
outputs = predictor(image)

# Visualize
v = Visualizer(image[:, :, ::-1])
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow('Segmentation', out.get_image()[:, :, ::-1])
cv2.waitKey(0)

Applications

  • Medical imaging (tumor detection)
  • Autonomous driving (road segmentation)
  • Satellite imagery
  • Photo editing
  • Agriculture (crop monitoring)

Remember

  • Semantic segmentation: Label categories
  • Instance segmentation: Separate objects
  • U-Net popular for medical images
  • Needs pixel-wise labeled data
#AI#Advanced#Computer Vision