AI7 min read
Image Segmentation
Segment images pixel by pixel.
Dr. Rachel Green
December 18, 2025
0.0k0
Label every pixel in an image.
What is Image Segmentation?
Classify each pixel (not just boxes).
Types:
- Semantic: Label categories
- Instance: Separate individual objects
U-Net for Segmentation
import tensorflow as tf
def unet_model(input_size=(256, 256, 3)):
inputs = tf.keras.Input(input_size)
# Encoder (downsampling)
c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(inputs)
c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c1)
p1 = tf.keras.layers.MaxPooling2D()(c1)
c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(p1)
c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c2)
p2 = tf.keras.layers.MaxPooling2D()(c2)
# Bottleneck
c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(p2)
c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(c3)
# Decoder (upsampling)
u1 = tf.keras.layers.Conv2DTranspose(128, 2, strides=2, padding='same')(c3)
u1 = tf.keras.layers.concatenate([u1, c2])
c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(u1)
c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c4)
u2 = tf.keras.layers.Conv2DTranspose(64, 2, strides=2, padding='same')(c4)
u2 = tf.keras.layers.concatenate([u2, c1])
c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(u2)
c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c5)
# Output
outputs = tf.keras.layers.Conv2D(1, 1, activation='sigmoid')(c5)
model = tf.keras.Model(inputs=[inputs], outputs=[outputs])
return model
model = unet_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
Training Segmentation Model
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array
# Load data
def load_images_and_masks(image_dir, mask_dir, img_size=(256, 256)):
images = []
masks = []
for img_file in os.listdir(image_dir):
# Load image
img = load_img(f"{image_dir}/{img_file}", target_size=img_size)
img = img_to_array(img) / 255.0
images.append(img)
# Load mask
mask = load_img(f"{mask_dir}/{img_file}", target_size=img_size, color_mode='grayscale')
mask = img_to_array(mask) / 255.0
masks.append(mask)
return np.array(images), np.array(masks)
X_train, y_train = load_images_and_masks('train/images', 'train/masks')
# Train
history = model.fit(
X_train, y_train,
batch_size=16,
epochs=50,
validation_split=0.2
)
Prediction and Visualization
import matplotlib.pyplot as plt
# Predict
test_img = load_img('test.jpg', target_size=(256, 256))
test_img_array = img_to_array(test_img) / 255.0
test_img_array = np.expand_dims(test_img_array, axis=0)
prediction = model.predict(test_img_array)[0]
# Visualize
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
axes[0].imshow(test_img)
axes[0].set_title('Original Image')
axes[1].imshow(prediction.squeeze(), cmap='gray')
axes[1].set_title('Predicted Mask')
axes[2].imshow(test_img)
axes[2].imshow(prediction.squeeze(), alpha=0.5, cmap='jet')
axes[2].set_title('Overlay')
plt.show()
DeepLab v3+
State-of-the-art segmentation:
# Install
# pip install segmentation-models-pytorch
import segmentation_models_pytorch as smp
# Load pre-trained model
model = smp.DeepLabV3Plus(
encoder_name='resnet50',
encoder_weights='imagenet',
classes=1,
activation='sigmoid'
)
# Use with PyTorch
import torch
model.eval()
image = torch.randn(1, 3, 256, 256)
mask = model(image)
print(mask.shape) # (1, 1, 256, 256)
Mask R-CNN (Instance Segmentation)
# Install
# pip install detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
# Setup
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
# Predict
import cv2
image = cv2.imread('image.jpg')
outputs = predictor(image)
# Visualize
v = Visualizer(image[:, :, ::-1])
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2.imshow('Segmentation', out.get_image()[:, :, ::-1])
cv2.waitKey(0)
Applications
- Medical imaging (tumor detection)
- Autonomous driving (road segmentation)
- Satellite imagery
- Photo editing
- Agriculture (crop monitoring)
Remember
- Semantic segmentation: Label categories
- Instance segmentation: Separate objects
- U-Net popular for medical images
- Needs pixel-wise labeled data
#AI#Advanced#Computer Vision