AI7 min read

Image Segmentation

Segment images pixel by pixel.

Dr. Rachel Green
December 18, 2025
0.0k0

Label every pixel in an image.

What is Image Segmentation?

Classify each pixel (not just boxes).

**Types**: - Semantic: Label categories - Instance: Separate individual objects

U-Net for Segmentation

```python import tensorflow as tf

def unet_model(input_size=(256, 256, 3)): inputs = tf.keras.Input(input_size) # Encoder (downsampling) c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(inputs) c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c1) p1 = tf.keras.layers.MaxPooling2D()(c1) c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(p1) c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c2) p2 = tf.keras.layers.MaxPooling2D()(c2) # Bottleneck c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(p2) c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(c3) # Decoder (upsampling) u1 = tf.keras.layers.Conv2DTranspose(128, 2, strides=2, padding='same')(c3) u1 = tf.keras.layers.concatenate([u1, c2]) c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(u1) c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c4) u2 = tf.keras.layers.Conv2DTranspose(64, 2, strides=2, padding='same')(c4) u2 = tf.keras.layers.concatenate([u2, c1]) c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(u2) c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c5) # Output outputs = tf.keras.layers.Conv2D(1, 1, activation='sigmoid')(c5) model = tf.keras.Model(inputs=[inputs], outputs=[outputs]) return model

model = unet_model() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) ```

Training Segmentation Model

```python import numpy as np from tensorflow.keras.preprocessing.image import load_img, img_to_array

Load data def load_images_and_masks(image_dir, mask_dir, img_size=(256, 256)): images = [] masks = [] for img_file in os.listdir(image_dir): # Load image img = load_img(f"{image_dir}/{img_file}", target_size=img_size) img = img_to_array(img) / 255.0 images.append(img) # Load mask mask = load_img(f"{mask_dir}/{img_file}", target_size=img_size, color_mode='grayscale') mask = img_to_array(mask) / 255.0 masks.append(mask) return np.array(images), np.array(masks)

X_train, y_train = load_images_and_masks('train/images', 'train/masks')

Train history = model.fit( X_train, y_train, batch_size=16, epochs=50, validation_split=0.2 ) ```

Prediction and Visualization

```python import matplotlib.pyplot as plt

Predict test_img = load_img('test.jpg', target_size=(256, 256)) test_img_array = img_to_array(test_img) / 255.0 test_img_array = np.expand_dims(test_img_array, axis=0)

prediction = model.predict(test_img_array)[0]

Visualize fig, axes = plt.subplots(1, 3, figsize=(15, 5)) axes[0].imshow(test_img) axes[0].set_title('Original Image') axes[1].imshow(prediction.squeeze(), cmap='gray') axes[1].set_title('Predicted Mask') axes[2].imshow(test_img) axes[2].imshow(prediction.squeeze(), alpha=0.5, cmap='jet') axes[2].set_title('Overlay') plt.show() ```

DeepLab v3+

State-of-the-art segmentation:

```python # Install # pip install segmentation-models-pytorch

import segmentation_models_pytorch as smp

Load pre-trained model model = smp.DeepLabV3Plus( encoder_name='resnet50', encoder_weights='imagenet', classes=1, activation='sigmoid' )

Use with PyTorch import torch model.eval()

image = torch.randn(1, 3, 256, 256) mask = model(image) print(mask.shape) # (1, 1, 256, 256) ```

Mask R-CNN (Instance Segmentation)

```python # Install # pip install detectron2

from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer

Setup cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")

predictor = DefaultPredictor(cfg)

Predict import cv2 image = cv2.imread('image.jpg') outputs = predictor(image)

Visualize v = Visualizer(image[:, :, ::-1]) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imshow('Segmentation', out.get_image()[:, :, ::-1]) cv2.waitKey(0) ```

Applications

- Medical imaging (tumor detection) - Autonomous driving (road segmentation) - Satellite imagery - Photo editing - Agriculture (crop monitoring)

Remember

- Semantic segmentation: Label categories - Instance segmentation: Separate objects - U-Net popular for medical images - Needs pixel-wise labeled data

#AI#Advanced#Computer Vision