Image Segmentation
Segment images pixel by pixel.
Label every pixel in an image.
What is Image Segmentation?
Classify each pixel (not just boxes).
**Types**: - Semantic: Label categories - Instance: Separate individual objects
U-Net for Segmentation
```python import tensorflow as tf
def unet_model(input_size=(256, 256, 3)): inputs = tf.keras.Input(input_size) # Encoder (downsampling) c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(inputs) c1 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c1) p1 = tf.keras.layers.MaxPooling2D()(c1) c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(p1) c2 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c2) p2 = tf.keras.layers.MaxPooling2D()(c2) # Bottleneck c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(p2) c3 = tf.keras.layers.Conv2D(256, 3, activation='relu', padding='same')(c3) # Decoder (upsampling) u1 = tf.keras.layers.Conv2DTranspose(128, 2, strides=2, padding='same')(c3) u1 = tf.keras.layers.concatenate([u1, c2]) c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(u1) c4 = tf.keras.layers.Conv2D(128, 3, activation='relu', padding='same')(c4) u2 = tf.keras.layers.Conv2DTranspose(64, 2, strides=2, padding='same')(c4) u2 = tf.keras.layers.concatenate([u2, c1]) c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(u2) c5 = tf.keras.layers.Conv2D(64, 3, activation='relu', padding='same')(c5) # Output outputs = tf.keras.layers.Conv2D(1, 1, activation='sigmoid')(c5) model = tf.keras.Model(inputs=[inputs], outputs=[outputs]) return model
model = unet_model() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) ```
Training Segmentation Model
```python import numpy as np from tensorflow.keras.preprocessing.image import load_img, img_to_array
Load data def load_images_and_masks(image_dir, mask_dir, img_size=(256, 256)): images = [] masks = [] for img_file in os.listdir(image_dir): # Load image img = load_img(f"{image_dir}/{img_file}", target_size=img_size) img = img_to_array(img) / 255.0 images.append(img) # Load mask mask = load_img(f"{mask_dir}/{img_file}", target_size=img_size, color_mode='grayscale') mask = img_to_array(mask) / 255.0 masks.append(mask) return np.array(images), np.array(masks)
X_train, y_train = load_images_and_masks('train/images', 'train/masks')
Train history = model.fit( X_train, y_train, batch_size=16, epochs=50, validation_split=0.2 ) ```
Prediction and Visualization
```python import matplotlib.pyplot as plt
Predict test_img = load_img('test.jpg', target_size=(256, 256)) test_img_array = img_to_array(test_img) / 255.0 test_img_array = np.expand_dims(test_img_array, axis=0)
prediction = model.predict(test_img_array)[0]
Visualize fig, axes = plt.subplots(1, 3, figsize=(15, 5)) axes[0].imshow(test_img) axes[0].set_title('Original Image') axes[1].imshow(prediction.squeeze(), cmap='gray') axes[1].set_title('Predicted Mask') axes[2].imshow(test_img) axes[2].imshow(prediction.squeeze(), alpha=0.5, cmap='jet') axes[2].set_title('Overlay') plt.show() ```
DeepLab v3+
State-of-the-art segmentation:
```python # Install # pip install segmentation-models-pytorch
import segmentation_models_pytorch as smp
Load pre-trained model model = smp.DeepLabV3Plus( encoder_name='resnet50', encoder_weights='imagenet', classes=1, activation='sigmoid' )
Use with PyTorch import torch model.eval()
image = torch.randn(1, 3, 256, 256) mask = model(image) print(mask.shape) # (1, 1, 256, 256) ```
Mask R-CNN (Instance Segmentation)
```python # Install # pip install detectron2
from detectron2 import model_zoo from detectron2.engine import DefaultPredictor from detectron2.config import get_cfg from detectron2.utils.visualizer import Visualizer
Setup cfg = get_cfg() cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")) cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)
Predict import cv2 image = cv2.imread('image.jpg') outputs = predictor(image)
Visualize v = Visualizer(image[:, :, ::-1]) out = v.draw_instance_predictions(outputs["instances"].to("cpu")) cv2.imshow('Segmentation', out.get_image()[:, :, ::-1]) cv2.waitKey(0) ```
Applications
- Medical imaging (tumor detection) - Autonomous driving (road segmentation) - Satellite imagery - Photo editing - Agriculture (crop monitoring)
Remember
- Semantic segmentation: Label categories - Instance segmentation: Separate objects - U-Net popular for medical images - Needs pixel-wise labeled data