UNet_UAE_for_Lane_Detection/predicdt.py

import itertools
import torch
import numpy as np
from torchvision import transforms
from PIL import Image, ImageOps
import cv2
from unet import Unet
from nets.U_ConvAutoencoder import U_ConvAutoencoder
from typing import Tuple, List


# 定义卷积自编码器

class PreCA:
    device: torch.device = None
    model: U_ConvAutoencoder = None
    transform: transforms.Compose = None

    @classmethod
    def initialize_model(cls, u_ca_path: str) -> None:
        # 实例化模型并加载权重
        cls.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        cls.model = U_ConvAutoencoder().to(cls.device)
        cls.model.load_state_dict(torch.load(u_ca_path, map_location=cls.device))
        cls.model.eval()
        # 图像预处理
        cls.transform = transforms.Compose([
            transforms.Resize((1728, 3392)),
            transforms.ToTensor()
        ])

    @classmethod
    def load_image(cls, image: Image.Image) -> torch.Tensor:
        image = image.convert("L")
        image = cls.transform(image).unsqueeze(0)  # 添加batch维度
        return image.to(cls.device)

    @staticmethod
    def ca_smooth(image: Image.Image) -> Image.Image:
        image_cv2 = np.array(image)
        # 对图像进行闭运算
        closed_image = cv2.morphologyEx(image_cv2, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)))
        # Step 1: 使用高斯模糊来平滑图像边缘
        blurred = cv2.GaussianBlur(closed_image, (1, 1), 0)
        th = cv2.threshold(blurred, 126, 255, cv2.THRESH_BINARY)[1]

        eroded_image_pil = Image.fromarray(th)
        return eroded_image_pil

    @classmethod
    def infer(cls, image: Image.Image) -> Image.Image:
        image = cls.load_image(image)
        with torch.no_grad():
            output = cls.model(image)
        output = output.squeeze(0).cpu()  # 去除batch维度并移动到CPU
        output_image = transforms.ToPILImage()(output)
        output_image = output_image.resize((3384, 1710), Image.NEAREST)
        return output_image


class PreUnet:

    @staticmethod
    def blend_images_with_colorize(image1: Image.Image, image2: Image.Image, alpha: float = 0.5) -> None:
        red_image1 = ImageOps.colorize(image1.convert("L"), (0, 0, 0), (255, 0, 0))
        green_image2 = ImageOps.colorize(image2.convert("L"), (0, 0, 0), (0, 255, 0))
        blended_image = Image.blend(red_image1, green_image2, alpha)
        blended_image.show()

    @staticmethod
    def calculate_metrics(pred_image: Image.Image, true_image: Image.Image, threshold: int = 1) -> Tuple[int, int, int]:
        pred_gray = pred_image.convert('L')
        true_gray = true_image.convert('L')

        pred_binary = pred_gray.point(lambda x: 0 if x < threshold else 255)
        true_binary = true_gray.point(lambda x: 0 if x < threshold else 255)

        pred_array = np.array(pred_binary)
        true_array = np.array(true_binary)

        # Calculate TP, FP, FN
        TP = np.sum((pred_array == 255) & (true_array == 255))
        FP = np.sum((pred_array == 255) & (true_array == 0))
        FN = np.sum((pred_array == 0) & (true_array == 255))

        return TP, FP, FN

    @staticmethod
    def apply_mask(original_image, mask_imag):
        # 打开原图和mask图片
        original_image = original_image.convert("RGB")
        mask_image = mask_imag.convert("RGB")

        # 获取图片的像素数据
        original_pixels = original_image.load()
        mask_pixels = mask_image.load()

        # 获取图片的尺寸
        width, height = original_image.size

        # 遍历每个像素
        for y in range(height):
            for x in range(width):
                # 如果mask的像素是白色 (255, 255, 255)
                if mask_pixels[x, y] == (255, 255, 255):
                    # 将原图中的对应像素改为绿色 (0, 255, 0)
                    original_pixels[x, y] = (0, 255, 0)

        # 保存结果图片
        return original_image


    @classmethod
    def main(cls, ca_path: str) -> None:
        PreCA.initialize_model(ca_path)
        import os
        from tqdm import tqdm
        ious: List[float] = []
        img_names: List[str] = os.listdir(dir_origin_path)
        for img_name in tqdm(img_names):
            if img_name.lower().endswith(
                    ('.bmp', '.dib', '.png', '.jpg', '.jpeg', '.pbm', '.pgm', '.ppm', '.tif', '.tiff')):
                image_path = os.path.join(dir_origin_path, img_name)
                image = Image.open(image_path)

                r_image = unet.detect_image(image)
                r_image = PreCA.infer(r_image)  # 自编码器
                r_image = PreCA.ca_smooth(r_image)


                if is_save:
                    if not os.path.exists(dir_save_path):
                        os.makedirs(dir_save_path)
                    r_image.save(os.path.join(dir_save_path, img_name.split('.')[0] + '_bin.png'))
                if is_get_iou:
                    label_path = os.path.join(dir_label_path, img_name.split('.')[0] + '_bin.png')
                    label = Image.open(label_path)
                    TP, FP, FN = cls.calculate_metrics(r_image, label)
                    iou = TP / (TP + FP + FN)
                    ious.append(iou)
                    print(f"当前iou{iou}")

                    # cls.blend_images_with_colorize(label, r_image)

        if is_get_iou: print(f"平均iou{np.mean(ious)}")


if __name__ == "__main__":
    name_classes: List[str] = ["background", "lane"]
    dir_origin_path: str = r"E:\git\unet_seg\unet\original_data\dataset_A\test\img"
    # 是否计算IOU，若为True必须填写dir_label_path（label的路径）
    is_get_iou: bool = True
    dir_label_path: str = r"E:\git\unet_seg\unet\original_data\dataset_A\test\Label"
    # 是否保存预测后的图像，若为True必须填写dir_save_path（保存路径的路径）
    is_save: bool = False
    dir_save_path: str = "img_out/"
    # 设置多尺度监督自编码器的权重路径
    u_ca_path: str = 'weights/best_conv_autoencoder1.pth'
    _defaults: dict = {
        "model_path": 'model_data/best80.pth',  # U-Net权重地址
        "num_classes": 2,  # 预测类别算上背景为2
        "backbone": "vgg",
        "input_shape": [1696, 864],  # 图像大小
        "mix_type": 1,
        "cuda": True,  # 是否启用cuda加速
    }
    unet: Unet = Unet(_defaults)
    PreUnet.main(u_ca_path)