| | import os
|
| | import cv2
|
| | import math
|
| | import torch
|
| | import numpy as np
|
| | import axengine as ort
|
| |
|
| | from utils.general import img2tensor, imwrite, is_gray, bgr2gray, adain_npy
|
| | from utils.face_detector import YoloDetector
|
| |
|
| | def get_largest_face(det_faces, h, w):
|
| | def get_location(val, length):
|
| | if val < 0:
|
| | return 0
|
| | elif val > length:
|
| | return length
|
| | else:
|
| | return val
|
| |
|
| | face_areas = []
|
| | for det_face in det_faces:
|
| | left = get_location(det_face[0], w)
|
| | right = get_location(det_face[2], w)
|
| | top = get_location(det_face[1], h)
|
| | bottom = get_location(det_face[3], h)
|
| | face_area = (right - left) * (bottom - top)
|
| | face_areas.append(face_area)
|
| | largest_idx = face_areas.index(max(face_areas))
|
| | return det_faces[largest_idx], largest_idx
|
| |
|
| |
|
| | def get_center_face(det_faces, h=0, w=0, center=None):
|
| | if center is not None:
|
| | center = np.array(center)
|
| | else:
|
| | center = np.array([w / 2, h / 2])
|
| | center_dist = []
|
| | for det_face in det_faces:
|
| | face_center = np.array([(det_face[0] + det_face[2]) / 2, (det_face[1] + det_face[3]) / 2])
|
| | dist = np.linalg.norm(face_center - center)
|
| | center_dist.append(dist)
|
| | center_idx = center_dist.index(min(center_dist))
|
| | return det_faces[center_idx], center_idx
|
| |
|
| |
|
| | class RestoreHelper(object):
|
| | """Helper for the restoration pipeline (base class)."""
|
| | def __init__(self,
|
| | upscale_factor,
|
| | face_size=512,
|
| | crop_ratio=(1, 1),
|
| | det_model='yolov5l-face.onnx',
|
| | res_model='codeformer.onnx',
|
| | bg_model='realesrgan_x2.onnx' ,
|
| | save_ext='png',
|
| | template_3points=False,
|
| | pad_blur=False,
|
| | use_parse=False,
|
| | ):
|
| |
|
| |
|
| | self.template_3points = template_3points
|
| | self.upscale_factor = int(upscale_factor)
|
| |
|
| | self.crop_ratio = crop_ratio
|
| | assert (self.crop_ratio[0] >= 1 and self.crop_ratio[1] >= 1), 'crop ration only supports >=1'
|
| | self.face_size = (int(face_size * self.crop_ratio[1]), int(face_size * self.crop_ratio[0]))
|
| |
|
| |
|
| |
|
| | self.face_template = np.array([[192.98138, 239.94708], [318.90277, 240.1936], [256.63416, 314.01935],
|
| | [201.26117, 371.41043], [313.08905, 371.15118]])
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | self.face_template = self.face_template * (face_size / 512.0)
|
| | if self.crop_ratio[0] > 1:
|
| | self.face_template[:, 1] += face_size * (self.crop_ratio[0] - 1) / 2
|
| | if self.crop_ratio[1] > 1:
|
| | self.face_template[:, 0] += face_size * (self.crop_ratio[1] - 1) / 2
|
| | self.save_ext = save_ext
|
| | self.pad_blur = pad_blur
|
| | if self.pad_blur is True:
|
| | self.template_3points = False
|
| |
|
| | self.all_landmarks_5 = []
|
| | self.det_faces = []
|
| | self.affine_matrices = []
|
| | self.inverse_affine_matrices = []
|
| | self.cropped_faces = []
|
| | self.restored_faces = []
|
| | self.pad_input_imgs = []
|
| |
|
| |
|
| | self.face_detector = YoloDetector(model_path=det_model)
|
| |
|
| |
|
| | self.use_parse = use_parse
|
| |
|
| |
|
| |
|
| | self.res_model = res_model
|
| | self.rs_sessison, self.rs_input, self.rs_output = self.init_face_restoration()
|
| |
|
| |
|
| | self.tile = 108
|
| | self.tile_pad = 10
|
| | self.scale = 2
|
| | self.bg_model = bg_model
|
| | self.bg_sessison, self.bg_input, self.bg_output = self.init_background_upsampling()
|
| |
|
| | def set_upscale_factor(self, upscale_factor):
|
| | self.upscale_factor = upscale_factor
|
| |
|
| | def read_image(self, img):
|
| | """img can be image path or cv2 loaded image."""
|
| |
|
| | if isinstance(img, str):
|
| | img = cv2.imread(img)
|
| |
|
| | if np.max(img) > 256:
|
| | img = img / 65535 * 255
|
| | if len(img.shape) == 2:
|
| | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
| | elif img.shape[2] == 4:
|
| | img = img[:, :, 0:3]
|
| |
|
| | self.input_img = img
|
| | self.is_gray = is_gray(img, threshold=10)
|
| | if self.is_gray:
|
| | print('Grayscale input: True')
|
| |
|
| | if min(self.input_img.shape[:2])<512:
|
| | f = 512.0/min(self.input_img.shape[:2])
|
| | self.input_img = cv2.resize(self.input_img, (0,0), fx=f, fy=f, interpolation=cv2.INTER_LINEAR)
|
| |
|
| | def get_face_landmarks_5(self,
|
| | only_keep_largest=False,
|
| | only_center_face=False,
|
| | resize=None,
|
| | blur_ratio=0.01,
|
| | eye_dist_threshold=None):
|
| |
|
| | if resize is None:
|
| | scale = 1
|
| | input_img = self.input_img
|
| | else:
|
| | h, w = self.input_img.shape[0:2]
|
| | scale = resize / min(h, w)
|
| |
|
| | h, w = int(h * scale), int(w * scale)
|
| | interp = cv2.INTER_AREA if scale < 1 else cv2.INTER_LINEAR
|
| | input_img = cv2.resize(self.input_img, (w, h), interpolation=interp)
|
| |
|
| |
|
| | bboxes = self.face_detector.detect_faces(input_img)
|
| |
|
| | if bboxes is None or bboxes.shape[0] == 0:
|
| | return 0
|
| | else:
|
| | bboxes = bboxes / scale
|
| |
|
| | for bbox in bboxes:
|
| |
|
| | eye_dist = np.linalg.norm([bbox[6] - bbox[8], bbox[7] - bbox[9]])
|
| | if eye_dist_threshold is not None and (eye_dist < eye_dist_threshold):
|
| | continue
|
| |
|
| | if self.template_3points:
|
| | landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 11, 2)])
|
| | else:
|
| | landmark = np.array([[bbox[i], bbox[i + 1]] for i in range(5, 15, 2)])
|
| | self.all_landmarks_5.append(landmark)
|
| | self.det_faces.append(bbox[0:5])
|
| |
|
| | if len(self.det_faces) == 0:
|
| | return 0
|
| | if only_keep_largest:
|
| | h, w, _ = self.input_img.shape
|
| | self.det_faces, largest_idx = get_largest_face(self.det_faces, h, w)
|
| | self.all_landmarks_5 = [self.all_landmarks_5[largest_idx]]
|
| | elif only_center_face:
|
| | h, w, _ = self.input_img.shape
|
| | self.det_faces, center_idx = get_center_face(self.det_faces, h, w)
|
| | self.all_landmarks_5 = [self.all_landmarks_5[center_idx]]
|
| |
|
| |
|
| | if self.pad_blur:
|
| | self.pad_input_imgs = []
|
| | for landmarks in self.all_landmarks_5:
|
| |
|
| | eye_left = landmarks[0, :]
|
| | eye_right = landmarks[1, :]
|
| | eye_avg = (eye_left + eye_right) * 0.5
|
| | mouth_avg = (landmarks[3, :] + landmarks[4, :]) * 0.5
|
| | eye_to_eye = eye_right - eye_left
|
| | eye_to_mouth = mouth_avg - eye_avg
|
| |
|
| |
|
| |
|
| | x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
|
| |
|
| |
|
| | x /= np.hypot(*x)
|
| | rect_scale = 1.5
|
| | x *= max(np.hypot(*eye_to_eye) * 2.0 * rect_scale, np.hypot(*eye_to_mouth) * 1.8 * rect_scale)
|
| |
|
| | y = np.flipud(x) * [-1, 1]
|
| |
|
| |
|
| | c = eye_avg + eye_to_mouth * 0.1
|
| |
|
| | quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
|
| |
|
| | qsize = np.hypot(*x) * 2
|
| | border = max(int(np.rint(qsize * 0.1)), 3)
|
| |
|
| |
|
| |
|
| | pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
|
| | int(np.ceil(max(quad[:, 1]))))
|
| | pad = [
|
| | max(-pad[0] + border, 1),
|
| | max(-pad[1] + border, 1),
|
| | max(pad[2] - self.input_img.shape[0] + border, 1),
|
| | max(pad[3] - self.input_img.shape[1] + border, 1)
|
| | ]
|
| |
|
| | if max(pad) > 1:
|
| |
|
| | pad_img = np.pad(self.input_img, ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
|
| |
|
| | landmarks[:, 0] += pad[0]
|
| | landmarks[:, 1] += pad[1]
|
| |
|
| | h, w, _ = pad_img.shape
|
| | y, x, _ = np.ogrid[:h, :w, :1]
|
| | mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0],
|
| | np.float32(w - 1 - x) / pad[2]),
|
| | 1.0 - np.minimum(np.float32(y) / pad[1],
|
| | np.float32(h - 1 - y) / pad[3]))
|
| | blur = int(qsize * blur_ratio)
|
| | if blur % 2 == 0:
|
| | blur += 1
|
| | blur_img = cv2.boxFilter(pad_img, 0, ksize=(blur, blur))
|
| |
|
| |
|
| | pad_img = pad_img.astype('float32')
|
| | pad_img += (blur_img - pad_img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
|
| | pad_img += (np.median(pad_img, axis=(0, 1)) - pad_img) * np.clip(mask, 0.0, 1.0)
|
| | pad_img = np.clip(pad_img, 0, 255)
|
| | self.pad_input_imgs.append(pad_img)
|
| | else:
|
| | self.pad_input_imgs.append(np.copy(self.input_img))
|
| |
|
| | return len(self.all_landmarks_5)
|
| |
|
| | def align_warp_face(self, save_cropped_path=None, border_mode='constant'):
|
| | """Align and warp faces with face template.
|
| | """
|
| | if self.pad_blur:
|
| | assert len(self.pad_input_imgs) == len(
|
| | self.all_landmarks_5), f'Mismatched samples: {len(self.pad_input_imgs)} and {len(self.all_landmarks_5)}'
|
| | for idx, landmark in enumerate(self.all_landmarks_5):
|
| |
|
| |
|
| |
|
| | affine_matrix = cv2.estimateAffinePartial2D(landmark, self.face_template, method=cv2.LMEDS)[0]
|
| | self.affine_matrices.append(affine_matrix)
|
| |
|
| | if border_mode == 'constant':
|
| | border_mode = cv2.BORDER_CONSTANT
|
| | elif border_mode == 'reflect101':
|
| | border_mode = cv2.BORDER_REFLECT101
|
| | elif border_mode == 'reflect':
|
| | border_mode = cv2.BORDER_REFLECT
|
| | if self.pad_blur:
|
| | input_img = self.pad_input_imgs[idx]
|
| | else:
|
| | input_img = self.input_img
|
| | cropped_face = cv2.warpAffine(
|
| | input_img, affine_matrix, self.face_size, borderMode=border_mode, borderValue=(135, 133, 132))
|
| | self.cropped_faces.append(cropped_face)
|
| |
|
| | if save_cropped_path is not None:
|
| | path = os.path.splitext(save_cropped_path)[0]
|
| | save_path = f'{path}_{idx:02d}.{self.save_ext}'
|
| | imwrite(cropped_face, save_path)
|
| |
|
| | def get_inverse_affine(self, save_inverse_affine_path=None):
|
| | """Get inverse affine matrix."""
|
| | for idx, affine_matrix in enumerate(self.affine_matrices):
|
| | inverse_affine = cv2.invertAffineTransform(affine_matrix)
|
| | inverse_affine *= self.upscale_factor
|
| | self.inverse_affine_matrices.append(inverse_affine)
|
| |
|
| | if save_inverse_affine_path is not None:
|
| | path, _ = os.path.splitext(save_inverse_affine_path)
|
| | save_path = f'{path}_{idx:02d}.pth'
|
| | torch.save(inverse_affine, save_path)
|
| |
|
| |
|
| | def add_restored_face(self, restored_face, input_face=None):
|
| | if self.is_gray:
|
| | restored_face = bgr2gray(restored_face)
|
| | if input_face is not None:
|
| | restored_face = adain_npy(restored_face, input_face)
|
| | self.restored_faces.append(restored_face)
|
| |
|
| |
|
| | def paste_faces_to_input_image(self, save_path=None, upsample_img=None, draw_box=False, face_upsampler=None):
|
| | h, w, _ = self.input_img.shape
|
| | h_up, w_up = int(h * self.upscale_factor), int(w * self.upscale_factor)
|
| |
|
| | if upsample_img is None:
|
| |
|
| |
|
| | upsample_img = cv2.resize(self.input_img, (w_up, h_up), interpolation=cv2.INTER_LINEAR)
|
| | else:
|
| | upsample_img = cv2.resize(upsample_img, (w_up, h_up), interpolation=cv2.INTER_LANCZOS4)
|
| |
|
| | assert len(self.restored_faces) == len(
|
| | self.inverse_affine_matrices), ('length of restored_faces and affine_matrices are different.')
|
| |
|
| | inv_mask_borders = []
|
| | for restored_face, inverse_affine in zip(self.restored_faces, self.inverse_affine_matrices):
|
| | if face_upsampler is not None:
|
| | restored_face = face_upsampler.enhance(restored_face, outscale=self.upscale_factor)[0]
|
| | inverse_affine /= self.upscale_factor
|
| | inverse_affine[:, 2] *= self.upscale_factor
|
| | face_size = (self.face_size[0]*self.upscale_factor, self.face_size[1]*self.upscale_factor)
|
| | else:
|
| |
|
| | if self.upscale_factor > 1:
|
| | extra_offset = 0.5 * self.upscale_factor
|
| | else:
|
| | extra_offset = 0
|
| | inverse_affine[:, 2] += extra_offset
|
| | face_size = self.face_size
|
| | inv_restored = cv2.warpAffine(restored_face, inverse_affine, (w_up, h_up))
|
| |
|
| |
|
| | mask = np.ones(face_size, dtype=np.float32)
|
| | inv_mask = cv2.warpAffine(mask, inverse_affine, (w_up, h_up))
|
| |
|
| | inv_mask_erosion = cv2.erode(
|
| | inv_mask, np.ones((int(2 * self.upscale_factor), int(2 * self.upscale_factor)), np.uint8))
|
| | pasted_face = inv_mask_erosion[:, :, None] * inv_restored
|
| | total_face_area = np.sum(inv_mask_erosion)
|
| |
|
| | if draw_box:
|
| | h, w = face_size
|
| | mask_border = np.ones((h, w, 3), dtype=np.float32)
|
| | border = int(1400/np.sqrt(total_face_area))
|
| | mask_border[border:h-border, border:w-border,:] = 0
|
| | inv_mask_border = cv2.warpAffine(mask_border, inverse_affine, (w_up, h_up))
|
| | inv_mask_borders.append(inv_mask_border)
|
| |
|
| | w_edge = int(total_face_area**0.5) // 20
|
| | erosion_radius = w_edge * 2
|
| | inv_mask_center = cv2.erode(inv_mask_erosion, np.ones((erosion_radius, erosion_radius), np.uint8))
|
| | blur_size = w_edge * 2
|
| | inv_soft_mask = cv2.GaussianBlur(inv_mask_center, (blur_size + 1, blur_size + 1), 0)
|
| | if len(upsample_img.shape) == 2:
|
| | upsample_img = upsample_img[:, :, None]
|
| | inv_soft_mask = inv_soft_mask[:, :, None]
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | if len(upsample_img.shape) == 3 and upsample_img.shape[2] == 4:
|
| | alpha = upsample_img[:, :, 3:]
|
| | upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img[:, :, 0:3]
|
| | upsample_img = np.concatenate((upsample_img, alpha), axis=2)
|
| | else:
|
| | upsample_img = inv_soft_mask * pasted_face + (1 - inv_soft_mask) * upsample_img
|
| |
|
| | if np.max(upsample_img) > 256:
|
| | upsample_img = upsample_img.astype(np.uint16)
|
| | else:
|
| | upsample_img = upsample_img.astype(np.uint8)
|
| |
|
| |
|
| | if draw_box:
|
| |
|
| | img_color = np.ones([*upsample_img.shape], dtype=np.float32)
|
| | img_color[:,:,0] = 0
|
| | img_color[:,:,1] = 255
|
| | img_color[:,:,2] = 0
|
| | for inv_mask_border in inv_mask_borders:
|
| | upsample_img = inv_mask_border * img_color + (1 - inv_mask_border) * upsample_img
|
| |
|
| |
|
| | if save_path is not None:
|
| | path = os.path.splitext(save_path)[0]
|
| | save_path = f'{path}.{self.save_ext}'
|
| | imwrite(upsample_img, save_path)
|
| | return upsample_img
|
| |
|
| | def init_face_restoration(self):
|
| | session = ort.InferenceSession(self.res_model)
|
| | input_name = session.get_inputs()[0].name
|
| | output_names = [x.name for x in session.get_outputs()]
|
| | return session, input_name, output_names
|
| |
|
| | def pre_process(self, img):
|
| |
|
| | tile_pad_h, tile_pad_w = 0, 0
|
| | h, w = img.shape[0:2]
|
| |
|
| | if h % self.tile != 0:
|
| | tile_pad_h = (self.tile - h % self.tile)
|
| | if w % self.tile != 0:
|
| | tile_pad_w = (self.tile - w % self.tile)
|
| | img = np.pad(img, ((0, tile_pad_h), (0, tile_pad_w), (0, 0)), 'constant')
|
| |
|
| |
|
| | img = np.pad(img, ((self.tile_pad, self.tile_pad), (self.tile_pad, self.tile_pad), (0, 0)), 'constant')
|
| |
|
| |
|
| | img = (img[..., ::-1] / 255).astype(np.float32)
|
| | img = np.expand_dims(np.transpose(img, (2, 0, 1)), axis=0)
|
| |
|
| | return img
|
| |
|
| | def init_background_upsampling(self):
|
| | session = ort.InferenceSession(self.bg_model)
|
| | input_name = session.get_inputs()[0].name
|
| | output_names = [x.name for x in session.get_outputs()]
|
| | return session, input_name, output_names
|
| |
|
| | def tile_process(self, img, origin_shape):
|
| | """It will first crop input images to tiles, and then process each tile.
|
| | Finally, all the processed tiles are merged into one images.
|
| | """
|
| |
|
| | batch, channel, height, width = img.shape
|
| | output_height = int(round(height * self.scale))
|
| | output_width = int(round(width * self.scale))
|
| | output_shape = (batch, channel, output_height, output_width)
|
| | origin_h, origin_w = origin_shape[0:2]
|
| |
|
| |
|
| | output = np.zeros(output_shape)
|
| | tiles_x = math.floor(width / self.tile)
|
| | tiles_y = math.floor(height / self.tile)
|
| |
|
| |
|
| | start_tile = int(round(self.tile_pad * self.scale))
|
| | end_tile = int(round(self.tile * self.scale)) + start_tile
|
| |
|
| |
|
| | for y in range(tiles_y):
|
| | for x in range(tiles_x):
|
| |
|
| | ofs_x = x * self.tile
|
| | ofs_y = y * self.tile
|
| |
|
| | input_start_x = ofs_x
|
| | input_end_x = min(ofs_x + self.tile, width)
|
| | input_start_y = ofs_y
|
| | input_end_y = min(ofs_y + self.tile, height)
|
| |
|
| |
|
| | input_tile = img[:, :, input_start_y:(input_end_y+2*self.tile_pad),
|
| | input_start_x:(input_end_x+2*self.tile_pad)]
|
| |
|
| |
|
| | try:
|
| | output_tile = self.bg_sessison.run(self.bg_output, {self.bg_input: input_tile})
|
| | except RuntimeError as error:
|
| | print('Error', error)
|
| |
|
| |
|
| | output_start_x = int(round(input_start_x * self.scale))
|
| | output_end_x = int(round(input_end_x * self.scale))
|
| | output_start_y = int(round(input_start_y * self.scale))
|
| | output_end_y = int(round(input_end_y * self.scale))
|
| |
|
| | output[:, :, output_start_y:output_end_y,
|
| | output_start_x:output_end_x] = output_tile[0][:, :, start_tile:end_tile, start_tile:end_tile]
|
| |
|
| |
|
| | output = output[:, :, :int(round(origin_h * self.scale)), :int(round(origin_w * self.scale))].squeeze(0)
|
| | output = np.transpose(output[::-1, ...], (1, 2, 0)).astype(np.float32)
|
| | output = np.clip(output*255.0, 0, 255).astype(np.uint8)
|
| |
|
| |
|
| | output = cv2.resize(output, (origin_w, origin_h), interpolation=cv2.INTER_LINEAR)
|
| |
|
| | return output
|
| |
|
| | def background_upsampling(self, img):
|
| | """Background upsampling with Real-ESRGAN.
|
| | """
|
| |
|
| | img_input = self.pre_process(img)
|
| |
|
| |
|
| | output = self.tile_process(img_input, img.shape)
|
| |
|
| | return output
|
| |
|
| | def clean_all(self):
|
| | self.all_landmarks_5 = []
|
| | self.restored_faces = []
|
| | self.affine_matrices = []
|
| | self.cropped_faces = []
|
| | self.inverse_affine_matrices = []
|
| | self.det_faces = []
|
| | self.pad_input_imgs = []
|
| |
|