vkit.pipeline.text_detection.page_cropping
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, List, Optional, Tuple 15 16import attrs 17from numpy.random import Generator as RandomGenerator 18import numpy as np 19import cv2 as cv 20 21from vkit.element import Box, Mask, ScoreMap, Image 22from vkit.mechanism.cropper import Cropper 23from .page_resizing import PageResizingStepOutput 24from ..interface import PipelineStep, PipelineStepFactory 25 26 27@attrs.define 28class PageCroppingStepConfig: 29 core_size: int 30 pad_size: int 31 num_samples: Optional[int] = None 32 num_samples_max: Optional[int] = None 33 num_samples_estimation_factor: float = 1.5 34 pad_value: int = 0 35 drop_cropped_page_with_small_text_ratio: bool = True 36 text_ratio_min: float = 0.025 37 drop_cropped_page_with_small_active_region: bool = True 38 active_region_ratio_min: float = 0.4 39 enable_downsample_labeling: bool = True 40 downsample_labeling_factor: int = 2 41 42 43@attrs.define 44class PageCroppingStepInput: 45 page_resizing_step_output: PageResizingStepOutput 46 47 48@attrs.define 49class DownsampledLabel: 50 shape: Tuple[int, int] 51 page_char_mask: Mask 52 page_seal_impression_char_mask: Mask 53 page_char_height_score_map: ScoreMap 54 page_text_line_mask: Mask 55 page_text_line_height_score_map: ScoreMap 56 target_core_box: Box 57 58 59@attrs.define 60class CroppedPage: 61 page_image: Image 62 page_char_mask: Mask 63 page_seal_impression_char_mask: Mask 64 page_char_height_score_map: ScoreMap 65 page_text_line_mask: Mask 66 page_text_line_height_score_map: ScoreMap 67 target_core_box: Box 68 downsampled_label: Optional[DownsampledLabel] 69 70 71@attrs.define 72class PageCroppingStepOutput: 73 cropped_pages: Sequence[CroppedPage] 74 75 76class PageCroppingStep( 77 PipelineStep[ 78 PageCroppingStepConfig, 79 PageCroppingStepInput, 80 PageCroppingStepOutput, 81 ] 82): # yapf: disable 83 84 def __init__(self, config: PageCroppingStepConfig): 85 super().__init__(config) 86 87 def sample_cropped_page( 88 self, 89 page_image: Image, 90 page_active_mask: Mask, 91 page_char_mask: Mask, 92 page_seal_impression_char_mask: Mask, 93 page_char_height_score_map: ScoreMap, 94 page_text_line_mask: Mask, 95 page_text_line_height_score_map: ScoreMap, 96 rng: RandomGenerator, 97 force_crop_center: bool = False, 98 ): 99 if not force_crop_center: 100 cropper = Cropper.create_from_random_proposal( 101 shape=page_image.shape, 102 core_size=self.config.core_size, 103 pad_size=self.config.pad_size, 104 pad_value=self.config.pad_value, 105 rng=rng, 106 ) 107 else: 108 cropper = Cropper.create_from_center_point( 109 shape=page_image.shape, 110 core_size=self.config.core_size, 111 pad_size=self.config.pad_size, 112 pad_value=self.config.pad_value, 113 center_point=Box.from_shapable(page_image).get_center_point(), 114 ) 115 116 page_image = cropper.crop_image(page_image) 117 118 page_active_mask = cropper.crop_mask(page_active_mask) 119 120 page_char_mask = cropper.crop_mask( 121 page_char_mask, 122 core_only=True, 123 ) 124 page_seal_impression_char_mask = cropper.crop_mask( 125 page_seal_impression_char_mask, 126 core_only=True, 127 ) 128 page_char_height_score_map = cropper.crop_score_map( 129 page_char_height_score_map, 130 core_only=True, 131 ) 132 133 page_text_line_mask = cropper.crop_mask( 134 page_text_line_mask, 135 core_only=True, 136 ) 137 page_text_line_height_score_map = cropper.crop_score_map( 138 page_text_line_height_score_map, 139 core_only=True, 140 ) 141 142 if self.config.drop_cropped_page_with_small_text_ratio: 143 num_text_pixels = (page_char_mask.mat > 0).sum() 144 text_ratio = num_text_pixels / cropper.target_core_box.area 145 if text_ratio < self.config.text_ratio_min: 146 return None 147 148 if self.config.drop_cropped_page_with_small_active_region: 149 num_active_pixels = int(page_active_mask.np_mask.sum()) 150 active_region_ratio = num_active_pixels / page_image.area 151 if active_region_ratio < self.config.active_region_ratio_min: 152 return None 153 154 downsampled_label: Optional[DownsampledLabel] = None 155 if self.config.enable_downsample_labeling: 156 downsample_labeling_factor = self.config.downsample_labeling_factor 157 158 assert cropper.crop_size % downsample_labeling_factor == 0 159 downsampled_size = cropper.crop_size // downsample_labeling_factor 160 downsampled_shape = (downsampled_size, downsampled_size) 161 162 assert self.config.pad_size % downsample_labeling_factor == 0 163 assert self.config.core_size % downsample_labeling_factor == 0 164 assert cropper.target_core_box.height \ 165 == cropper.target_core_box.width \ 166 == self.config.core_size 167 168 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 169 downsampled_core_size = self.config.core_size // downsample_labeling_factor 170 171 downsampled_target_core_begin = downsampled_pad_size 172 downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1 173 downsampled_target_core_box = Box( 174 up=downsampled_target_core_begin, 175 down=downsampled_target_core_end, 176 left=downsampled_target_core_begin, 177 right=downsampled_target_core_end, 178 ) 179 180 downsampled_page_char_mask = page_char_mask.to_box_detached() 181 downsampled_page_char_mask = \ 182 downsampled_page_char_mask.to_resized_mask( 183 resized_height=downsampled_core_size, 184 resized_width=downsampled_core_size, 185 cv_resize_interpolation=cv.INTER_AREA, 186 ) 187 188 downsampled_page_seal_impression_char_mask = \ 189 page_seal_impression_char_mask.to_box_detached() 190 downsampled_page_seal_impression_char_mask = \ 191 downsampled_page_seal_impression_char_mask.to_resized_mask( 192 resized_height=downsampled_core_size, 193 resized_width=downsampled_core_size, 194 cv_resize_interpolation=cv.INTER_AREA, 195 ) 196 197 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 198 downsampled_page_char_height_score_map = \ 199 downsampled_page_char_height_score_map.to_resized_score_map( 200 resized_height=downsampled_core_size, 201 resized_width=downsampled_core_size, 202 cv_resize_interpolation=cv.INTER_AREA, 203 ) 204 205 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 206 downsampled_page_text_line_mask = \ 207 downsampled_page_text_line_mask.to_resized_mask( 208 resized_height=downsampled_core_size, 209 resized_width=downsampled_core_size, 210 cv_resize_interpolation=cv.INTER_AREA, 211 ) 212 213 downsampled_page_text_line_height_score_map = \ 214 page_text_line_height_score_map.to_box_detached() 215 downsampled_page_text_line_height_score_map = \ 216 downsampled_page_text_line_height_score_map.to_resized_score_map( 217 resized_height=downsampled_core_size, 218 resized_width=downsampled_core_size, 219 cv_resize_interpolation=cv.INTER_AREA, 220 ) 221 222 downsampled_label = DownsampledLabel( 223 shape=downsampled_shape, 224 page_char_mask=downsampled_page_char_mask, 225 page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask, 226 page_char_height_score_map=downsampled_page_char_height_score_map, 227 page_text_line_mask=downsampled_page_text_line_mask, 228 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 229 target_core_box=downsampled_target_core_box, 230 ) 231 232 return CroppedPage( 233 page_image=page_image, 234 page_char_mask=page_char_mask, 235 page_seal_impression_char_mask=page_seal_impression_char_mask, 236 page_char_height_score_map=page_char_height_score_map, 237 page_text_line_mask=page_text_line_mask, 238 page_text_line_height_score_map=page_text_line_height_score_map, 239 target_core_box=cropper.target_core_box, 240 downsampled_label=downsampled_label, 241 ) 242 243 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 244 page_resizing_step_output = input.page_resizing_step_output 245 page_image = page_resizing_step_output.page_image 246 page_active_mask = page_resizing_step_output.page_active_mask 247 page_char_mask = page_resizing_step_output.page_char_mask 248 page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask 249 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 250 page_text_line_mask = page_resizing_step_output.page_text_line_mask 251 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 252 253 num_samples = self.config.num_samples 254 255 if num_samples is None: 256 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 257 core_area = self.config.core_size**2 258 num_samples = max( 259 1, 260 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 261 ) 262 263 if self.config.num_samples_max: 264 num_samples = min(num_samples, self.config.num_samples_max) 265 266 run_count_max = max(3, 2 * num_samples) 267 run_count = 0 268 269 cropped_pages: List[CroppedPage] = [] 270 271 while len(cropped_pages) < num_samples and run_count < run_count_max: 272 cropped_page = self.sample_cropped_page( 273 page_image=page_image, 274 page_active_mask=page_active_mask, 275 page_char_mask=page_char_mask, 276 page_seal_impression_char_mask=page_seal_impression_char_mask, 277 page_char_height_score_map=page_char_height_score_map, 278 page_text_line_mask=page_text_line_mask, 279 page_text_line_height_score_map=page_text_line_height_score_map, 280 rng=rng, 281 force_crop_center=(run_count == 0), 282 ) 283 if cropped_page: 284 cropped_pages.append(cropped_page) 285 run_count += 1 286 287 return PageCroppingStepOutput(cropped_pages=cropped_pages) 288 289 290page_cropping_step_factory = PipelineStepFactory(PageCroppingStep)
29class PageCroppingStepConfig: 30 core_size: int 31 pad_size: int 32 num_samples: Optional[int] = None 33 num_samples_max: Optional[int] = None 34 num_samples_estimation_factor: float = 1.5 35 pad_value: int = 0 36 drop_cropped_page_with_small_text_ratio: bool = True 37 text_ratio_min: float = 0.025 38 drop_cropped_page_with_small_active_region: bool = True 39 active_region_ratio_min: float = 0.4 40 enable_downsample_labeling: bool = True 41 downsample_labeling_factor: int = 2
2def __init__(self, core_size, pad_size, num_samples=attr_dict['num_samples'].default, num_samples_max=attr_dict['num_samples_max'].default, num_samples_estimation_factor=attr_dict['num_samples_estimation_factor'].default, pad_value=attr_dict['pad_value'].default, drop_cropped_page_with_small_text_ratio=attr_dict['drop_cropped_page_with_small_text_ratio'].default, text_ratio_min=attr_dict['text_ratio_min'].default, drop_cropped_page_with_small_active_region=attr_dict['drop_cropped_page_with_small_active_region'].default, active_region_ratio_min=attr_dict['active_region_ratio_min'].default, enable_downsample_labeling=attr_dict['enable_downsample_labeling'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default): 3 self.core_size = core_size 4 self.pad_size = pad_size 5 self.num_samples = num_samples 6 self.num_samples_max = num_samples_max 7 self.num_samples_estimation_factor = num_samples_estimation_factor 8 self.pad_value = pad_value 9 self.drop_cropped_page_with_small_text_ratio = drop_cropped_page_with_small_text_ratio 10 self.text_ratio_min = text_ratio_min 11 self.drop_cropped_page_with_small_active_region = drop_cropped_page_with_small_active_region 12 self.active_region_ratio_min = active_region_ratio_min 13 self.enable_downsample_labeling = enable_downsample_labeling 14 self.downsample_labeling_factor = downsample_labeling_factor
Method generated by attrs for class PageCroppingStepConfig.
2def __init__(self, page_resizing_step_output): 3 self.page_resizing_step_output = page_resizing_step_output
Method generated by attrs for class PageCroppingStepInput.
50class DownsampledLabel: 51 shape: Tuple[int, int] 52 page_char_mask: Mask 53 page_seal_impression_char_mask: Mask 54 page_char_height_score_map: ScoreMap 55 page_text_line_mask: Mask 56 page_text_line_height_score_map: ScoreMap 57 target_core_box: Box
2def __init__(self, shape, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, target_core_box): 3 self.shape = shape 4 self.page_char_mask = page_char_mask 5 self.page_seal_impression_char_mask = page_seal_impression_char_mask 6 self.page_char_height_score_map = page_char_height_score_map 7 self.page_text_line_mask = page_text_line_mask 8 self.page_text_line_height_score_map = page_text_line_height_score_map 9 self.target_core_box = target_core_box
Method generated by attrs for class DownsampledLabel.
61class CroppedPage: 62 page_image: Image 63 page_char_mask: Mask 64 page_seal_impression_char_mask: Mask 65 page_char_height_score_map: ScoreMap 66 page_text_line_mask: Mask 67 page_text_line_height_score_map: ScoreMap 68 target_core_box: Box 69 downsampled_label: Optional[DownsampledLabel]
2def __init__(self, page_image, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, target_core_box, downsampled_label): 3 self.page_image = page_image 4 self.page_char_mask = page_char_mask 5 self.page_seal_impression_char_mask = page_seal_impression_char_mask 6 self.page_char_height_score_map = page_char_height_score_map 7 self.page_text_line_mask = page_text_line_mask 8 self.page_text_line_height_score_map = page_text_line_height_score_map 9 self.target_core_box = target_core_box 10 self.downsampled_label = downsampled_label
Method generated by attrs for class CroppedPage.
Method generated by attrs for class PageCroppingStepOutput.
77class PageCroppingStep( 78 PipelineStep[ 79 PageCroppingStepConfig, 80 PageCroppingStepInput, 81 PageCroppingStepOutput, 82 ] 83): # yapf: disable 84 85 def __init__(self, config: PageCroppingStepConfig): 86 super().__init__(config) 87 88 def sample_cropped_page( 89 self, 90 page_image: Image, 91 page_active_mask: Mask, 92 page_char_mask: Mask, 93 page_seal_impression_char_mask: Mask, 94 page_char_height_score_map: ScoreMap, 95 page_text_line_mask: Mask, 96 page_text_line_height_score_map: ScoreMap, 97 rng: RandomGenerator, 98 force_crop_center: bool = False, 99 ): 100 if not force_crop_center: 101 cropper = Cropper.create_from_random_proposal( 102 shape=page_image.shape, 103 core_size=self.config.core_size, 104 pad_size=self.config.pad_size, 105 pad_value=self.config.pad_value, 106 rng=rng, 107 ) 108 else: 109 cropper = Cropper.create_from_center_point( 110 shape=page_image.shape, 111 core_size=self.config.core_size, 112 pad_size=self.config.pad_size, 113 pad_value=self.config.pad_value, 114 center_point=Box.from_shapable(page_image).get_center_point(), 115 ) 116 117 page_image = cropper.crop_image(page_image) 118 119 page_active_mask = cropper.crop_mask(page_active_mask) 120 121 page_char_mask = cropper.crop_mask( 122 page_char_mask, 123 core_only=True, 124 ) 125 page_seal_impression_char_mask = cropper.crop_mask( 126 page_seal_impression_char_mask, 127 core_only=True, 128 ) 129 page_char_height_score_map = cropper.crop_score_map( 130 page_char_height_score_map, 131 core_only=True, 132 ) 133 134 page_text_line_mask = cropper.crop_mask( 135 page_text_line_mask, 136 core_only=True, 137 ) 138 page_text_line_height_score_map = cropper.crop_score_map( 139 page_text_line_height_score_map, 140 core_only=True, 141 ) 142 143 if self.config.drop_cropped_page_with_small_text_ratio: 144 num_text_pixels = (page_char_mask.mat > 0).sum() 145 text_ratio = num_text_pixels / cropper.target_core_box.area 146 if text_ratio < self.config.text_ratio_min: 147 return None 148 149 if self.config.drop_cropped_page_with_small_active_region: 150 num_active_pixels = int(page_active_mask.np_mask.sum()) 151 active_region_ratio = num_active_pixels / page_image.area 152 if active_region_ratio < self.config.active_region_ratio_min: 153 return None 154 155 downsampled_label: Optional[DownsampledLabel] = None 156 if self.config.enable_downsample_labeling: 157 downsample_labeling_factor = self.config.downsample_labeling_factor 158 159 assert cropper.crop_size % downsample_labeling_factor == 0 160 downsampled_size = cropper.crop_size // downsample_labeling_factor 161 downsampled_shape = (downsampled_size, downsampled_size) 162 163 assert self.config.pad_size % downsample_labeling_factor == 0 164 assert self.config.core_size % downsample_labeling_factor == 0 165 assert cropper.target_core_box.height \ 166 == cropper.target_core_box.width \ 167 == self.config.core_size 168 169 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 170 downsampled_core_size = self.config.core_size // downsample_labeling_factor 171 172 downsampled_target_core_begin = downsampled_pad_size 173 downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1 174 downsampled_target_core_box = Box( 175 up=downsampled_target_core_begin, 176 down=downsampled_target_core_end, 177 left=downsampled_target_core_begin, 178 right=downsampled_target_core_end, 179 ) 180 181 downsampled_page_char_mask = page_char_mask.to_box_detached() 182 downsampled_page_char_mask = \ 183 downsampled_page_char_mask.to_resized_mask( 184 resized_height=downsampled_core_size, 185 resized_width=downsampled_core_size, 186 cv_resize_interpolation=cv.INTER_AREA, 187 ) 188 189 downsampled_page_seal_impression_char_mask = \ 190 page_seal_impression_char_mask.to_box_detached() 191 downsampled_page_seal_impression_char_mask = \ 192 downsampled_page_seal_impression_char_mask.to_resized_mask( 193 resized_height=downsampled_core_size, 194 resized_width=downsampled_core_size, 195 cv_resize_interpolation=cv.INTER_AREA, 196 ) 197 198 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 199 downsampled_page_char_height_score_map = \ 200 downsampled_page_char_height_score_map.to_resized_score_map( 201 resized_height=downsampled_core_size, 202 resized_width=downsampled_core_size, 203 cv_resize_interpolation=cv.INTER_AREA, 204 ) 205 206 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 207 downsampled_page_text_line_mask = \ 208 downsampled_page_text_line_mask.to_resized_mask( 209 resized_height=downsampled_core_size, 210 resized_width=downsampled_core_size, 211 cv_resize_interpolation=cv.INTER_AREA, 212 ) 213 214 downsampled_page_text_line_height_score_map = \ 215 page_text_line_height_score_map.to_box_detached() 216 downsampled_page_text_line_height_score_map = \ 217 downsampled_page_text_line_height_score_map.to_resized_score_map( 218 resized_height=downsampled_core_size, 219 resized_width=downsampled_core_size, 220 cv_resize_interpolation=cv.INTER_AREA, 221 ) 222 223 downsampled_label = DownsampledLabel( 224 shape=downsampled_shape, 225 page_char_mask=downsampled_page_char_mask, 226 page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask, 227 page_char_height_score_map=downsampled_page_char_height_score_map, 228 page_text_line_mask=downsampled_page_text_line_mask, 229 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 230 target_core_box=downsampled_target_core_box, 231 ) 232 233 return CroppedPage( 234 page_image=page_image, 235 page_char_mask=page_char_mask, 236 page_seal_impression_char_mask=page_seal_impression_char_mask, 237 page_char_height_score_map=page_char_height_score_map, 238 page_text_line_mask=page_text_line_mask, 239 page_text_line_height_score_map=page_text_line_height_score_map, 240 target_core_box=cropper.target_core_box, 241 downsampled_label=downsampled_label, 242 ) 243 244 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 245 page_resizing_step_output = input.page_resizing_step_output 246 page_image = page_resizing_step_output.page_image 247 page_active_mask = page_resizing_step_output.page_active_mask 248 page_char_mask = page_resizing_step_output.page_char_mask 249 page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask 250 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 251 page_text_line_mask = page_resizing_step_output.page_text_line_mask 252 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 253 254 num_samples = self.config.num_samples 255 256 if num_samples is None: 257 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 258 core_area = self.config.core_size**2 259 num_samples = max( 260 1, 261 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 262 ) 263 264 if self.config.num_samples_max: 265 num_samples = min(num_samples, self.config.num_samples_max) 266 267 run_count_max = max(3, 2 * num_samples) 268 run_count = 0 269 270 cropped_pages: List[CroppedPage] = [] 271 272 while len(cropped_pages) < num_samples and run_count < run_count_max: 273 cropped_page = self.sample_cropped_page( 274 page_image=page_image, 275 page_active_mask=page_active_mask, 276 page_char_mask=page_char_mask, 277 page_seal_impression_char_mask=page_seal_impression_char_mask, 278 page_char_height_score_map=page_char_height_score_map, 279 page_text_line_mask=page_text_line_mask, 280 page_text_line_height_score_map=page_text_line_height_score_map, 281 rng=rng, 282 force_crop_center=(run_count == 0), 283 ) 284 if cropped_page: 285 cropped_pages.append(cropped_page) 286 run_count += 1 287 288 return PageCroppingStepOutput(cropped_pages=cropped_pages)
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
88 def sample_cropped_page( 89 self, 90 page_image: Image, 91 page_active_mask: Mask, 92 page_char_mask: Mask, 93 page_seal_impression_char_mask: Mask, 94 page_char_height_score_map: ScoreMap, 95 page_text_line_mask: Mask, 96 page_text_line_height_score_map: ScoreMap, 97 rng: RandomGenerator, 98 force_crop_center: bool = False, 99 ): 100 if not force_crop_center: 101 cropper = Cropper.create_from_random_proposal( 102 shape=page_image.shape, 103 core_size=self.config.core_size, 104 pad_size=self.config.pad_size, 105 pad_value=self.config.pad_value, 106 rng=rng, 107 ) 108 else: 109 cropper = Cropper.create_from_center_point( 110 shape=page_image.shape, 111 core_size=self.config.core_size, 112 pad_size=self.config.pad_size, 113 pad_value=self.config.pad_value, 114 center_point=Box.from_shapable(page_image).get_center_point(), 115 ) 116 117 page_image = cropper.crop_image(page_image) 118 119 page_active_mask = cropper.crop_mask(page_active_mask) 120 121 page_char_mask = cropper.crop_mask( 122 page_char_mask, 123 core_only=True, 124 ) 125 page_seal_impression_char_mask = cropper.crop_mask( 126 page_seal_impression_char_mask, 127 core_only=True, 128 ) 129 page_char_height_score_map = cropper.crop_score_map( 130 page_char_height_score_map, 131 core_only=True, 132 ) 133 134 page_text_line_mask = cropper.crop_mask( 135 page_text_line_mask, 136 core_only=True, 137 ) 138 page_text_line_height_score_map = cropper.crop_score_map( 139 page_text_line_height_score_map, 140 core_only=True, 141 ) 142 143 if self.config.drop_cropped_page_with_small_text_ratio: 144 num_text_pixels = (page_char_mask.mat > 0).sum() 145 text_ratio = num_text_pixels / cropper.target_core_box.area 146 if text_ratio < self.config.text_ratio_min: 147 return None 148 149 if self.config.drop_cropped_page_with_small_active_region: 150 num_active_pixels = int(page_active_mask.np_mask.sum()) 151 active_region_ratio = num_active_pixels / page_image.area 152 if active_region_ratio < self.config.active_region_ratio_min: 153 return None 154 155 downsampled_label: Optional[DownsampledLabel] = None 156 if self.config.enable_downsample_labeling: 157 downsample_labeling_factor = self.config.downsample_labeling_factor 158 159 assert cropper.crop_size % downsample_labeling_factor == 0 160 downsampled_size = cropper.crop_size // downsample_labeling_factor 161 downsampled_shape = (downsampled_size, downsampled_size) 162 163 assert self.config.pad_size % downsample_labeling_factor == 0 164 assert self.config.core_size % downsample_labeling_factor == 0 165 assert cropper.target_core_box.height \ 166 == cropper.target_core_box.width \ 167 == self.config.core_size 168 169 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 170 downsampled_core_size = self.config.core_size // downsample_labeling_factor 171 172 downsampled_target_core_begin = downsampled_pad_size 173 downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1 174 downsampled_target_core_box = Box( 175 up=downsampled_target_core_begin, 176 down=downsampled_target_core_end, 177 left=downsampled_target_core_begin, 178 right=downsampled_target_core_end, 179 ) 180 181 downsampled_page_char_mask = page_char_mask.to_box_detached() 182 downsampled_page_char_mask = \ 183 downsampled_page_char_mask.to_resized_mask( 184 resized_height=downsampled_core_size, 185 resized_width=downsampled_core_size, 186 cv_resize_interpolation=cv.INTER_AREA, 187 ) 188 189 downsampled_page_seal_impression_char_mask = \ 190 page_seal_impression_char_mask.to_box_detached() 191 downsampled_page_seal_impression_char_mask = \ 192 downsampled_page_seal_impression_char_mask.to_resized_mask( 193 resized_height=downsampled_core_size, 194 resized_width=downsampled_core_size, 195 cv_resize_interpolation=cv.INTER_AREA, 196 ) 197 198 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 199 downsampled_page_char_height_score_map = \ 200 downsampled_page_char_height_score_map.to_resized_score_map( 201 resized_height=downsampled_core_size, 202 resized_width=downsampled_core_size, 203 cv_resize_interpolation=cv.INTER_AREA, 204 ) 205 206 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 207 downsampled_page_text_line_mask = \ 208 downsampled_page_text_line_mask.to_resized_mask( 209 resized_height=downsampled_core_size, 210 resized_width=downsampled_core_size, 211 cv_resize_interpolation=cv.INTER_AREA, 212 ) 213 214 downsampled_page_text_line_height_score_map = \ 215 page_text_line_height_score_map.to_box_detached() 216 downsampled_page_text_line_height_score_map = \ 217 downsampled_page_text_line_height_score_map.to_resized_score_map( 218 resized_height=downsampled_core_size, 219 resized_width=downsampled_core_size, 220 cv_resize_interpolation=cv.INTER_AREA, 221 ) 222 223 downsampled_label = DownsampledLabel( 224 shape=downsampled_shape, 225 page_char_mask=downsampled_page_char_mask, 226 page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask, 227 page_char_height_score_map=downsampled_page_char_height_score_map, 228 page_text_line_mask=downsampled_page_text_line_mask, 229 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 230 target_core_box=downsampled_target_core_box, 231 ) 232 233 return CroppedPage( 234 page_image=page_image, 235 page_char_mask=page_char_mask, 236 page_seal_impression_char_mask=page_seal_impression_char_mask, 237 page_char_height_score_map=page_char_height_score_map, 238 page_text_line_mask=page_text_line_mask, 239 page_text_line_height_score_map=page_text_line_height_score_map, 240 target_core_box=cropper.target_core_box, 241 downsampled_label=downsampled_label, 242 )
244 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 245 page_resizing_step_output = input.page_resizing_step_output 246 page_image = page_resizing_step_output.page_image 247 page_active_mask = page_resizing_step_output.page_active_mask 248 page_char_mask = page_resizing_step_output.page_char_mask 249 page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask 250 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 251 page_text_line_mask = page_resizing_step_output.page_text_line_mask 252 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 253 254 num_samples = self.config.num_samples 255 256 if num_samples is None: 257 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 258 core_area = self.config.core_size**2 259 num_samples = max( 260 1, 261 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 262 ) 263 264 if self.config.num_samples_max: 265 num_samples = min(num_samples, self.config.num_samples_max) 266 267 run_count_max = max(3, 2 * num_samples) 268 run_count = 0 269 270 cropped_pages: List[CroppedPage] = [] 271 272 while len(cropped_pages) < num_samples and run_count < run_count_max: 273 cropped_page = self.sample_cropped_page( 274 page_image=page_image, 275 page_active_mask=page_active_mask, 276 page_char_mask=page_char_mask, 277 page_seal_impression_char_mask=page_seal_impression_char_mask, 278 page_char_height_score_map=page_char_height_score_map, 279 page_text_line_mask=page_text_line_mask, 280 page_text_line_height_score_map=page_text_line_height_score_map, 281 rng=rng, 282 force_crop_center=(run_count == 0), 283 ) 284 if cropped_page: 285 cropped_pages.append(cropped_page) 286 run_count += 1 287 288 return PageCroppingStepOutput(cropped_pages=cropped_pages)