vkit.pipeline.text_detection.page_cropping
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, List, Optional, Tuple 15 16import attrs 17from numpy.random import Generator as RandomGenerator 18import numpy as np 19import cv2 as cv 20 21from vkit.element import Box, Mask, ScoreMap, Image 22from vkit.mechanism.cropper import Cropper 23from .page_resizing import PageResizingStepOutput 24from ..interface import PipelineStep, PipelineStepFactory 25 26 27@attrs.define 28class PageCroppingStepConfig: 29 core_size: int 30 pad_size: int 31 num_samples: Optional[int] = None 32 num_samples_max: Optional[int] = None 33 num_samples_estimation_factor: float = 1.5 34 pad_value: int = 0 35 drop_cropped_page_with_small_text_ratio: bool = True 36 text_ratio_min: float = 0.025 37 drop_cropped_page_with_small_active_region: bool = True 38 active_region_ratio_min: float = 0.4 39 enable_downsample_labeling: bool = True 40 downsample_labeling_factor: int = 2 41 42 43@attrs.define 44class PageCroppingStepInput: 45 page_resizing_step_output: PageResizingStepOutput 46 47 48@attrs.define 49class DownsampledLabel: 50 shape: Tuple[int, int] 51 page_char_mask: Mask 52 page_char_height_score_map: ScoreMap 53 page_text_line_mask: Mask 54 page_text_line_height_score_map: ScoreMap 55 core_box: Box 56 57 58@attrs.define 59class CroppedPage: 60 page_image: Image 61 page_char_mask: Mask 62 page_char_height_score_map: ScoreMap 63 page_text_line_mask: Mask 64 page_text_line_height_score_map: ScoreMap 65 core_box: Box 66 downsampled_label: Optional[DownsampledLabel] 67 68 69@attrs.define 70class PageCroppingStepOutput: 71 cropped_pages: Sequence[CroppedPage] 72 73 74class PageCroppingStep( 75 PipelineStep[ 76 PageCroppingStepConfig, 77 PageCroppingStepInput, 78 PageCroppingStepOutput, 79 ] 80): # yapf: disable 81 82 def __init__(self, config: PageCroppingStepConfig): 83 super().__init__(config) 84 85 def sample_cropped_page( 86 self, 87 page_image: Image, 88 page_active_mask: Mask, 89 page_char_mask: Mask, 90 page_char_height_score_map: ScoreMap, 91 page_text_line_mask: Mask, 92 page_text_line_height_score_map: ScoreMap, 93 rng: RandomGenerator, 94 force_crop_center: bool = False, 95 ): 96 if not force_crop_center: 97 cropper = Cropper.create( 98 shape=page_image.shape, 99 core_size=self.config.core_size, 100 pad_size=self.config.pad_size, 101 pad_value=self.config.pad_value, 102 rng=rng, 103 ) 104 else: 105 cropper = Cropper.create_from_center_point( 106 shape=page_image.shape, 107 core_size=self.config.core_size, 108 pad_size=self.config.pad_size, 109 pad_value=self.config.pad_value, 110 center_point=Box.from_shapable(page_image).get_center_point(), 111 ) 112 113 page_image = cropper.crop_image(page_image) 114 115 page_active_mask = cropper.crop_mask(page_active_mask) 116 117 page_char_mask = cropper.crop_mask( 118 page_char_mask, 119 core_only=True, 120 ) 121 page_char_height_score_map = cropper.crop_score_map( 122 page_char_height_score_map, 123 core_only=True, 124 ) 125 126 page_text_line_mask = cropper.crop_mask( 127 page_text_line_mask, 128 core_only=True, 129 ) 130 page_text_line_height_score_map = cropper.crop_score_map( 131 page_text_line_height_score_map, 132 core_only=True, 133 ) 134 135 if self.config.drop_cropped_page_with_small_text_ratio: 136 num_text_pixels = (page_char_mask.mat > 0).sum() 137 text_ratio = num_text_pixels / cropper.core_box.area 138 if text_ratio < self.config.text_ratio_min: 139 return None 140 141 if self.config.drop_cropped_page_with_small_active_region: 142 num_active_pixels = int(page_active_mask.np_mask.sum()) 143 active_region_ratio = num_active_pixels / page_image.area 144 if active_region_ratio < self.config.active_region_ratio_min: 145 return None 146 147 downsampled_label: Optional[DownsampledLabel] = None 148 if self.config.enable_downsample_labeling: 149 downsample_labeling_factor = self.config.downsample_labeling_factor 150 151 assert cropper.crop_size % downsample_labeling_factor == 0 152 downsampled_size = cropper.crop_size // downsample_labeling_factor 153 downsampled_shape = (downsampled_size, downsampled_size) 154 155 assert self.config.pad_size % downsample_labeling_factor == 0 156 assert self.config.core_size % downsample_labeling_factor == 0 157 assert cropper.core_box.height == cropper.core_box.width == self.config.core_size 158 159 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 160 downsampled_core_size = self.config.core_size // downsample_labeling_factor 161 162 downsampled_core_begin = downsampled_pad_size 163 downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1 164 downsampled_core_box = Box( 165 up=downsampled_core_begin, 166 down=downsampled_core_end, 167 left=downsampled_core_begin, 168 right=downsampled_core_end, 169 ) 170 171 downsampled_page_char_mask = page_char_mask.to_box_detached() 172 downsampled_page_char_mask = \ 173 downsampled_page_char_mask.to_resized_mask( 174 resized_height=downsampled_core_size, 175 resized_width=downsampled_core_size, 176 cv_resize_interpolation=cv.INTER_AREA, 177 ) 178 179 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 180 downsampled_page_char_height_score_map = \ 181 downsampled_page_char_height_score_map.to_resized_score_map( 182 resized_height=downsampled_core_size, 183 resized_width=downsampled_core_size, 184 cv_resize_interpolation=cv.INTER_AREA, 185 ) 186 187 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 188 downsampled_page_text_line_mask = \ 189 downsampled_page_text_line_mask.to_resized_mask( 190 resized_height=downsampled_core_size, 191 resized_width=downsampled_core_size, 192 cv_resize_interpolation=cv.INTER_AREA, 193 ) 194 195 downsampled_page_text_line_height_score_map = \ 196 page_text_line_height_score_map.to_box_detached() 197 downsampled_page_text_line_height_score_map = \ 198 downsampled_page_text_line_height_score_map.to_resized_score_map( 199 resized_height=downsampled_core_size, 200 resized_width=downsampled_core_size, 201 cv_resize_interpolation=cv.INTER_AREA, 202 ) 203 204 downsampled_label = DownsampledLabel( 205 shape=downsampled_shape, 206 page_char_mask=downsampled_page_char_mask, 207 page_char_height_score_map=downsampled_page_char_height_score_map, 208 page_text_line_mask=downsampled_page_text_line_mask, 209 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 210 core_box=downsampled_core_box, 211 ) 212 213 return CroppedPage( 214 page_image=page_image, 215 page_char_mask=page_char_mask, 216 page_char_height_score_map=page_char_height_score_map, 217 page_text_line_mask=page_text_line_mask, 218 page_text_line_height_score_map=page_text_line_height_score_map, 219 core_box=cropper.core_box, 220 downsampled_label=downsampled_label, 221 ) 222 223 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 224 page_resizing_step_output = input.page_resizing_step_output 225 page_image = page_resizing_step_output.page_image 226 page_active_mask = page_resizing_step_output.page_active_mask 227 page_char_mask = page_resizing_step_output.page_char_mask 228 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 229 page_text_line_mask = page_resizing_step_output.page_text_line_mask 230 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 231 232 num_samples = self.config.num_samples 233 234 if num_samples is None: 235 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 236 core_area = self.config.core_size**2 237 num_samples = max( 238 1, 239 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 240 ) 241 242 if self.config.num_samples_max: 243 num_samples = min(num_samples, self.config.num_samples_max) 244 245 run_count_max = max(3, 2 * num_samples) 246 run_count = 0 247 248 cropped_pages: List[CroppedPage] = [] 249 250 while len(cropped_pages) < num_samples and run_count < run_count_max: 251 cropped_page = self.sample_cropped_page( 252 page_image=page_image, 253 page_active_mask=page_active_mask, 254 page_char_mask=page_char_mask, 255 page_char_height_score_map=page_char_height_score_map, 256 page_text_line_mask=page_text_line_mask, 257 page_text_line_height_score_map=page_text_line_height_score_map, 258 rng=rng, 259 force_crop_center=(run_count == 0), 260 ) 261 if cropped_page: 262 cropped_pages.append(cropped_page) 263 run_count += 1 264 265 return PageCroppingStepOutput(cropped_pages=cropped_pages) 266 267 268page_cropping_step_factory = PipelineStepFactory(PageCroppingStep)
29class PageCroppingStepConfig: 30 core_size: int 31 pad_size: int 32 num_samples: Optional[int] = None 33 num_samples_max: Optional[int] = None 34 num_samples_estimation_factor: float = 1.5 35 pad_value: int = 0 36 drop_cropped_page_with_small_text_ratio: bool = True 37 text_ratio_min: float = 0.025 38 drop_cropped_page_with_small_active_region: bool = True 39 active_region_ratio_min: float = 0.4 40 enable_downsample_labeling: bool = True 41 downsample_labeling_factor: int = 2
2def __init__(self, core_size, pad_size, num_samples=attr_dict['num_samples'].default, num_samples_max=attr_dict['num_samples_max'].default, num_samples_estimation_factor=attr_dict['num_samples_estimation_factor'].default, pad_value=attr_dict['pad_value'].default, drop_cropped_page_with_small_text_ratio=attr_dict['drop_cropped_page_with_small_text_ratio'].default, text_ratio_min=attr_dict['text_ratio_min'].default, drop_cropped_page_with_small_active_region=attr_dict['drop_cropped_page_with_small_active_region'].default, active_region_ratio_min=attr_dict['active_region_ratio_min'].default, enable_downsample_labeling=attr_dict['enable_downsample_labeling'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default): 3 self.core_size = core_size 4 self.pad_size = pad_size 5 self.num_samples = num_samples 6 self.num_samples_max = num_samples_max 7 self.num_samples_estimation_factor = num_samples_estimation_factor 8 self.pad_value = pad_value 9 self.drop_cropped_page_with_small_text_ratio = drop_cropped_page_with_small_text_ratio 10 self.text_ratio_min = text_ratio_min 11 self.drop_cropped_page_with_small_active_region = drop_cropped_page_with_small_active_region 12 self.active_region_ratio_min = active_region_ratio_min 13 self.enable_downsample_labeling = enable_downsample_labeling 14 self.downsample_labeling_factor = downsample_labeling_factor
Method generated by attrs for class PageCroppingStepConfig.
2def __init__(self, page_resizing_step_output): 3 self.page_resizing_step_output = page_resizing_step_output
Method generated by attrs for class PageCroppingStepInput.
50class DownsampledLabel: 51 shape: Tuple[int, int] 52 page_char_mask: Mask 53 page_char_height_score_map: ScoreMap 54 page_text_line_mask: Mask 55 page_text_line_height_score_map: ScoreMap 56 core_box: Box
2def __init__(self, shape, page_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, core_box): 3 self.shape = shape 4 self.page_char_mask = page_char_mask 5 self.page_char_height_score_map = page_char_height_score_map 6 self.page_text_line_mask = page_text_line_mask 7 self.page_text_line_height_score_map = page_text_line_height_score_map 8 self.core_box = core_box
Method generated by attrs for class DownsampledLabel.
60class CroppedPage: 61 page_image: Image 62 page_char_mask: Mask 63 page_char_height_score_map: ScoreMap 64 page_text_line_mask: Mask 65 page_text_line_height_score_map: ScoreMap 66 core_box: Box 67 downsampled_label: Optional[DownsampledLabel]
2def __init__(self, page_image, page_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, core_box, downsampled_label): 3 self.page_image = page_image 4 self.page_char_mask = page_char_mask 5 self.page_char_height_score_map = page_char_height_score_map 6 self.page_text_line_mask = page_text_line_mask 7 self.page_text_line_height_score_map = page_text_line_height_score_map 8 self.core_box = core_box 9 self.downsampled_label = downsampled_label
Method generated by attrs for class CroppedPage.
Method generated by attrs for class PageCroppingStepOutput.
75class PageCroppingStep( 76 PipelineStep[ 77 PageCroppingStepConfig, 78 PageCroppingStepInput, 79 PageCroppingStepOutput, 80 ] 81): # yapf: disable 82 83 def __init__(self, config: PageCroppingStepConfig): 84 super().__init__(config) 85 86 def sample_cropped_page( 87 self, 88 page_image: Image, 89 page_active_mask: Mask, 90 page_char_mask: Mask, 91 page_char_height_score_map: ScoreMap, 92 page_text_line_mask: Mask, 93 page_text_line_height_score_map: ScoreMap, 94 rng: RandomGenerator, 95 force_crop_center: bool = False, 96 ): 97 if not force_crop_center: 98 cropper = Cropper.create( 99 shape=page_image.shape, 100 core_size=self.config.core_size, 101 pad_size=self.config.pad_size, 102 pad_value=self.config.pad_value, 103 rng=rng, 104 ) 105 else: 106 cropper = Cropper.create_from_center_point( 107 shape=page_image.shape, 108 core_size=self.config.core_size, 109 pad_size=self.config.pad_size, 110 pad_value=self.config.pad_value, 111 center_point=Box.from_shapable(page_image).get_center_point(), 112 ) 113 114 page_image = cropper.crop_image(page_image) 115 116 page_active_mask = cropper.crop_mask(page_active_mask) 117 118 page_char_mask = cropper.crop_mask( 119 page_char_mask, 120 core_only=True, 121 ) 122 page_char_height_score_map = cropper.crop_score_map( 123 page_char_height_score_map, 124 core_only=True, 125 ) 126 127 page_text_line_mask = cropper.crop_mask( 128 page_text_line_mask, 129 core_only=True, 130 ) 131 page_text_line_height_score_map = cropper.crop_score_map( 132 page_text_line_height_score_map, 133 core_only=True, 134 ) 135 136 if self.config.drop_cropped_page_with_small_text_ratio: 137 num_text_pixels = (page_char_mask.mat > 0).sum() 138 text_ratio = num_text_pixels / cropper.core_box.area 139 if text_ratio < self.config.text_ratio_min: 140 return None 141 142 if self.config.drop_cropped_page_with_small_active_region: 143 num_active_pixels = int(page_active_mask.np_mask.sum()) 144 active_region_ratio = num_active_pixels / page_image.area 145 if active_region_ratio < self.config.active_region_ratio_min: 146 return None 147 148 downsampled_label: Optional[DownsampledLabel] = None 149 if self.config.enable_downsample_labeling: 150 downsample_labeling_factor = self.config.downsample_labeling_factor 151 152 assert cropper.crop_size % downsample_labeling_factor == 0 153 downsampled_size = cropper.crop_size // downsample_labeling_factor 154 downsampled_shape = (downsampled_size, downsampled_size) 155 156 assert self.config.pad_size % downsample_labeling_factor == 0 157 assert self.config.core_size % downsample_labeling_factor == 0 158 assert cropper.core_box.height == cropper.core_box.width == self.config.core_size 159 160 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 161 downsampled_core_size = self.config.core_size // downsample_labeling_factor 162 163 downsampled_core_begin = downsampled_pad_size 164 downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1 165 downsampled_core_box = Box( 166 up=downsampled_core_begin, 167 down=downsampled_core_end, 168 left=downsampled_core_begin, 169 right=downsampled_core_end, 170 ) 171 172 downsampled_page_char_mask = page_char_mask.to_box_detached() 173 downsampled_page_char_mask = \ 174 downsampled_page_char_mask.to_resized_mask( 175 resized_height=downsampled_core_size, 176 resized_width=downsampled_core_size, 177 cv_resize_interpolation=cv.INTER_AREA, 178 ) 179 180 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 181 downsampled_page_char_height_score_map = \ 182 downsampled_page_char_height_score_map.to_resized_score_map( 183 resized_height=downsampled_core_size, 184 resized_width=downsampled_core_size, 185 cv_resize_interpolation=cv.INTER_AREA, 186 ) 187 188 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 189 downsampled_page_text_line_mask = \ 190 downsampled_page_text_line_mask.to_resized_mask( 191 resized_height=downsampled_core_size, 192 resized_width=downsampled_core_size, 193 cv_resize_interpolation=cv.INTER_AREA, 194 ) 195 196 downsampled_page_text_line_height_score_map = \ 197 page_text_line_height_score_map.to_box_detached() 198 downsampled_page_text_line_height_score_map = \ 199 downsampled_page_text_line_height_score_map.to_resized_score_map( 200 resized_height=downsampled_core_size, 201 resized_width=downsampled_core_size, 202 cv_resize_interpolation=cv.INTER_AREA, 203 ) 204 205 downsampled_label = DownsampledLabel( 206 shape=downsampled_shape, 207 page_char_mask=downsampled_page_char_mask, 208 page_char_height_score_map=downsampled_page_char_height_score_map, 209 page_text_line_mask=downsampled_page_text_line_mask, 210 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 211 core_box=downsampled_core_box, 212 ) 213 214 return CroppedPage( 215 page_image=page_image, 216 page_char_mask=page_char_mask, 217 page_char_height_score_map=page_char_height_score_map, 218 page_text_line_mask=page_text_line_mask, 219 page_text_line_height_score_map=page_text_line_height_score_map, 220 core_box=cropper.core_box, 221 downsampled_label=downsampled_label, 222 ) 223 224 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 225 page_resizing_step_output = input.page_resizing_step_output 226 page_image = page_resizing_step_output.page_image 227 page_active_mask = page_resizing_step_output.page_active_mask 228 page_char_mask = page_resizing_step_output.page_char_mask 229 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 230 page_text_line_mask = page_resizing_step_output.page_text_line_mask 231 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 232 233 num_samples = self.config.num_samples 234 235 if num_samples is None: 236 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 237 core_area = self.config.core_size**2 238 num_samples = max( 239 1, 240 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 241 ) 242 243 if self.config.num_samples_max: 244 num_samples = min(num_samples, self.config.num_samples_max) 245 246 run_count_max = max(3, 2 * num_samples) 247 run_count = 0 248 249 cropped_pages: List[CroppedPage] = [] 250 251 while len(cropped_pages) < num_samples and run_count < run_count_max: 252 cropped_page = self.sample_cropped_page( 253 page_image=page_image, 254 page_active_mask=page_active_mask, 255 page_char_mask=page_char_mask, 256 page_char_height_score_map=page_char_height_score_map, 257 page_text_line_mask=page_text_line_mask, 258 page_text_line_height_score_map=page_text_line_height_score_map, 259 rng=rng, 260 force_crop_center=(run_count == 0), 261 ) 262 if cropped_page: 263 cropped_pages.append(cropped_page) 264 run_count += 1 265 266 return PageCroppingStepOutput(cropped_pages=cropped_pages)
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
86 def sample_cropped_page( 87 self, 88 page_image: Image, 89 page_active_mask: Mask, 90 page_char_mask: Mask, 91 page_char_height_score_map: ScoreMap, 92 page_text_line_mask: Mask, 93 page_text_line_height_score_map: ScoreMap, 94 rng: RandomGenerator, 95 force_crop_center: bool = False, 96 ): 97 if not force_crop_center: 98 cropper = Cropper.create( 99 shape=page_image.shape, 100 core_size=self.config.core_size, 101 pad_size=self.config.pad_size, 102 pad_value=self.config.pad_value, 103 rng=rng, 104 ) 105 else: 106 cropper = Cropper.create_from_center_point( 107 shape=page_image.shape, 108 core_size=self.config.core_size, 109 pad_size=self.config.pad_size, 110 pad_value=self.config.pad_value, 111 center_point=Box.from_shapable(page_image).get_center_point(), 112 ) 113 114 page_image = cropper.crop_image(page_image) 115 116 page_active_mask = cropper.crop_mask(page_active_mask) 117 118 page_char_mask = cropper.crop_mask( 119 page_char_mask, 120 core_only=True, 121 ) 122 page_char_height_score_map = cropper.crop_score_map( 123 page_char_height_score_map, 124 core_only=True, 125 ) 126 127 page_text_line_mask = cropper.crop_mask( 128 page_text_line_mask, 129 core_only=True, 130 ) 131 page_text_line_height_score_map = cropper.crop_score_map( 132 page_text_line_height_score_map, 133 core_only=True, 134 ) 135 136 if self.config.drop_cropped_page_with_small_text_ratio: 137 num_text_pixels = (page_char_mask.mat > 0).sum() 138 text_ratio = num_text_pixels / cropper.core_box.area 139 if text_ratio < self.config.text_ratio_min: 140 return None 141 142 if self.config.drop_cropped_page_with_small_active_region: 143 num_active_pixels = int(page_active_mask.np_mask.sum()) 144 active_region_ratio = num_active_pixels / page_image.area 145 if active_region_ratio < self.config.active_region_ratio_min: 146 return None 147 148 downsampled_label: Optional[DownsampledLabel] = None 149 if self.config.enable_downsample_labeling: 150 downsample_labeling_factor = self.config.downsample_labeling_factor 151 152 assert cropper.crop_size % downsample_labeling_factor == 0 153 downsampled_size = cropper.crop_size // downsample_labeling_factor 154 downsampled_shape = (downsampled_size, downsampled_size) 155 156 assert self.config.pad_size % downsample_labeling_factor == 0 157 assert self.config.core_size % downsample_labeling_factor == 0 158 assert cropper.core_box.height == cropper.core_box.width == self.config.core_size 159 160 downsampled_pad_size = self.config.pad_size // downsample_labeling_factor 161 downsampled_core_size = self.config.core_size // downsample_labeling_factor 162 163 downsampled_core_begin = downsampled_pad_size 164 downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1 165 downsampled_core_box = Box( 166 up=downsampled_core_begin, 167 down=downsampled_core_end, 168 left=downsampled_core_begin, 169 right=downsampled_core_end, 170 ) 171 172 downsampled_page_char_mask = page_char_mask.to_box_detached() 173 downsampled_page_char_mask = \ 174 downsampled_page_char_mask.to_resized_mask( 175 resized_height=downsampled_core_size, 176 resized_width=downsampled_core_size, 177 cv_resize_interpolation=cv.INTER_AREA, 178 ) 179 180 downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached() 181 downsampled_page_char_height_score_map = \ 182 downsampled_page_char_height_score_map.to_resized_score_map( 183 resized_height=downsampled_core_size, 184 resized_width=downsampled_core_size, 185 cv_resize_interpolation=cv.INTER_AREA, 186 ) 187 188 downsampled_page_text_line_mask = page_text_line_mask.to_box_detached() 189 downsampled_page_text_line_mask = \ 190 downsampled_page_text_line_mask.to_resized_mask( 191 resized_height=downsampled_core_size, 192 resized_width=downsampled_core_size, 193 cv_resize_interpolation=cv.INTER_AREA, 194 ) 195 196 downsampled_page_text_line_height_score_map = \ 197 page_text_line_height_score_map.to_box_detached() 198 downsampled_page_text_line_height_score_map = \ 199 downsampled_page_text_line_height_score_map.to_resized_score_map( 200 resized_height=downsampled_core_size, 201 resized_width=downsampled_core_size, 202 cv_resize_interpolation=cv.INTER_AREA, 203 ) 204 205 downsampled_label = DownsampledLabel( 206 shape=downsampled_shape, 207 page_char_mask=downsampled_page_char_mask, 208 page_char_height_score_map=downsampled_page_char_height_score_map, 209 page_text_line_mask=downsampled_page_text_line_mask, 210 page_text_line_height_score_map=downsampled_page_text_line_height_score_map, 211 core_box=downsampled_core_box, 212 ) 213 214 return CroppedPage( 215 page_image=page_image, 216 page_char_mask=page_char_mask, 217 page_char_height_score_map=page_char_height_score_map, 218 page_text_line_mask=page_text_line_mask, 219 page_text_line_height_score_map=page_text_line_height_score_map, 220 core_box=cropper.core_box, 221 downsampled_label=downsampled_label, 222 )
224 def run(self, input: PageCroppingStepInput, rng: RandomGenerator): 225 page_resizing_step_output = input.page_resizing_step_output 226 page_image = page_resizing_step_output.page_image 227 page_active_mask = page_resizing_step_output.page_active_mask 228 page_char_mask = page_resizing_step_output.page_char_mask 229 page_char_height_score_map = page_resizing_step_output.page_char_height_score_map 230 page_text_line_mask = page_resizing_step_output.page_text_line_mask 231 page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map 232 233 num_samples = self.config.num_samples 234 235 if num_samples is None: 236 page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum()) 237 core_area = self.config.core_size**2 238 num_samples = max( 239 1, 240 round(page_image_area / core_area * self.config.num_samples_estimation_factor), 241 ) 242 243 if self.config.num_samples_max: 244 num_samples = min(num_samples, self.config.num_samples_max) 245 246 run_count_max = max(3, 2 * num_samples) 247 run_count = 0 248 249 cropped_pages: List[CroppedPage] = [] 250 251 while len(cropped_pages) < num_samples and run_count < run_count_max: 252 cropped_page = self.sample_cropped_page( 253 page_image=page_image, 254 page_active_mask=page_active_mask, 255 page_char_mask=page_char_mask, 256 page_char_height_score_map=page_char_height_score_map, 257 page_text_line_mask=page_text_line_mask, 258 page_text_line_height_score_map=page_text_line_height_score_map, 259 rng=rng, 260 force_crop_center=(run_count == 0), 261 ) 262 if cropped_page: 263 cropped_pages.append(cropped_page) 264 run_count += 1 265 266 return PageCroppingStepOutput(cropped_pages=cropped_pages)