vkit.pipeline.text_detection.page_cropping

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, List, Optional, Tuple
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18import numpy as np
 19import cv2 as cv
 20
 21from vkit.element import Box, Mask, ScoreMap, Image
 22from vkit.mechanism.cropper import Cropper
 23from .page_resizing import PageResizingStepOutput
 24from ..interface import PipelineStep, PipelineStepFactory
 25
 26
 27@attrs.define
 28class PageCroppingStepConfig:
 29    core_size: int
 30    pad_size: int
 31    num_samples: Optional[int] = None
 32    num_samples_max: Optional[int] = None
 33    num_samples_estimation_factor: float = 1.5
 34    pad_value: int = 0
 35    drop_cropped_page_with_small_text_ratio: bool = True
 36    text_ratio_min: float = 0.025
 37    drop_cropped_page_with_small_active_region: bool = True
 38    active_region_ratio_min: float = 0.4
 39    enable_downsample_labeling: bool = True
 40    downsample_labeling_factor: int = 2
 41
 42
 43@attrs.define
 44class PageCroppingStepInput:
 45    page_resizing_step_output: PageResizingStepOutput
 46
 47
 48@attrs.define
 49class DownsampledLabel:
 50    shape: Tuple[int, int]
 51    page_char_mask: Mask
 52    page_char_height_score_map: ScoreMap
 53    page_text_line_mask: Mask
 54    page_text_line_height_score_map: ScoreMap
 55    core_box: Box
 56
 57
 58@attrs.define
 59class CroppedPage:
 60    page_image: Image
 61    page_char_mask: Mask
 62    page_char_height_score_map: ScoreMap
 63    page_text_line_mask: Mask
 64    page_text_line_height_score_map: ScoreMap
 65    core_box: Box
 66    downsampled_label: Optional[DownsampledLabel]
 67
 68
 69@attrs.define
 70class PageCroppingStepOutput:
 71    cropped_pages: Sequence[CroppedPage]
 72
 73
 74class PageCroppingStep(
 75    PipelineStep[
 76        PageCroppingStepConfig,
 77        PageCroppingStepInput,
 78        PageCroppingStepOutput,
 79    ]
 80):  # yapf: disable
 81
 82    def __init__(self, config: PageCroppingStepConfig):
 83        super().__init__(config)
 84
 85    def sample_cropped_page(
 86        self,
 87        page_image: Image,
 88        page_active_mask: Mask,
 89        page_char_mask: Mask,
 90        page_char_height_score_map: ScoreMap,
 91        page_text_line_mask: Mask,
 92        page_text_line_height_score_map: ScoreMap,
 93        rng: RandomGenerator,
 94        force_crop_center: bool = False,
 95    ):
 96        if not force_crop_center:
 97            cropper = Cropper.create(
 98                shape=page_image.shape,
 99                core_size=self.config.core_size,
100                pad_size=self.config.pad_size,
101                pad_value=self.config.pad_value,
102                rng=rng,
103            )
104        else:
105            cropper = Cropper.create_from_center_point(
106                shape=page_image.shape,
107                core_size=self.config.core_size,
108                pad_size=self.config.pad_size,
109                pad_value=self.config.pad_value,
110                center_point=Box.from_shapable(page_image).get_center_point(),
111            )
112
113        page_image = cropper.crop_image(page_image)
114
115        page_active_mask = cropper.crop_mask(page_active_mask)
116
117        page_char_mask = cropper.crop_mask(
118            page_char_mask,
119            core_only=True,
120        )
121        page_char_height_score_map = cropper.crop_score_map(
122            page_char_height_score_map,
123            core_only=True,
124        )
125
126        page_text_line_mask = cropper.crop_mask(
127            page_text_line_mask,
128            core_only=True,
129        )
130        page_text_line_height_score_map = cropper.crop_score_map(
131            page_text_line_height_score_map,
132            core_only=True,
133        )
134
135        if self.config.drop_cropped_page_with_small_text_ratio:
136            num_text_pixels = (page_char_mask.mat > 0).sum()
137            text_ratio = num_text_pixels / cropper.core_box.area
138            if text_ratio < self.config.text_ratio_min:
139                return None
140
141        if self.config.drop_cropped_page_with_small_active_region:
142            num_active_pixels = int(page_active_mask.np_mask.sum())
143            active_region_ratio = num_active_pixels / page_image.area
144            if active_region_ratio < self.config.active_region_ratio_min:
145                return None
146
147        downsampled_label: Optional[DownsampledLabel] = None
148        if self.config.enable_downsample_labeling:
149            downsample_labeling_factor = self.config.downsample_labeling_factor
150
151            assert cropper.crop_size % downsample_labeling_factor == 0
152            downsampled_size = cropper.crop_size // downsample_labeling_factor
153            downsampled_shape = (downsampled_size, downsampled_size)
154
155            assert self.config.pad_size % downsample_labeling_factor == 0
156            assert self.config.core_size % downsample_labeling_factor == 0
157            assert cropper.core_box.height == cropper.core_box.width == self.config.core_size
158
159            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
160            downsampled_core_size = self.config.core_size // downsample_labeling_factor
161
162            downsampled_core_begin = downsampled_pad_size
163            downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1
164            downsampled_core_box = Box(
165                up=downsampled_core_begin,
166                down=downsampled_core_end,
167                left=downsampled_core_begin,
168                right=downsampled_core_end,
169            )
170
171            downsampled_page_char_mask = page_char_mask.to_box_detached()
172            downsampled_page_char_mask = \
173                downsampled_page_char_mask.to_resized_mask(
174                    resized_height=downsampled_core_size,
175                    resized_width=downsampled_core_size,
176                    cv_resize_interpolation=cv.INTER_AREA,
177                )
178
179            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
180            downsampled_page_char_height_score_map = \
181                downsampled_page_char_height_score_map.to_resized_score_map(
182                    resized_height=downsampled_core_size,
183                    resized_width=downsampled_core_size,
184                    cv_resize_interpolation=cv.INTER_AREA,
185                )
186
187            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
188            downsampled_page_text_line_mask = \
189                downsampled_page_text_line_mask.to_resized_mask(
190                    resized_height=downsampled_core_size,
191                    resized_width=downsampled_core_size,
192                    cv_resize_interpolation=cv.INTER_AREA,
193                )
194
195            downsampled_page_text_line_height_score_map = \
196                page_text_line_height_score_map.to_box_detached()
197            downsampled_page_text_line_height_score_map = \
198                downsampled_page_text_line_height_score_map.to_resized_score_map(
199                    resized_height=downsampled_core_size,
200                    resized_width=downsampled_core_size,
201                    cv_resize_interpolation=cv.INTER_AREA,
202                )
203
204            downsampled_label = DownsampledLabel(
205                shape=downsampled_shape,
206                page_char_mask=downsampled_page_char_mask,
207                page_char_height_score_map=downsampled_page_char_height_score_map,
208                page_text_line_mask=downsampled_page_text_line_mask,
209                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
210                core_box=downsampled_core_box,
211            )
212
213        return CroppedPage(
214            page_image=page_image,
215            page_char_mask=page_char_mask,
216            page_char_height_score_map=page_char_height_score_map,
217            page_text_line_mask=page_text_line_mask,
218            page_text_line_height_score_map=page_text_line_height_score_map,
219            core_box=cropper.core_box,
220            downsampled_label=downsampled_label,
221        )
222
223    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
224        page_resizing_step_output = input.page_resizing_step_output
225        page_image = page_resizing_step_output.page_image
226        page_active_mask = page_resizing_step_output.page_active_mask
227        page_char_mask = page_resizing_step_output.page_char_mask
228        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
229        page_text_line_mask = page_resizing_step_output.page_text_line_mask
230        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
231
232        num_samples = self.config.num_samples
233
234        if num_samples is None:
235            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
236            core_area = self.config.core_size**2
237            num_samples = max(
238                1,
239                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
240            )
241
242        if self.config.num_samples_max:
243            num_samples = min(num_samples, self.config.num_samples_max)
244
245        run_count_max = max(3, 2 * num_samples)
246        run_count = 0
247
248        cropped_pages: List[CroppedPage] = []
249
250        while len(cropped_pages) < num_samples and run_count < run_count_max:
251            cropped_page = self.sample_cropped_page(
252                page_image=page_image,
253                page_active_mask=page_active_mask,
254                page_char_mask=page_char_mask,
255                page_char_height_score_map=page_char_height_score_map,
256                page_text_line_mask=page_text_line_mask,
257                page_text_line_height_score_map=page_text_line_height_score_map,
258                rng=rng,
259                force_crop_center=(run_count == 0),
260            )
261            if cropped_page:
262                cropped_pages.append(cropped_page)
263            run_count += 1
264
265        return PageCroppingStepOutput(cropped_pages=cropped_pages)
266
267
268page_cropping_step_factory = PipelineStepFactory(PageCroppingStep)
class PageCroppingStepConfig:
29class PageCroppingStepConfig:
30    core_size: int
31    pad_size: int
32    num_samples: Optional[int] = None
33    num_samples_max: Optional[int] = None
34    num_samples_estimation_factor: float = 1.5
35    pad_value: int = 0
36    drop_cropped_page_with_small_text_ratio: bool = True
37    text_ratio_min: float = 0.025
38    drop_cropped_page_with_small_active_region: bool = True
39    active_region_ratio_min: float = 0.4
40    enable_downsample_labeling: bool = True
41    downsample_labeling_factor: int = 2
PageCroppingStepConfig( core_size: int, pad_size: int, num_samples: Union[int, NoneType] = None, num_samples_max: Union[int, NoneType] = None, num_samples_estimation_factor: float = 1.5, pad_value: int = 0, drop_cropped_page_with_small_text_ratio: bool = True, text_ratio_min: float = 0.025, drop_cropped_page_with_small_active_region: bool = True, active_region_ratio_min: float = 0.4, enable_downsample_labeling: bool = True, downsample_labeling_factor: int = 2)
 2def __init__(self, core_size, pad_size, num_samples=attr_dict['num_samples'].default, num_samples_max=attr_dict['num_samples_max'].default, num_samples_estimation_factor=attr_dict['num_samples_estimation_factor'].default, pad_value=attr_dict['pad_value'].default, drop_cropped_page_with_small_text_ratio=attr_dict['drop_cropped_page_with_small_text_ratio'].default, text_ratio_min=attr_dict['text_ratio_min'].default, drop_cropped_page_with_small_active_region=attr_dict['drop_cropped_page_with_small_active_region'].default, active_region_ratio_min=attr_dict['active_region_ratio_min'].default, enable_downsample_labeling=attr_dict['enable_downsample_labeling'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default):
 3    self.core_size = core_size
 4    self.pad_size = pad_size
 5    self.num_samples = num_samples
 6    self.num_samples_max = num_samples_max
 7    self.num_samples_estimation_factor = num_samples_estimation_factor
 8    self.pad_value = pad_value
 9    self.drop_cropped_page_with_small_text_ratio = drop_cropped_page_with_small_text_ratio
10    self.text_ratio_min = text_ratio_min
11    self.drop_cropped_page_with_small_active_region = drop_cropped_page_with_small_active_region
12    self.active_region_ratio_min = active_region_ratio_min
13    self.enable_downsample_labeling = enable_downsample_labeling
14    self.downsample_labeling_factor = downsample_labeling_factor

Method generated by attrs for class PageCroppingStepConfig.

class PageCroppingStepInput:
45class PageCroppingStepInput:
46    page_resizing_step_output: PageResizingStepOutput
PageCroppingStepInput( page_resizing_step_output: vkit.pipeline.text_detection.page_resizing.PageResizingStepOutput)
2def __init__(self, page_resizing_step_output):
3    self.page_resizing_step_output = page_resizing_step_output

Method generated by attrs for class PageCroppingStepInput.

class DownsampledLabel:
50class DownsampledLabel:
51    shape: Tuple[int, int]
52    page_char_mask: Mask
53    page_char_height_score_map: ScoreMap
54    page_text_line_mask: Mask
55    page_text_line_height_score_map: ScoreMap
56    core_box: Box
DownsampledLabel( shape: Tuple[int, int], page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, core_box: vkit.element.box.Box)
2def __init__(self, shape, page_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, core_box):
3    self.shape = shape
4    self.page_char_mask = page_char_mask
5    self.page_char_height_score_map = page_char_height_score_map
6    self.page_text_line_mask = page_text_line_mask
7    self.page_text_line_height_score_map = page_text_line_height_score_map
8    self.core_box = core_box

Method generated by attrs for class DownsampledLabel.

class CroppedPage:
60class CroppedPage:
61    page_image: Image
62    page_char_mask: Mask
63    page_char_height_score_map: ScoreMap
64    page_text_line_mask: Mask
65    page_text_line_height_score_map: ScoreMap
66    core_box: Box
67    downsampled_label: Optional[DownsampledLabel]
CroppedPage( page_image: vkit.element.image.Image, page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, core_box: vkit.element.box.Box, downsampled_label: Union[vkit.pipeline.text_detection.page_cropping.DownsampledLabel, NoneType])
2def __init__(self, page_image, page_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, core_box, downsampled_label):
3    self.page_image = page_image
4    self.page_char_mask = page_char_mask
5    self.page_char_height_score_map = page_char_height_score_map
6    self.page_text_line_mask = page_text_line_mask
7    self.page_text_line_height_score_map = page_text_line_height_score_map
8    self.core_box = core_box
9    self.downsampled_label = downsampled_label

Method generated by attrs for class CroppedPage.

class PageCroppingStepOutput:
71class PageCroppingStepOutput:
72    cropped_pages: Sequence[CroppedPage]
PageCroppingStepOutput( cropped_pages: Sequence[vkit.pipeline.text_detection.page_cropping.CroppedPage])
2def __init__(self, cropped_pages):
3    self.cropped_pages = cropped_pages

Method generated by attrs for class PageCroppingStepOutput.

 75class PageCroppingStep(
 76    PipelineStep[
 77        PageCroppingStepConfig,
 78        PageCroppingStepInput,
 79        PageCroppingStepOutput,
 80    ]
 81):  # yapf: disable
 82
 83    def __init__(self, config: PageCroppingStepConfig):
 84        super().__init__(config)
 85
 86    def sample_cropped_page(
 87        self,
 88        page_image: Image,
 89        page_active_mask: Mask,
 90        page_char_mask: Mask,
 91        page_char_height_score_map: ScoreMap,
 92        page_text_line_mask: Mask,
 93        page_text_line_height_score_map: ScoreMap,
 94        rng: RandomGenerator,
 95        force_crop_center: bool = False,
 96    ):
 97        if not force_crop_center:
 98            cropper = Cropper.create(
 99                shape=page_image.shape,
100                core_size=self.config.core_size,
101                pad_size=self.config.pad_size,
102                pad_value=self.config.pad_value,
103                rng=rng,
104            )
105        else:
106            cropper = Cropper.create_from_center_point(
107                shape=page_image.shape,
108                core_size=self.config.core_size,
109                pad_size=self.config.pad_size,
110                pad_value=self.config.pad_value,
111                center_point=Box.from_shapable(page_image).get_center_point(),
112            )
113
114        page_image = cropper.crop_image(page_image)
115
116        page_active_mask = cropper.crop_mask(page_active_mask)
117
118        page_char_mask = cropper.crop_mask(
119            page_char_mask,
120            core_only=True,
121        )
122        page_char_height_score_map = cropper.crop_score_map(
123            page_char_height_score_map,
124            core_only=True,
125        )
126
127        page_text_line_mask = cropper.crop_mask(
128            page_text_line_mask,
129            core_only=True,
130        )
131        page_text_line_height_score_map = cropper.crop_score_map(
132            page_text_line_height_score_map,
133            core_only=True,
134        )
135
136        if self.config.drop_cropped_page_with_small_text_ratio:
137            num_text_pixels = (page_char_mask.mat > 0).sum()
138            text_ratio = num_text_pixels / cropper.core_box.area
139            if text_ratio < self.config.text_ratio_min:
140                return None
141
142        if self.config.drop_cropped_page_with_small_active_region:
143            num_active_pixels = int(page_active_mask.np_mask.sum())
144            active_region_ratio = num_active_pixels / page_image.area
145            if active_region_ratio < self.config.active_region_ratio_min:
146                return None
147
148        downsampled_label: Optional[DownsampledLabel] = None
149        if self.config.enable_downsample_labeling:
150            downsample_labeling_factor = self.config.downsample_labeling_factor
151
152            assert cropper.crop_size % downsample_labeling_factor == 0
153            downsampled_size = cropper.crop_size // downsample_labeling_factor
154            downsampled_shape = (downsampled_size, downsampled_size)
155
156            assert self.config.pad_size % downsample_labeling_factor == 0
157            assert self.config.core_size % downsample_labeling_factor == 0
158            assert cropper.core_box.height == cropper.core_box.width == self.config.core_size
159
160            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
161            downsampled_core_size = self.config.core_size // downsample_labeling_factor
162
163            downsampled_core_begin = downsampled_pad_size
164            downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1
165            downsampled_core_box = Box(
166                up=downsampled_core_begin,
167                down=downsampled_core_end,
168                left=downsampled_core_begin,
169                right=downsampled_core_end,
170            )
171
172            downsampled_page_char_mask = page_char_mask.to_box_detached()
173            downsampled_page_char_mask = \
174                downsampled_page_char_mask.to_resized_mask(
175                    resized_height=downsampled_core_size,
176                    resized_width=downsampled_core_size,
177                    cv_resize_interpolation=cv.INTER_AREA,
178                )
179
180            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
181            downsampled_page_char_height_score_map = \
182                downsampled_page_char_height_score_map.to_resized_score_map(
183                    resized_height=downsampled_core_size,
184                    resized_width=downsampled_core_size,
185                    cv_resize_interpolation=cv.INTER_AREA,
186                )
187
188            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
189            downsampled_page_text_line_mask = \
190                downsampled_page_text_line_mask.to_resized_mask(
191                    resized_height=downsampled_core_size,
192                    resized_width=downsampled_core_size,
193                    cv_resize_interpolation=cv.INTER_AREA,
194                )
195
196            downsampled_page_text_line_height_score_map = \
197                page_text_line_height_score_map.to_box_detached()
198            downsampled_page_text_line_height_score_map = \
199                downsampled_page_text_line_height_score_map.to_resized_score_map(
200                    resized_height=downsampled_core_size,
201                    resized_width=downsampled_core_size,
202                    cv_resize_interpolation=cv.INTER_AREA,
203                )
204
205            downsampled_label = DownsampledLabel(
206                shape=downsampled_shape,
207                page_char_mask=downsampled_page_char_mask,
208                page_char_height_score_map=downsampled_page_char_height_score_map,
209                page_text_line_mask=downsampled_page_text_line_mask,
210                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
211                core_box=downsampled_core_box,
212            )
213
214        return CroppedPage(
215            page_image=page_image,
216            page_char_mask=page_char_mask,
217            page_char_height_score_map=page_char_height_score_map,
218            page_text_line_mask=page_text_line_mask,
219            page_text_line_height_score_map=page_text_line_height_score_map,
220            core_box=cropper.core_box,
221            downsampled_label=downsampled_label,
222        )
223
224    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
225        page_resizing_step_output = input.page_resizing_step_output
226        page_image = page_resizing_step_output.page_image
227        page_active_mask = page_resizing_step_output.page_active_mask
228        page_char_mask = page_resizing_step_output.page_char_mask
229        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
230        page_text_line_mask = page_resizing_step_output.page_text_line_mask
231        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
232
233        num_samples = self.config.num_samples
234
235        if num_samples is None:
236            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
237            core_area = self.config.core_size**2
238            num_samples = max(
239                1,
240                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
241            )
242
243        if self.config.num_samples_max:
244            num_samples = min(num_samples, self.config.num_samples_max)
245
246        run_count_max = max(3, 2 * num_samples)
247        run_count = 0
248
249        cropped_pages: List[CroppedPage] = []
250
251        while len(cropped_pages) < num_samples and run_count < run_count_max:
252            cropped_page = self.sample_cropped_page(
253                page_image=page_image,
254                page_active_mask=page_active_mask,
255                page_char_mask=page_char_mask,
256                page_char_height_score_map=page_char_height_score_map,
257                page_text_line_mask=page_text_line_mask,
258                page_text_line_height_score_map=page_text_line_height_score_map,
259                rng=rng,
260                force_crop_center=(run_count == 0),
261            )
262            if cropped_page:
263                cropped_pages.append(cropped_page)
264            run_count += 1
265
266        return PageCroppingStepOutput(cropped_pages=cropped_pages)

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

83    def __init__(self, config: PageCroppingStepConfig):
84        super().__init__(config)
def sample_cropped_page( self, page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, rng: numpy.random._generator.Generator, force_crop_center: bool = False):
 86    def sample_cropped_page(
 87        self,
 88        page_image: Image,
 89        page_active_mask: Mask,
 90        page_char_mask: Mask,
 91        page_char_height_score_map: ScoreMap,
 92        page_text_line_mask: Mask,
 93        page_text_line_height_score_map: ScoreMap,
 94        rng: RandomGenerator,
 95        force_crop_center: bool = False,
 96    ):
 97        if not force_crop_center:
 98            cropper = Cropper.create(
 99                shape=page_image.shape,
100                core_size=self.config.core_size,
101                pad_size=self.config.pad_size,
102                pad_value=self.config.pad_value,
103                rng=rng,
104            )
105        else:
106            cropper = Cropper.create_from_center_point(
107                shape=page_image.shape,
108                core_size=self.config.core_size,
109                pad_size=self.config.pad_size,
110                pad_value=self.config.pad_value,
111                center_point=Box.from_shapable(page_image).get_center_point(),
112            )
113
114        page_image = cropper.crop_image(page_image)
115
116        page_active_mask = cropper.crop_mask(page_active_mask)
117
118        page_char_mask = cropper.crop_mask(
119            page_char_mask,
120            core_only=True,
121        )
122        page_char_height_score_map = cropper.crop_score_map(
123            page_char_height_score_map,
124            core_only=True,
125        )
126
127        page_text_line_mask = cropper.crop_mask(
128            page_text_line_mask,
129            core_only=True,
130        )
131        page_text_line_height_score_map = cropper.crop_score_map(
132            page_text_line_height_score_map,
133            core_only=True,
134        )
135
136        if self.config.drop_cropped_page_with_small_text_ratio:
137            num_text_pixels = (page_char_mask.mat > 0).sum()
138            text_ratio = num_text_pixels / cropper.core_box.area
139            if text_ratio < self.config.text_ratio_min:
140                return None
141
142        if self.config.drop_cropped_page_with_small_active_region:
143            num_active_pixels = int(page_active_mask.np_mask.sum())
144            active_region_ratio = num_active_pixels / page_image.area
145            if active_region_ratio < self.config.active_region_ratio_min:
146                return None
147
148        downsampled_label: Optional[DownsampledLabel] = None
149        if self.config.enable_downsample_labeling:
150            downsample_labeling_factor = self.config.downsample_labeling_factor
151
152            assert cropper.crop_size % downsample_labeling_factor == 0
153            downsampled_size = cropper.crop_size // downsample_labeling_factor
154            downsampled_shape = (downsampled_size, downsampled_size)
155
156            assert self.config.pad_size % downsample_labeling_factor == 0
157            assert self.config.core_size % downsample_labeling_factor == 0
158            assert cropper.core_box.height == cropper.core_box.width == self.config.core_size
159
160            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
161            downsampled_core_size = self.config.core_size // downsample_labeling_factor
162
163            downsampled_core_begin = downsampled_pad_size
164            downsampled_core_end = downsampled_core_begin + downsampled_core_size - 1
165            downsampled_core_box = Box(
166                up=downsampled_core_begin,
167                down=downsampled_core_end,
168                left=downsampled_core_begin,
169                right=downsampled_core_end,
170            )
171
172            downsampled_page_char_mask = page_char_mask.to_box_detached()
173            downsampled_page_char_mask = \
174                downsampled_page_char_mask.to_resized_mask(
175                    resized_height=downsampled_core_size,
176                    resized_width=downsampled_core_size,
177                    cv_resize_interpolation=cv.INTER_AREA,
178                )
179
180            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
181            downsampled_page_char_height_score_map = \
182                downsampled_page_char_height_score_map.to_resized_score_map(
183                    resized_height=downsampled_core_size,
184                    resized_width=downsampled_core_size,
185                    cv_resize_interpolation=cv.INTER_AREA,
186                )
187
188            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
189            downsampled_page_text_line_mask = \
190                downsampled_page_text_line_mask.to_resized_mask(
191                    resized_height=downsampled_core_size,
192                    resized_width=downsampled_core_size,
193                    cv_resize_interpolation=cv.INTER_AREA,
194                )
195
196            downsampled_page_text_line_height_score_map = \
197                page_text_line_height_score_map.to_box_detached()
198            downsampled_page_text_line_height_score_map = \
199                downsampled_page_text_line_height_score_map.to_resized_score_map(
200                    resized_height=downsampled_core_size,
201                    resized_width=downsampled_core_size,
202                    cv_resize_interpolation=cv.INTER_AREA,
203                )
204
205            downsampled_label = DownsampledLabel(
206                shape=downsampled_shape,
207                page_char_mask=downsampled_page_char_mask,
208                page_char_height_score_map=downsampled_page_char_height_score_map,
209                page_text_line_mask=downsampled_page_text_line_mask,
210                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
211                core_box=downsampled_core_box,
212            )
213
214        return CroppedPage(
215            page_image=page_image,
216            page_char_mask=page_char_mask,
217            page_char_height_score_map=page_char_height_score_map,
218            page_text_line_mask=page_text_line_mask,
219            page_text_line_height_score_map=page_text_line_height_score_map,
220            core_box=cropper.core_box,
221            downsampled_label=downsampled_label,
222        )
def run( self, input: vkit.pipeline.text_detection.page_cropping.PageCroppingStepInput, rng: numpy.random._generator.Generator):
224    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
225        page_resizing_step_output = input.page_resizing_step_output
226        page_image = page_resizing_step_output.page_image
227        page_active_mask = page_resizing_step_output.page_active_mask
228        page_char_mask = page_resizing_step_output.page_char_mask
229        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
230        page_text_line_mask = page_resizing_step_output.page_text_line_mask
231        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
232
233        num_samples = self.config.num_samples
234
235        if num_samples is None:
236            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
237            core_area = self.config.core_size**2
238            num_samples = max(
239                1,
240                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
241            )
242
243        if self.config.num_samples_max:
244            num_samples = min(num_samples, self.config.num_samples_max)
245
246        run_count_max = max(3, 2 * num_samples)
247        run_count = 0
248
249        cropped_pages: List[CroppedPage] = []
250
251        while len(cropped_pages) < num_samples and run_count < run_count_max:
252            cropped_page = self.sample_cropped_page(
253                page_image=page_image,
254                page_active_mask=page_active_mask,
255                page_char_mask=page_char_mask,
256                page_char_height_score_map=page_char_height_score_map,
257                page_text_line_mask=page_text_line_mask,
258                page_text_line_height_score_map=page_text_line_height_score_map,
259                rng=rng,
260                force_crop_center=(run_count == 0),
261            )
262            if cropped_page:
263                cropped_pages.append(cropped_page)
264            run_count += 1
265
266        return PageCroppingStepOutput(cropped_pages=cropped_pages)