vkit.pipeline.text_detection.page_cropping

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, List, Optional, Tuple
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18import numpy as np
 19import cv2 as cv
 20
 21from vkit.element import Box, Mask, ScoreMap, Image
 22from vkit.mechanism.cropper import Cropper
 23from .page_resizing import PageResizingStepOutput
 24from ..interface import PipelineStep, PipelineStepFactory
 25
 26
 27@attrs.define
 28class PageCroppingStepConfig:
 29    core_size: int
 30    pad_size: int
 31    num_samples: Optional[int] = None
 32    num_samples_max: Optional[int] = None
 33    num_samples_estimation_factor: float = 1.5
 34    pad_value: int = 0
 35    drop_cropped_page_with_small_text_ratio: bool = True
 36    text_ratio_min: float = 0.025
 37    drop_cropped_page_with_small_active_region: bool = True
 38    active_region_ratio_min: float = 0.4
 39    enable_downsample_labeling: bool = True
 40    downsample_labeling_factor: int = 2
 41
 42
 43@attrs.define
 44class PageCroppingStepInput:
 45    page_resizing_step_output: PageResizingStepOutput
 46
 47
 48@attrs.define
 49class DownsampledLabel:
 50    shape: Tuple[int, int]
 51    page_char_mask: Mask
 52    page_seal_impression_char_mask: Mask
 53    page_char_height_score_map: ScoreMap
 54    page_text_line_mask: Mask
 55    page_text_line_height_score_map: ScoreMap
 56    target_core_box: Box
 57
 58
 59@attrs.define
 60class CroppedPage:
 61    page_image: Image
 62    page_char_mask: Mask
 63    page_seal_impression_char_mask: Mask
 64    page_char_height_score_map: ScoreMap
 65    page_text_line_mask: Mask
 66    page_text_line_height_score_map: ScoreMap
 67    target_core_box: Box
 68    downsampled_label: Optional[DownsampledLabel]
 69
 70
 71@attrs.define
 72class PageCroppingStepOutput:
 73    cropped_pages: Sequence[CroppedPage]
 74
 75
 76class PageCroppingStep(
 77    PipelineStep[
 78        PageCroppingStepConfig,
 79        PageCroppingStepInput,
 80        PageCroppingStepOutput,
 81    ]
 82):  # yapf: disable
 83
 84    def __init__(self, config: PageCroppingStepConfig):
 85        super().__init__(config)
 86
 87    def sample_cropped_page(
 88        self,
 89        page_image: Image,
 90        page_active_mask: Mask,
 91        page_char_mask: Mask,
 92        page_seal_impression_char_mask: Mask,
 93        page_char_height_score_map: ScoreMap,
 94        page_text_line_mask: Mask,
 95        page_text_line_height_score_map: ScoreMap,
 96        rng: RandomGenerator,
 97        force_crop_center: bool = False,
 98    ):
 99        if not force_crop_center:
100            cropper = Cropper.create_from_random_proposal(
101                shape=page_image.shape,
102                core_size=self.config.core_size,
103                pad_size=self.config.pad_size,
104                pad_value=self.config.pad_value,
105                rng=rng,
106            )
107        else:
108            cropper = Cropper.create_from_center_point(
109                shape=page_image.shape,
110                core_size=self.config.core_size,
111                pad_size=self.config.pad_size,
112                pad_value=self.config.pad_value,
113                center_point=Box.from_shapable(page_image).get_center_point(),
114            )
115
116        page_image = cropper.crop_image(page_image)
117
118        page_active_mask = cropper.crop_mask(page_active_mask)
119
120        page_char_mask = cropper.crop_mask(
121            page_char_mask,
122            core_only=True,
123        )
124        page_seal_impression_char_mask = cropper.crop_mask(
125            page_seal_impression_char_mask,
126            core_only=True,
127        )
128        page_char_height_score_map = cropper.crop_score_map(
129            page_char_height_score_map,
130            core_only=True,
131        )
132
133        page_text_line_mask = cropper.crop_mask(
134            page_text_line_mask,
135            core_only=True,
136        )
137        page_text_line_height_score_map = cropper.crop_score_map(
138            page_text_line_height_score_map,
139            core_only=True,
140        )
141
142        if self.config.drop_cropped_page_with_small_text_ratio:
143            num_text_pixels = (page_char_mask.mat > 0).sum()
144            text_ratio = num_text_pixels / cropper.target_core_box.area
145            if text_ratio < self.config.text_ratio_min:
146                return None
147
148        if self.config.drop_cropped_page_with_small_active_region:
149            num_active_pixels = int(page_active_mask.np_mask.sum())
150            active_region_ratio = num_active_pixels / page_image.area
151            if active_region_ratio < self.config.active_region_ratio_min:
152                return None
153
154        downsampled_label: Optional[DownsampledLabel] = None
155        if self.config.enable_downsample_labeling:
156            downsample_labeling_factor = self.config.downsample_labeling_factor
157
158            assert cropper.crop_size % downsample_labeling_factor == 0
159            downsampled_size = cropper.crop_size // downsample_labeling_factor
160            downsampled_shape = (downsampled_size, downsampled_size)
161
162            assert self.config.pad_size % downsample_labeling_factor == 0
163            assert self.config.core_size % downsample_labeling_factor == 0
164            assert cropper.target_core_box.height \
165                == cropper.target_core_box.width \
166                == self.config.core_size
167
168            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
169            downsampled_core_size = self.config.core_size // downsample_labeling_factor
170
171            downsampled_target_core_begin = downsampled_pad_size
172            downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1
173            downsampled_target_core_box = Box(
174                up=downsampled_target_core_begin,
175                down=downsampled_target_core_end,
176                left=downsampled_target_core_begin,
177                right=downsampled_target_core_end,
178            )
179
180            downsampled_page_char_mask = page_char_mask.to_box_detached()
181            downsampled_page_char_mask = \
182                downsampled_page_char_mask.to_resized_mask(
183                    resized_height=downsampled_core_size,
184                    resized_width=downsampled_core_size,
185                    cv_resize_interpolation=cv.INTER_AREA,
186                )
187
188            downsampled_page_seal_impression_char_mask = \
189                page_seal_impression_char_mask.to_box_detached()
190            downsampled_page_seal_impression_char_mask = \
191                downsampled_page_seal_impression_char_mask.to_resized_mask(
192                    resized_height=downsampled_core_size,
193                    resized_width=downsampled_core_size,
194                    cv_resize_interpolation=cv.INTER_AREA,
195                )
196
197            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
198            downsampled_page_char_height_score_map = \
199                downsampled_page_char_height_score_map.to_resized_score_map(
200                    resized_height=downsampled_core_size,
201                    resized_width=downsampled_core_size,
202                    cv_resize_interpolation=cv.INTER_AREA,
203                )
204
205            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
206            downsampled_page_text_line_mask = \
207                downsampled_page_text_line_mask.to_resized_mask(
208                    resized_height=downsampled_core_size,
209                    resized_width=downsampled_core_size,
210                    cv_resize_interpolation=cv.INTER_AREA,
211                )
212
213            downsampled_page_text_line_height_score_map = \
214                page_text_line_height_score_map.to_box_detached()
215            downsampled_page_text_line_height_score_map = \
216                downsampled_page_text_line_height_score_map.to_resized_score_map(
217                    resized_height=downsampled_core_size,
218                    resized_width=downsampled_core_size,
219                    cv_resize_interpolation=cv.INTER_AREA,
220                )
221
222            downsampled_label = DownsampledLabel(
223                shape=downsampled_shape,
224                page_char_mask=downsampled_page_char_mask,
225                page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask,
226                page_char_height_score_map=downsampled_page_char_height_score_map,
227                page_text_line_mask=downsampled_page_text_line_mask,
228                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
229                target_core_box=downsampled_target_core_box,
230            )
231
232        return CroppedPage(
233            page_image=page_image,
234            page_char_mask=page_char_mask,
235            page_seal_impression_char_mask=page_seal_impression_char_mask,
236            page_char_height_score_map=page_char_height_score_map,
237            page_text_line_mask=page_text_line_mask,
238            page_text_line_height_score_map=page_text_line_height_score_map,
239            target_core_box=cropper.target_core_box,
240            downsampled_label=downsampled_label,
241        )
242
243    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
244        page_resizing_step_output = input.page_resizing_step_output
245        page_image = page_resizing_step_output.page_image
246        page_active_mask = page_resizing_step_output.page_active_mask
247        page_char_mask = page_resizing_step_output.page_char_mask
248        page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask
249        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
250        page_text_line_mask = page_resizing_step_output.page_text_line_mask
251        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
252
253        num_samples = self.config.num_samples
254
255        if num_samples is None:
256            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
257            core_area = self.config.core_size**2
258            num_samples = max(
259                1,
260                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
261            )
262
263        if self.config.num_samples_max:
264            num_samples = min(num_samples, self.config.num_samples_max)
265
266        run_count_max = max(3, 2 * num_samples)
267        run_count = 0
268
269        cropped_pages: List[CroppedPage] = []
270
271        while len(cropped_pages) < num_samples and run_count < run_count_max:
272            cropped_page = self.sample_cropped_page(
273                page_image=page_image,
274                page_active_mask=page_active_mask,
275                page_char_mask=page_char_mask,
276                page_seal_impression_char_mask=page_seal_impression_char_mask,
277                page_char_height_score_map=page_char_height_score_map,
278                page_text_line_mask=page_text_line_mask,
279                page_text_line_height_score_map=page_text_line_height_score_map,
280                rng=rng,
281                force_crop_center=(run_count == 0),
282            )
283            if cropped_page:
284                cropped_pages.append(cropped_page)
285            run_count += 1
286
287        return PageCroppingStepOutput(cropped_pages=cropped_pages)
288
289
290page_cropping_step_factory = PipelineStepFactory(PageCroppingStep)
class PageCroppingStepConfig:
29class PageCroppingStepConfig:
30    core_size: int
31    pad_size: int
32    num_samples: Optional[int] = None
33    num_samples_max: Optional[int] = None
34    num_samples_estimation_factor: float = 1.5
35    pad_value: int = 0
36    drop_cropped_page_with_small_text_ratio: bool = True
37    text_ratio_min: float = 0.025
38    drop_cropped_page_with_small_active_region: bool = True
39    active_region_ratio_min: float = 0.4
40    enable_downsample_labeling: bool = True
41    downsample_labeling_factor: int = 2
PageCroppingStepConfig( core_size: int, pad_size: int, num_samples: Union[int, NoneType] = None, num_samples_max: Union[int, NoneType] = None, num_samples_estimation_factor: float = 1.5, pad_value: int = 0, drop_cropped_page_with_small_text_ratio: bool = True, text_ratio_min: float = 0.025, drop_cropped_page_with_small_active_region: bool = True, active_region_ratio_min: float = 0.4, enable_downsample_labeling: bool = True, downsample_labeling_factor: int = 2)
 2def __init__(self, core_size, pad_size, num_samples=attr_dict['num_samples'].default, num_samples_max=attr_dict['num_samples_max'].default, num_samples_estimation_factor=attr_dict['num_samples_estimation_factor'].default, pad_value=attr_dict['pad_value'].default, drop_cropped_page_with_small_text_ratio=attr_dict['drop_cropped_page_with_small_text_ratio'].default, text_ratio_min=attr_dict['text_ratio_min'].default, drop_cropped_page_with_small_active_region=attr_dict['drop_cropped_page_with_small_active_region'].default, active_region_ratio_min=attr_dict['active_region_ratio_min'].default, enable_downsample_labeling=attr_dict['enable_downsample_labeling'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default):
 3    self.core_size = core_size
 4    self.pad_size = pad_size
 5    self.num_samples = num_samples
 6    self.num_samples_max = num_samples_max
 7    self.num_samples_estimation_factor = num_samples_estimation_factor
 8    self.pad_value = pad_value
 9    self.drop_cropped_page_with_small_text_ratio = drop_cropped_page_with_small_text_ratio
10    self.text_ratio_min = text_ratio_min
11    self.drop_cropped_page_with_small_active_region = drop_cropped_page_with_small_active_region
12    self.active_region_ratio_min = active_region_ratio_min
13    self.enable_downsample_labeling = enable_downsample_labeling
14    self.downsample_labeling_factor = downsample_labeling_factor

Method generated by attrs for class PageCroppingStepConfig.

class PageCroppingStepInput:
45class PageCroppingStepInput:
46    page_resizing_step_output: PageResizingStepOutput
PageCroppingStepInput( page_resizing_step_output: vkit.pipeline.text_detection.page_resizing.PageResizingStepOutput)
2def __init__(self, page_resizing_step_output):
3    self.page_resizing_step_output = page_resizing_step_output

Method generated by attrs for class PageCroppingStepInput.

class DownsampledLabel:
50class DownsampledLabel:
51    shape: Tuple[int, int]
52    page_char_mask: Mask
53    page_seal_impression_char_mask: Mask
54    page_char_height_score_map: ScoreMap
55    page_text_line_mask: Mask
56    page_text_line_height_score_map: ScoreMap
57    target_core_box: Box
DownsampledLabel( shape: Tuple[int, int], page_char_mask: vkit.element.mask.Mask, page_seal_impression_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, target_core_box: vkit.element.box.Box)
2def __init__(self, shape, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, target_core_box):
3    self.shape = shape
4    self.page_char_mask = page_char_mask
5    self.page_seal_impression_char_mask = page_seal_impression_char_mask
6    self.page_char_height_score_map = page_char_height_score_map
7    self.page_text_line_mask = page_text_line_mask
8    self.page_text_line_height_score_map = page_text_line_height_score_map
9    self.target_core_box = target_core_box

Method generated by attrs for class DownsampledLabel.

class CroppedPage:
61class CroppedPage:
62    page_image: Image
63    page_char_mask: Mask
64    page_seal_impression_char_mask: Mask
65    page_char_height_score_map: ScoreMap
66    page_text_line_mask: Mask
67    page_text_line_height_score_map: ScoreMap
68    target_core_box: Box
69    downsampled_label: Optional[DownsampledLabel]
CroppedPage( page_image: vkit.element.image.Image, page_char_mask: vkit.element.mask.Mask, page_seal_impression_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, target_core_box: vkit.element.box.Box, downsampled_label: Union[vkit.pipeline.text_detection.page_cropping.DownsampledLabel, NoneType])
 2def __init__(self, page_image, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map, target_core_box, downsampled_label):
 3    self.page_image = page_image
 4    self.page_char_mask = page_char_mask
 5    self.page_seal_impression_char_mask = page_seal_impression_char_mask
 6    self.page_char_height_score_map = page_char_height_score_map
 7    self.page_text_line_mask = page_text_line_mask
 8    self.page_text_line_height_score_map = page_text_line_height_score_map
 9    self.target_core_box = target_core_box
10    self.downsampled_label = downsampled_label

Method generated by attrs for class CroppedPage.

class PageCroppingStepOutput:
73class PageCroppingStepOutput:
74    cropped_pages: Sequence[CroppedPage]
PageCroppingStepOutput( cropped_pages: Sequence[vkit.pipeline.text_detection.page_cropping.CroppedPage])
2def __init__(self, cropped_pages):
3    self.cropped_pages = cropped_pages

Method generated by attrs for class PageCroppingStepOutput.

 77class PageCroppingStep(
 78    PipelineStep[
 79        PageCroppingStepConfig,
 80        PageCroppingStepInput,
 81        PageCroppingStepOutput,
 82    ]
 83):  # yapf: disable
 84
 85    def __init__(self, config: PageCroppingStepConfig):
 86        super().__init__(config)
 87
 88    def sample_cropped_page(
 89        self,
 90        page_image: Image,
 91        page_active_mask: Mask,
 92        page_char_mask: Mask,
 93        page_seal_impression_char_mask: Mask,
 94        page_char_height_score_map: ScoreMap,
 95        page_text_line_mask: Mask,
 96        page_text_line_height_score_map: ScoreMap,
 97        rng: RandomGenerator,
 98        force_crop_center: bool = False,
 99    ):
100        if not force_crop_center:
101            cropper = Cropper.create_from_random_proposal(
102                shape=page_image.shape,
103                core_size=self.config.core_size,
104                pad_size=self.config.pad_size,
105                pad_value=self.config.pad_value,
106                rng=rng,
107            )
108        else:
109            cropper = Cropper.create_from_center_point(
110                shape=page_image.shape,
111                core_size=self.config.core_size,
112                pad_size=self.config.pad_size,
113                pad_value=self.config.pad_value,
114                center_point=Box.from_shapable(page_image).get_center_point(),
115            )
116
117        page_image = cropper.crop_image(page_image)
118
119        page_active_mask = cropper.crop_mask(page_active_mask)
120
121        page_char_mask = cropper.crop_mask(
122            page_char_mask,
123            core_only=True,
124        )
125        page_seal_impression_char_mask = cropper.crop_mask(
126            page_seal_impression_char_mask,
127            core_only=True,
128        )
129        page_char_height_score_map = cropper.crop_score_map(
130            page_char_height_score_map,
131            core_only=True,
132        )
133
134        page_text_line_mask = cropper.crop_mask(
135            page_text_line_mask,
136            core_only=True,
137        )
138        page_text_line_height_score_map = cropper.crop_score_map(
139            page_text_line_height_score_map,
140            core_only=True,
141        )
142
143        if self.config.drop_cropped_page_with_small_text_ratio:
144            num_text_pixels = (page_char_mask.mat > 0).sum()
145            text_ratio = num_text_pixels / cropper.target_core_box.area
146            if text_ratio < self.config.text_ratio_min:
147                return None
148
149        if self.config.drop_cropped_page_with_small_active_region:
150            num_active_pixels = int(page_active_mask.np_mask.sum())
151            active_region_ratio = num_active_pixels / page_image.area
152            if active_region_ratio < self.config.active_region_ratio_min:
153                return None
154
155        downsampled_label: Optional[DownsampledLabel] = None
156        if self.config.enable_downsample_labeling:
157            downsample_labeling_factor = self.config.downsample_labeling_factor
158
159            assert cropper.crop_size % downsample_labeling_factor == 0
160            downsampled_size = cropper.crop_size // downsample_labeling_factor
161            downsampled_shape = (downsampled_size, downsampled_size)
162
163            assert self.config.pad_size % downsample_labeling_factor == 0
164            assert self.config.core_size % downsample_labeling_factor == 0
165            assert cropper.target_core_box.height \
166                == cropper.target_core_box.width \
167                == self.config.core_size
168
169            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
170            downsampled_core_size = self.config.core_size // downsample_labeling_factor
171
172            downsampled_target_core_begin = downsampled_pad_size
173            downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1
174            downsampled_target_core_box = Box(
175                up=downsampled_target_core_begin,
176                down=downsampled_target_core_end,
177                left=downsampled_target_core_begin,
178                right=downsampled_target_core_end,
179            )
180
181            downsampled_page_char_mask = page_char_mask.to_box_detached()
182            downsampled_page_char_mask = \
183                downsampled_page_char_mask.to_resized_mask(
184                    resized_height=downsampled_core_size,
185                    resized_width=downsampled_core_size,
186                    cv_resize_interpolation=cv.INTER_AREA,
187                )
188
189            downsampled_page_seal_impression_char_mask = \
190                page_seal_impression_char_mask.to_box_detached()
191            downsampled_page_seal_impression_char_mask = \
192                downsampled_page_seal_impression_char_mask.to_resized_mask(
193                    resized_height=downsampled_core_size,
194                    resized_width=downsampled_core_size,
195                    cv_resize_interpolation=cv.INTER_AREA,
196                )
197
198            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
199            downsampled_page_char_height_score_map = \
200                downsampled_page_char_height_score_map.to_resized_score_map(
201                    resized_height=downsampled_core_size,
202                    resized_width=downsampled_core_size,
203                    cv_resize_interpolation=cv.INTER_AREA,
204                )
205
206            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
207            downsampled_page_text_line_mask = \
208                downsampled_page_text_line_mask.to_resized_mask(
209                    resized_height=downsampled_core_size,
210                    resized_width=downsampled_core_size,
211                    cv_resize_interpolation=cv.INTER_AREA,
212                )
213
214            downsampled_page_text_line_height_score_map = \
215                page_text_line_height_score_map.to_box_detached()
216            downsampled_page_text_line_height_score_map = \
217                downsampled_page_text_line_height_score_map.to_resized_score_map(
218                    resized_height=downsampled_core_size,
219                    resized_width=downsampled_core_size,
220                    cv_resize_interpolation=cv.INTER_AREA,
221                )
222
223            downsampled_label = DownsampledLabel(
224                shape=downsampled_shape,
225                page_char_mask=downsampled_page_char_mask,
226                page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask,
227                page_char_height_score_map=downsampled_page_char_height_score_map,
228                page_text_line_mask=downsampled_page_text_line_mask,
229                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
230                target_core_box=downsampled_target_core_box,
231            )
232
233        return CroppedPage(
234            page_image=page_image,
235            page_char_mask=page_char_mask,
236            page_seal_impression_char_mask=page_seal_impression_char_mask,
237            page_char_height_score_map=page_char_height_score_map,
238            page_text_line_mask=page_text_line_mask,
239            page_text_line_height_score_map=page_text_line_height_score_map,
240            target_core_box=cropper.target_core_box,
241            downsampled_label=downsampled_label,
242        )
243
244    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
245        page_resizing_step_output = input.page_resizing_step_output
246        page_image = page_resizing_step_output.page_image
247        page_active_mask = page_resizing_step_output.page_active_mask
248        page_char_mask = page_resizing_step_output.page_char_mask
249        page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask
250        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
251        page_text_line_mask = page_resizing_step_output.page_text_line_mask
252        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
253
254        num_samples = self.config.num_samples
255
256        if num_samples is None:
257            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
258            core_area = self.config.core_size**2
259            num_samples = max(
260                1,
261                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
262            )
263
264        if self.config.num_samples_max:
265            num_samples = min(num_samples, self.config.num_samples_max)
266
267        run_count_max = max(3, 2 * num_samples)
268        run_count = 0
269
270        cropped_pages: List[CroppedPage] = []
271
272        while len(cropped_pages) < num_samples and run_count < run_count_max:
273            cropped_page = self.sample_cropped_page(
274                page_image=page_image,
275                page_active_mask=page_active_mask,
276                page_char_mask=page_char_mask,
277                page_seal_impression_char_mask=page_seal_impression_char_mask,
278                page_char_height_score_map=page_char_height_score_map,
279                page_text_line_mask=page_text_line_mask,
280                page_text_line_height_score_map=page_text_line_height_score_map,
281                rng=rng,
282                force_crop_center=(run_count == 0),
283            )
284            if cropped_page:
285                cropped_pages.append(cropped_page)
286            run_count += 1
287
288        return PageCroppingStepOutput(cropped_pages=cropped_pages)

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

85    def __init__(self, config: PageCroppingStepConfig):
86        super().__init__(config)
def sample_cropped_page( self, page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_mask: vkit.element.mask.Mask, page_seal_impression_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap, rng: numpy.random._generator.Generator, force_crop_center: bool = False):
 88    def sample_cropped_page(
 89        self,
 90        page_image: Image,
 91        page_active_mask: Mask,
 92        page_char_mask: Mask,
 93        page_seal_impression_char_mask: Mask,
 94        page_char_height_score_map: ScoreMap,
 95        page_text_line_mask: Mask,
 96        page_text_line_height_score_map: ScoreMap,
 97        rng: RandomGenerator,
 98        force_crop_center: bool = False,
 99    ):
100        if not force_crop_center:
101            cropper = Cropper.create_from_random_proposal(
102                shape=page_image.shape,
103                core_size=self.config.core_size,
104                pad_size=self.config.pad_size,
105                pad_value=self.config.pad_value,
106                rng=rng,
107            )
108        else:
109            cropper = Cropper.create_from_center_point(
110                shape=page_image.shape,
111                core_size=self.config.core_size,
112                pad_size=self.config.pad_size,
113                pad_value=self.config.pad_value,
114                center_point=Box.from_shapable(page_image).get_center_point(),
115            )
116
117        page_image = cropper.crop_image(page_image)
118
119        page_active_mask = cropper.crop_mask(page_active_mask)
120
121        page_char_mask = cropper.crop_mask(
122            page_char_mask,
123            core_only=True,
124        )
125        page_seal_impression_char_mask = cropper.crop_mask(
126            page_seal_impression_char_mask,
127            core_only=True,
128        )
129        page_char_height_score_map = cropper.crop_score_map(
130            page_char_height_score_map,
131            core_only=True,
132        )
133
134        page_text_line_mask = cropper.crop_mask(
135            page_text_line_mask,
136            core_only=True,
137        )
138        page_text_line_height_score_map = cropper.crop_score_map(
139            page_text_line_height_score_map,
140            core_only=True,
141        )
142
143        if self.config.drop_cropped_page_with_small_text_ratio:
144            num_text_pixels = (page_char_mask.mat > 0).sum()
145            text_ratio = num_text_pixels / cropper.target_core_box.area
146            if text_ratio < self.config.text_ratio_min:
147                return None
148
149        if self.config.drop_cropped_page_with_small_active_region:
150            num_active_pixels = int(page_active_mask.np_mask.sum())
151            active_region_ratio = num_active_pixels / page_image.area
152            if active_region_ratio < self.config.active_region_ratio_min:
153                return None
154
155        downsampled_label: Optional[DownsampledLabel] = None
156        if self.config.enable_downsample_labeling:
157            downsample_labeling_factor = self.config.downsample_labeling_factor
158
159            assert cropper.crop_size % downsample_labeling_factor == 0
160            downsampled_size = cropper.crop_size // downsample_labeling_factor
161            downsampled_shape = (downsampled_size, downsampled_size)
162
163            assert self.config.pad_size % downsample_labeling_factor == 0
164            assert self.config.core_size % downsample_labeling_factor == 0
165            assert cropper.target_core_box.height \
166                == cropper.target_core_box.width \
167                == self.config.core_size
168
169            downsampled_pad_size = self.config.pad_size // downsample_labeling_factor
170            downsampled_core_size = self.config.core_size // downsample_labeling_factor
171
172            downsampled_target_core_begin = downsampled_pad_size
173            downsampled_target_core_end = downsampled_target_core_begin + downsampled_core_size - 1
174            downsampled_target_core_box = Box(
175                up=downsampled_target_core_begin,
176                down=downsampled_target_core_end,
177                left=downsampled_target_core_begin,
178                right=downsampled_target_core_end,
179            )
180
181            downsampled_page_char_mask = page_char_mask.to_box_detached()
182            downsampled_page_char_mask = \
183                downsampled_page_char_mask.to_resized_mask(
184                    resized_height=downsampled_core_size,
185                    resized_width=downsampled_core_size,
186                    cv_resize_interpolation=cv.INTER_AREA,
187                )
188
189            downsampled_page_seal_impression_char_mask = \
190                page_seal_impression_char_mask.to_box_detached()
191            downsampled_page_seal_impression_char_mask = \
192                downsampled_page_seal_impression_char_mask.to_resized_mask(
193                    resized_height=downsampled_core_size,
194                    resized_width=downsampled_core_size,
195                    cv_resize_interpolation=cv.INTER_AREA,
196                )
197
198            downsampled_page_char_height_score_map = page_char_height_score_map.to_box_detached()
199            downsampled_page_char_height_score_map = \
200                downsampled_page_char_height_score_map.to_resized_score_map(
201                    resized_height=downsampled_core_size,
202                    resized_width=downsampled_core_size,
203                    cv_resize_interpolation=cv.INTER_AREA,
204                )
205
206            downsampled_page_text_line_mask = page_text_line_mask.to_box_detached()
207            downsampled_page_text_line_mask = \
208                downsampled_page_text_line_mask.to_resized_mask(
209                    resized_height=downsampled_core_size,
210                    resized_width=downsampled_core_size,
211                    cv_resize_interpolation=cv.INTER_AREA,
212                )
213
214            downsampled_page_text_line_height_score_map = \
215                page_text_line_height_score_map.to_box_detached()
216            downsampled_page_text_line_height_score_map = \
217                downsampled_page_text_line_height_score_map.to_resized_score_map(
218                    resized_height=downsampled_core_size,
219                    resized_width=downsampled_core_size,
220                    cv_resize_interpolation=cv.INTER_AREA,
221                )
222
223            downsampled_label = DownsampledLabel(
224                shape=downsampled_shape,
225                page_char_mask=downsampled_page_char_mask,
226                page_seal_impression_char_mask=downsampled_page_seal_impression_char_mask,
227                page_char_height_score_map=downsampled_page_char_height_score_map,
228                page_text_line_mask=downsampled_page_text_line_mask,
229                page_text_line_height_score_map=downsampled_page_text_line_height_score_map,
230                target_core_box=downsampled_target_core_box,
231            )
232
233        return CroppedPage(
234            page_image=page_image,
235            page_char_mask=page_char_mask,
236            page_seal_impression_char_mask=page_seal_impression_char_mask,
237            page_char_height_score_map=page_char_height_score_map,
238            page_text_line_mask=page_text_line_mask,
239            page_text_line_height_score_map=page_text_line_height_score_map,
240            target_core_box=cropper.target_core_box,
241            downsampled_label=downsampled_label,
242        )
def run( self, input: vkit.pipeline.text_detection.page_cropping.PageCroppingStepInput, rng: numpy.random._generator.Generator):
244    def run(self, input: PageCroppingStepInput, rng: RandomGenerator):
245        page_resizing_step_output = input.page_resizing_step_output
246        page_image = page_resizing_step_output.page_image
247        page_active_mask = page_resizing_step_output.page_active_mask
248        page_char_mask = page_resizing_step_output.page_char_mask
249        page_seal_impression_char_mask = page_resizing_step_output.page_seal_impression_char_mask
250        page_char_height_score_map = page_resizing_step_output.page_char_height_score_map
251        page_text_line_mask = page_resizing_step_output.page_text_line_mask
252        page_text_line_height_score_map = page_resizing_step_output.page_text_line_height_score_map
253
254        num_samples = self.config.num_samples
255
256        if num_samples is None:
257            page_image_area = int((np.amax(page_image.mat, axis=2) > 0).sum())
258            core_area = self.config.core_size**2
259            num_samples = max(
260                1,
261                round(page_image_area / core_area * self.config.num_samples_estimation_factor),
262            )
263
264        if self.config.num_samples_max:
265            num_samples = min(num_samples, self.config.num_samples_max)
266
267        run_count_max = max(3, 2 * num_samples)
268        run_count = 0
269
270        cropped_pages: List[CroppedPage] = []
271
272        while len(cropped_pages) < num_samples and run_count < run_count_max:
273            cropped_page = self.sample_cropped_page(
274                page_image=page_image,
275                page_active_mask=page_active_mask,
276                page_char_mask=page_char_mask,
277                page_seal_impression_char_mask=page_seal_impression_char_mask,
278                page_char_height_score_map=page_char_height_score_map,
279                page_text_line_mask=page_text_line_mask,
280                page_text_line_height_score_map=page_text_line_height_score_map,
281                rng=rng,
282                force_crop_center=(run_count == 0),
283            )
284            if cropped_page:
285                cropped_pages.append(cropped_page)
286            run_count += 1
287
288        return PageCroppingStepOutput(cropped_pages=cropped_pages)