vkit.pipeline.text_detection.page_distortion

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Optional, Union, Mapping, Any, List, Tuple, Sequence, TypeVar, Generic
 15import itertools
 16
 17import attrs
 18from numpy.random import Generator as RandomGenerator
 19import numpy as np
 20
 21from vkit.utility import PathType
 22from vkit.element import (
 23    Point,
 24    PointList,
 25    Polygon,
 26    Mask,
 27    ScoreMap,
 28    Image,
 29)
 30from vkit.mechanism.distortion_policy import (
 31    random_distortion_factory,
 32    RandomDistortionDebug,
 33)
 34from vkit.mechanism.painter import Painter
 35from vkit.engine.char_mask import (
 36    char_mask_engine_executor_aggregator_factory,
 37    CharMaskEngineRunConfig,
 38)
 39from ..interface import PipelineStep, PipelineStepFactory
 40from .page_layout import DisconnectedTextRegion, NonTextRegion
 41from .page_text_line_label import (
 42    PageCharPolygonCollection,
 43    PageTextLinePolygonCollection,
 44)
 45from .page_assembler import (
 46    PageAssemblerStepOutput,
 47    PageDisconnectedTextRegionCollection,
 48    PageNonTextRegionCollection,
 49    PageSealImpressionCharPolygonCollection,
 50)
 51
 52
 53@attrs.define
 54class PageDistortionStepConfig:
 55    random_distortion_factory_config: Optional[Union[Mapping[str, Any], PathType]] = attrs.field(
 56        factory=lambda: {
 57            # NOTE: defocus blur and zoom in blur introduce labeling noise.
 58            # TODO: enhance those blurring methods for page.
 59            'disabled_policy_names': [
 60                'defocus_blur',
 61                'zoom_in_blur',
 62            ],
 63        }
 64    )
 65    enable_debug_random_distortion: bool = False
 66    enable_distorted_char_mask: bool = True
 67    enable_distorted_seal_impression_char_mask: bool = True
 68    char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'})
 69    enable_distorted_char_height_score_map: bool = True
 70    enable_debug_distorted_char_heights: bool = False
 71    enable_distorted_text_line_mask: bool = True
 72    enable_distorted_text_line_height_score_map: bool = True
 73    enable_debug_distorted_text_line_heights: bool = False
 74
 75
 76@attrs.define
 77class PageDistortionStepInput:
 78    page_assembler_step_output: PageAssemblerStepOutput
 79
 80
 81@attrs.define
 82class PageDistortionStepOutput:
 83    page_image: Image
 84    page_random_distortion_debug: Optional[RandomDistortionDebug]
 85    page_active_mask: Mask
 86    page_char_polygon_collection: PageCharPolygonCollection
 87    page_char_mask: Optional[Mask]
 88    page_seal_impression_char_mask: Optional[Mask]
 89    page_char_height_score_map: Optional[ScoreMap]
 90    page_char_heights: Optional[Sequence[float]]
 91    page_char_heights_debug_image: Optional[Image]
 92    page_text_line_polygon_collection: PageTextLinePolygonCollection
 93    page_text_line_mask: Optional[Mask]
 94    page_text_line_height_score_map: Optional[ScoreMap]
 95    page_text_line_heights: Optional[Sequence[float]]
 96    page_text_line_heights_debug_image: Optional[Image]
 97    page_disconnected_text_region_collection: PageDisconnectedTextRegionCollection
 98    page_non_text_region_collection: PageNonTextRegionCollection
 99    page_seal_impression_char_polygon_collection: PageSealImpressionCharPolygonCollection
100
101
102# TODO: More types.
103_E = TypeVar('_E', Point, Polygon)
104
105
106# TODO: Move to distortion.
107class ElementFlattener(Generic[_E]):
108
109    def __init__(self, grouped_elements: Sequence[Sequence[_E]]):
110        self.grouped_elements = grouped_elements
111        self.group_sizes = [len(elements) for elements in grouped_elements]
112
113    def flatten(self):
114        return tuple(itertools.chain.from_iterable(self.grouped_elements))
115
116    def unflatten(self, flattened_elements: Sequence[_E]) -> Sequence[Sequence[_E]]:
117        assert len(flattened_elements) == sum(self.group_sizes)
118        grouped_elements: List[Sequence[_E]] = []
119        begin = 0
120        for group_size in self.group_sizes:
121            end = begin + group_size
122            grouped_elements.append(flattened_elements[begin:end])
123            begin = end
124        return grouped_elements
125
126
127class PageDistortionStep(
128    PipelineStep[
129        PageDistortionStepConfig,
130        PageDistortionStepInput,
131        PageDistortionStepOutput,
132    ]
133):  # yapf: disable
134
135    def __init__(self, config: PageDistortionStepConfig):
136        super().__init__(config)
137
138        self.random_distortion = random_distortion_factory.create(
139            self.config.random_distortion_factory_config
140        )
141        self.char_mask_engine_executor = \
142            char_mask_engine_executor_aggregator_factory.create_engine_executor(
143                self.config.char_mask_engine_config
144            )
145
146    @classmethod
147    def fill_page_inactive_region(
148        cls,
149        page_image: Image,
150        page_active_mask: Mask,
151        page_bottom_layer_image: Image,
152    ):
153        assert page_image.shape == page_active_mask.shape
154
155        if page_bottom_layer_image.shape != page_image.shape:
156            page_bottom_layer_image = page_bottom_layer_image.to_resized_image(
157                resized_height=page_image.height,
158                resized_width=page_image.width,
159            )
160
161        page_active_mask.to_inverted_mask().fill_image(page_image, page_bottom_layer_image)
162
163    def generate_text_line_labelings(
164        self,
165        distorted_image: Image,
166        text_line_polygons: Sequence[Polygon],
167        text_line_height_points_up: PointList,
168        text_line_height_points_down: PointList,
169        text_line_height_points_group_sizes: Sequence[int],
170    ):
171        text_line_mask: Optional[Mask] = None
172        if self.config.enable_distorted_text_line_mask:
173            text_line_mask = Mask.from_shapable(distorted_image)
174            for polygon in text_line_polygons:
175                polygon.fill_mask(text_line_mask)
176
177        text_line_height_score_map: Optional[ScoreMap] = None
178        text_line_heights: Optional[List[float]] = None
179        text_line_heights_debug_image: Optional[Image] = None
180
181        if self.config.enable_distorted_text_line_height_score_map:
182            np_height_points_up = text_line_height_points_up.to_smooth_np_array()
183            np_height_points_down = text_line_height_points_down.to_smooth_np_array()
184            np_heights: np.ndarray = np.linalg.norm(
185                np_height_points_down - np_height_points_up,
186                axis=1,
187            )
188            # Add one to compensate.
189            np_heights += 1
190            assert sum(text_line_height_points_group_sizes) == np_heights.shape[0]
191
192            text_line_heights = []
193            text_line_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
194            begin = 0
195            for polygon, group_size in zip(text_line_polygons, text_line_height_points_group_sizes):
196                end = begin + group_size - 1
197                text_line_height = float(np_heights[begin:end + 1].mean())
198                text_line_heights.append(text_line_height)
199                polygon.fill_score_map(
200                    score_map=text_line_height_score_map,
201                    value=text_line_height,
202                )
203                begin = end + 1
204
205            if self.config.enable_debug_distorted_text_line_heights:
206                painter = Painter.create(distorted_image)
207                painter.paint_polygons(text_line_polygons)
208
209                texts: List[str] = []
210                points = PointList()
211                for polygon, height in zip(text_line_polygons, text_line_heights):
212                    texts.append(f'{height:.1f}')
213                    points.append(polygon.get_center_point())
214                painter.paint_texts(texts, points, alpha=1.0)
215
216                text_line_heights_debug_image = painter.image
217
218        return (
219            text_line_mask,
220            text_line_height_score_map,
221            text_line_heights,
222            text_line_heights_debug_image,
223        )
224
225    def generate_char_labelings(
226        self,
227        distorted_image: Image,
228        char_polygons: Sequence[Polygon],
229        seal_impression_char_polygons: Sequence[Polygon],
230        char_height_points_up: PointList,
231        char_height_points_down: PointList,
232    ):
233        char_mask: Optional[Mask] = None
234        fill_char_height_score_map_masks: Optional[Sequence[Mask]] = None
235        if self.config.enable_distorted_char_mask:
236            result = self.char_mask_engine_executor.run(
237                CharMaskEngineRunConfig(
238                    height=distorted_image.height,
239                    width=distorted_image.width,
240                    char_polygons=char_polygons,
241                ),
242            )
243            char_mask = result.combined_chars_mask
244            fill_char_height_score_map_masks = result.char_masks
245
246        seal_impression_char_mask: Optional[Mask] = None
247        if self.config.enable_distorted_seal_impression_char_mask:
248            result = self.char_mask_engine_executor.run(
249                CharMaskEngineRunConfig(
250                    height=distorted_image.height,
251                    width=distorted_image.width,
252                    char_polygons=seal_impression_char_polygons,
253                ),
254            )
255            seal_impression_char_mask = result.combined_chars_mask
256
257        char_height_score_map: Optional[ScoreMap] = None
258        char_heights: Optional[List[float]] = None
259        char_heights_debug_image: Optional[Image] = None
260
261        if self.config.enable_distorted_char_height_score_map:
262            np_height_points_up = char_height_points_up.to_smooth_np_array()
263            np_height_points_down = char_height_points_down.to_smooth_np_array()
264            np_heights: np.ndarray = np.linalg.norm(
265                np_height_points_down - np_height_points_up,
266                axis=1,
267            )
268            # Add one to compensate.
269            np_heights += 1
270
271            # Fill from large height to small height,
272            # in order to preserve small height labeling when two char boxes overlapped.
273            sorted_char_polygon_indices: Tuple[int, ...] = tuple(reversed(np_heights.argsort()))
274
275            char_heights = [0.0] * len(char_polygons)
276            char_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
277
278            for idx in sorted_char_polygon_indices:
279                polygon = char_polygons[idx]
280                char_height = float(np_heights[idx])
281                char_heights[idx] = char_height
282
283                if fill_char_height_score_map_masks is None:
284                    polygon.fill_score_map(
285                        score_map=char_height_score_map,
286                        value=char_height,
287                    )
288                else:
289                    fill_char_height_score_map_mask = fill_char_height_score_map_masks[idx]
290                    fill_char_height_score_map_mask.fill_score_map(
291                        score_map=char_height_score_map,
292                        value=char_height,
293                    )
294
295            if self.config.enable_debug_distorted_char_heights:
296                painter = Painter.create(distorted_image)
297                painter.paint_polygons(char_polygons)
298
299                texts: List[str] = []
300                points = PointList()
301                for polygon, height in zip(char_polygons, char_heights):
302                    texts.append(f'{height:.1f}')
303                    points.append(polygon.get_center_point())
304                painter.paint_texts(texts, points, alpha=1.0)
305
306                char_heights_debug_image = painter.image
307
308        return (
309            char_mask,
310            seal_impression_char_mask,
311            char_height_score_map,
312            char_heights,
313            char_heights_debug_image,
314        )
315
316    def run(self, input: PageDistortionStepInput, rng: RandomGenerator):
317        page_assembler_step_output = input.page_assembler_step_output
318        page = page_assembler_step_output.page
319        page_bottom_layer_image = page.page_bottom_layer_image
320        page_char_polygon_collection = page.page_char_polygon_collection
321        page_text_line_polygon_collection = page.page_text_line_polygon_collection
322        page_disconnected_text_region_collection = page.page_disconnected_text_region_collection
323        page_non_text_region_collection = page.page_non_text_region_collection
324        page_seal_impression_char_polygon_collection = \
325            page.page_seal_impression_char_polygon_collection
326
327        # Flatten.
328        polygon_flattener = ElementFlattener([
329            # Char level.
330            page_char_polygon_collection.char_polygons,
331            page_char_polygon_collection.adjusted_char_polygons,
332            # Text line level.
333            page_text_line_polygon_collection.polygons,
334            # For char-level polygon regression.
335            tuple(page_disconnected_text_region_collection.to_polygons()),
336            # For sampling negative text region area.
337            tuple(page_non_text_region_collection.to_polygons()),
338            # For generating char-level seal impression labeling.
339            page_seal_impression_char_polygon_collection.char_polygons,
340        ])
341        point_flattener = ElementFlattener([
342            # Char level.
343            page_char_polygon_collection.height_points_up,
344            page_char_polygon_collection.height_points_down,
345            # Text line level.
346            page_text_line_polygon_collection.height_points_up,
347            page_text_line_polygon_collection.height_points_down,
348        ])
349
350        # Distort.
351        page_random_distortion_debug = None
352        if self.config.enable_debug_random_distortion:
353            page_random_distortion_debug = RandomDistortionDebug()
354
355        page_active_mask = Mask.from_shapable(page.image, value=1)
356        # To mitigate a bug in cv.remap, in which the border interpolation is wrong.
357        # This mitigation DO remove 1-pixel width border, but it should be fine.
358        with page_active_mask.writable_context:
359            page_active_mask.mat[0] = 0
360            page_active_mask.mat[-1] = 0
361            page_active_mask.mat[:, 0] = 0
362            page_active_mask.mat[:, -1] = 0
363
364        result = self.random_distortion.distort(
365            image=page.image,
366            mask=page_active_mask,
367            polygons=polygon_flattener.flatten(),
368            points=PointList(point_flattener.flatten()),
369            rng=rng,
370            debug=page_random_distortion_debug,
371        )
372        assert result.image
373        assert result.mask
374        assert result.polygons
375        assert result.points
376
377        # Fill inplace the inactive (black) region with page_bottom_layer_image.
378        self.fill_page_inactive_region(
379            page_image=result.image,
380            page_active_mask=result.mask,
381            page_bottom_layer_image=page_bottom_layer_image,
382        )
383
384        # Unflatten.
385        (
386            # Char level.
387            char_polygons,
388            adjusted_char_polygons,
389            # Text line level.
390            text_line_polygons,
391            # For char-level polygon regression.
392            disconnected_text_region_polygons,
393            # For sampling negative text region area.
394            non_text_region_polygons,
395            # For generating char-level seal impression labeling.
396            seal_impression_char_polygons,
397        ) = polygon_flattener.unflatten(result.polygons)
398
399        (
400            # Char level.
401            char_height_points_up,
402            char_height_points_down,
403            # Text line level.
404            text_line_height_points_up,
405            text_line_height_points_down,
406        ) = map(PointList, point_flattener.unflatten(result.points))
407
408        text_line_height_points_group_sizes = \
409            page_text_line_polygon_collection.height_points_group_sizes
410        assert len(text_line_polygons) == len(text_line_height_points_group_sizes)
411        assert len(text_line_height_points_up) == len(text_line_height_points_down)
412
413        # Labelings.
414        (
415            text_line_mask,
416            text_line_height_score_map,
417            text_line_heights,
418            text_line_heights_debug_image,
419        ) = self.generate_text_line_labelings(
420            distorted_image=result.image,
421            text_line_polygons=text_line_polygons,
422            text_line_height_points_up=text_line_height_points_up,
423            text_line_height_points_down=text_line_height_points_down,
424            text_line_height_points_group_sizes=text_line_height_points_group_sizes,
425        )
426        (
427            char_mask,
428            seal_impression_char_mask,
429            char_height_score_map,
430            char_heights,
431            char_heights_debug_image,
432        ) = self.generate_char_labelings(
433            distorted_image=result.image,
434            char_polygons=char_polygons,
435            seal_impression_char_polygons=seal_impression_char_polygons,
436            char_height_points_up=char_height_points_up,
437            char_height_points_down=char_height_points_down,
438        )
439
440        return PageDistortionStepOutput(
441            page_image=result.image,
442            page_random_distortion_debug=page_random_distortion_debug,
443            page_active_mask=result.mask,
444            page_char_polygon_collection=PageCharPolygonCollection(
445                height=result.image.height,
446                width=result.image.width,
447                char_polygons=char_polygons,
448                adjusted_char_polygons=adjusted_char_polygons,
449                height_points_up=char_height_points_up,
450                height_points_down=char_height_points_down,
451            ),
452            page_char_mask=char_mask,
453            page_seal_impression_char_mask=seal_impression_char_mask,
454            page_char_height_score_map=char_height_score_map,
455            page_char_heights=char_heights,
456            page_char_heights_debug_image=char_heights_debug_image,
457            page_text_line_polygon_collection=PageTextLinePolygonCollection(
458                height=result.image.height,
459                width=result.image.width,
460                polygons=text_line_polygons,
461                height_points_group_sizes=text_line_height_points_group_sizes,
462                height_points_up=text_line_height_points_up,
463                height_points_down=text_line_height_points_down,
464            ),
465            page_text_line_mask=text_line_mask,
466            page_text_line_height_score_map=text_line_height_score_map,
467            page_text_line_heights=text_line_heights,
468            page_text_line_heights_debug_image=text_line_heights_debug_image,
469            page_disconnected_text_region_collection=PageDisconnectedTextRegionCollection(
470                disconnected_text_regions=[
471                    DisconnectedTextRegion(disconnected_text_region_polygon)
472                    for disconnected_text_region_polygon in disconnected_text_region_polygons
473                ],
474            ),
475            page_non_text_region_collection=PageNonTextRegionCollection(
476                non_text_regions=[
477                    NonTextRegion(non_text_region_polygon)
478                    for non_text_region_polygon in non_text_region_polygons
479                ],
480            ),
481            page_seal_impression_char_polygon_collection=PageSealImpressionCharPolygonCollection(
482                char_polygons=seal_impression_char_polygons,
483            ),
484        )
485
486
487page_distortion_step_factory = PipelineStepFactory(PageDistortionStep)
class PageDistortionStepConfig:
55class PageDistortionStepConfig:
56    random_distortion_factory_config: Optional[Union[Mapping[str, Any], PathType]] = attrs.field(
57        factory=lambda: {
58            # NOTE: defocus blur and zoom in blur introduce labeling noise.
59            # TODO: enhance those blurring methods for page.
60            'disabled_policy_names': [
61                'defocus_blur',
62                'zoom_in_blur',
63            ],
64        }
65    )
66    enable_debug_random_distortion: bool = False
67    enable_distorted_char_mask: bool = True
68    enable_distorted_seal_impression_char_mask: bool = True
69    char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'})
70    enable_distorted_char_height_score_map: bool = True
71    enable_debug_distorted_char_heights: bool = False
72    enable_distorted_text_line_mask: bool = True
73    enable_distorted_text_line_height_score_map: bool = True
74    enable_debug_distorted_text_line_heights: bool = False
PageDistortionStepConfig( random_distortion_factory_config: Union[Mapping[str, Any], str, os.PathLike, NoneType] = NOTHING, enable_debug_random_distortion: bool = False, enable_distorted_char_mask: bool = True, enable_distorted_seal_impression_char_mask: bool = True, char_mask_engine_config: Mapping[str, Any] = NOTHING, enable_distorted_char_height_score_map: bool = True, enable_debug_distorted_char_heights: bool = False, enable_distorted_text_line_mask: bool = True, enable_distorted_text_line_height_score_map: bool = True, enable_debug_distorted_text_line_heights: bool = False)
 2def __init__(self, random_distortion_factory_config=NOTHING, enable_debug_random_distortion=attr_dict['enable_debug_random_distortion'].default, enable_distorted_char_mask=attr_dict['enable_distorted_char_mask'].default, enable_distorted_seal_impression_char_mask=attr_dict['enable_distorted_seal_impression_char_mask'].default, char_mask_engine_config=NOTHING, enable_distorted_char_height_score_map=attr_dict['enable_distorted_char_height_score_map'].default, enable_debug_distorted_char_heights=attr_dict['enable_debug_distorted_char_heights'].default, enable_distorted_text_line_mask=attr_dict['enable_distorted_text_line_mask'].default, enable_distorted_text_line_height_score_map=attr_dict['enable_distorted_text_line_height_score_map'].default, enable_debug_distorted_text_line_heights=attr_dict['enable_debug_distorted_text_line_heights'].default):
 3    if random_distortion_factory_config is not NOTHING:
 4        self.random_distortion_factory_config = random_distortion_factory_config
 5    else:
 6        self.random_distortion_factory_config = __attr_factory_random_distortion_factory_config()
 7    self.enable_debug_random_distortion = enable_debug_random_distortion
 8    self.enable_distorted_char_mask = enable_distorted_char_mask
 9    self.enable_distorted_seal_impression_char_mask = enable_distorted_seal_impression_char_mask
10    if char_mask_engine_config is not NOTHING:
11        self.char_mask_engine_config = char_mask_engine_config
12    else:
13        self.char_mask_engine_config = __attr_factory_char_mask_engine_config()
14    self.enable_distorted_char_height_score_map = enable_distorted_char_height_score_map
15    self.enable_debug_distorted_char_heights = enable_debug_distorted_char_heights
16    self.enable_distorted_text_line_mask = enable_distorted_text_line_mask
17    self.enable_distorted_text_line_height_score_map = enable_distorted_text_line_height_score_map
18    self.enable_debug_distorted_text_line_heights = enable_debug_distorted_text_line_heights

Method generated by attrs for class PageDistortionStepConfig.

class PageDistortionStepInput:
78class PageDistortionStepInput:
79    page_assembler_step_output: PageAssemblerStepOutput
PageDistortionStepInput( page_assembler_step_output: vkit.pipeline.text_detection.page_assembler.PageAssemblerStepOutput)
2def __init__(self, page_assembler_step_output):
3    self.page_assembler_step_output = page_assembler_step_output

Method generated by attrs for class PageDistortionStepInput.

class PageDistortionStepOutput:
 83class PageDistortionStepOutput:
 84    page_image: Image
 85    page_random_distortion_debug: Optional[RandomDistortionDebug]
 86    page_active_mask: Mask
 87    page_char_polygon_collection: PageCharPolygonCollection
 88    page_char_mask: Optional[Mask]
 89    page_seal_impression_char_mask: Optional[Mask]
 90    page_char_height_score_map: Optional[ScoreMap]
 91    page_char_heights: Optional[Sequence[float]]
 92    page_char_heights_debug_image: Optional[Image]
 93    page_text_line_polygon_collection: PageTextLinePolygonCollection
 94    page_text_line_mask: Optional[Mask]
 95    page_text_line_height_score_map: Optional[ScoreMap]
 96    page_text_line_heights: Optional[Sequence[float]]
 97    page_text_line_heights_debug_image: Optional[Image]
 98    page_disconnected_text_region_collection: PageDisconnectedTextRegionCollection
 99    page_non_text_region_collection: PageNonTextRegionCollection
100    page_seal_impression_char_polygon_collection: PageSealImpressionCharPolygonCollection
PageDistortionStepOutput( page_image: vkit.element.image.Image, page_random_distortion_debug: Union[vkit.mechanism.distortion_policy.random_distortion.RandomDistortionDebug, NoneType], page_active_mask: vkit.element.mask.Mask, page_char_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageCharPolygonCollection, page_char_mask: Union[vkit.element.mask.Mask, NoneType], page_seal_impression_char_mask: Union[vkit.element.mask.Mask, NoneType], page_char_height_score_map: Union[vkit.element.score_map.ScoreMap, NoneType], page_char_heights: Union[Sequence[float], NoneType], page_char_heights_debug_image: Union[vkit.element.image.Image, NoneType], page_text_line_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageTextLinePolygonCollection, page_text_line_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_height_score_map: Union[vkit.element.score_map.ScoreMap, NoneType], page_text_line_heights: Union[Sequence[float], NoneType], page_text_line_heights_debug_image: Union[vkit.element.image.Image, NoneType], page_disconnected_text_region_collection: vkit.pipeline.text_detection.page_assembler.PageDisconnectedTextRegionCollection, page_non_text_region_collection: vkit.pipeline.text_detection.page_assembler.PageNonTextRegionCollection, page_seal_impression_char_polygon_collection: vkit.pipeline.text_detection.page_assembler.PageSealImpressionCharPolygonCollection)
 2def __init__(self, page_image, page_random_distortion_debug, page_active_mask, page_char_polygon_collection, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_char_heights, page_char_heights_debug_image, page_text_line_polygon_collection, page_text_line_mask, page_text_line_height_score_map, page_text_line_heights, page_text_line_heights_debug_image, page_disconnected_text_region_collection, page_non_text_region_collection, page_seal_impression_char_polygon_collection):
 3    self.page_image = page_image
 4    self.page_random_distortion_debug = page_random_distortion_debug
 5    self.page_active_mask = page_active_mask
 6    self.page_char_polygon_collection = page_char_polygon_collection
 7    self.page_char_mask = page_char_mask
 8    self.page_seal_impression_char_mask = page_seal_impression_char_mask
 9    self.page_char_height_score_map = page_char_height_score_map
10    self.page_char_heights = page_char_heights
11    self.page_char_heights_debug_image = page_char_heights_debug_image
12    self.page_text_line_polygon_collection = page_text_line_polygon_collection
13    self.page_text_line_mask = page_text_line_mask
14    self.page_text_line_height_score_map = page_text_line_height_score_map
15    self.page_text_line_heights = page_text_line_heights
16    self.page_text_line_heights_debug_image = page_text_line_heights_debug_image
17    self.page_disconnected_text_region_collection = page_disconnected_text_region_collection
18    self.page_non_text_region_collection = page_non_text_region_collection
19    self.page_seal_impression_char_polygon_collection = page_seal_impression_char_polygon_collection

Method generated by attrs for class PageDistortionStepOutput.

class ElementFlattener(typing.Generic[~_E]):
108class ElementFlattener(Generic[_E]):
109
110    def __init__(self, grouped_elements: Sequence[Sequence[_E]]):
111        self.grouped_elements = grouped_elements
112        self.group_sizes = [len(elements) for elements in grouped_elements]
113
114    def flatten(self):
115        return tuple(itertools.chain.from_iterable(self.grouped_elements))
116
117    def unflatten(self, flattened_elements: Sequence[_E]) -> Sequence[Sequence[_E]]:
118        assert len(flattened_elements) == sum(self.group_sizes)
119        grouped_elements: List[Sequence[_E]] = []
120        begin = 0
121        for group_size in self.group_sizes:
122            end = begin + group_size
123            grouped_elements.append(flattened_elements[begin:end])
124            begin = end
125        return grouped_elements

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

ElementFlattener(grouped_elements: Sequence[Sequence[~_E]])
110    def __init__(self, grouped_elements: Sequence[Sequence[_E]]):
111        self.grouped_elements = grouped_elements
112        self.group_sizes = [len(elements) for elements in grouped_elements]
def flatten(self):
114    def flatten(self):
115        return tuple(itertools.chain.from_iterable(self.grouped_elements))
def unflatten(self, flattened_elements: Sequence[~_E]) -> Sequence[Sequence[~_E]]:
117    def unflatten(self, flattened_elements: Sequence[_E]) -> Sequence[Sequence[_E]]:
118        assert len(flattened_elements) == sum(self.group_sizes)
119        grouped_elements: List[Sequence[_E]] = []
120        begin = 0
121        for group_size in self.group_sizes:
122            end = begin + group_size
123            grouped_elements.append(flattened_elements[begin:end])
124            begin = end
125        return grouped_elements
128class PageDistortionStep(
129    PipelineStep[
130        PageDistortionStepConfig,
131        PageDistortionStepInput,
132        PageDistortionStepOutput,
133    ]
134):  # yapf: disable
135
136    def __init__(self, config: PageDistortionStepConfig):
137        super().__init__(config)
138
139        self.random_distortion = random_distortion_factory.create(
140            self.config.random_distortion_factory_config
141        )
142        self.char_mask_engine_executor = \
143            char_mask_engine_executor_aggregator_factory.create_engine_executor(
144                self.config.char_mask_engine_config
145            )
146
147    @classmethod
148    def fill_page_inactive_region(
149        cls,
150        page_image: Image,
151        page_active_mask: Mask,
152        page_bottom_layer_image: Image,
153    ):
154        assert page_image.shape == page_active_mask.shape
155
156        if page_bottom_layer_image.shape != page_image.shape:
157            page_bottom_layer_image = page_bottom_layer_image.to_resized_image(
158                resized_height=page_image.height,
159                resized_width=page_image.width,
160            )
161
162        page_active_mask.to_inverted_mask().fill_image(page_image, page_bottom_layer_image)
163
164    def generate_text_line_labelings(
165        self,
166        distorted_image: Image,
167        text_line_polygons: Sequence[Polygon],
168        text_line_height_points_up: PointList,
169        text_line_height_points_down: PointList,
170        text_line_height_points_group_sizes: Sequence[int],
171    ):
172        text_line_mask: Optional[Mask] = None
173        if self.config.enable_distorted_text_line_mask:
174            text_line_mask = Mask.from_shapable(distorted_image)
175            for polygon in text_line_polygons:
176                polygon.fill_mask(text_line_mask)
177
178        text_line_height_score_map: Optional[ScoreMap] = None
179        text_line_heights: Optional[List[float]] = None
180        text_line_heights_debug_image: Optional[Image] = None
181
182        if self.config.enable_distorted_text_line_height_score_map:
183            np_height_points_up = text_line_height_points_up.to_smooth_np_array()
184            np_height_points_down = text_line_height_points_down.to_smooth_np_array()
185            np_heights: np.ndarray = np.linalg.norm(
186                np_height_points_down - np_height_points_up,
187                axis=1,
188            )
189            # Add one to compensate.
190            np_heights += 1
191            assert sum(text_line_height_points_group_sizes) == np_heights.shape[0]
192
193            text_line_heights = []
194            text_line_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
195            begin = 0
196            for polygon, group_size in zip(text_line_polygons, text_line_height_points_group_sizes):
197                end = begin + group_size - 1
198                text_line_height = float(np_heights[begin:end + 1].mean())
199                text_line_heights.append(text_line_height)
200                polygon.fill_score_map(
201                    score_map=text_line_height_score_map,
202                    value=text_line_height,
203                )
204                begin = end + 1
205
206            if self.config.enable_debug_distorted_text_line_heights:
207                painter = Painter.create(distorted_image)
208                painter.paint_polygons(text_line_polygons)
209
210                texts: List[str] = []
211                points = PointList()
212                for polygon, height in zip(text_line_polygons, text_line_heights):
213                    texts.append(f'{height:.1f}')
214                    points.append(polygon.get_center_point())
215                painter.paint_texts(texts, points, alpha=1.0)
216
217                text_line_heights_debug_image = painter.image
218
219        return (
220            text_line_mask,
221            text_line_height_score_map,
222            text_line_heights,
223            text_line_heights_debug_image,
224        )
225
226    def generate_char_labelings(
227        self,
228        distorted_image: Image,
229        char_polygons: Sequence[Polygon],
230        seal_impression_char_polygons: Sequence[Polygon],
231        char_height_points_up: PointList,
232        char_height_points_down: PointList,
233    ):
234        char_mask: Optional[Mask] = None
235        fill_char_height_score_map_masks: Optional[Sequence[Mask]] = None
236        if self.config.enable_distorted_char_mask:
237            result = self.char_mask_engine_executor.run(
238                CharMaskEngineRunConfig(
239                    height=distorted_image.height,
240                    width=distorted_image.width,
241                    char_polygons=char_polygons,
242                ),
243            )
244            char_mask = result.combined_chars_mask
245            fill_char_height_score_map_masks = result.char_masks
246
247        seal_impression_char_mask: Optional[Mask] = None
248        if self.config.enable_distorted_seal_impression_char_mask:
249            result = self.char_mask_engine_executor.run(
250                CharMaskEngineRunConfig(
251                    height=distorted_image.height,
252                    width=distorted_image.width,
253                    char_polygons=seal_impression_char_polygons,
254                ),
255            )
256            seal_impression_char_mask = result.combined_chars_mask
257
258        char_height_score_map: Optional[ScoreMap] = None
259        char_heights: Optional[List[float]] = None
260        char_heights_debug_image: Optional[Image] = None
261
262        if self.config.enable_distorted_char_height_score_map:
263            np_height_points_up = char_height_points_up.to_smooth_np_array()
264            np_height_points_down = char_height_points_down.to_smooth_np_array()
265            np_heights: np.ndarray = np.linalg.norm(
266                np_height_points_down - np_height_points_up,
267                axis=1,
268            )
269            # Add one to compensate.
270            np_heights += 1
271
272            # Fill from large height to small height,
273            # in order to preserve small height labeling when two char boxes overlapped.
274            sorted_char_polygon_indices: Tuple[int, ...] = tuple(reversed(np_heights.argsort()))
275
276            char_heights = [0.0] * len(char_polygons)
277            char_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
278
279            for idx in sorted_char_polygon_indices:
280                polygon = char_polygons[idx]
281                char_height = float(np_heights[idx])
282                char_heights[idx] = char_height
283
284                if fill_char_height_score_map_masks is None:
285                    polygon.fill_score_map(
286                        score_map=char_height_score_map,
287                        value=char_height,
288                    )
289                else:
290                    fill_char_height_score_map_mask = fill_char_height_score_map_masks[idx]
291                    fill_char_height_score_map_mask.fill_score_map(
292                        score_map=char_height_score_map,
293                        value=char_height,
294                    )
295
296            if self.config.enable_debug_distorted_char_heights:
297                painter = Painter.create(distorted_image)
298                painter.paint_polygons(char_polygons)
299
300                texts: List[str] = []
301                points = PointList()
302                for polygon, height in zip(char_polygons, char_heights):
303                    texts.append(f'{height:.1f}')
304                    points.append(polygon.get_center_point())
305                painter.paint_texts(texts, points, alpha=1.0)
306
307                char_heights_debug_image = painter.image
308
309        return (
310            char_mask,
311            seal_impression_char_mask,
312            char_height_score_map,
313            char_heights,
314            char_heights_debug_image,
315        )
316
317    def run(self, input: PageDistortionStepInput, rng: RandomGenerator):
318        page_assembler_step_output = input.page_assembler_step_output
319        page = page_assembler_step_output.page
320        page_bottom_layer_image = page.page_bottom_layer_image
321        page_char_polygon_collection = page.page_char_polygon_collection
322        page_text_line_polygon_collection = page.page_text_line_polygon_collection
323        page_disconnected_text_region_collection = page.page_disconnected_text_region_collection
324        page_non_text_region_collection = page.page_non_text_region_collection
325        page_seal_impression_char_polygon_collection = \
326            page.page_seal_impression_char_polygon_collection
327
328        # Flatten.
329        polygon_flattener = ElementFlattener([
330            # Char level.
331            page_char_polygon_collection.char_polygons,
332            page_char_polygon_collection.adjusted_char_polygons,
333            # Text line level.
334            page_text_line_polygon_collection.polygons,
335            # For char-level polygon regression.
336            tuple(page_disconnected_text_region_collection.to_polygons()),
337            # For sampling negative text region area.
338            tuple(page_non_text_region_collection.to_polygons()),
339            # For generating char-level seal impression labeling.
340            page_seal_impression_char_polygon_collection.char_polygons,
341        ])
342        point_flattener = ElementFlattener([
343            # Char level.
344            page_char_polygon_collection.height_points_up,
345            page_char_polygon_collection.height_points_down,
346            # Text line level.
347            page_text_line_polygon_collection.height_points_up,
348            page_text_line_polygon_collection.height_points_down,
349        ])
350
351        # Distort.
352        page_random_distortion_debug = None
353        if self.config.enable_debug_random_distortion:
354            page_random_distortion_debug = RandomDistortionDebug()
355
356        page_active_mask = Mask.from_shapable(page.image, value=1)
357        # To mitigate a bug in cv.remap, in which the border interpolation is wrong.
358        # This mitigation DO remove 1-pixel width border, but it should be fine.
359        with page_active_mask.writable_context:
360            page_active_mask.mat[0] = 0
361            page_active_mask.mat[-1] = 0
362            page_active_mask.mat[:, 0] = 0
363            page_active_mask.mat[:, -1] = 0
364
365        result = self.random_distortion.distort(
366            image=page.image,
367            mask=page_active_mask,
368            polygons=polygon_flattener.flatten(),
369            points=PointList(point_flattener.flatten()),
370            rng=rng,
371            debug=page_random_distortion_debug,
372        )
373        assert result.image
374        assert result.mask
375        assert result.polygons
376        assert result.points
377
378        # Fill inplace the inactive (black) region with page_bottom_layer_image.
379        self.fill_page_inactive_region(
380            page_image=result.image,
381            page_active_mask=result.mask,
382            page_bottom_layer_image=page_bottom_layer_image,
383        )
384
385        # Unflatten.
386        (
387            # Char level.
388            char_polygons,
389            adjusted_char_polygons,
390            # Text line level.
391            text_line_polygons,
392            # For char-level polygon regression.
393            disconnected_text_region_polygons,
394            # For sampling negative text region area.
395            non_text_region_polygons,
396            # For generating char-level seal impression labeling.
397            seal_impression_char_polygons,
398        ) = polygon_flattener.unflatten(result.polygons)
399
400        (
401            # Char level.
402            char_height_points_up,
403            char_height_points_down,
404            # Text line level.
405            text_line_height_points_up,
406            text_line_height_points_down,
407        ) = map(PointList, point_flattener.unflatten(result.points))
408
409        text_line_height_points_group_sizes = \
410            page_text_line_polygon_collection.height_points_group_sizes
411        assert len(text_line_polygons) == len(text_line_height_points_group_sizes)
412        assert len(text_line_height_points_up) == len(text_line_height_points_down)
413
414        # Labelings.
415        (
416            text_line_mask,
417            text_line_height_score_map,
418            text_line_heights,
419            text_line_heights_debug_image,
420        ) = self.generate_text_line_labelings(
421            distorted_image=result.image,
422            text_line_polygons=text_line_polygons,
423            text_line_height_points_up=text_line_height_points_up,
424            text_line_height_points_down=text_line_height_points_down,
425            text_line_height_points_group_sizes=text_line_height_points_group_sizes,
426        )
427        (
428            char_mask,
429            seal_impression_char_mask,
430            char_height_score_map,
431            char_heights,
432            char_heights_debug_image,
433        ) = self.generate_char_labelings(
434            distorted_image=result.image,
435            char_polygons=char_polygons,
436            seal_impression_char_polygons=seal_impression_char_polygons,
437            char_height_points_up=char_height_points_up,
438            char_height_points_down=char_height_points_down,
439        )
440
441        return PageDistortionStepOutput(
442            page_image=result.image,
443            page_random_distortion_debug=page_random_distortion_debug,
444            page_active_mask=result.mask,
445            page_char_polygon_collection=PageCharPolygonCollection(
446                height=result.image.height,
447                width=result.image.width,
448                char_polygons=char_polygons,
449                adjusted_char_polygons=adjusted_char_polygons,
450                height_points_up=char_height_points_up,
451                height_points_down=char_height_points_down,
452            ),
453            page_char_mask=char_mask,
454            page_seal_impression_char_mask=seal_impression_char_mask,
455            page_char_height_score_map=char_height_score_map,
456            page_char_heights=char_heights,
457            page_char_heights_debug_image=char_heights_debug_image,
458            page_text_line_polygon_collection=PageTextLinePolygonCollection(
459                height=result.image.height,
460                width=result.image.width,
461                polygons=text_line_polygons,
462                height_points_group_sizes=text_line_height_points_group_sizes,
463                height_points_up=text_line_height_points_up,
464                height_points_down=text_line_height_points_down,
465            ),
466            page_text_line_mask=text_line_mask,
467            page_text_line_height_score_map=text_line_height_score_map,
468            page_text_line_heights=text_line_heights,
469            page_text_line_heights_debug_image=text_line_heights_debug_image,
470            page_disconnected_text_region_collection=PageDisconnectedTextRegionCollection(
471                disconnected_text_regions=[
472                    DisconnectedTextRegion(disconnected_text_region_polygon)
473                    for disconnected_text_region_polygon in disconnected_text_region_polygons
474                ],
475            ),
476            page_non_text_region_collection=PageNonTextRegionCollection(
477                non_text_regions=[
478                    NonTextRegion(non_text_region_polygon)
479                    for non_text_region_polygon in non_text_region_polygons
480                ],
481            ),
482            page_seal_impression_char_polygon_collection=PageSealImpressionCharPolygonCollection(
483                char_polygons=seal_impression_char_polygons,
484            ),
485        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

136    def __init__(self, config: PageDistortionStepConfig):
137        super().__init__(config)
138
139        self.random_distortion = random_distortion_factory.create(
140            self.config.random_distortion_factory_config
141        )
142        self.char_mask_engine_executor = \
143            char_mask_engine_executor_aggregator_factory.create_engine_executor(
144                self.config.char_mask_engine_config
145            )
@classmethod
def fill_page_inactive_region( cls, page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_bottom_layer_image: vkit.element.image.Image):
147    @classmethod
148    def fill_page_inactive_region(
149        cls,
150        page_image: Image,
151        page_active_mask: Mask,
152        page_bottom_layer_image: Image,
153    ):
154        assert page_image.shape == page_active_mask.shape
155
156        if page_bottom_layer_image.shape != page_image.shape:
157            page_bottom_layer_image = page_bottom_layer_image.to_resized_image(
158                resized_height=page_image.height,
159                resized_width=page_image.width,
160            )
161
162        page_active_mask.to_inverted_mask().fill_image(page_image, page_bottom_layer_image)
def generate_text_line_labelings( self, distorted_image: vkit.element.image.Image, text_line_polygons: Sequence[vkit.element.polygon.Polygon], text_line_height_points_up: vkit.element.point.PointList, text_line_height_points_down: vkit.element.point.PointList, text_line_height_points_group_sizes: Sequence[int]):
164    def generate_text_line_labelings(
165        self,
166        distorted_image: Image,
167        text_line_polygons: Sequence[Polygon],
168        text_line_height_points_up: PointList,
169        text_line_height_points_down: PointList,
170        text_line_height_points_group_sizes: Sequence[int],
171    ):
172        text_line_mask: Optional[Mask] = None
173        if self.config.enable_distorted_text_line_mask:
174            text_line_mask = Mask.from_shapable(distorted_image)
175            for polygon in text_line_polygons:
176                polygon.fill_mask(text_line_mask)
177
178        text_line_height_score_map: Optional[ScoreMap] = None
179        text_line_heights: Optional[List[float]] = None
180        text_line_heights_debug_image: Optional[Image] = None
181
182        if self.config.enable_distorted_text_line_height_score_map:
183            np_height_points_up = text_line_height_points_up.to_smooth_np_array()
184            np_height_points_down = text_line_height_points_down.to_smooth_np_array()
185            np_heights: np.ndarray = np.linalg.norm(
186                np_height_points_down - np_height_points_up,
187                axis=1,
188            )
189            # Add one to compensate.
190            np_heights += 1
191            assert sum(text_line_height_points_group_sizes) == np_heights.shape[0]
192
193            text_line_heights = []
194            text_line_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
195            begin = 0
196            for polygon, group_size in zip(text_line_polygons, text_line_height_points_group_sizes):
197                end = begin + group_size - 1
198                text_line_height = float(np_heights[begin:end + 1].mean())
199                text_line_heights.append(text_line_height)
200                polygon.fill_score_map(
201                    score_map=text_line_height_score_map,
202                    value=text_line_height,
203                )
204                begin = end + 1
205
206            if self.config.enable_debug_distorted_text_line_heights:
207                painter = Painter.create(distorted_image)
208                painter.paint_polygons(text_line_polygons)
209
210                texts: List[str] = []
211                points = PointList()
212                for polygon, height in zip(text_line_polygons, text_line_heights):
213                    texts.append(f'{height:.1f}')
214                    points.append(polygon.get_center_point())
215                painter.paint_texts(texts, points, alpha=1.0)
216
217                text_line_heights_debug_image = painter.image
218
219        return (
220            text_line_mask,
221            text_line_height_score_map,
222            text_line_heights,
223            text_line_heights_debug_image,
224        )
def generate_char_labelings( self, distorted_image: vkit.element.image.Image, char_polygons: Sequence[vkit.element.polygon.Polygon], seal_impression_char_polygons: Sequence[vkit.element.polygon.Polygon], char_height_points_up: vkit.element.point.PointList, char_height_points_down: vkit.element.point.PointList):
226    def generate_char_labelings(
227        self,
228        distorted_image: Image,
229        char_polygons: Sequence[Polygon],
230        seal_impression_char_polygons: Sequence[Polygon],
231        char_height_points_up: PointList,
232        char_height_points_down: PointList,
233    ):
234        char_mask: Optional[Mask] = None
235        fill_char_height_score_map_masks: Optional[Sequence[Mask]] = None
236        if self.config.enable_distorted_char_mask:
237            result = self.char_mask_engine_executor.run(
238                CharMaskEngineRunConfig(
239                    height=distorted_image.height,
240                    width=distorted_image.width,
241                    char_polygons=char_polygons,
242                ),
243            )
244            char_mask = result.combined_chars_mask
245            fill_char_height_score_map_masks = result.char_masks
246
247        seal_impression_char_mask: Optional[Mask] = None
248        if self.config.enable_distorted_seal_impression_char_mask:
249            result = self.char_mask_engine_executor.run(
250                CharMaskEngineRunConfig(
251                    height=distorted_image.height,
252                    width=distorted_image.width,
253                    char_polygons=seal_impression_char_polygons,
254                ),
255            )
256            seal_impression_char_mask = result.combined_chars_mask
257
258        char_height_score_map: Optional[ScoreMap] = None
259        char_heights: Optional[List[float]] = None
260        char_heights_debug_image: Optional[Image] = None
261
262        if self.config.enable_distorted_char_height_score_map:
263            np_height_points_up = char_height_points_up.to_smooth_np_array()
264            np_height_points_down = char_height_points_down.to_smooth_np_array()
265            np_heights: np.ndarray = np.linalg.norm(
266                np_height_points_down - np_height_points_up,
267                axis=1,
268            )
269            # Add one to compensate.
270            np_heights += 1
271
272            # Fill from large height to small height,
273            # in order to preserve small height labeling when two char boxes overlapped.
274            sorted_char_polygon_indices: Tuple[int, ...] = tuple(reversed(np_heights.argsort()))
275
276            char_heights = [0.0] * len(char_polygons)
277            char_height_score_map = ScoreMap.from_shapable(distorted_image, is_prob=False)
278
279            for idx in sorted_char_polygon_indices:
280                polygon = char_polygons[idx]
281                char_height = float(np_heights[idx])
282                char_heights[idx] = char_height
283
284                if fill_char_height_score_map_masks is None:
285                    polygon.fill_score_map(
286                        score_map=char_height_score_map,
287                        value=char_height,
288                    )
289                else:
290                    fill_char_height_score_map_mask = fill_char_height_score_map_masks[idx]
291                    fill_char_height_score_map_mask.fill_score_map(
292                        score_map=char_height_score_map,
293                        value=char_height,
294                    )
295
296            if self.config.enable_debug_distorted_char_heights:
297                painter = Painter.create(distorted_image)
298                painter.paint_polygons(char_polygons)
299
300                texts: List[str] = []
301                points = PointList()
302                for polygon, height in zip(char_polygons, char_heights):
303                    texts.append(f'{height:.1f}')
304                    points.append(polygon.get_center_point())
305                painter.paint_texts(texts, points, alpha=1.0)
306
307                char_heights_debug_image = painter.image
308
309        return (
310            char_mask,
311            seal_impression_char_mask,
312            char_height_score_map,
313            char_heights,
314            char_heights_debug_image,
315        )
def run( self, input: vkit.pipeline.text_detection.page_distortion.PageDistortionStepInput, rng: numpy.random._generator.Generator):
317    def run(self, input: PageDistortionStepInput, rng: RandomGenerator):
318        page_assembler_step_output = input.page_assembler_step_output
319        page = page_assembler_step_output.page
320        page_bottom_layer_image = page.page_bottom_layer_image
321        page_char_polygon_collection = page.page_char_polygon_collection
322        page_text_line_polygon_collection = page.page_text_line_polygon_collection
323        page_disconnected_text_region_collection = page.page_disconnected_text_region_collection
324        page_non_text_region_collection = page.page_non_text_region_collection
325        page_seal_impression_char_polygon_collection = \
326            page.page_seal_impression_char_polygon_collection
327
328        # Flatten.
329        polygon_flattener = ElementFlattener([
330            # Char level.
331            page_char_polygon_collection.char_polygons,
332            page_char_polygon_collection.adjusted_char_polygons,
333            # Text line level.
334            page_text_line_polygon_collection.polygons,
335            # For char-level polygon regression.
336            tuple(page_disconnected_text_region_collection.to_polygons()),
337            # For sampling negative text region area.
338            tuple(page_non_text_region_collection.to_polygons()),
339            # For generating char-level seal impression labeling.
340            page_seal_impression_char_polygon_collection.char_polygons,
341        ])
342        point_flattener = ElementFlattener([
343            # Char level.
344            page_char_polygon_collection.height_points_up,
345            page_char_polygon_collection.height_points_down,
346            # Text line level.
347            page_text_line_polygon_collection.height_points_up,
348            page_text_line_polygon_collection.height_points_down,
349        ])
350
351        # Distort.
352        page_random_distortion_debug = None
353        if self.config.enable_debug_random_distortion:
354            page_random_distortion_debug = RandomDistortionDebug()
355
356        page_active_mask = Mask.from_shapable(page.image, value=1)
357        # To mitigate a bug in cv.remap, in which the border interpolation is wrong.
358        # This mitigation DO remove 1-pixel width border, but it should be fine.
359        with page_active_mask.writable_context:
360            page_active_mask.mat[0] = 0
361            page_active_mask.mat[-1] = 0
362            page_active_mask.mat[:, 0] = 0
363            page_active_mask.mat[:, -1] = 0
364
365        result = self.random_distortion.distort(
366            image=page.image,
367            mask=page_active_mask,
368            polygons=polygon_flattener.flatten(),
369            points=PointList(point_flattener.flatten()),
370            rng=rng,
371            debug=page_random_distortion_debug,
372        )
373        assert result.image
374        assert result.mask
375        assert result.polygons
376        assert result.points
377
378        # Fill inplace the inactive (black) region with page_bottom_layer_image.
379        self.fill_page_inactive_region(
380            page_image=result.image,
381            page_active_mask=result.mask,
382            page_bottom_layer_image=page_bottom_layer_image,
383        )
384
385        # Unflatten.
386        (
387            # Char level.
388            char_polygons,
389            adjusted_char_polygons,
390            # Text line level.
391            text_line_polygons,
392            # For char-level polygon regression.
393            disconnected_text_region_polygons,
394            # For sampling negative text region area.
395            non_text_region_polygons,
396            # For generating char-level seal impression labeling.
397            seal_impression_char_polygons,
398        ) = polygon_flattener.unflatten(result.polygons)
399
400        (
401            # Char level.
402            char_height_points_up,
403            char_height_points_down,
404            # Text line level.
405            text_line_height_points_up,
406            text_line_height_points_down,
407        ) = map(PointList, point_flattener.unflatten(result.points))
408
409        text_line_height_points_group_sizes = \
410            page_text_line_polygon_collection.height_points_group_sizes
411        assert len(text_line_polygons) == len(text_line_height_points_group_sizes)
412        assert len(text_line_height_points_up) == len(text_line_height_points_down)
413
414        # Labelings.
415        (
416            text_line_mask,
417            text_line_height_score_map,
418            text_line_heights,
419            text_line_heights_debug_image,
420        ) = self.generate_text_line_labelings(
421            distorted_image=result.image,
422            text_line_polygons=text_line_polygons,
423            text_line_height_points_up=text_line_height_points_up,
424            text_line_height_points_down=text_line_height_points_down,
425            text_line_height_points_group_sizes=text_line_height_points_group_sizes,
426        )
427        (
428            char_mask,
429            seal_impression_char_mask,
430            char_height_score_map,
431            char_heights,
432            char_heights_debug_image,
433        ) = self.generate_char_labelings(
434            distorted_image=result.image,
435            char_polygons=char_polygons,
436            seal_impression_char_polygons=seal_impression_char_polygons,
437            char_height_points_up=char_height_points_up,
438            char_height_points_down=char_height_points_down,
439        )
440
441        return PageDistortionStepOutput(
442            page_image=result.image,
443            page_random_distortion_debug=page_random_distortion_debug,
444            page_active_mask=result.mask,
445            page_char_polygon_collection=PageCharPolygonCollection(
446                height=result.image.height,
447                width=result.image.width,
448                char_polygons=char_polygons,
449                adjusted_char_polygons=adjusted_char_polygons,
450                height_points_up=char_height_points_up,
451                height_points_down=char_height_points_down,
452            ),
453            page_char_mask=char_mask,
454            page_seal_impression_char_mask=seal_impression_char_mask,
455            page_char_height_score_map=char_height_score_map,
456            page_char_heights=char_heights,
457            page_char_heights_debug_image=char_heights_debug_image,
458            page_text_line_polygon_collection=PageTextLinePolygonCollection(
459                height=result.image.height,
460                width=result.image.width,
461                polygons=text_line_polygons,
462                height_points_group_sizes=text_line_height_points_group_sizes,
463                height_points_up=text_line_height_points_up,
464                height_points_down=text_line_height_points_down,
465            ),
466            page_text_line_mask=text_line_mask,
467            page_text_line_height_score_map=text_line_height_score_map,
468            page_text_line_heights=text_line_heights,
469            page_text_line_heights_debug_image=text_line_heights_debug_image,
470            page_disconnected_text_region_collection=PageDisconnectedTextRegionCollection(
471                disconnected_text_regions=[
472                    DisconnectedTextRegion(disconnected_text_region_polygon)
473                    for disconnected_text_region_polygon in disconnected_text_region_polygons
474                ],
475            ),
476            page_non_text_region_collection=PageNonTextRegionCollection(
477                non_text_regions=[
478                    NonTextRegion(non_text_region_polygon)
479                    for non_text_region_polygon in non_text_region_polygons
480                ],
481            ),
482            page_seal_impression_char_polygon_collection=PageSealImpressionCharPolygonCollection(
483                char_polygons=seal_impression_char_polygons,
484            ),
485        )