vkit.pipeline.text_detection.page_text_region_label

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Tuple, Sequence, List, Optional, Mapping, Any
 15from enum import Enum, unique
 16import math
 17import logging
 18
 19import attrs
 20from numpy.random import Generator as RandomGenerator
 21import numpy as np
 22import cv2 as cv
 23from sklearn.neighbors import KDTree
 24
 25from vkit.utility import attrs_lazy_field, unwrap_optional_field, normalize_to_probs
 26from vkit.element import Point, PointList, Box, Polygon, Mask, ScoreMap
 27from vkit.mechanism.distortion.geometric.affine import affine_points
 28from vkit.engine.char_heatmap import (
 29    char_heatmap_default_engine_executor_factory,
 30    CharHeatmapDefaultEngineInitConfig,
 31)
 32from vkit.engine.char_mask import (
 33    char_mask_engine_executor_aggregator_factory,
 34    CharMaskEngineRunConfig,
 35)
 36from ..interface import PipelineStep, PipelineStepFactory
 37from .page_text_region import PageTextRegionStepOutput
 38
 39logger = logging.getLogger(__name__)
 40
 41
 42@attrs.define
 43class PageTextRegionLabelStepConfig:
 44    char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \
 45        attrs.field(factory=CharHeatmapDefaultEngineInitConfig)
 46    char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'})
 47
 48    # 1 centrod + n deviate points.
 49    num_deviate_char_regression_labels: int = 1
 50    num_deviate_char_regression_labels_candiates_factor: int = 3
 51
 52
 53@attrs.define
 54class PageTextRegionLabelStepInput:
 55    page_text_region_step_output: PageTextRegionStepOutput
 56
 57
 58@unique
 59class PageCharRegressionLabelTag(Enum):
 60    CENTROID = 'centroid'
 61    DEVIATE = 'deviate'
 62
 63
 64PI = float(np.pi)
 65TWO_PI = float(2 * np.pi)
 66
 67
 68@attrs.define
 69class Vector:
 70    y: float
 71    x: float
 72
 73    _distance: Optional[float] = attrs_lazy_field()
 74    _theta: Optional[float] = attrs_lazy_field()
 75
 76    def lazy_post_init(self):
 77        initialized = (self._distance is not None)
 78        if initialized:
 79            return
 80
 81        self._distance = math.hypot(self.x, self.y)
 82        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
 83
 84    @property
 85    def distance(self):
 86        self.lazy_post_init()
 87        return unwrap_optional_field(self._distance)
 88
 89    @property
 90    def theta(self):
 91        self.lazy_post_init()
 92        return unwrap_optional_field(self._theta)
 93
 94    @classmethod
 95    def calculate_theta_delta(
 96        cls,
 97        vector0: 'Vector',
 98        vector1: 'Vector',
 99        clockwise: bool = False,
100    ):
101        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
102        if clockwise and theta_delta < 0:
103            theta_delta += TWO_PI
104        return theta_delta
105
106    def dot(self, other: 'Vector'):
107        return self.x * other.x + self.y * other.y
108
109
110@attrs.define
111class PageCharRegressionLabel:
112    char_idx: int
113    tag: PageCharRegressionLabelTag
114    label_point_smooth_y: float
115    label_point_smooth_x: float
116    downsampled_label_point_y: int
117    downsampled_label_point_x: int
118    up_left: Point
119    up_right: Point
120    down_right: Point
121    down_left: Point
122
123    is_downsampled: bool = False
124    downsample_labeling_factor: int = 1
125
126    _bounding_smooth_up: Optional[float] = attrs_lazy_field()
127    _bounding_smooth_down: Optional[float] = attrs_lazy_field()
128    _bounding_smooth_left: Optional[float] = attrs_lazy_field()
129    _bounding_smooth_right: Optional[float] = attrs_lazy_field()
130    _bounding_orientation_idx: Optional[int] = attrs_lazy_field()
131
132    _up_left_vector: Optional[Vector] = attrs_lazy_field()
133    _up_right_vector: Optional[Vector] = attrs_lazy_field()
134    _down_right_vector: Optional[Vector] = attrs_lazy_field()
135    _down_left_vector: Optional[Vector] = attrs_lazy_field()
136
137    _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field()
138    _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field()
139    _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field()
140    _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field()
141    _valid: Optional[bool] = attrs_lazy_field()
142    _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field()
143
144    @property
145    def corner_points(self):
146        yield from (self.up_left, self.up_right, self.down_right, self.down_left)
147
148    @classmethod
149    def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point):
150        vector = Vector(
151            y=down_right.smooth_y - down_left.smooth_y,
152            x=down_right.smooth_x - down_left.smooth_x,
153        )
154        #        0
155        #  ┌───────────┐
156        #  │           │
157        # 2│           │3
158        #  │           │
159        #  └───────────┘
160        #        1
161        factor = vector.theta / PI
162        if 1.75 <= factor or factor < 0.25:
163            return 1
164        elif 0.25 <= factor < 0.75:
165            return 2
166        elif 0.75 <= factor < 1.25:
167            return 0
168        elif 1.25 <= factor:
169            return 3
170        else:
171            raise RuntimeError()
172
173    def lazy_post_init(self):
174        if self._bounding_smooth_up is None:
175            self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points)
176            self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points)
177            self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points)
178            self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points)
179            self._bounding_orientation_idx = self.get_bounding_orientation_idx(
180                down_left=self.down_left,
181                down_right=self.down_right,
182            )
183
184        initialized = (self._up_left_vector is not None)
185        if initialized:
186            return
187
188        self._up_left_vector = Vector(
189            y=self.up_left.smooth_y - self.label_point_smooth_y,
190            x=self.up_left.smooth_x - self.label_point_smooth_x,
191        )
192        self._up_right_vector = Vector(
193            y=self.up_right.smooth_y - self.label_point_smooth_y,
194            x=self.up_right.smooth_x - self.label_point_smooth_x,
195        )
196        self._down_right_vector = Vector(
197            y=self.down_right.smooth_y - self.label_point_smooth_y,
198            x=self.down_right.smooth_x - self.label_point_smooth_x,
199        )
200        self._down_left_vector = Vector(
201            y=self.down_left.smooth_y - self.label_point_smooth_y,
202            x=self.down_left.smooth_x - self.label_point_smooth_x,
203        )
204
205        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
206            self._up_left_vector,
207            self._up_right_vector,
208            clockwise=True,
209        )
210        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
211            self._up_right_vector,
212            self._down_right_vector,
213            clockwise=True,
214        )
215        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
216            self._down_right_vector,
217            self._down_left_vector,
218            clockwise=True,
219        )
220        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
221            self._down_left_vector,
222            self._up_left_vector,
223            clockwise=True,
224        )
225
226        sum_of_angles = sum([
227            self._up_left_to_up_right_angle,
228            self._up_right_to_down_right_angle,
229            self._down_right_to_down_left_angle,
230            self._down_left_to_up_left_angle,
231        ])
232        # Consider valid if deviate within 4 degrees.
233        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
234
235        self._clockwise_angle_distribution = normalize_to_probs([
236            self._up_left_to_up_right_angle,
237            self._up_right_to_down_right_angle,
238            self._down_right_to_down_left_angle,
239            self._down_left_to_up_left_angle,
240        ])
241
242    def copy(self, with_non_bounding_related_lazy_fields: bool = False):
243        copied = attrs.evolve(self)
244
245        if with_non_bounding_related_lazy_fields:
246            # NOTE: Bounding box related properties are not copied.
247            copied._up_left_vector = self._up_left_vector
248            copied._up_right_vector = self._up_right_vector
249            copied._down_right_vector = self._down_right_vector
250            copied._down_left_vector = self._down_left_vector
251            copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle
252            copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle
253            copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle
254            copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle
255            copied._valid = self._valid
256            copied._clockwise_angle_distribution = self._clockwise_angle_distribution
257
258        return copied
259
260    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
261        assert self.valid and not self.is_downsampled
262
263        # Shift operation doesn't change the lazy fields.
264        shifted = self.copy(with_non_bounding_related_lazy_fields=True)
265
266        shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y
267        shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x
268        shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y)
269        shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x)
270        shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
271        shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
272        shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
273        shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
274
275        return shifted
276
277    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
278        assert self.valid and not self.is_downsampled
279
280        # Downsample operation doesn't change the lazy fields.
281        downsampled = self.copy(with_non_bounding_related_lazy_fields=True)
282        # Mark as downsampled hence disables shift & downsample opts.
283        downsampled.is_downsampled = True
284        # Should be helpful in training.
285        downsampled.downsample_labeling_factor = downsample_labeling_factor
286
287        downsampled.downsampled_label_point_y = \
288            int(self.label_point_smooth_y // downsample_labeling_factor)
289        downsampled.downsampled_label_point_x = \
290            int(self.label_point_smooth_x // downsample_labeling_factor)
291
292        return downsampled
293
294    @property
295    def bounding_smooth_up(self):
296        self.lazy_post_init()
297        return unwrap_optional_field(self._bounding_smooth_up)
298
299    @property
300    def bounding_smooth_down(self):
301        self.lazy_post_init()
302        return unwrap_optional_field(self._bounding_smooth_down)
303
304    @property
305    def bounding_smooth_left(self):
306        self.lazy_post_init()
307        return unwrap_optional_field(self._bounding_smooth_left)
308
309    @property
310    def bounding_smooth_right(self):
311        self.lazy_post_init()
312        return unwrap_optional_field(self._bounding_smooth_right)
313
314    @property
315    def bounding_center_point(self):
316        return Point.create(
317            y=(self.bounding_smooth_up + self.bounding_smooth_down) / 2,
318            x=(self.bounding_smooth_left + self.bounding_smooth_right) / 2,
319        )
320
321    @property
322    def bounding_smooth_shape(self):
323        height = self.bounding_smooth_down - self.bounding_smooth_up
324        width = self.bounding_smooth_right - self.bounding_smooth_left
325        return height, width
326
327    @property
328    def bounding_orientation_idx(self):
329        self.lazy_post_init()
330        return unwrap_optional_field(self._bounding_orientation_idx)
331
332    @property
333    def valid(self):
334        self.lazy_post_init()
335        return unwrap_optional_field(self._valid)
336
337    def generate_up_left_offsets(self):
338        self.lazy_post_init()
339        up_left_vector = unwrap_optional_field(self._up_left_vector)
340        return up_left_vector.y, up_left_vector.x
341
342    def generate_clockwise_angle_distribution(self):
343        self.lazy_post_init()
344        return unwrap_optional_field(self._clockwise_angle_distribution)
345
346    def generate_clockwise_distances(self):
347        self.lazy_post_init()
348        return (
349            unwrap_optional_field(self._up_left_vector).distance,
350            unwrap_optional_field(self._up_right_vector).distance,
351            unwrap_optional_field(self._down_right_vector).distance,
352            unwrap_optional_field(self._down_left_vector).distance,
353        )
354
355
356@attrs.define
357class PageTextRegionLabelStepOutput:
358    page_char_mask: Mask
359    page_char_height_score_map: ScoreMap
360    page_char_gaussian_score_map: ScoreMap
361    page_char_regression_labels: Sequence[PageCharRegressionLabel]
362    page_char_bounding_box_mask: Mask
363
364
365class PageTextRegionLabelStep(
366    PipelineStep[
367        PageTextRegionLabelStepConfig,
368        PageTextRegionLabelStepInput,
369        PageTextRegionLabelStepOutput,
370    ]
371):  # yapf: disable
372
373    def __init__(self, config: PageTextRegionLabelStepConfig):
374        super().__init__(config)
375
376        self.char_heatmap_default_engine_executor = \
377            char_heatmap_default_engine_executor_factory.create(
378                self.config.char_heatmap_default_engine_init_config
379            )
380        self.char_mask_engine_executor = \
381            char_mask_engine_executor_aggregator_factory.create_engine_executor(
382                self.config.char_mask_engine_config
383            )
384
385    def generate_page_char_mask(
386        self,
387        shape: Tuple[int, int],
388        page_inactive_mask: Mask,
389        page_char_polygons: Sequence[Polygon],
390        page_text_region_polygons: Sequence[Polygon],
391        page_char_polygon_text_region_polygon_indices: Sequence[int],
392    ):
393        height, width = shape
394        result = self.char_mask_engine_executor.run(
395            CharMaskEngineRunConfig(
396                height=height,
397                width=width,
398                char_polygons=page_char_polygons,
399                char_bounding_polygons=[
400                    page_text_region_polygons[idx]
401                    for idx in page_char_polygon_text_region_polygon_indices
402                ],
403            ),
404        )
405
406        page_inactive_mask.fill_mask(result.combined_chars_mask, 0)
407
408        return result.combined_chars_mask, result.char_masks
409
410    @classmethod
411    def generate_page_char_height_score_map(
412        cls,
413        shape: Tuple[int, int],
414        page_inactive_mask: Mask,
415        page_char_polygons: Sequence[Polygon],
416        fill_score_map_char_masks: Optional[Sequence[Mask]],
417    ):
418        rectangular_heights = [
419            char_polygon.get_rectangular_height() for char_polygon in page_char_polygons
420        ]
421        sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort()))
422
423        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
424        for idx in sorted_indices:
425            char_polygon = page_char_polygons[idx]
426            rectangular_height = rectangular_heights[idx]
427            if fill_score_map_char_masks is None:
428                char_polygon.fill_score_map(
429                    page_char_height_score_map,
430                    value=rectangular_height,
431                )
432            else:
433                char_mask = fill_score_map_char_masks[idx]
434                char_mask.fill_score_map(
435                    page_char_height_score_map,
436                    value=rectangular_height,
437                )
438
439        page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0)
440
441        return page_char_height_score_map
442
443    def generate_page_char_gaussian_score_map(
444        self,
445        shape: Tuple[int, int],
446        page_char_polygons: Sequence[Polygon],
447    ):
448        height, width = shape
449        char_heatmap = self.char_heatmap_default_engine_executor.run({
450            'height': height,
451            'width': width,
452            'char_polygons': page_char_polygons,
453        })
454        return char_heatmap.score_map
455
456    def generate_page_char_regression_labels(
457        self,
458        shape: Tuple[int, int],
459        page_char_polygons: Sequence[Polygon],
460        rng: RandomGenerator,
461    ):
462        page_height, page_width = shape
463
464        # Build a KD tree to for removing deviate point that is too close to another center point.
465        center_points = PointList()
466        for polygon in page_char_polygons:
467            center_points.append(polygon.get_center_point())
468        kd_tree = KDTree(center_points.to_np_array())
469
470        page_char_regression_labels: List[PageCharRegressionLabel] = []
471
472        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
473            assert polygon.num_points == 4
474            up_left, up_right, down_right, down_left = polygon.points
475
476            # 1. The centroid of char polygon.
477            label = PageCharRegressionLabel(
478                char_idx=char_idx,
479                tag=PageCharRegressionLabelTag.CENTROID,
480                label_point_smooth_y=center_point.smooth_y,
481                label_point_smooth_x=center_point.smooth_x,
482                downsampled_label_point_y=center_point.y,
483                downsampled_label_point_x=center_point.x,
484                up_left=up_left,
485                up_right=up_right,
486                down_right=down_right,
487                down_left=down_left,
488            )
489            # The centroid labeling must be valid.
490            assert label.valid
491            page_char_regression_labels.append(label)
492
493            # 2. The deviate points.
494            if self.config.num_deviate_char_regression_labels <= 0:
495                # Generating deviate points are optional.
496                continue
497
498            bounding_box = polygon.bounding_box
499
500            # Sample points in shfited bounding box space.
501            deviate_points_in_bounding_box = PointList()
502            # Some points are invalid, hence multiply the number of samplings by a factor.
503            # Also not to sample the points lying on the border to increase the chance of valid.
504            for _ in range(
505                self.config.num_deviate_char_regression_labels_candiates_factor
506                * self.config.num_deviate_char_regression_labels
507            ):
508                y = int(rng.integers(1, bounding_box.height - 1))
509                x = int(rng.integers(1, bounding_box.width - 1))
510                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
511
512            # Then transform to the polygon space.
513            np_src_points = np.asarray(
514                [
515                    (0, 0),
516                    (bounding_box.width - 1, 0),
517                    (bounding_box.width - 1, bounding_box.height - 1),
518                    (0, bounding_box.height - 1),
519                ],
520                dtype=np.float32,
521            )
522            np_dst_points = polygon.internals.np_self_relative_points
523            trans_mat = cv.getPerspectiveTransform(
524                np_src_points,
525                np_dst_points,
526                cv.DECOMP_SVD,
527            )
528
529            deviate_points = PointList()
530            for shifted_deviate_point in affine_points(
531                trans_mat,
532                deviate_points_in_bounding_box.to_point_tuple(),
533            ):
534                y = bounding_box.up + shifted_deviate_point.smooth_y
535                x = bounding_box.left + shifted_deviate_point.smooth_x
536                assert 0 <= y < page_height
537                assert 0 <= x < page_width
538                deviate_points.append(Point.create(y=y, x=x))
539
540            # Remove those are too close to another center point.
541            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
542            preserve_flags: List[bool] = [
543                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
544            ]
545
546            # Build labels.
547            num_valid_deviate_char_regression_labels = 0
548            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
549                if num_valid_deviate_char_regression_labels \
550                        >= self.config.num_deviate_char_regression_labels:
551                    break
552
553                if not preserve_flag:
554                    continue
555
556                label = PageCharRegressionLabel(
557                    char_idx=char_idx,
558                    tag=PageCharRegressionLabelTag.DEVIATE,
559                    label_point_smooth_y=deviate_point.smooth_y,
560                    label_point_smooth_x=deviate_point.smooth_x,
561                    downsampled_label_point_y=deviate_point.y,
562                    downsampled_label_point_x=deviate_point.x,
563                    up_left=up_left,
564                    up_right=up_right,
565                    down_right=down_right,
566                    down_left=down_left,
567                )
568                if label.valid:
569                    page_char_regression_labels.append(label)
570                    num_valid_deviate_char_regression_labels += 1
571
572            if num_valid_deviate_char_regression_labels \
573                    < self.config.num_deviate_char_regression_labels:
574                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
575
576        return page_char_regression_labels
577
578    def generate_page_char_bounding_box_mask(
579        self,
580        shape: Tuple[int, int],
581        page_char_regression_labels: Sequence[PageCharRegressionLabel],
582    ):
583        page_char_bounding_box_mask = Mask.from_shape(shape)
584        for page_char_regression_label in page_char_regression_labels:
585            box = Box(
586                up=math.floor(page_char_regression_label.bounding_smooth_up),
587                down=math.ceil(page_char_regression_label.bounding_smooth_down),
588                left=math.floor(page_char_regression_label.bounding_smooth_left),
589                right=math.ceil(page_char_regression_label.bounding_smooth_right),
590            )
591            box.fill_mask(page_char_bounding_box_mask)
592        return page_char_bounding_box_mask
593
594    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
595        page_text_region_step_output = input.page_text_region_step_output
596        page_image = page_text_region_step_output.page_image
597        page_active_mask = page_text_region_step_output.page_active_mask
598        page_char_polygons = page_text_region_step_output.page_char_polygons
599        page_text_region_polygons = page_text_region_step_output.page_text_region_polygons
600        page_char_polygon_text_region_polygon_indices = \
601            page_text_region_step_output.page_char_polygon_text_region_polygon_indices
602
603        page_inactive_mask = page_active_mask.to_inverted_mask()
604        page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask(
605            shape=page_image.shape,
606            page_inactive_mask=page_inactive_mask,
607            page_char_polygons=page_char_polygons,
608            page_text_region_polygons=page_text_region_polygons,
609            page_char_polygon_text_region_polygon_indices=(
610                page_char_polygon_text_region_polygon_indices
611            ),
612        )
613
614        # NOTE: page_char_height_score_map is different from the one defined in page distortion.
615        # TODO: Resolve the inconsistency.
616        page_char_height_score_map = self.generate_page_char_height_score_map(
617            shape=page_image.shape,
618            page_inactive_mask=page_inactive_mask,
619            page_char_polygons=page_char_polygons,
620            fill_score_map_char_masks=fill_score_map_char_masks,
621        )
622
623        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
624            page_image.shape,
625            page_char_polygons,
626        )
627
628        page_char_regression_labels = self.generate_page_char_regression_labels(
629            page_image.shape,
630            page_char_polygons,
631            rng,
632        )
633
634        page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask(
635            page_image.shape,
636            page_char_regression_labels,
637        )
638
639        return PageTextRegionLabelStepOutput(
640            page_char_mask=page_char_mask,
641            page_char_height_score_map=page_char_height_score_map,
642            page_char_gaussian_score_map=page_char_gaussian_score_map,
643            page_char_regression_labels=page_char_regression_labels,
644            page_char_bounding_box_mask=page_char_bounding_box_mask,
645        )
646
647
648page_text_region_label_step_factory = PipelineStepFactory(PageTextRegionLabelStep)
class PageTextRegionLabelStepConfig:
44class PageTextRegionLabelStepConfig:
45    char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \
46        attrs.field(factory=CharHeatmapDefaultEngineInitConfig)
47    char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'})
48
49    # 1 centrod + n deviate points.
50    num_deviate_char_regression_labels: int = 1
51    num_deviate_char_regression_labels_candiates_factor: int = 3
PageTextRegionLabelStepConfig( char_heatmap_default_engine_init_config: vkit.engine.char_heatmap.default.CharHeatmapDefaultEngineInitConfig = NOTHING, char_mask_engine_config: Mapping[str, Any] = NOTHING, num_deviate_char_regression_labels: int = 1, num_deviate_char_regression_labels_candiates_factor: int = 3)
 2def __init__(self, char_heatmap_default_engine_init_config=NOTHING, char_mask_engine_config=NOTHING, num_deviate_char_regression_labels=attr_dict['num_deviate_char_regression_labels'].default, num_deviate_char_regression_labels_candiates_factor=attr_dict['num_deviate_char_regression_labels_candiates_factor'].default):
 3    if char_heatmap_default_engine_init_config is not NOTHING:
 4        self.char_heatmap_default_engine_init_config = char_heatmap_default_engine_init_config
 5    else:
 6        self.char_heatmap_default_engine_init_config = __attr_factory_char_heatmap_default_engine_init_config()
 7    if char_mask_engine_config is not NOTHING:
 8        self.char_mask_engine_config = char_mask_engine_config
 9    else:
10        self.char_mask_engine_config = __attr_factory_char_mask_engine_config()
11    self.num_deviate_char_regression_labels = num_deviate_char_regression_labels
12    self.num_deviate_char_regression_labels_candiates_factor = num_deviate_char_regression_labels_candiates_factor

Method generated by attrs for class PageTextRegionLabelStepConfig.

class PageTextRegionLabelStepInput:
55class PageTextRegionLabelStepInput:
56    page_text_region_step_output: PageTextRegionStepOutput
PageTextRegionLabelStepInput( page_text_region_step_output: vkit.pipeline.text_detection.page_text_region.PageTextRegionStepOutput)
2def __init__(self, page_text_region_step_output):
3    self.page_text_region_step_output = page_text_region_step_output

Method generated by attrs for class PageTextRegionLabelStepInput.

class PageCharRegressionLabelTag(enum.Enum):
60class PageCharRegressionLabelTag(Enum):
61    CENTROID = 'centroid'
62    DEVIATE = 'deviate'

An enumeration.

CENTROID = <PageCharRegressionLabelTag.CENTROID: 'centroid'>
DEVIATE = <PageCharRegressionLabelTag.DEVIATE: 'deviate'>
Inherited Members
enum.Enum
name
value
class Vector:
 70class Vector:
 71    y: float
 72    x: float
 73
 74    _distance: Optional[float] = attrs_lazy_field()
 75    _theta: Optional[float] = attrs_lazy_field()
 76
 77    def lazy_post_init(self):
 78        initialized = (self._distance is not None)
 79        if initialized:
 80            return
 81
 82        self._distance = math.hypot(self.x, self.y)
 83        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
 84
 85    @property
 86    def distance(self):
 87        self.lazy_post_init()
 88        return unwrap_optional_field(self._distance)
 89
 90    @property
 91    def theta(self):
 92        self.lazy_post_init()
 93        return unwrap_optional_field(self._theta)
 94
 95    @classmethod
 96    def calculate_theta_delta(
 97        cls,
 98        vector0: 'Vector',
 99        vector1: 'Vector',
100        clockwise: bool = False,
101    ):
102        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
103        if clockwise and theta_delta < 0:
104            theta_delta += TWO_PI
105        return theta_delta
106
107    def dot(self, other: 'Vector'):
108        return self.x * other.x + self.y * other.y
Vector(y: float, x: float)
2def __init__(self, y, x):
3    self.y = y
4    self.x = x
5    self._distance = attr_dict['_distance'].default
6    self._theta = attr_dict['_theta'].default

Method generated by attrs for class Vector.

def lazy_post_init(self):
77    def lazy_post_init(self):
78        initialized = (self._distance is not None)
79        if initialized:
80            return
81
82        self._distance = math.hypot(self.x, self.y)
83        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
@classmethod
def calculate_theta_delta( cls, vector0: vkit.pipeline.text_detection.page_text_region_label.Vector, vector1: vkit.pipeline.text_detection.page_text_region_label.Vector, clockwise: bool = False):
 95    @classmethod
 96    def calculate_theta_delta(
 97        cls,
 98        vector0: 'Vector',
 99        vector1: 'Vector',
100        clockwise: bool = False,
101    ):
102        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
103        if clockwise and theta_delta < 0:
104            theta_delta += TWO_PI
105        return theta_delta
107    def dot(self, other: 'Vector'):
108        return self.x * other.x + self.y * other.y
class PageCharRegressionLabel:
112class PageCharRegressionLabel:
113    char_idx: int
114    tag: PageCharRegressionLabelTag
115    label_point_smooth_y: float
116    label_point_smooth_x: float
117    downsampled_label_point_y: int
118    downsampled_label_point_x: int
119    up_left: Point
120    up_right: Point
121    down_right: Point
122    down_left: Point
123
124    is_downsampled: bool = False
125    downsample_labeling_factor: int = 1
126
127    _bounding_smooth_up: Optional[float] = attrs_lazy_field()
128    _bounding_smooth_down: Optional[float] = attrs_lazy_field()
129    _bounding_smooth_left: Optional[float] = attrs_lazy_field()
130    _bounding_smooth_right: Optional[float] = attrs_lazy_field()
131    _bounding_orientation_idx: Optional[int] = attrs_lazy_field()
132
133    _up_left_vector: Optional[Vector] = attrs_lazy_field()
134    _up_right_vector: Optional[Vector] = attrs_lazy_field()
135    _down_right_vector: Optional[Vector] = attrs_lazy_field()
136    _down_left_vector: Optional[Vector] = attrs_lazy_field()
137
138    _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field()
139    _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field()
140    _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field()
141    _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field()
142    _valid: Optional[bool] = attrs_lazy_field()
143    _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field()
144
145    @property
146    def corner_points(self):
147        yield from (self.up_left, self.up_right, self.down_right, self.down_left)
148
149    @classmethod
150    def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point):
151        vector = Vector(
152            y=down_right.smooth_y - down_left.smooth_y,
153            x=down_right.smooth_x - down_left.smooth_x,
154        )
155        #        0
156        #  ┌───────────┐
157        #  │           │
158        # 2│           │3
159        #  │           │
160        #  └───────────┘
161        #        1
162        factor = vector.theta / PI
163        if 1.75 <= factor or factor < 0.25:
164            return 1
165        elif 0.25 <= factor < 0.75:
166            return 2
167        elif 0.75 <= factor < 1.25:
168            return 0
169        elif 1.25 <= factor:
170            return 3
171        else:
172            raise RuntimeError()
173
174    def lazy_post_init(self):
175        if self._bounding_smooth_up is None:
176            self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points)
177            self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points)
178            self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points)
179            self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points)
180            self._bounding_orientation_idx = self.get_bounding_orientation_idx(
181                down_left=self.down_left,
182                down_right=self.down_right,
183            )
184
185        initialized = (self._up_left_vector is not None)
186        if initialized:
187            return
188
189        self._up_left_vector = Vector(
190            y=self.up_left.smooth_y - self.label_point_smooth_y,
191            x=self.up_left.smooth_x - self.label_point_smooth_x,
192        )
193        self._up_right_vector = Vector(
194            y=self.up_right.smooth_y - self.label_point_smooth_y,
195            x=self.up_right.smooth_x - self.label_point_smooth_x,
196        )
197        self._down_right_vector = Vector(
198            y=self.down_right.smooth_y - self.label_point_smooth_y,
199            x=self.down_right.smooth_x - self.label_point_smooth_x,
200        )
201        self._down_left_vector = Vector(
202            y=self.down_left.smooth_y - self.label_point_smooth_y,
203            x=self.down_left.smooth_x - self.label_point_smooth_x,
204        )
205
206        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
207            self._up_left_vector,
208            self._up_right_vector,
209            clockwise=True,
210        )
211        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
212            self._up_right_vector,
213            self._down_right_vector,
214            clockwise=True,
215        )
216        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
217            self._down_right_vector,
218            self._down_left_vector,
219            clockwise=True,
220        )
221        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
222            self._down_left_vector,
223            self._up_left_vector,
224            clockwise=True,
225        )
226
227        sum_of_angles = sum([
228            self._up_left_to_up_right_angle,
229            self._up_right_to_down_right_angle,
230            self._down_right_to_down_left_angle,
231            self._down_left_to_up_left_angle,
232        ])
233        # Consider valid if deviate within 4 degrees.
234        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
235
236        self._clockwise_angle_distribution = normalize_to_probs([
237            self._up_left_to_up_right_angle,
238            self._up_right_to_down_right_angle,
239            self._down_right_to_down_left_angle,
240            self._down_left_to_up_left_angle,
241        ])
242
243    def copy(self, with_non_bounding_related_lazy_fields: bool = False):
244        copied = attrs.evolve(self)
245
246        if with_non_bounding_related_lazy_fields:
247            # NOTE: Bounding box related properties are not copied.
248            copied._up_left_vector = self._up_left_vector
249            copied._up_right_vector = self._up_right_vector
250            copied._down_right_vector = self._down_right_vector
251            copied._down_left_vector = self._down_left_vector
252            copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle
253            copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle
254            copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle
255            copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle
256            copied._valid = self._valid
257            copied._clockwise_angle_distribution = self._clockwise_angle_distribution
258
259        return copied
260
261    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
262        assert self.valid and not self.is_downsampled
263
264        # Shift operation doesn't change the lazy fields.
265        shifted = self.copy(with_non_bounding_related_lazy_fields=True)
266
267        shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y
268        shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x
269        shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y)
270        shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x)
271        shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
272        shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
273        shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
274        shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
275
276        return shifted
277
278    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
279        assert self.valid and not self.is_downsampled
280
281        # Downsample operation doesn't change the lazy fields.
282        downsampled = self.copy(with_non_bounding_related_lazy_fields=True)
283        # Mark as downsampled hence disables shift & downsample opts.
284        downsampled.is_downsampled = True
285        # Should be helpful in training.
286        downsampled.downsample_labeling_factor = downsample_labeling_factor
287
288        downsampled.downsampled_label_point_y = \
289            int(self.label_point_smooth_y // downsample_labeling_factor)
290        downsampled.downsampled_label_point_x = \
291            int(self.label_point_smooth_x // downsample_labeling_factor)
292
293        return downsampled
294
295    @property
296    def bounding_smooth_up(self):
297        self.lazy_post_init()
298        return unwrap_optional_field(self._bounding_smooth_up)
299
300    @property
301    def bounding_smooth_down(self):
302        self.lazy_post_init()
303        return unwrap_optional_field(self._bounding_smooth_down)
304
305    @property
306    def bounding_smooth_left(self):
307        self.lazy_post_init()
308        return unwrap_optional_field(self._bounding_smooth_left)
309
310    @property
311    def bounding_smooth_right(self):
312        self.lazy_post_init()
313        return unwrap_optional_field(self._bounding_smooth_right)
314
315    @property
316    def bounding_center_point(self):
317        return Point.create(
318            y=(self.bounding_smooth_up + self.bounding_smooth_down) / 2,
319            x=(self.bounding_smooth_left + self.bounding_smooth_right) / 2,
320        )
321
322    @property
323    def bounding_smooth_shape(self):
324        height = self.bounding_smooth_down - self.bounding_smooth_up
325        width = self.bounding_smooth_right - self.bounding_smooth_left
326        return height, width
327
328    @property
329    def bounding_orientation_idx(self):
330        self.lazy_post_init()
331        return unwrap_optional_field(self._bounding_orientation_idx)
332
333    @property
334    def valid(self):
335        self.lazy_post_init()
336        return unwrap_optional_field(self._valid)
337
338    def generate_up_left_offsets(self):
339        self.lazy_post_init()
340        up_left_vector = unwrap_optional_field(self._up_left_vector)
341        return up_left_vector.y, up_left_vector.x
342
343    def generate_clockwise_angle_distribution(self):
344        self.lazy_post_init()
345        return unwrap_optional_field(self._clockwise_angle_distribution)
346
347    def generate_clockwise_distances(self):
348        self.lazy_post_init()
349        return (
350            unwrap_optional_field(self._up_left_vector).distance,
351            unwrap_optional_field(self._up_right_vector).distance,
352            unwrap_optional_field(self._down_right_vector).distance,
353            unwrap_optional_field(self._down_left_vector).distance,
354        )
PageCharRegressionLabel( char_idx: int, tag: vkit.pipeline.text_detection.page_text_region_label.PageCharRegressionLabelTag, label_point_smooth_y: float, label_point_smooth_x: float, downsampled_label_point_y: int, downsampled_label_point_x: int, up_left: vkit.element.point.Point, up_right: vkit.element.point.Point, down_right: vkit.element.point.Point, down_left: vkit.element.point.Point, is_downsampled: bool = False, downsample_labeling_factor: int = 1)
 2def __init__(self, char_idx, tag, label_point_smooth_y, label_point_smooth_x, downsampled_label_point_y, downsampled_label_point_x, up_left, up_right, down_right, down_left, is_downsampled=attr_dict['is_downsampled'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default):
 3    self.char_idx = char_idx
 4    self.tag = tag
 5    self.label_point_smooth_y = label_point_smooth_y
 6    self.label_point_smooth_x = label_point_smooth_x
 7    self.downsampled_label_point_y = downsampled_label_point_y
 8    self.downsampled_label_point_x = downsampled_label_point_x
 9    self.up_left = up_left
10    self.up_right = up_right
11    self.down_right = down_right
12    self.down_left = down_left
13    self.is_downsampled = is_downsampled
14    self.downsample_labeling_factor = downsample_labeling_factor
15    self._bounding_smooth_up = attr_dict['_bounding_smooth_up'].default
16    self._bounding_smooth_down = attr_dict['_bounding_smooth_down'].default
17    self._bounding_smooth_left = attr_dict['_bounding_smooth_left'].default
18    self._bounding_smooth_right = attr_dict['_bounding_smooth_right'].default
19    self._bounding_orientation_idx = attr_dict['_bounding_orientation_idx'].default
20    self._up_left_vector = attr_dict['_up_left_vector'].default
21    self._up_right_vector = attr_dict['_up_right_vector'].default
22    self._down_right_vector = attr_dict['_down_right_vector'].default
23    self._down_left_vector = attr_dict['_down_left_vector'].default
24    self._up_left_to_up_right_angle = attr_dict['_up_left_to_up_right_angle'].default
25    self._up_right_to_down_right_angle = attr_dict['_up_right_to_down_right_angle'].default
26    self._down_right_to_down_left_angle = attr_dict['_down_right_to_down_left_angle'].default
27    self._down_left_to_up_left_angle = attr_dict['_down_left_to_up_left_angle'].default
28    self._valid = attr_dict['_valid'].default
29    self._clockwise_angle_distribution = attr_dict['_clockwise_angle_distribution'].default

Method generated by attrs for class PageCharRegressionLabel.

@classmethod
def get_bounding_orientation_idx( cls, down_left: vkit.element.point.Point, down_right: vkit.element.point.Point):
149    @classmethod
150    def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point):
151        vector = Vector(
152            y=down_right.smooth_y - down_left.smooth_y,
153            x=down_right.smooth_x - down_left.smooth_x,
154        )
155        #        0
156        #  ┌───────────┐
157        #  │           │
158        # 2│           │3
159        #  │           │
160        #  └───────────┘
161        #        1
162        factor = vector.theta / PI
163        if 1.75 <= factor or factor < 0.25:
164            return 1
165        elif 0.25 <= factor < 0.75:
166            return 2
167        elif 0.75 <= factor < 1.25:
168            return 0
169        elif 1.25 <= factor:
170            return 3
171        else:
172            raise RuntimeError()
def lazy_post_init(self):
174    def lazy_post_init(self):
175        if self._bounding_smooth_up is None:
176            self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points)
177            self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points)
178            self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points)
179            self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points)
180            self._bounding_orientation_idx = self.get_bounding_orientation_idx(
181                down_left=self.down_left,
182                down_right=self.down_right,
183            )
184
185        initialized = (self._up_left_vector is not None)
186        if initialized:
187            return
188
189        self._up_left_vector = Vector(
190            y=self.up_left.smooth_y - self.label_point_smooth_y,
191            x=self.up_left.smooth_x - self.label_point_smooth_x,
192        )
193        self._up_right_vector = Vector(
194            y=self.up_right.smooth_y - self.label_point_smooth_y,
195            x=self.up_right.smooth_x - self.label_point_smooth_x,
196        )
197        self._down_right_vector = Vector(
198            y=self.down_right.smooth_y - self.label_point_smooth_y,
199            x=self.down_right.smooth_x - self.label_point_smooth_x,
200        )
201        self._down_left_vector = Vector(
202            y=self.down_left.smooth_y - self.label_point_smooth_y,
203            x=self.down_left.smooth_x - self.label_point_smooth_x,
204        )
205
206        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
207            self._up_left_vector,
208            self._up_right_vector,
209            clockwise=True,
210        )
211        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
212            self._up_right_vector,
213            self._down_right_vector,
214            clockwise=True,
215        )
216        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
217            self._down_right_vector,
218            self._down_left_vector,
219            clockwise=True,
220        )
221        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
222            self._down_left_vector,
223            self._up_left_vector,
224            clockwise=True,
225        )
226
227        sum_of_angles = sum([
228            self._up_left_to_up_right_angle,
229            self._up_right_to_down_right_angle,
230            self._down_right_to_down_left_angle,
231            self._down_left_to_up_left_angle,
232        ])
233        # Consider valid if deviate within 4 degrees.
234        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
235
236        self._clockwise_angle_distribution = normalize_to_probs([
237            self._up_left_to_up_right_angle,
238            self._up_right_to_down_right_angle,
239            self._down_right_to_down_left_angle,
240            self._down_left_to_up_left_angle,
241        ])
def copy(self, with_non_bounding_related_lazy_fields: bool = False):
243    def copy(self, with_non_bounding_related_lazy_fields: bool = False):
244        copied = attrs.evolve(self)
245
246        if with_non_bounding_related_lazy_fields:
247            # NOTE: Bounding box related properties are not copied.
248            copied._up_left_vector = self._up_left_vector
249            copied._up_right_vector = self._up_right_vector
250            copied._down_right_vector = self._down_right_vector
251            copied._down_left_vector = self._down_left_vector
252            copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle
253            copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle
254            copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle
255            copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle
256            copied._valid = self._valid
257            copied._clockwise_angle_distribution = self._clockwise_angle_distribution
258
259        return copied
def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
261    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
262        assert self.valid and not self.is_downsampled
263
264        # Shift operation doesn't change the lazy fields.
265        shifted = self.copy(with_non_bounding_related_lazy_fields=True)
266
267        shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y
268        shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x
269        shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y)
270        shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x)
271        shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
272        shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
273        shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
274        shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
275
276        return shifted
def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
278    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
279        assert self.valid and not self.is_downsampled
280
281        # Downsample operation doesn't change the lazy fields.
282        downsampled = self.copy(with_non_bounding_related_lazy_fields=True)
283        # Mark as downsampled hence disables shift & downsample opts.
284        downsampled.is_downsampled = True
285        # Should be helpful in training.
286        downsampled.downsample_labeling_factor = downsample_labeling_factor
287
288        downsampled.downsampled_label_point_y = \
289            int(self.label_point_smooth_y // downsample_labeling_factor)
290        downsampled.downsampled_label_point_x = \
291            int(self.label_point_smooth_x // downsample_labeling_factor)
292
293        return downsampled
def generate_up_left_offsets(self):
338    def generate_up_left_offsets(self):
339        self.lazy_post_init()
340        up_left_vector = unwrap_optional_field(self._up_left_vector)
341        return up_left_vector.y, up_left_vector.x
def generate_clockwise_angle_distribution(self):
343    def generate_clockwise_angle_distribution(self):
344        self.lazy_post_init()
345        return unwrap_optional_field(self._clockwise_angle_distribution)
def generate_clockwise_distances(self):
347    def generate_clockwise_distances(self):
348        self.lazy_post_init()
349        return (
350            unwrap_optional_field(self._up_left_vector).distance,
351            unwrap_optional_field(self._up_right_vector).distance,
352            unwrap_optional_field(self._down_right_vector).distance,
353            unwrap_optional_field(self._down_left_vector).distance,
354        )
class PageTextRegionLabelStepOutput:
358class PageTextRegionLabelStepOutput:
359    page_char_mask: Mask
360    page_char_height_score_map: ScoreMap
361    page_char_gaussian_score_map: ScoreMap
362    page_char_regression_labels: Sequence[PageCharRegressionLabel]
363    page_char_bounding_box_mask: Mask
PageTextRegionLabelStepOutput( page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_char_gaussian_score_map: vkit.element.score_map.ScoreMap, page_char_regression_labels: Sequence[vkit.pipeline.text_detection.page_text_region_label.PageCharRegressionLabel], page_char_bounding_box_mask: vkit.element.mask.Mask)
2def __init__(self, page_char_mask, page_char_height_score_map, page_char_gaussian_score_map, page_char_regression_labels, page_char_bounding_box_mask):
3    self.page_char_mask = page_char_mask
4    self.page_char_height_score_map = page_char_height_score_map
5    self.page_char_gaussian_score_map = page_char_gaussian_score_map
6    self.page_char_regression_labels = page_char_regression_labels
7    self.page_char_bounding_box_mask = page_char_bounding_box_mask

Method generated by attrs for class PageTextRegionLabelStepOutput.

366class PageTextRegionLabelStep(
367    PipelineStep[
368        PageTextRegionLabelStepConfig,
369        PageTextRegionLabelStepInput,
370        PageTextRegionLabelStepOutput,
371    ]
372):  # yapf: disable
373
374    def __init__(self, config: PageTextRegionLabelStepConfig):
375        super().__init__(config)
376
377        self.char_heatmap_default_engine_executor = \
378            char_heatmap_default_engine_executor_factory.create(
379                self.config.char_heatmap_default_engine_init_config
380            )
381        self.char_mask_engine_executor = \
382            char_mask_engine_executor_aggregator_factory.create_engine_executor(
383                self.config.char_mask_engine_config
384            )
385
386    def generate_page_char_mask(
387        self,
388        shape: Tuple[int, int],
389        page_inactive_mask: Mask,
390        page_char_polygons: Sequence[Polygon],
391        page_text_region_polygons: Sequence[Polygon],
392        page_char_polygon_text_region_polygon_indices: Sequence[int],
393    ):
394        height, width = shape
395        result = self.char_mask_engine_executor.run(
396            CharMaskEngineRunConfig(
397                height=height,
398                width=width,
399                char_polygons=page_char_polygons,
400                char_bounding_polygons=[
401                    page_text_region_polygons[idx]
402                    for idx in page_char_polygon_text_region_polygon_indices
403                ],
404            ),
405        )
406
407        page_inactive_mask.fill_mask(result.combined_chars_mask, 0)
408
409        return result.combined_chars_mask, result.char_masks
410
411    @classmethod
412    def generate_page_char_height_score_map(
413        cls,
414        shape: Tuple[int, int],
415        page_inactive_mask: Mask,
416        page_char_polygons: Sequence[Polygon],
417        fill_score_map_char_masks: Optional[Sequence[Mask]],
418    ):
419        rectangular_heights = [
420            char_polygon.get_rectangular_height() for char_polygon in page_char_polygons
421        ]
422        sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort()))
423
424        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
425        for idx in sorted_indices:
426            char_polygon = page_char_polygons[idx]
427            rectangular_height = rectangular_heights[idx]
428            if fill_score_map_char_masks is None:
429                char_polygon.fill_score_map(
430                    page_char_height_score_map,
431                    value=rectangular_height,
432                )
433            else:
434                char_mask = fill_score_map_char_masks[idx]
435                char_mask.fill_score_map(
436                    page_char_height_score_map,
437                    value=rectangular_height,
438                )
439
440        page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0)
441
442        return page_char_height_score_map
443
444    def generate_page_char_gaussian_score_map(
445        self,
446        shape: Tuple[int, int],
447        page_char_polygons: Sequence[Polygon],
448    ):
449        height, width = shape
450        char_heatmap = self.char_heatmap_default_engine_executor.run({
451            'height': height,
452            'width': width,
453            'char_polygons': page_char_polygons,
454        })
455        return char_heatmap.score_map
456
457    def generate_page_char_regression_labels(
458        self,
459        shape: Tuple[int, int],
460        page_char_polygons: Sequence[Polygon],
461        rng: RandomGenerator,
462    ):
463        page_height, page_width = shape
464
465        # Build a KD tree to for removing deviate point that is too close to another center point.
466        center_points = PointList()
467        for polygon in page_char_polygons:
468            center_points.append(polygon.get_center_point())
469        kd_tree = KDTree(center_points.to_np_array())
470
471        page_char_regression_labels: List[PageCharRegressionLabel] = []
472
473        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
474            assert polygon.num_points == 4
475            up_left, up_right, down_right, down_left = polygon.points
476
477            # 1. The centroid of char polygon.
478            label = PageCharRegressionLabel(
479                char_idx=char_idx,
480                tag=PageCharRegressionLabelTag.CENTROID,
481                label_point_smooth_y=center_point.smooth_y,
482                label_point_smooth_x=center_point.smooth_x,
483                downsampled_label_point_y=center_point.y,
484                downsampled_label_point_x=center_point.x,
485                up_left=up_left,
486                up_right=up_right,
487                down_right=down_right,
488                down_left=down_left,
489            )
490            # The centroid labeling must be valid.
491            assert label.valid
492            page_char_regression_labels.append(label)
493
494            # 2. The deviate points.
495            if self.config.num_deviate_char_regression_labels <= 0:
496                # Generating deviate points are optional.
497                continue
498
499            bounding_box = polygon.bounding_box
500
501            # Sample points in shfited bounding box space.
502            deviate_points_in_bounding_box = PointList()
503            # Some points are invalid, hence multiply the number of samplings by a factor.
504            # Also not to sample the points lying on the border to increase the chance of valid.
505            for _ in range(
506                self.config.num_deviate_char_regression_labels_candiates_factor
507                * self.config.num_deviate_char_regression_labels
508            ):
509                y = int(rng.integers(1, bounding_box.height - 1))
510                x = int(rng.integers(1, bounding_box.width - 1))
511                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
512
513            # Then transform to the polygon space.
514            np_src_points = np.asarray(
515                [
516                    (0, 0),
517                    (bounding_box.width - 1, 0),
518                    (bounding_box.width - 1, bounding_box.height - 1),
519                    (0, bounding_box.height - 1),
520                ],
521                dtype=np.float32,
522            )
523            np_dst_points = polygon.internals.np_self_relative_points
524            trans_mat = cv.getPerspectiveTransform(
525                np_src_points,
526                np_dst_points,
527                cv.DECOMP_SVD,
528            )
529
530            deviate_points = PointList()
531            for shifted_deviate_point in affine_points(
532                trans_mat,
533                deviate_points_in_bounding_box.to_point_tuple(),
534            ):
535                y = bounding_box.up + shifted_deviate_point.smooth_y
536                x = bounding_box.left + shifted_deviate_point.smooth_x
537                assert 0 <= y < page_height
538                assert 0 <= x < page_width
539                deviate_points.append(Point.create(y=y, x=x))
540
541            # Remove those are too close to another center point.
542            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
543            preserve_flags: List[bool] = [
544                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
545            ]
546
547            # Build labels.
548            num_valid_deviate_char_regression_labels = 0
549            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
550                if num_valid_deviate_char_regression_labels \
551                        >= self.config.num_deviate_char_regression_labels:
552                    break
553
554                if not preserve_flag:
555                    continue
556
557                label = PageCharRegressionLabel(
558                    char_idx=char_idx,
559                    tag=PageCharRegressionLabelTag.DEVIATE,
560                    label_point_smooth_y=deviate_point.smooth_y,
561                    label_point_smooth_x=deviate_point.smooth_x,
562                    downsampled_label_point_y=deviate_point.y,
563                    downsampled_label_point_x=deviate_point.x,
564                    up_left=up_left,
565                    up_right=up_right,
566                    down_right=down_right,
567                    down_left=down_left,
568                )
569                if label.valid:
570                    page_char_regression_labels.append(label)
571                    num_valid_deviate_char_regression_labels += 1
572
573            if num_valid_deviate_char_regression_labels \
574                    < self.config.num_deviate_char_regression_labels:
575                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
576
577        return page_char_regression_labels
578
579    def generate_page_char_bounding_box_mask(
580        self,
581        shape: Tuple[int, int],
582        page_char_regression_labels: Sequence[PageCharRegressionLabel],
583    ):
584        page_char_bounding_box_mask = Mask.from_shape(shape)
585        for page_char_regression_label in page_char_regression_labels:
586            box = Box(
587                up=math.floor(page_char_regression_label.bounding_smooth_up),
588                down=math.ceil(page_char_regression_label.bounding_smooth_down),
589                left=math.floor(page_char_regression_label.bounding_smooth_left),
590                right=math.ceil(page_char_regression_label.bounding_smooth_right),
591            )
592            box.fill_mask(page_char_bounding_box_mask)
593        return page_char_bounding_box_mask
594
595    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
596        page_text_region_step_output = input.page_text_region_step_output
597        page_image = page_text_region_step_output.page_image
598        page_active_mask = page_text_region_step_output.page_active_mask
599        page_char_polygons = page_text_region_step_output.page_char_polygons
600        page_text_region_polygons = page_text_region_step_output.page_text_region_polygons
601        page_char_polygon_text_region_polygon_indices = \
602            page_text_region_step_output.page_char_polygon_text_region_polygon_indices
603
604        page_inactive_mask = page_active_mask.to_inverted_mask()
605        page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask(
606            shape=page_image.shape,
607            page_inactive_mask=page_inactive_mask,
608            page_char_polygons=page_char_polygons,
609            page_text_region_polygons=page_text_region_polygons,
610            page_char_polygon_text_region_polygon_indices=(
611                page_char_polygon_text_region_polygon_indices
612            ),
613        )
614
615        # NOTE: page_char_height_score_map is different from the one defined in page distortion.
616        # TODO: Resolve the inconsistency.
617        page_char_height_score_map = self.generate_page_char_height_score_map(
618            shape=page_image.shape,
619            page_inactive_mask=page_inactive_mask,
620            page_char_polygons=page_char_polygons,
621            fill_score_map_char_masks=fill_score_map_char_masks,
622        )
623
624        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
625            page_image.shape,
626            page_char_polygons,
627        )
628
629        page_char_regression_labels = self.generate_page_char_regression_labels(
630            page_image.shape,
631            page_char_polygons,
632            rng,
633        )
634
635        page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask(
636            page_image.shape,
637            page_char_regression_labels,
638        )
639
640        return PageTextRegionLabelStepOutput(
641            page_char_mask=page_char_mask,
642            page_char_height_score_map=page_char_height_score_map,
643            page_char_gaussian_score_map=page_char_gaussian_score_map,
644            page_char_regression_labels=page_char_regression_labels,
645            page_char_bounding_box_mask=page_char_bounding_box_mask,
646        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

374    def __init__(self, config: PageTextRegionLabelStepConfig):
375        super().__init__(config)
376
377        self.char_heatmap_default_engine_executor = \
378            char_heatmap_default_engine_executor_factory.create(
379                self.config.char_heatmap_default_engine_init_config
380            )
381        self.char_mask_engine_executor = \
382            char_mask_engine_executor_aggregator_factory.create_engine_executor(
383                self.config.char_mask_engine_config
384            )
def generate_page_char_mask( self, shape: Tuple[int, int], page_inactive_mask: vkit.element.mask.Mask, page_char_polygons: Sequence[vkit.element.polygon.Polygon], page_text_region_polygons: Sequence[vkit.element.polygon.Polygon], page_char_polygon_text_region_polygon_indices: Sequence[int]):
386    def generate_page_char_mask(
387        self,
388        shape: Tuple[int, int],
389        page_inactive_mask: Mask,
390        page_char_polygons: Sequence[Polygon],
391        page_text_region_polygons: Sequence[Polygon],
392        page_char_polygon_text_region_polygon_indices: Sequence[int],
393    ):
394        height, width = shape
395        result = self.char_mask_engine_executor.run(
396            CharMaskEngineRunConfig(
397                height=height,
398                width=width,
399                char_polygons=page_char_polygons,
400                char_bounding_polygons=[
401                    page_text_region_polygons[idx]
402                    for idx in page_char_polygon_text_region_polygon_indices
403                ],
404            ),
405        )
406
407        page_inactive_mask.fill_mask(result.combined_chars_mask, 0)
408
409        return result.combined_chars_mask, result.char_masks
@classmethod
def generate_page_char_height_score_map( cls, shape: Tuple[int, int], page_inactive_mask: vkit.element.mask.Mask, page_char_polygons: Sequence[vkit.element.polygon.Polygon], fill_score_map_char_masks: Union[Sequence[vkit.element.mask.Mask], NoneType]):
411    @classmethod
412    def generate_page_char_height_score_map(
413        cls,
414        shape: Tuple[int, int],
415        page_inactive_mask: Mask,
416        page_char_polygons: Sequence[Polygon],
417        fill_score_map_char_masks: Optional[Sequence[Mask]],
418    ):
419        rectangular_heights = [
420            char_polygon.get_rectangular_height() for char_polygon in page_char_polygons
421        ]
422        sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort()))
423
424        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
425        for idx in sorted_indices:
426            char_polygon = page_char_polygons[idx]
427            rectangular_height = rectangular_heights[idx]
428            if fill_score_map_char_masks is None:
429                char_polygon.fill_score_map(
430                    page_char_height_score_map,
431                    value=rectangular_height,
432                )
433            else:
434                char_mask = fill_score_map_char_masks[idx]
435                char_mask.fill_score_map(
436                    page_char_height_score_map,
437                    value=rectangular_height,
438                )
439
440        page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0)
441
442        return page_char_height_score_map
def generate_page_char_gaussian_score_map( self, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon]):
444    def generate_page_char_gaussian_score_map(
445        self,
446        shape: Tuple[int, int],
447        page_char_polygons: Sequence[Polygon],
448    ):
449        height, width = shape
450        char_heatmap = self.char_heatmap_default_engine_executor.run({
451            'height': height,
452            'width': width,
453            'char_polygons': page_char_polygons,
454        })
455        return char_heatmap.score_map
def generate_page_char_regression_labels( self, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon], rng: numpy.random._generator.Generator):
457    def generate_page_char_regression_labels(
458        self,
459        shape: Tuple[int, int],
460        page_char_polygons: Sequence[Polygon],
461        rng: RandomGenerator,
462    ):
463        page_height, page_width = shape
464
465        # Build a KD tree to for removing deviate point that is too close to another center point.
466        center_points = PointList()
467        for polygon in page_char_polygons:
468            center_points.append(polygon.get_center_point())
469        kd_tree = KDTree(center_points.to_np_array())
470
471        page_char_regression_labels: List[PageCharRegressionLabel] = []
472
473        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
474            assert polygon.num_points == 4
475            up_left, up_right, down_right, down_left = polygon.points
476
477            # 1. The centroid of char polygon.
478            label = PageCharRegressionLabel(
479                char_idx=char_idx,
480                tag=PageCharRegressionLabelTag.CENTROID,
481                label_point_smooth_y=center_point.smooth_y,
482                label_point_smooth_x=center_point.smooth_x,
483                downsampled_label_point_y=center_point.y,
484                downsampled_label_point_x=center_point.x,
485                up_left=up_left,
486                up_right=up_right,
487                down_right=down_right,
488                down_left=down_left,
489            )
490            # The centroid labeling must be valid.
491            assert label.valid
492            page_char_regression_labels.append(label)
493
494            # 2. The deviate points.
495            if self.config.num_deviate_char_regression_labels <= 0:
496                # Generating deviate points are optional.
497                continue
498
499            bounding_box = polygon.bounding_box
500
501            # Sample points in shfited bounding box space.
502            deviate_points_in_bounding_box = PointList()
503            # Some points are invalid, hence multiply the number of samplings by a factor.
504            # Also not to sample the points lying on the border to increase the chance of valid.
505            for _ in range(
506                self.config.num_deviate_char_regression_labels_candiates_factor
507                * self.config.num_deviate_char_regression_labels
508            ):
509                y = int(rng.integers(1, bounding_box.height - 1))
510                x = int(rng.integers(1, bounding_box.width - 1))
511                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
512
513            # Then transform to the polygon space.
514            np_src_points = np.asarray(
515                [
516                    (0, 0),
517                    (bounding_box.width - 1, 0),
518                    (bounding_box.width - 1, bounding_box.height - 1),
519                    (0, bounding_box.height - 1),
520                ],
521                dtype=np.float32,
522            )
523            np_dst_points = polygon.internals.np_self_relative_points
524            trans_mat = cv.getPerspectiveTransform(
525                np_src_points,
526                np_dst_points,
527                cv.DECOMP_SVD,
528            )
529
530            deviate_points = PointList()
531            for shifted_deviate_point in affine_points(
532                trans_mat,
533                deviate_points_in_bounding_box.to_point_tuple(),
534            ):
535                y = bounding_box.up + shifted_deviate_point.smooth_y
536                x = bounding_box.left + shifted_deviate_point.smooth_x
537                assert 0 <= y < page_height
538                assert 0 <= x < page_width
539                deviate_points.append(Point.create(y=y, x=x))
540
541            # Remove those are too close to another center point.
542            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
543            preserve_flags: List[bool] = [
544                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
545            ]
546
547            # Build labels.
548            num_valid_deviate_char_regression_labels = 0
549            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
550                if num_valid_deviate_char_regression_labels \
551                        >= self.config.num_deviate_char_regression_labels:
552                    break
553
554                if not preserve_flag:
555                    continue
556
557                label = PageCharRegressionLabel(
558                    char_idx=char_idx,
559                    tag=PageCharRegressionLabelTag.DEVIATE,
560                    label_point_smooth_y=deviate_point.smooth_y,
561                    label_point_smooth_x=deviate_point.smooth_x,
562                    downsampled_label_point_y=deviate_point.y,
563                    downsampled_label_point_x=deviate_point.x,
564                    up_left=up_left,
565                    up_right=up_right,
566                    down_right=down_right,
567                    down_left=down_left,
568                )
569                if label.valid:
570                    page_char_regression_labels.append(label)
571                    num_valid_deviate_char_regression_labels += 1
572
573            if num_valid_deviate_char_regression_labels \
574                    < self.config.num_deviate_char_regression_labels:
575                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
576
577        return page_char_regression_labels
def generate_page_char_bounding_box_mask( self, shape: Tuple[int, int], page_char_regression_labels: Sequence[vkit.pipeline.text_detection.page_text_region_label.PageCharRegressionLabel]):
579    def generate_page_char_bounding_box_mask(
580        self,
581        shape: Tuple[int, int],
582        page_char_regression_labels: Sequence[PageCharRegressionLabel],
583    ):
584        page_char_bounding_box_mask = Mask.from_shape(shape)
585        for page_char_regression_label in page_char_regression_labels:
586            box = Box(
587                up=math.floor(page_char_regression_label.bounding_smooth_up),
588                down=math.ceil(page_char_regression_label.bounding_smooth_down),
589                left=math.floor(page_char_regression_label.bounding_smooth_left),
590                right=math.ceil(page_char_regression_label.bounding_smooth_right),
591            )
592            box.fill_mask(page_char_bounding_box_mask)
593        return page_char_bounding_box_mask
def run( self, input: vkit.pipeline.text_detection.page_text_region_label.PageTextRegionLabelStepInput, rng: numpy.random._generator.Generator):
595    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
596        page_text_region_step_output = input.page_text_region_step_output
597        page_image = page_text_region_step_output.page_image
598        page_active_mask = page_text_region_step_output.page_active_mask
599        page_char_polygons = page_text_region_step_output.page_char_polygons
600        page_text_region_polygons = page_text_region_step_output.page_text_region_polygons
601        page_char_polygon_text_region_polygon_indices = \
602            page_text_region_step_output.page_char_polygon_text_region_polygon_indices
603
604        page_inactive_mask = page_active_mask.to_inverted_mask()
605        page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask(
606            shape=page_image.shape,
607            page_inactive_mask=page_inactive_mask,
608            page_char_polygons=page_char_polygons,
609            page_text_region_polygons=page_text_region_polygons,
610            page_char_polygon_text_region_polygon_indices=(
611                page_char_polygon_text_region_polygon_indices
612            ),
613        )
614
615        # NOTE: page_char_height_score_map is different from the one defined in page distortion.
616        # TODO: Resolve the inconsistency.
617        page_char_height_score_map = self.generate_page_char_height_score_map(
618            shape=page_image.shape,
619            page_inactive_mask=page_inactive_mask,
620            page_char_polygons=page_char_polygons,
621            fill_score_map_char_masks=fill_score_map_char_masks,
622        )
623
624        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
625            page_image.shape,
626            page_char_polygons,
627        )
628
629        page_char_regression_labels = self.generate_page_char_regression_labels(
630            page_image.shape,
631            page_char_polygons,
632            rng,
633        )
634
635        page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask(
636            page_image.shape,
637            page_char_regression_labels,
638        )
639
640        return PageTextRegionLabelStepOutput(
641            page_char_mask=page_char_mask,
642            page_char_height_score_map=page_char_height_score_map,
643            page_char_gaussian_score_map=page_char_gaussian_score_map,
644            page_char_regression_labels=page_char_regression_labels,
645            page_char_bounding_box_mask=page_char_bounding_box_mask,
646        )