vkit.pipeline.text_detection.page_text_region_label

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import cast, Tuple, Sequence, List, Optional
 15from enum import Enum, unique
 16import math
 17import logging
 18
 19import attrs
 20from numpy.random import Generator as RandomGenerator
 21import numpy as np
 22import cv2 as cv
 23from sklearn.neighbors import KDTree
 24
 25from vkit.utility import attrs_lazy_field, normalize_to_probs
 26from vkit.element import Point, PointList, Polygon, Mask, ScoreMap
 27from vkit.mechanism.distortion.geometric.affine import affine_points
 28from vkit.engine.char_heatmap import (
 29    char_heatmap_default_engine_executor_factory,
 30    CharHeatmapDefaultEngineInitConfig,
 31)
 32from ..interface import PipelineStep, PipelineStepFactory
 33from .page_text_region import PageTextRegionStepOutput
 34
 35logger = logging.getLogger(__name__)
 36
 37
 38@attrs.define
 39class PageTextRegionLabelStepConfig:
 40    char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \
 41        attrs.field(factory=CharHeatmapDefaultEngineInitConfig)
 42
 43    # 1 centrod + n deviate points.
 44    num_deviate_char_regression_labels: int = 3
 45    num_deviate_char_regression_labels_candiates_factor: int = 5
 46
 47
 48@attrs.define
 49class PageTextRegionLabelStepInput:
 50    page_text_region_step_output: PageTextRegionStepOutput
 51
 52
 53@unique
 54class PageCharRegressionLabelTag(Enum):
 55    CENTROID = 'centroid'
 56    DEVIATE = 'deviate'
 57
 58
 59PI = float(np.pi)
 60TWO_PI = float(2 * np.pi)
 61
 62
 63@attrs.define
 64class Vector:
 65    y: float
 66    x: float
 67
 68    _distance: Optional[float] = attrs_lazy_field()
 69    _theta: Optional[float] = attrs_lazy_field()
 70
 71    def lazy_post_init(self):
 72        initialized = (self._distance is not None)
 73        if initialized:
 74            return
 75
 76        self._distance = math.hypot(self.x, self.y)
 77        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
 78
 79    @property
 80    def distance(self):
 81        self.lazy_post_init()
 82        assert self._distance is not None
 83        return self._distance
 84
 85    @property
 86    def theta(self):
 87        self.lazy_post_init()
 88        assert self._theta is not None
 89        return self._theta
 90
 91    @classmethod
 92    def calculate_theta_delta(
 93        cls,
 94        vector0: 'Vector',
 95        vector1: 'Vector',
 96        clockwise: bool = False,
 97    ):
 98        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
 99        if clockwise and theta_delta < 0:
100            theta_delta += TWO_PI
101        return theta_delta
102
103    def dot(self, other: 'Vector'):
104        return self.x * other.x + self.y * other.y
105
106
107@attrs.define
108class PageCharRegressionLabel:
109    char_idx: int
110    tag: PageCharRegressionLabelTag
111    label_point_y: float
112    label_point_x: float
113    downsampled_label_point_y: int
114    downsampled_label_point_x: int
115    up_left: Point
116    up_right: Point
117    down_right: Point
118    down_left: Point
119
120    _up_left_vector: Optional[Vector] = attrs_lazy_field()
121    _up_right_vector: Optional[Vector] = attrs_lazy_field()
122    _down_right_vector: Optional[Vector] = attrs_lazy_field()
123    _down_left_vector: Optional[Vector] = attrs_lazy_field()
124
125    _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field()
126    _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field()
127    _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field()
128    _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field()
129    _valid: Optional[bool] = attrs_lazy_field()
130    _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field()
131
132    def lazy_post_init(self):
133        initialized = (self._up_left_vector is not None)
134        if initialized:
135            return
136
137        self._up_left_vector = Vector(
138            y=self.up_left.smooth_y - self.label_point_y,
139            x=self.up_left.smooth_x - self.label_point_x,
140        )
141        self._up_right_vector = Vector(
142            y=self.up_right.smooth_y - self.label_point_y,
143            x=self.up_right.smooth_x - self.label_point_x,
144        )
145        self._down_right_vector = Vector(
146            y=self.down_right.smooth_y - self.label_point_y,
147            x=self.down_right.smooth_x - self.label_point_x,
148        )
149        self._down_left_vector = Vector(
150            y=self.down_left.smooth_y - self.label_point_y,
151            x=self.down_left.smooth_x - self.label_point_x,
152        )
153
154        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
155            self._up_left_vector,
156            self._up_right_vector,
157            clockwise=True,
158        )
159        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
160            self._up_right_vector,
161            self._down_right_vector,
162            clockwise=True,
163        )
164        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
165            self._down_right_vector,
166            self._down_left_vector,
167            clockwise=True,
168        )
169        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
170            self._down_left_vector,
171            self._up_left_vector,
172            clockwise=True,
173        )
174
175        sum_of_angles = sum([
176            self._up_left_to_up_right_angle,
177            self._up_right_to_down_right_angle,
178            self._down_right_to_down_left_angle,
179            self._down_left_to_up_left_angle,
180        ])
181        # Consider valid if deviate within 4 degrees.
182        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
183
184        self._clockwise_angle_distribution = normalize_to_probs([
185            self._up_left_to_up_right_angle,
186            self._up_right_to_down_right_angle,
187            self._down_right_to_down_left_angle,
188            self._down_left_to_up_left_angle,
189        ])
190
191    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
192        assert self.valid
193        # Only can be called before downsampling.
194        assert self.label_point_y == self.downsampled_label_point_y
195        assert self.label_point_x == self.downsampled_label_point_x
196
197        label_point_y = cast(int, self.label_point_y + offset_y)
198        label_point_x = cast(int, self.label_point_x + offset_x)
199        up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
200        up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
201        down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
202        down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
203
204        shifted = PageCharRegressionLabel(
205            char_idx=self.char_idx,
206            tag=self.tag,
207            label_point_y=label_point_y,
208            label_point_x=label_point_x,
209            downsampled_label_point_y=label_point_y,
210            downsampled_label_point_x=label_point_x,
211            up_left=up_left,
212            up_right=up_right,
213            down_right=down_right,
214            down_left=down_left,
215        )
216
217        # Avoid recalculate the labelings.
218        shifted._up_left_vector = self._up_left_vector
219        shifted._up_right_vector = self._up_right_vector
220        shifted._down_right_vector = self._down_right_vector
221        shifted._down_left_vector = self._down_left_vector
222        shifted._up_left_to_up_right_angle = self._up_left_to_up_right_angle
223        shifted._up_right_to_down_right_angle = self._up_right_to_down_right_angle
224        shifted._down_right_to_down_left_angle = self._down_right_to_down_left_angle
225        shifted._down_left_to_up_left_angle = self._down_left_to_up_left_angle
226        shifted._valid = self._valid
227        shifted._clockwise_angle_distribution = self._clockwise_angle_distribution
228
229        return shifted
230
231    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
232        assert self.valid
233        # Only can be called before downsampling.
234        assert self.label_point_y == self.downsampled_label_point_y
235        assert self.label_point_x == self.downsampled_label_point_x
236
237        downsampled_label_point_y = int(self.label_point_y // downsample_labeling_factor)
238        downsampled_label_point_x = int(self.label_point_x // downsample_labeling_factor)
239
240        # label_point_* are shifted to the center of upsampled positions.
241        offset = (downsample_labeling_factor - 1) / 2
242        label_point_y = downsampled_label_point_y * downsample_labeling_factor + offset
243        label_point_x = downsampled_label_point_x * downsample_labeling_factor + offset
244
245        downsampled_page_char_regression_label = PageCharRegressionLabel(
246            char_idx=self.char_idx,
247            tag=self.tag,
248            label_point_y=label_point_y,
249            label_point_x=label_point_x,
250            downsampled_label_point_y=downsampled_label_point_y,
251            downsampled_label_point_x=downsampled_label_point_x,
252            up_left=self.up_left,
253            up_right=self.up_right,
254            down_right=self.down_right,
255            down_left=self.down_left,
256        )
257        return downsampled_page_char_regression_label
258
259    @property
260    def valid(self):
261        self.lazy_post_init()
262        assert self._valid is not None
263        return self._valid
264
265    def generate_up_left_offsets(self):
266        self.lazy_post_init()
267        assert self._up_left_vector is not None
268        return self._up_left_vector.y, self._up_left_vector.x
269
270    def generate_clockwise_angle_distribution(self):
271        self.lazy_post_init()
272        assert self._clockwise_angle_distribution is not None
273        return self._clockwise_angle_distribution
274
275    def generate_non_up_left_distances(self):
276        self.lazy_post_init()
277        assert self._up_right_vector is not None
278        assert self._down_right_vector is not None
279        assert self._down_left_vector is not None
280        return (
281            self._up_right_vector.distance,
282            self._down_right_vector.distance,
283            self._down_left_vector.distance,
284        )
285
286
287@attrs.define
288class PageTextRegionLabelStepOutput:
289    page_char_mask: Mask
290    page_char_height_score_map: ScoreMap
291    page_char_gaussian_score_map: ScoreMap
292    page_char_regression_labels: Sequence[PageCharRegressionLabel]
293
294
295class PageTextRegionLabelStep(
296    PipelineStep[
297        PageTextRegionLabelStepConfig,
298        PageTextRegionLabelStepInput,
299        PageTextRegionLabelStepOutput,
300    ]
301):  # yapf: disable
302
303    def __init__(self, config: PageTextRegionLabelStepConfig):
304        super().__init__(config)
305
306        self.char_heatmap_default_engine_executor = \
307            char_heatmap_default_engine_executor_factory.create(
308                self.config.char_heatmap_default_engine_init_config
309            )
310
311    @classmethod
312    def generate_page_char_mask(
313        cls,
314        shape: Tuple[int, int],
315        page_char_polygons: Sequence[Polygon],
316    ):
317        page_char_mask = Mask.from_shape(shape)
318        for polygon in page_char_polygons:
319            polygon.fill_mask(page_char_mask)
320        return page_char_mask
321
322    @classmethod
323    def generate_page_char_height_score_map(
324        cls,
325        shape: Tuple[int, int],
326        page_char_polygons: Sequence[Polygon],
327    ):
328        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
329        for polygon in page_char_polygons:
330            polygon.fill_score_map(
331                page_char_height_score_map,
332                value=polygon.get_rectangular_height(),
333            )
334        return page_char_height_score_map
335
336    def generate_page_char_gaussian_score_map(
337        self,
338        shape: Tuple[int, int],
339        page_char_polygons: Sequence[Polygon],
340        rng: RandomGenerator,
341    ):
342        height, width = shape
343        char_heatmap = self.char_heatmap_default_engine_executor.run(
344            {
345                'height': height,
346                'width': width,
347                'char_polygons': page_char_polygons,
348            },
349            rng,
350        )
351        return char_heatmap.score_map
352
353    def generate_page_char_regression_labels(
354        self,
355        shape: Tuple[int, int],
356        page_char_polygons: Sequence[Polygon],
357        rng: RandomGenerator,
358    ):
359        page_height, page_width = shape
360
361        # Build a KD tree to for removing deviate point that is too close to another center point.
362        center_points = PointList()
363        for polygon in page_char_polygons:
364            center_points.append(polygon.get_center_point())
365        kd_tree = KDTree(center_points.to_np_array())
366
367        page_char_regression_labels: List[PageCharRegressionLabel] = []
368
369        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
370            assert polygon.num_points == 4
371            up_left, up_right, down_right, down_left = polygon.points
372
373            # 1. The centroid of char polygon.
374            label = PageCharRegressionLabel(
375                char_idx=char_idx,
376                tag=PageCharRegressionLabelTag.CENTROID,
377                label_point_y=center_point.y,
378                label_point_x=center_point.x,
379                downsampled_label_point_y=center_point.y,
380                downsampled_label_point_x=center_point.x,
381                up_left=up_left,
382                up_right=up_right,
383                down_right=down_right,
384                down_left=down_left,
385            )
386            # The centroid labeling must be valid.
387            assert label.valid
388            page_char_regression_labels.append(label)
389
390            # 2. The deviate points.
391            bounding_box = polygon.bounding_box
392
393            # Sample points in shfited bounding box space.
394            deviate_points_in_bounding_box = PointList()
395            # Some points are invalid, hence multiply the number of samplings by a factor.
396            # Also not to sample the points lying on the border to increase the chance of valid.
397            for _ in range(
398                self.config.num_deviate_char_regression_labels_candiates_factor
399                * self.config.num_deviate_char_regression_labels
400            ):
401                y = int(rng.integers(1, bounding_box.height - 1))
402                x = int(rng.integers(1, bounding_box.width - 1))
403                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
404
405            # Then transform to the polygon space.
406            np_src_points = np.asarray(
407                [
408                    (0, 0),
409                    (bounding_box.width - 1, 0),
410                    (bounding_box.width - 1, bounding_box.height - 1),
411                    (0, bounding_box.height - 1),
412                ],
413                dtype=np.float32,
414            )
415            np_dst_points = polygon.internals.np_self_relative_points
416            trans_mat = cv.getPerspectiveTransform(
417                np_src_points,
418                np_dst_points,
419                cv.DECOMP_SVD,
420            )
421
422            deviate_points = PointList()
423            for shifted_deviate_point in affine_points(
424                trans_mat,
425                deviate_points_in_bounding_box.to_point_tuple(),
426            ):
427                y = bounding_box.up + shifted_deviate_point.y
428                x = bounding_box.left + shifted_deviate_point.x
429                assert 0 <= y < page_height
430                assert 0 <= x < page_width
431                deviate_points.append(Point.create(y=y, x=x))
432
433            # Remove those are too close to another center point.
434            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
435            preserve_flags: List[bool] = [
436                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
437            ]
438
439            # Build labels.
440            num_valid_deviate_char_regression_labels = 0
441            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
442                if num_valid_deviate_char_regression_labels \
443                        >= self.config.num_deviate_char_regression_labels:
444                    break
445
446                if not preserve_flag:
447                    continue
448
449                label = PageCharRegressionLabel(
450                    char_idx=char_idx,
451                    tag=PageCharRegressionLabelTag.DEVIATE,
452                    label_point_y=deviate_point.y,
453                    label_point_x=deviate_point.x,
454                    downsampled_label_point_y=deviate_point.y,
455                    downsampled_label_point_x=deviate_point.x,
456                    up_left=up_left,
457                    up_right=up_right,
458                    down_right=down_right,
459                    down_left=down_left,
460                )
461                if label.valid:
462                    page_char_regression_labels.append(label)
463                    num_valid_deviate_char_regression_labels += 1
464
465            if num_valid_deviate_char_regression_labels \
466                    < self.config.num_deviate_char_regression_labels:
467                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
468
469        return page_char_regression_labels
470
471    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
472        page_text_region_step_output = input.page_text_region_step_output
473        page_image = page_text_region_step_output.page_image
474        page_char_polygons = page_text_region_step_output.page_char_polygons
475
476        page_char_mask = self.generate_page_char_mask(
477            page_image.shape,
478            page_char_polygons,
479        )
480        page_char_height_score_map = self.generate_page_char_height_score_map(
481            page_image.shape,
482            page_char_polygons,
483        )
484
485        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
486            page_image.shape,
487            page_char_polygons,
488            rng,
489        )
490        page_char_regression_labels = self.generate_page_char_regression_labels(
491            page_image.shape,
492            page_char_polygons,
493            rng,
494        )
495
496        return PageTextRegionLabelStepOutput(
497            page_char_mask=page_char_mask,
498            page_char_height_score_map=page_char_height_score_map,
499            page_char_gaussian_score_map=page_char_gaussian_score_map,
500            page_char_regression_labels=page_char_regression_labels,
501        )
502
503
504page_text_region_label_step_factory = PipelineStepFactory(PageTextRegionLabelStep)
class PageTextRegionLabelStepConfig:
40class PageTextRegionLabelStepConfig:
41    char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \
42        attrs.field(factory=CharHeatmapDefaultEngineInitConfig)
43
44    # 1 centrod + n deviate points.
45    num_deviate_char_regression_labels: int = 3
46    num_deviate_char_regression_labels_candiates_factor: int = 5
PageTextRegionLabelStepConfig( char_heatmap_default_engine_init_config: vkit.engine.char_heatmap.default.CharHeatmapDefaultEngineInitConfig = NOTHING, num_deviate_char_regression_labels: int = 3, num_deviate_char_regression_labels_candiates_factor: int = 5)
2def __init__(self, char_heatmap_default_engine_init_config=NOTHING, num_deviate_char_regression_labels=attr_dict['num_deviate_char_regression_labels'].default, num_deviate_char_regression_labels_candiates_factor=attr_dict['num_deviate_char_regression_labels_candiates_factor'].default):
3    if char_heatmap_default_engine_init_config is not NOTHING:
4        self.char_heatmap_default_engine_init_config = char_heatmap_default_engine_init_config
5    else:
6        self.char_heatmap_default_engine_init_config = __attr_factory_char_heatmap_default_engine_init_config()
7    self.num_deviate_char_regression_labels = num_deviate_char_regression_labels
8    self.num_deviate_char_regression_labels_candiates_factor = num_deviate_char_regression_labels_candiates_factor

Method generated by attrs for class PageTextRegionLabelStepConfig.

class PageTextRegionLabelStepInput:
50class PageTextRegionLabelStepInput:
51    page_text_region_step_output: PageTextRegionStepOutput
PageTextRegionLabelStepInput( page_text_region_step_output: vkit.pipeline.text_detection.page_text_region.PageTextRegionStepOutput)
2def __init__(self, page_text_region_step_output):
3    self.page_text_region_step_output = page_text_region_step_output

Method generated by attrs for class PageTextRegionLabelStepInput.

class PageCharRegressionLabelTag(enum.Enum):
55class PageCharRegressionLabelTag(Enum):
56    CENTROID = 'centroid'
57    DEVIATE = 'deviate'

An enumeration.

Inherited Members
enum.Enum
name
value
class Vector:
 65class Vector:
 66    y: float
 67    x: float
 68
 69    _distance: Optional[float] = attrs_lazy_field()
 70    _theta: Optional[float] = attrs_lazy_field()
 71
 72    def lazy_post_init(self):
 73        initialized = (self._distance is not None)
 74        if initialized:
 75            return
 76
 77        self._distance = math.hypot(self.x, self.y)
 78        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
 79
 80    @property
 81    def distance(self):
 82        self.lazy_post_init()
 83        assert self._distance is not None
 84        return self._distance
 85
 86    @property
 87    def theta(self):
 88        self.lazy_post_init()
 89        assert self._theta is not None
 90        return self._theta
 91
 92    @classmethod
 93    def calculate_theta_delta(
 94        cls,
 95        vector0: 'Vector',
 96        vector1: 'Vector',
 97        clockwise: bool = False,
 98    ):
 99        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
100        if clockwise and theta_delta < 0:
101            theta_delta += TWO_PI
102        return theta_delta
103
104    def dot(self, other: 'Vector'):
105        return self.x * other.x + self.y * other.y
Vector(y: float, x: float)
2def __init__(self, y, x):
3    self.y = y
4    self.x = x
5    self._distance = attr_dict['_distance'].default
6    self._theta = attr_dict['_theta'].default

Method generated by attrs for class Vector.

def lazy_post_init(self):
72    def lazy_post_init(self):
73        initialized = (self._distance is not None)
74        if initialized:
75            return
76
77        self._distance = math.hypot(self.x, self.y)
78        self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI
@classmethod
def calculate_theta_delta( cls, vector0: vkit.pipeline.text_detection.page_text_region_label.Vector, vector1: vkit.pipeline.text_detection.page_text_region_label.Vector, clockwise: bool = False):
 92    @classmethod
 93    def calculate_theta_delta(
 94        cls,
 95        vector0: 'Vector',
 96        vector1: 'Vector',
 97        clockwise: bool = False,
 98    ):
 99        theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI
100        if clockwise and theta_delta < 0:
101            theta_delta += TWO_PI
102        return theta_delta
104    def dot(self, other: 'Vector'):
105        return self.x * other.x + self.y * other.y
class PageCharRegressionLabel:
109class PageCharRegressionLabel:
110    char_idx: int
111    tag: PageCharRegressionLabelTag
112    label_point_y: float
113    label_point_x: float
114    downsampled_label_point_y: int
115    downsampled_label_point_x: int
116    up_left: Point
117    up_right: Point
118    down_right: Point
119    down_left: Point
120
121    _up_left_vector: Optional[Vector] = attrs_lazy_field()
122    _up_right_vector: Optional[Vector] = attrs_lazy_field()
123    _down_right_vector: Optional[Vector] = attrs_lazy_field()
124    _down_left_vector: Optional[Vector] = attrs_lazy_field()
125
126    _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field()
127    _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field()
128    _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field()
129    _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field()
130    _valid: Optional[bool] = attrs_lazy_field()
131    _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field()
132
133    def lazy_post_init(self):
134        initialized = (self._up_left_vector is not None)
135        if initialized:
136            return
137
138        self._up_left_vector = Vector(
139            y=self.up_left.smooth_y - self.label_point_y,
140            x=self.up_left.smooth_x - self.label_point_x,
141        )
142        self._up_right_vector = Vector(
143            y=self.up_right.smooth_y - self.label_point_y,
144            x=self.up_right.smooth_x - self.label_point_x,
145        )
146        self._down_right_vector = Vector(
147            y=self.down_right.smooth_y - self.label_point_y,
148            x=self.down_right.smooth_x - self.label_point_x,
149        )
150        self._down_left_vector = Vector(
151            y=self.down_left.smooth_y - self.label_point_y,
152            x=self.down_left.smooth_x - self.label_point_x,
153        )
154
155        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
156            self._up_left_vector,
157            self._up_right_vector,
158            clockwise=True,
159        )
160        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
161            self._up_right_vector,
162            self._down_right_vector,
163            clockwise=True,
164        )
165        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
166            self._down_right_vector,
167            self._down_left_vector,
168            clockwise=True,
169        )
170        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
171            self._down_left_vector,
172            self._up_left_vector,
173            clockwise=True,
174        )
175
176        sum_of_angles = sum([
177            self._up_left_to_up_right_angle,
178            self._up_right_to_down_right_angle,
179            self._down_right_to_down_left_angle,
180            self._down_left_to_up_left_angle,
181        ])
182        # Consider valid if deviate within 4 degrees.
183        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
184
185        self._clockwise_angle_distribution = normalize_to_probs([
186            self._up_left_to_up_right_angle,
187            self._up_right_to_down_right_angle,
188            self._down_right_to_down_left_angle,
189            self._down_left_to_up_left_angle,
190        ])
191
192    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
193        assert self.valid
194        # Only can be called before downsampling.
195        assert self.label_point_y == self.downsampled_label_point_y
196        assert self.label_point_x == self.downsampled_label_point_x
197
198        label_point_y = cast(int, self.label_point_y + offset_y)
199        label_point_x = cast(int, self.label_point_x + offset_x)
200        up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
201        up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
202        down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
203        down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
204
205        shifted = PageCharRegressionLabel(
206            char_idx=self.char_idx,
207            tag=self.tag,
208            label_point_y=label_point_y,
209            label_point_x=label_point_x,
210            downsampled_label_point_y=label_point_y,
211            downsampled_label_point_x=label_point_x,
212            up_left=up_left,
213            up_right=up_right,
214            down_right=down_right,
215            down_left=down_left,
216        )
217
218        # Avoid recalculate the labelings.
219        shifted._up_left_vector = self._up_left_vector
220        shifted._up_right_vector = self._up_right_vector
221        shifted._down_right_vector = self._down_right_vector
222        shifted._down_left_vector = self._down_left_vector
223        shifted._up_left_to_up_right_angle = self._up_left_to_up_right_angle
224        shifted._up_right_to_down_right_angle = self._up_right_to_down_right_angle
225        shifted._down_right_to_down_left_angle = self._down_right_to_down_left_angle
226        shifted._down_left_to_up_left_angle = self._down_left_to_up_left_angle
227        shifted._valid = self._valid
228        shifted._clockwise_angle_distribution = self._clockwise_angle_distribution
229
230        return shifted
231
232    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
233        assert self.valid
234        # Only can be called before downsampling.
235        assert self.label_point_y == self.downsampled_label_point_y
236        assert self.label_point_x == self.downsampled_label_point_x
237
238        downsampled_label_point_y = int(self.label_point_y // downsample_labeling_factor)
239        downsampled_label_point_x = int(self.label_point_x // downsample_labeling_factor)
240
241        # label_point_* are shifted to the center of upsampled positions.
242        offset = (downsample_labeling_factor - 1) / 2
243        label_point_y = downsampled_label_point_y * downsample_labeling_factor + offset
244        label_point_x = downsampled_label_point_x * downsample_labeling_factor + offset
245
246        downsampled_page_char_regression_label = PageCharRegressionLabel(
247            char_idx=self.char_idx,
248            tag=self.tag,
249            label_point_y=label_point_y,
250            label_point_x=label_point_x,
251            downsampled_label_point_y=downsampled_label_point_y,
252            downsampled_label_point_x=downsampled_label_point_x,
253            up_left=self.up_left,
254            up_right=self.up_right,
255            down_right=self.down_right,
256            down_left=self.down_left,
257        )
258        return downsampled_page_char_regression_label
259
260    @property
261    def valid(self):
262        self.lazy_post_init()
263        assert self._valid is not None
264        return self._valid
265
266    def generate_up_left_offsets(self):
267        self.lazy_post_init()
268        assert self._up_left_vector is not None
269        return self._up_left_vector.y, self._up_left_vector.x
270
271    def generate_clockwise_angle_distribution(self):
272        self.lazy_post_init()
273        assert self._clockwise_angle_distribution is not None
274        return self._clockwise_angle_distribution
275
276    def generate_non_up_left_distances(self):
277        self.lazy_post_init()
278        assert self._up_right_vector is not None
279        assert self._down_right_vector is not None
280        assert self._down_left_vector is not None
281        return (
282            self._up_right_vector.distance,
283            self._down_right_vector.distance,
284            self._down_left_vector.distance,
285        )
PageCharRegressionLabel( char_idx: int, tag: vkit.pipeline.text_detection.page_text_region_label.PageCharRegressionLabelTag, label_point_y: float, label_point_x: float, downsampled_label_point_y: int, downsampled_label_point_x: int, up_left: vkit.element.point.Point, up_right: vkit.element.point.Point, down_right: vkit.element.point.Point, down_left: vkit.element.point.Point)
 2def __init__(self, char_idx, tag, label_point_y, label_point_x, downsampled_label_point_y, downsampled_label_point_x, up_left, up_right, down_right, down_left):
 3    self.char_idx = char_idx
 4    self.tag = tag
 5    self.label_point_y = label_point_y
 6    self.label_point_x = label_point_x
 7    self.downsampled_label_point_y = downsampled_label_point_y
 8    self.downsampled_label_point_x = downsampled_label_point_x
 9    self.up_left = up_left
10    self.up_right = up_right
11    self.down_right = down_right
12    self.down_left = down_left
13    self._up_left_vector = attr_dict['_up_left_vector'].default
14    self._up_right_vector = attr_dict['_up_right_vector'].default
15    self._down_right_vector = attr_dict['_down_right_vector'].default
16    self._down_left_vector = attr_dict['_down_left_vector'].default
17    self._up_left_to_up_right_angle = attr_dict['_up_left_to_up_right_angle'].default
18    self._up_right_to_down_right_angle = attr_dict['_up_right_to_down_right_angle'].default
19    self._down_right_to_down_left_angle = attr_dict['_down_right_to_down_left_angle'].default
20    self._down_left_to_up_left_angle = attr_dict['_down_left_to_up_left_angle'].default
21    self._valid = attr_dict['_valid'].default
22    self._clockwise_angle_distribution = attr_dict['_clockwise_angle_distribution'].default

Method generated by attrs for class PageCharRegressionLabel.

def lazy_post_init(self):
133    def lazy_post_init(self):
134        initialized = (self._up_left_vector is not None)
135        if initialized:
136            return
137
138        self._up_left_vector = Vector(
139            y=self.up_left.smooth_y - self.label_point_y,
140            x=self.up_left.smooth_x - self.label_point_x,
141        )
142        self._up_right_vector = Vector(
143            y=self.up_right.smooth_y - self.label_point_y,
144            x=self.up_right.smooth_x - self.label_point_x,
145        )
146        self._down_right_vector = Vector(
147            y=self.down_right.smooth_y - self.label_point_y,
148            x=self.down_right.smooth_x - self.label_point_x,
149        )
150        self._down_left_vector = Vector(
151            y=self.down_left.smooth_y - self.label_point_y,
152            x=self.down_left.smooth_x - self.label_point_x,
153        )
154
155        self._up_left_to_up_right_angle = Vector.calculate_theta_delta(
156            self._up_left_vector,
157            self._up_right_vector,
158            clockwise=True,
159        )
160        self._up_right_to_down_right_angle = Vector.calculate_theta_delta(
161            self._up_right_vector,
162            self._down_right_vector,
163            clockwise=True,
164        )
165        self._down_right_to_down_left_angle = Vector.calculate_theta_delta(
166            self._down_right_vector,
167            self._down_left_vector,
168            clockwise=True,
169        )
170        self._down_left_to_up_left_angle = Vector.calculate_theta_delta(
171            self._down_left_vector,
172            self._up_left_vector,
173            clockwise=True,
174        )
175
176        sum_of_angles = sum([
177            self._up_left_to_up_right_angle,
178            self._up_right_to_down_right_angle,
179            self._down_right_to_down_left_angle,
180            self._down_left_to_up_left_angle,
181        ])
182        # Consider valid if deviate within 4 degrees.
183        self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012)
184
185        self._clockwise_angle_distribution = normalize_to_probs([
186            self._up_left_to_up_right_angle,
187            self._up_right_to_down_right_angle,
188            self._down_right_to_down_left_angle,
189            self._down_left_to_up_left_angle,
190        ])
def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
192    def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int):
193        assert self.valid
194        # Only can be called before downsampling.
195        assert self.label_point_y == self.downsampled_label_point_y
196        assert self.label_point_x == self.downsampled_label_point_x
197
198        label_point_y = cast(int, self.label_point_y + offset_y)
199        label_point_x = cast(int, self.label_point_x + offset_x)
200        up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
201        up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
202        down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
203        down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x)
204
205        shifted = PageCharRegressionLabel(
206            char_idx=self.char_idx,
207            tag=self.tag,
208            label_point_y=label_point_y,
209            label_point_x=label_point_x,
210            downsampled_label_point_y=label_point_y,
211            downsampled_label_point_x=label_point_x,
212            up_left=up_left,
213            up_right=up_right,
214            down_right=down_right,
215            down_left=down_left,
216        )
217
218        # Avoid recalculate the labelings.
219        shifted._up_left_vector = self._up_left_vector
220        shifted._up_right_vector = self._up_right_vector
221        shifted._down_right_vector = self._down_right_vector
222        shifted._down_left_vector = self._down_left_vector
223        shifted._up_left_to_up_right_angle = self._up_left_to_up_right_angle
224        shifted._up_right_to_down_right_angle = self._up_right_to_down_right_angle
225        shifted._down_right_to_down_left_angle = self._down_right_to_down_left_angle
226        shifted._down_left_to_up_left_angle = self._down_left_to_up_left_angle
227        shifted._valid = self._valid
228        shifted._clockwise_angle_distribution = self._clockwise_angle_distribution
229
230        return shifted
def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
232    def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int):
233        assert self.valid
234        # Only can be called before downsampling.
235        assert self.label_point_y == self.downsampled_label_point_y
236        assert self.label_point_x == self.downsampled_label_point_x
237
238        downsampled_label_point_y = int(self.label_point_y // downsample_labeling_factor)
239        downsampled_label_point_x = int(self.label_point_x // downsample_labeling_factor)
240
241        # label_point_* are shifted to the center of upsampled positions.
242        offset = (downsample_labeling_factor - 1) / 2
243        label_point_y = downsampled_label_point_y * downsample_labeling_factor + offset
244        label_point_x = downsampled_label_point_x * downsample_labeling_factor + offset
245
246        downsampled_page_char_regression_label = PageCharRegressionLabel(
247            char_idx=self.char_idx,
248            tag=self.tag,
249            label_point_y=label_point_y,
250            label_point_x=label_point_x,
251            downsampled_label_point_y=downsampled_label_point_y,
252            downsampled_label_point_x=downsampled_label_point_x,
253            up_left=self.up_left,
254            up_right=self.up_right,
255            down_right=self.down_right,
256            down_left=self.down_left,
257        )
258        return downsampled_page_char_regression_label
def generate_up_left_offsets(self):
266    def generate_up_left_offsets(self):
267        self.lazy_post_init()
268        assert self._up_left_vector is not None
269        return self._up_left_vector.y, self._up_left_vector.x
def generate_clockwise_angle_distribution(self):
271    def generate_clockwise_angle_distribution(self):
272        self.lazy_post_init()
273        assert self._clockwise_angle_distribution is not None
274        return self._clockwise_angle_distribution
def generate_non_up_left_distances(self):
276    def generate_non_up_left_distances(self):
277        self.lazy_post_init()
278        assert self._up_right_vector is not None
279        assert self._down_right_vector is not None
280        assert self._down_left_vector is not None
281        return (
282            self._up_right_vector.distance,
283            self._down_right_vector.distance,
284            self._down_left_vector.distance,
285        )
class PageTextRegionLabelStepOutput:
289class PageTextRegionLabelStepOutput:
290    page_char_mask: Mask
291    page_char_height_score_map: ScoreMap
292    page_char_gaussian_score_map: ScoreMap
293    page_char_regression_labels: Sequence[PageCharRegressionLabel]
PageTextRegionLabelStepOutput( page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_char_gaussian_score_map: vkit.element.score_map.ScoreMap, page_char_regression_labels: Sequence[vkit.pipeline.text_detection.page_text_region_label.PageCharRegressionLabel])
2def __init__(self, page_char_mask, page_char_height_score_map, page_char_gaussian_score_map, page_char_regression_labels):
3    self.page_char_mask = page_char_mask
4    self.page_char_height_score_map = page_char_height_score_map
5    self.page_char_gaussian_score_map = page_char_gaussian_score_map
6    self.page_char_regression_labels = page_char_regression_labels

Method generated by attrs for class PageTextRegionLabelStepOutput.

296class PageTextRegionLabelStep(
297    PipelineStep[
298        PageTextRegionLabelStepConfig,
299        PageTextRegionLabelStepInput,
300        PageTextRegionLabelStepOutput,
301    ]
302):  # yapf: disable
303
304    def __init__(self, config: PageTextRegionLabelStepConfig):
305        super().__init__(config)
306
307        self.char_heatmap_default_engine_executor = \
308            char_heatmap_default_engine_executor_factory.create(
309                self.config.char_heatmap_default_engine_init_config
310            )
311
312    @classmethod
313    def generate_page_char_mask(
314        cls,
315        shape: Tuple[int, int],
316        page_char_polygons: Sequence[Polygon],
317    ):
318        page_char_mask = Mask.from_shape(shape)
319        for polygon in page_char_polygons:
320            polygon.fill_mask(page_char_mask)
321        return page_char_mask
322
323    @classmethod
324    def generate_page_char_height_score_map(
325        cls,
326        shape: Tuple[int, int],
327        page_char_polygons: Sequence[Polygon],
328    ):
329        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
330        for polygon in page_char_polygons:
331            polygon.fill_score_map(
332                page_char_height_score_map,
333                value=polygon.get_rectangular_height(),
334            )
335        return page_char_height_score_map
336
337    def generate_page_char_gaussian_score_map(
338        self,
339        shape: Tuple[int, int],
340        page_char_polygons: Sequence[Polygon],
341        rng: RandomGenerator,
342    ):
343        height, width = shape
344        char_heatmap = self.char_heatmap_default_engine_executor.run(
345            {
346                'height': height,
347                'width': width,
348                'char_polygons': page_char_polygons,
349            },
350            rng,
351        )
352        return char_heatmap.score_map
353
354    def generate_page_char_regression_labels(
355        self,
356        shape: Tuple[int, int],
357        page_char_polygons: Sequence[Polygon],
358        rng: RandomGenerator,
359    ):
360        page_height, page_width = shape
361
362        # Build a KD tree to for removing deviate point that is too close to another center point.
363        center_points = PointList()
364        for polygon in page_char_polygons:
365            center_points.append(polygon.get_center_point())
366        kd_tree = KDTree(center_points.to_np_array())
367
368        page_char_regression_labels: List[PageCharRegressionLabel] = []
369
370        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
371            assert polygon.num_points == 4
372            up_left, up_right, down_right, down_left = polygon.points
373
374            # 1. The centroid of char polygon.
375            label = PageCharRegressionLabel(
376                char_idx=char_idx,
377                tag=PageCharRegressionLabelTag.CENTROID,
378                label_point_y=center_point.y,
379                label_point_x=center_point.x,
380                downsampled_label_point_y=center_point.y,
381                downsampled_label_point_x=center_point.x,
382                up_left=up_left,
383                up_right=up_right,
384                down_right=down_right,
385                down_left=down_left,
386            )
387            # The centroid labeling must be valid.
388            assert label.valid
389            page_char_regression_labels.append(label)
390
391            # 2. The deviate points.
392            bounding_box = polygon.bounding_box
393
394            # Sample points in shfited bounding box space.
395            deviate_points_in_bounding_box = PointList()
396            # Some points are invalid, hence multiply the number of samplings by a factor.
397            # Also not to sample the points lying on the border to increase the chance of valid.
398            for _ in range(
399                self.config.num_deviate_char_regression_labels_candiates_factor
400                * self.config.num_deviate_char_regression_labels
401            ):
402                y = int(rng.integers(1, bounding_box.height - 1))
403                x = int(rng.integers(1, bounding_box.width - 1))
404                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
405
406            # Then transform to the polygon space.
407            np_src_points = np.asarray(
408                [
409                    (0, 0),
410                    (bounding_box.width - 1, 0),
411                    (bounding_box.width - 1, bounding_box.height - 1),
412                    (0, bounding_box.height - 1),
413                ],
414                dtype=np.float32,
415            )
416            np_dst_points = polygon.internals.np_self_relative_points
417            trans_mat = cv.getPerspectiveTransform(
418                np_src_points,
419                np_dst_points,
420                cv.DECOMP_SVD,
421            )
422
423            deviate_points = PointList()
424            for shifted_deviate_point in affine_points(
425                trans_mat,
426                deviate_points_in_bounding_box.to_point_tuple(),
427            ):
428                y = bounding_box.up + shifted_deviate_point.y
429                x = bounding_box.left + shifted_deviate_point.x
430                assert 0 <= y < page_height
431                assert 0 <= x < page_width
432                deviate_points.append(Point.create(y=y, x=x))
433
434            # Remove those are too close to another center point.
435            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
436            preserve_flags: List[bool] = [
437                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
438            ]
439
440            # Build labels.
441            num_valid_deviate_char_regression_labels = 0
442            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
443                if num_valid_deviate_char_regression_labels \
444                        >= self.config.num_deviate_char_regression_labels:
445                    break
446
447                if not preserve_flag:
448                    continue
449
450                label = PageCharRegressionLabel(
451                    char_idx=char_idx,
452                    tag=PageCharRegressionLabelTag.DEVIATE,
453                    label_point_y=deviate_point.y,
454                    label_point_x=deviate_point.x,
455                    downsampled_label_point_y=deviate_point.y,
456                    downsampled_label_point_x=deviate_point.x,
457                    up_left=up_left,
458                    up_right=up_right,
459                    down_right=down_right,
460                    down_left=down_left,
461                )
462                if label.valid:
463                    page_char_regression_labels.append(label)
464                    num_valid_deviate_char_regression_labels += 1
465
466            if num_valid_deviate_char_regression_labels \
467                    < self.config.num_deviate_char_regression_labels:
468                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
469
470        return page_char_regression_labels
471
472    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
473        page_text_region_step_output = input.page_text_region_step_output
474        page_image = page_text_region_step_output.page_image
475        page_char_polygons = page_text_region_step_output.page_char_polygons
476
477        page_char_mask = self.generate_page_char_mask(
478            page_image.shape,
479            page_char_polygons,
480        )
481        page_char_height_score_map = self.generate_page_char_height_score_map(
482            page_image.shape,
483            page_char_polygons,
484        )
485
486        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
487            page_image.shape,
488            page_char_polygons,
489            rng,
490        )
491        page_char_regression_labels = self.generate_page_char_regression_labels(
492            page_image.shape,
493            page_char_polygons,
494            rng,
495        )
496
497        return PageTextRegionLabelStepOutput(
498            page_char_mask=page_char_mask,
499            page_char_height_score_map=page_char_height_score_map,
500            page_char_gaussian_score_map=page_char_gaussian_score_map,
501            page_char_regression_labels=page_char_regression_labels,
502        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

304    def __init__(self, config: PageTextRegionLabelStepConfig):
305        super().__init__(config)
306
307        self.char_heatmap_default_engine_executor = \
308            char_heatmap_default_engine_executor_factory.create(
309                self.config.char_heatmap_default_engine_init_config
310            )
@classmethod
def generate_page_char_mask( cls, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon]):
312    @classmethod
313    def generate_page_char_mask(
314        cls,
315        shape: Tuple[int, int],
316        page_char_polygons: Sequence[Polygon],
317    ):
318        page_char_mask = Mask.from_shape(shape)
319        for polygon in page_char_polygons:
320            polygon.fill_mask(page_char_mask)
321        return page_char_mask
@classmethod
def generate_page_char_height_score_map( cls, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon]):
323    @classmethod
324    def generate_page_char_height_score_map(
325        cls,
326        shape: Tuple[int, int],
327        page_char_polygons: Sequence[Polygon],
328    ):
329        page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False)
330        for polygon in page_char_polygons:
331            polygon.fill_score_map(
332                page_char_height_score_map,
333                value=polygon.get_rectangular_height(),
334            )
335        return page_char_height_score_map
def generate_page_char_gaussian_score_map( self, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon], rng: numpy.random._generator.Generator):
337    def generate_page_char_gaussian_score_map(
338        self,
339        shape: Tuple[int, int],
340        page_char_polygons: Sequence[Polygon],
341        rng: RandomGenerator,
342    ):
343        height, width = shape
344        char_heatmap = self.char_heatmap_default_engine_executor.run(
345            {
346                'height': height,
347                'width': width,
348                'char_polygons': page_char_polygons,
349            },
350            rng,
351        )
352        return char_heatmap.score_map
def generate_page_char_regression_labels( self, shape: Tuple[int, int], page_char_polygons: Sequence[vkit.element.polygon.Polygon], rng: numpy.random._generator.Generator):
354    def generate_page_char_regression_labels(
355        self,
356        shape: Tuple[int, int],
357        page_char_polygons: Sequence[Polygon],
358        rng: RandomGenerator,
359    ):
360        page_height, page_width = shape
361
362        # Build a KD tree to for removing deviate point that is too close to another center point.
363        center_points = PointList()
364        for polygon in page_char_polygons:
365            center_points.append(polygon.get_center_point())
366        kd_tree = KDTree(center_points.to_np_array())
367
368        page_char_regression_labels: List[PageCharRegressionLabel] = []
369
370        for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)):
371            assert polygon.num_points == 4
372            up_left, up_right, down_right, down_left = polygon.points
373
374            # 1. The centroid of char polygon.
375            label = PageCharRegressionLabel(
376                char_idx=char_idx,
377                tag=PageCharRegressionLabelTag.CENTROID,
378                label_point_y=center_point.y,
379                label_point_x=center_point.x,
380                downsampled_label_point_y=center_point.y,
381                downsampled_label_point_x=center_point.x,
382                up_left=up_left,
383                up_right=up_right,
384                down_right=down_right,
385                down_left=down_left,
386            )
387            # The centroid labeling must be valid.
388            assert label.valid
389            page_char_regression_labels.append(label)
390
391            # 2. The deviate points.
392            bounding_box = polygon.bounding_box
393
394            # Sample points in shfited bounding box space.
395            deviate_points_in_bounding_box = PointList()
396            # Some points are invalid, hence multiply the number of samplings by a factor.
397            # Also not to sample the points lying on the border to increase the chance of valid.
398            for _ in range(
399                self.config.num_deviate_char_regression_labels_candiates_factor
400                * self.config.num_deviate_char_regression_labels
401            ):
402                y = int(rng.integers(1, bounding_box.height - 1))
403                x = int(rng.integers(1, bounding_box.width - 1))
404                deviate_points_in_bounding_box.append(Point.create(y=y, x=x))
405
406            # Then transform to the polygon space.
407            np_src_points = np.asarray(
408                [
409                    (0, 0),
410                    (bounding_box.width - 1, 0),
411                    (bounding_box.width - 1, bounding_box.height - 1),
412                    (0, bounding_box.height - 1),
413                ],
414                dtype=np.float32,
415            )
416            np_dst_points = polygon.internals.np_self_relative_points
417            trans_mat = cv.getPerspectiveTransform(
418                np_src_points,
419                np_dst_points,
420                cv.DECOMP_SVD,
421            )
422
423            deviate_points = PointList()
424            for shifted_deviate_point in affine_points(
425                trans_mat,
426                deviate_points_in_bounding_box.to_point_tuple(),
427            ):
428                y = bounding_box.up + shifted_deviate_point.y
429                x = bounding_box.left + shifted_deviate_point.x
430                assert 0 <= y < page_height
431                assert 0 <= x < page_width
432                deviate_points.append(Point.create(y=y, x=x))
433
434            # Remove those are too close to another center point.
435            _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array())
436            preserve_flags: List[bool] = [
437                idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist()
438            ]
439
440            # Build labels.
441            num_valid_deviate_char_regression_labels = 0
442            for deviate_point, preserve_flag in zip(deviate_points, preserve_flags):
443                if num_valid_deviate_char_regression_labels \
444                        >= self.config.num_deviate_char_regression_labels:
445                    break
446
447                if not preserve_flag:
448                    continue
449
450                label = PageCharRegressionLabel(
451                    char_idx=char_idx,
452                    tag=PageCharRegressionLabelTag.DEVIATE,
453                    label_point_y=deviate_point.y,
454                    label_point_x=deviate_point.x,
455                    downsampled_label_point_y=deviate_point.y,
456                    downsampled_label_point_x=deviate_point.x,
457                    up_left=up_left,
458                    up_right=up_right,
459                    down_right=down_right,
460                    down_left=down_left,
461                )
462                if label.valid:
463                    page_char_regression_labels.append(label)
464                    num_valid_deviate_char_regression_labels += 1
465
466            if num_valid_deviate_char_regression_labels \
467                    < self.config.num_deviate_char_regression_labels:
468                logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}')
469
470        return page_char_regression_labels
def run( self, input: vkit.pipeline.text_detection.page_text_region_label.PageTextRegionLabelStepInput, rng: numpy.random._generator.Generator):
472    def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator):
473        page_text_region_step_output = input.page_text_region_step_output
474        page_image = page_text_region_step_output.page_image
475        page_char_polygons = page_text_region_step_output.page_char_polygons
476
477        page_char_mask = self.generate_page_char_mask(
478            page_image.shape,
479            page_char_polygons,
480        )
481        page_char_height_score_map = self.generate_page_char_height_score_map(
482            page_image.shape,
483            page_char_polygons,
484        )
485
486        page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map(
487            page_image.shape,
488            page_char_polygons,
489            rng,
490        )
491        page_char_regression_labels = self.generate_page_char_regression_labels(
492            page_image.shape,
493            page_char_polygons,
494            rng,
495        )
496
497        return PageTextRegionLabelStepOutput(
498            page_char_mask=page_char_mask,
499            page_char_height_score_map=page_char_height_score_map,
500            page_char_gaussian_score_map=page_char_gaussian_score_map,
501            page_char_regression_labels=page_char_regression_labels,
502        )