vkit.pipeline.text_detection.page_text_region

   1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
   2#
   3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
   4#
   5# The commercial license gives you the full rights to create and distribute software
   6# on your own terms without any SSPL license obligations. For more information,
   7# please see the "LICENSE_COMMERCIAL.txt" file.
   8#
   9# This project is also available under Server Side Public License (SSPL).
  10# The SSPL licensing is ideal for use cases such as open source projects with
  11# SSPL distribution, student/academic purposes, hobby projects, internal research
  12# projects without external distribution, or other projects where all SSPL
  13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
  14from typing import List, Optional, Dict, DefaultDict, Sequence, Tuple, Set
  15from collections import defaultdict
  16import itertools
  17import math
  18import statistics
  19import logging
  20import warnings
  21
  22import attrs
  23from numpy.random import Generator as RandomGenerator
  24import numpy as np
  25from shapely.errors import ShapelyDeprecationWarning
  26from shapely.strtree import STRtree
  27from shapely.geometry import Polygon as ShapelyPolygon
  28from rectpack import newPacker as RectPacker
  29
  30from vkit.utility import rng_choice, rng_choice_with_size
  31from vkit.element import Box, Polygon, Mask, Image, ElementSetOperationMode
  32from vkit.mechanism.distortion import rotate
  33from ..interface import PipelineStep, PipelineStepFactory
  34from .page_distortion import PageDistortionStepOutput
  35from .page_resizing import PageResizingStepOutput
  36
  37logger = logging.getLogger(__name__)
  38
  39# Shapely version has been explicitly locked under 2.0, hence ignore this warning.
  40warnings.filterwarnings('ignore', category=ShapelyDeprecationWarning)
  41
  42
  43@attrs.define
  44class PageTextRegionStepConfig:
  45    text_region_flattener_typical_long_side_ratio_min: float = 3.0
  46    text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85
  47    text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0
  48    text_region_resize_char_height_median_min: int = 30
  49    text_region_resize_char_height_median_max: int = 45
  50    text_region_typical_post_rotate_prob: float = 0.2
  51    text_region_untypical_post_rotate_prob: float = 0.2
  52    negative_text_region_ratio: float = 0.1
  53    negative_text_region_post_rotate_prob: float = 0.2
  54    stack_flattened_text_regions_pad: int = 2
  55    enable_post_rotate: bool = False
  56    post_rotate_angle_min: int = -10
  57    post_rotate_angle_max: int = 10
  58    enable_debug: bool = False
  59
  60
  61@attrs.define
  62class PageTextRegionStepInput:
  63    page_distortion_step_output: PageDistortionStepOutput
  64    page_resizing_step_output: PageResizingStepOutput
  65
  66
  67@attrs.define
  68class PageTextRegionInfo:
  69    precise_text_region_polygon: Polygon
  70    char_polygons: Sequence[Polygon]
  71
  72
  73@attrs.define
  74class FlattenedTextRegion:
  75    is_typical: bool
  76    text_region_polygon: Polygon
  77    text_region_image: Image
  78    bounding_extended_text_region_mask: Mask
  79    flattening_rotate_angle: int
  80    shape_before_trim: Tuple[int, int]
  81    rotated_trimmed_box: Box
  82    shape_before_resize: Tuple[int, int]
  83    post_rotate_angle: int
  84    flattened_image: Image
  85    flattened_mask: Mask
  86    flattened_char_polygons: Optional[Sequence[Polygon]]
  87
  88    @property
  89    def shape(self):
  90        return self.flattened_image.shape
  91
  92    @property
  93    def height(self):
  94        return self.flattened_image.height
  95
  96    @property
  97    def width(self):
  98        return self.flattened_image.width
  99
 100    @property
 101    def area(self):
 102        return self.flattened_image.area
 103
 104    def get_char_height_meidan(self):
 105        assert self.flattened_char_polygons
 106        return statistics.median(
 107            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
 108        )
 109
 110    def to_resized_flattened_text_region(
 111        self,
 112        resized_height: Optional[int] = None,
 113        resized_width: Optional[int] = None,
 114    ):
 115        resized_flattened_image = self.flattened_image.to_resized_image(
 116            resized_height=resized_height,
 117            resized_width=resized_width,
 118        )
 119
 120        resized_flattened_mask = self.flattened_mask.to_resized_mask(
 121            resized_height=resized_height,
 122            resized_width=resized_width,
 123        )
 124
 125        resized_flattened_char_polygons = None
 126        if self.flattened_char_polygons is not None:
 127            resized_flattened_char_polygons = [
 128                flattened_char_polygon.to_conducted_resized_polygon(
 129                    self.shape,
 130                    resized_height=resized_height,
 131                    resized_width=resized_width,
 132                ) for flattened_char_polygon in self.flattened_char_polygons
 133            ]
 134
 135        return attrs.evolve(
 136            self,
 137            flattened_image=resized_flattened_image,
 138            flattened_mask=resized_flattened_mask,
 139            flattened_char_polygons=resized_flattened_char_polygons,
 140        )
 141
 142    def to_post_rotated_flattened_text_region(
 143        self,
 144        post_rotate_angle: int,
 145    ):
 146        assert self.post_rotate_angle == 0
 147
 148        # NOTE: No need to trim.
 149        rotated_result = rotate.distort(
 150            {'angle': post_rotate_angle},
 151            image=self.flattened_image,
 152            mask=self.flattened_mask,
 153            polygons=self.flattened_char_polygons,
 154        )
 155        rotated_flattened_image = rotated_result.image
 156        assert rotated_flattened_image
 157        rotated_flattened_mask = rotated_result.mask
 158        assert rotated_flattened_mask
 159        rotated_flattened_char_polygons = rotated_result.polygons
 160
 161        return attrs.evolve(
 162            self,
 163            post_rotate_angle=post_rotate_angle,
 164            flattened_image=rotated_flattened_image,
 165            flattened_mask=rotated_flattened_mask,
 166            flattened_char_polygons=rotated_flattened_char_polygons,
 167        )
 168
 169
 170@attrs.define
 171class PageTextRegionStepDebug:
 172    page_image: Image = attrs.field(default=None)
 173    precise_text_region_candidate_polygons: Sequence[Polygon] = attrs.field(default=None)
 174    page_text_region_infos: Sequence[PageTextRegionInfo] = attrs.field(default=None)
 175    flattened_text_regions: Sequence[FlattenedTextRegion] = attrs.field(default=None)
 176
 177
 178@attrs.define
 179class PageTextRegionStepOutput:
 180    page_image: Image
 181    page_char_polygons: Sequence[Polygon]
 182    shape_before_rotate: Tuple[int, int]
 183    rotate_angle: int
 184    debug: Optional[PageTextRegionStepDebug]
 185
 186
 187def calculate_boxed_masks_intersected_ratio(
 188    anchor_mask: Mask,
 189    candidate_mask: Mask,
 190    use_candidate_as_base: bool = False,
 191):
 192    anchor_box = anchor_mask.box
 193    assert anchor_box
 194
 195    candidate_box = candidate_mask.box
 196    assert candidate_box
 197
 198    # Calculate intersection.
 199    up = max(anchor_box.up, candidate_box.up)
 200    down = min(anchor_box.down, candidate_box.down)
 201    left = max(anchor_box.left, candidate_box.left)
 202    right = min(anchor_box.right, candidate_box.right)
 203
 204    if up > down or left > right:
 205        return 0.0
 206
 207    np_intersected_anchor_mask = anchor_mask.mat[
 208        up - anchor_box.up:down - anchor_box.up + 1,
 209        left - anchor_box.left:right - anchor_box.left + 1,
 210    ]  # yapf: disable
 211    np_intersected_candidate_mask = candidate_mask.mat[
 212        up - candidate_box.up:down - candidate_box.up + 1,
 213        left - candidate_box.left:right - candidate_box.left + 1,
 214    ]  # yapf: disable
 215    np_intersected_mask = np_intersected_anchor_mask & np_intersected_candidate_mask
 216    intersected_area = int(np_intersected_mask.sum())
 217
 218    if use_candidate_as_base:
 219        base_area = int(candidate_mask.np_mask.sum())
 220    else:
 221        base_area = (
 222            int(anchor_mask.np_mask.sum()) + int(candidate_mask.np_mask.sum()) - intersected_area
 223        )
 224
 225    return intersected_area / base_area
 226
 227
 228class TextRegionFlattener:
 229
 230    @classmethod
 231    def patch_text_region_polygons(
 232        cls,
 233        text_region_polygons: Sequence[Polygon],
 234        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
 235    ):
 236        if grouped_char_polygons is None:
 237            return text_region_polygons
 238
 239        assert len(text_region_polygons) == len(grouped_char_polygons)
 240
 241        patched_text_region_polygons: List[Polygon] = []
 242        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
 243            # Need to make sure all char polygons are included.
 244            unionized_polygons = [text_region_polygon]
 245            unionized_polygons.extend(char_polygons)
 246
 247            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
 248            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
 249            for polygon in unionized_polygons:
 250                polygon.fill_mask(mask)
 251
 252            patched_text_region_polygons.append(mask.to_external_polygon())
 253
 254        return patched_text_region_polygons
 255
 256    @classmethod
 257    def process_text_region_polygons(
 258        cls,
 259        text_region_polygon_dilate_ratio: float,
 260        shape: Tuple[int, int],
 261        text_region_polygons: Sequence[Polygon],
 262        force_no_dilation_flags: Optional[Sequence[bool]] = None,
 263    ):
 264        text_mask = Mask.from_polygons(shape, text_region_polygons)
 265        non_text_mask = text_mask.to_inverted_mask()
 266
 267        box = Box.from_shape(shape)
 268        text_mask = text_mask.to_box_attached(box)
 269        non_text_mask = non_text_mask.to_box_attached(box)
 270
 271        bounding_extended_text_region_masks: List[Mask] = []
 272        bounding_rectangular_polygons: List[Polygon] = []
 273
 274        if force_no_dilation_flags is None:
 275            force_no_dilation_flags_iter = itertools.repeat(False)
 276        else:
 277            assert len(force_no_dilation_flags) == len(text_region_polygons)
 278            force_no_dilation_flags_iter = force_no_dilation_flags
 279
 280        for text_region_polygon, force_no_dilation_flag in zip(
 281            text_region_polygons, force_no_dilation_flags_iter
 282        ):
 283            original_text_region_polygon = text_region_polygon
 284
 285            if not force_no_dilation_flag:
 286                # Dilate.
 287                text_region_polygon = text_region_polygon.to_dilated_polygon(
 288                    ratio=text_region_polygon_dilate_ratio,
 289                )
 290                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
 291
 292            # Get bounding rectangular box (polygon).
 293            bounding_rectangular_polygon = \
 294                text_region_polygon.to_bounding_rectangular_polygon(shape)
 295
 296            bounding_box = bounding_rectangular_polygon.bounding_box
 297
 298            # Get other text region.
 299            bounding_other_text_mask = bounding_rectangular_polygon.extract_mask(text_mask).copy()
 300            # NOTE: Use the original text region polygon to unset the current text mask.
 301            original_text_region_polygon.fill_mask(bounding_other_text_mask, 0)
 302
 303            # Get protentially dilated text region.
 304            bounding_text_mask = Mask.from_shapable(bounding_other_text_mask)
 305            bounding_text_mask = bounding_text_mask.to_box_attached(bounding_box)
 306            # NOTE: Use the protentially dilated text region polygon to set the current text mask.
 307            text_region_polygon.fill_mask(bounding_text_mask, value=1)
 308
 309            # Should not use the protentially dilated text region polygon anymore.
 310            del text_region_polygon
 311
 312            # Trim protentially dilated text region polygon by eliminating other text region.
 313            bounding_trimmed_text_mask = Mask.from_masks(
 314                bounding_box,
 315                [
 316                    # Includes the protentially dilated text region.
 317                    bounding_text_mask,
 318                    # But not includes any other text regions.
 319                    bounding_other_text_mask.to_inverted_mask(),
 320                ],
 321                ElementSetOperationMode.INTERSECT,
 322            )
 323
 324            # Get non-text region.
 325            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
 326
 327            # Combine trimmed text region and non-text region.
 328            bounding_extended_text_region_mask = Mask.from_masks(
 329                bounding_box,
 330                [bounding_trimmed_text_mask, bounding_non_text_mask],
 331            )
 332
 333            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
 334            bounding_rectangular_polygons.append(bounding_rectangular_polygon)
 335
 336        return bounding_extended_text_region_masks, bounding_rectangular_polygons
 337
 338    @classmethod
 339    def analyze_bounding_rectangular_polygons(
 340        cls,
 341        bounding_rectangular_polygons: Sequence[Polygon],
 342    ):
 343        long_side_ratios: List[float] = []
 344        long_side_angles: List[int] = []
 345
 346        for polygon in bounding_rectangular_polygons:
 347            # Get reference line.
 348            point0, point1, _, point3 = polygon.points
 349            side0_length = math.hypot(
 350                point0.smooth_y - point1.smooth_y,
 351                point0.smooth_x - point1.smooth_x,
 352            )
 353            side1_length = math.hypot(
 354                point0.smooth_y - point3.smooth_y,
 355                point0.smooth_x - point3.smooth_x,
 356            )
 357
 358            long_side_ratios.append(
 359                max(side0_length, side1_length) / min(side0_length, side1_length)
 360            )
 361
 362            point_a = point0
 363            if side0_length > side1_length:
 364                # Reference line (p0 -> p1).
 365                point_b = point1
 366            else:
 367                # Reference line (p0 -> p3).
 368                point_b = point3
 369
 370            # Get the angle of reference line, in [0, 180) degree.
 371            np_theta = np.arctan2(
 372                point_a.smooth_y - point_b.smooth_y,
 373                point_a.smooth_x - point_b.smooth_x,
 374            )
 375            np_theta = np_theta % np.pi
 376            long_side_angle = round(np_theta / np.pi * 180) % 180
 377            long_side_angles.append(long_side_angle)
 378
 379        return long_side_ratios, long_side_angles
 380
 381    @classmethod
 382    def get_typical_angle(
 383        cls,
 384        typical_long_side_ratio_min: float,
 385        long_side_ratios: Sequence[float],
 386        long_side_angles: Sequence[int],
 387    ):
 388        typical_indices: Set[int] = set()
 389        typical_long_side_angles: List[float] = []
 390
 391        for idx, (long_side_ratio, long_side_angle) in \
 392                enumerate(zip(long_side_ratios, long_side_angles)):
 393            if long_side_ratio < typical_long_side_ratio_min:
 394                continue
 395
 396            typical_indices.add(idx)
 397            typical_long_side_angles.append(long_side_angle)
 398
 399        if not typical_long_side_angles:
 400            return None, typical_indices
 401
 402        # NOTE: Due to the sudden change between 179 and 0 degree,
 403        # we need to normalize the range to [0, 360) before calculate the mean of angles.
 404        two_pi = 2 * np.pi
 405        np_angles = np.asarray(typical_long_side_angles) / 180 * two_pi
 406        np_sin_mean = np.sin(np_angles).mean()
 407        np_cos_mean = np.cos(np_angles).mean()
 408
 409        np_theta = np.arctan2(np_sin_mean, np_cos_mean)
 410        np_theta = np_theta % two_pi
 411        # Rescale the range back to [0, 180).
 412        typical_angle = round(np_theta / two_pi * 180)
 413
 414        return typical_angle, typical_indices
 415
 416    @classmethod
 417    def get_flattening_rotate_angles(
 418        cls,
 419        typical_angle: Optional[int],
 420        typical_indices: Set[int],
 421        long_side_angles: Sequence[int],
 422    ):
 423        if typical_angle is not None:
 424            assert typical_indices
 425
 426        flattening_rotate_angles: List[int] = []
 427
 428        for idx, long_side_angle in enumerate(long_side_angles):
 429            if typical_angle is None or idx in typical_indices:
 430                # Dominated by long_side_angle.
 431                main_angle = long_side_angle
 432
 433            else:
 434                # Dominated by typical_angle.
 435                short_side_angle = (long_side_angle + 90) % 180
 436                long_side_delta = abs((long_side_angle - typical_angle + 90) % 180 - 90)
 437                short_side_delta = abs((short_side_angle - typical_angle + 90) % 180 - 90)
 438
 439                if long_side_delta < short_side_delta:
 440                    main_angle = long_side_angle
 441                else:
 442                    main_angle = short_side_angle
 443
 444            # Angle for flattening.
 445            if main_angle <= 90:
 446                # [270, 360).
 447                flattening_rotate_angle = (360 - main_angle) % 360
 448            else:
 449                # [1, 90).
 450                flattening_rotate_angle = 180 - main_angle
 451            flattening_rotate_angles.append(flattening_rotate_angle)
 452
 453        return flattening_rotate_angles
 454
 455    @classmethod
 456    def build_flattened_text_regions(
 457        cls,
 458        image: Image,
 459        text_region_polygons: Sequence[Polygon],
 460        bounding_extended_text_region_masks: Sequence[Mask],
 461        typical_indices: Set[int],
 462        flattening_rotate_angles: Sequence[int],
 463        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
 464    ):
 465        flattened_text_regions: List[FlattenedTextRegion] = []
 466
 467        for idx, (
 468            text_region_polygon,
 469            bounding_extended_text_region_mask,
 470            flattening_rotate_angle,
 471        ) in enumerate(
 472            zip(
 473                text_region_polygons,
 474                bounding_extended_text_region_masks,
 475                flattening_rotate_angles,
 476            )
 477        ):
 478            bounding_box = bounding_extended_text_region_mask.box
 479            assert bounding_box
 480
 481            # Extract image.
 482            text_region_image = bounding_extended_text_region_mask.extract_image(image)
 483
 484            # Shift char polygons.
 485            relative_char_polygons = None
 486            if grouped_char_polygons is not None:
 487                char_polygons = grouped_char_polygons[idx]
 488                relative_char_polygons = [
 489                    char_polygon.to_relative_polygon(
 490                        origin_y=bounding_box.up,
 491                        origin_x=bounding_box.left,
 492                    ) for char_polygon in char_polygons
 493                ]
 494
 495            # Rotate.
 496            rotated_result = rotate.distort(
 497                {'angle': flattening_rotate_angle},
 498                image=text_region_image,
 499                mask=bounding_extended_text_region_mask,
 500                polygons=relative_char_polygons,
 501            )
 502            rotated_text_region_image = rotated_result.image
 503            assert rotated_text_region_image
 504            rotated_bounding_extended_text_region_mask = rotated_result.mask
 505            assert rotated_bounding_extended_text_region_mask
 506            # Could be None.
 507            rotated_char_polygons = rotated_result.polygons
 508
 509            # Trim.
 510            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
 511
 512            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
 513                up=rotated_trimmed_box.up,
 514                down=rotated_trimmed_box.down,
 515                left=rotated_trimmed_box.left,
 516                right=rotated_trimmed_box.right,
 517            )
 518
 519            trimmed_mask = rotated_trimmed_box.extract_mask(
 520                rotated_bounding_extended_text_region_mask
 521            )
 522
 523            trimmed_char_polygons = None
 524            if rotated_char_polygons:
 525                trimmed_char_polygons = [
 526                    rotated_char_polygon.to_relative_polygon(
 527                        origin_y=rotated_trimmed_box.up,
 528                        origin_x=rotated_trimmed_box.left,
 529                    ) for rotated_char_polygon in rotated_char_polygons
 530                ]
 531
 532            flattened_text_regions.append(
 533                FlattenedTextRegion(
 534                    is_typical=(idx in typical_indices),
 535                    text_region_polygon=text_region_polygon,
 536                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
 537                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
 538                    flattening_rotate_angle=flattening_rotate_angle,
 539                    shape_before_trim=rotated_text_region_image.shape,
 540                    rotated_trimmed_box=rotated_trimmed_box,
 541                    shape_before_resize=trimmed_text_region_image.shape,
 542                    post_rotate_angle=0,
 543                    flattened_image=trimmed_text_region_image,
 544                    flattened_mask=trimmed_mask,
 545                    flattened_char_polygons=trimmed_char_polygons,
 546                )
 547            )
 548
 549        return flattened_text_regions
 550
 551    def __init__(
 552        self,
 553        typical_long_side_ratio_min: float,
 554        text_region_polygon_dilate_ratio: float,
 555        image: Image,
 556        text_region_polygons: Sequence[Polygon],
 557        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
 558        is_training: bool = False,
 559    ):
 560        self.origional_text_region_polygons = text_region_polygons
 561
 562        self.text_region_polygons = self.patch_text_region_polygons(
 563            text_region_polygons=text_region_polygons,
 564            grouped_char_polygons=grouped_char_polygons,
 565        )
 566
 567        force_no_dilation_flags = None
 568        if is_training:
 569            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
 570            force_no_dilation_flags = []
 571            for char_polygons in grouped_char_polygons:
 572                force_no_dilation_flags.append(not char_polygons)
 573
 574        self.bounding_extended_text_region_masks, self.bounding_rectangular_polygons = \
 575            self.process_text_region_polygons(
 576                text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 577                shape=image.shape,
 578                text_region_polygons=self.text_region_polygons,
 579                force_no_dilation_flags=force_no_dilation_flags,
 580            )
 581
 582        self.long_side_ratios, self.long_side_angles = \
 583            self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
 584
 585        self.typical_angle, self.typical_indices = self.get_typical_angle(
 586            typical_long_side_ratio_min=typical_long_side_ratio_min,
 587            long_side_ratios=self.long_side_ratios,
 588            long_side_angles=self.long_side_angles,
 589        )
 590
 591        self.flattening_rotate_angles = self.get_flattening_rotate_angles(
 592            typical_angle=self.typical_angle,
 593            typical_indices=self.typical_indices,
 594            long_side_angles=self.long_side_angles,
 595        )
 596
 597        self.flattened_text_regions = self.build_flattened_text_regions(
 598            image=image,
 599            text_region_polygons=self.origional_text_region_polygons,
 600            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
 601            typical_indices=self.typical_indices,
 602            flattening_rotate_angles=self.flattening_rotate_angles,
 603            grouped_char_polygons=grouped_char_polygons,
 604        )
 605
 606
 607def build_background_image_for_stacking(height: int, width: int):
 608    np_rgb_rows = [np.zeros((width, 3), dtype=np.uint8) for _ in range(3)]
 609    rgb_tuples = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
 610
 611    for color_offset, np_row in enumerate(np_rgb_rows):
 612        for color_idx in range(3):
 613            color_tuple = rgb_tuples[(color_offset + color_idx) % 3]
 614            np_row[color_idx::3] = color_tuple
 615
 616    np_image = np.zeros((height, width, 3), dtype=np.uint8)
 617    for row_offset, np_row in enumerate(np_rgb_rows):
 618        np_image[row_offset::3] = np_row
 619
 620    return Image(mat=np_image)
 621
 622
 623def stack_flattened_text_regions(
 624    page_pad: int,
 625    flattened_text_regions_pad: int,
 626    flattened_text_regions: Sequence[FlattenedTextRegion],
 627):
 628    page_double_pad = 2 * page_pad
 629    flattened_text_regions_double_pad = 2 * flattened_text_regions_pad
 630
 631    rect_packer = RectPacker(rotation=False)
 632
 633    # Add box and bin.
 634    # NOTE: Only one bin is added, that is, packing all text region into one image.
 635    bin_width = 0
 636    bin_height = 0
 637
 638    for ftr_idx, flattened_text_region in enumerate(flattened_text_regions):
 639        rect_packer.add_rect(
 640            width=flattened_text_region.width + flattened_text_regions_double_pad,
 641            height=flattened_text_region.height + flattened_text_regions_double_pad,
 642            rid=ftr_idx,
 643        )
 644
 645        bin_width = max(bin_width, flattened_text_region.width)
 646        bin_height += flattened_text_region.height
 647
 648    bin_width += flattened_text_regions_double_pad
 649    bin_height += flattened_text_regions_double_pad
 650
 651    rect_packer.add_bin(width=bin_width, height=bin_height)
 652
 653    # Pack boxes.
 654    rect_packer.pack()  # type: ignore
 655
 656    # Get packed boxes.
 657    unordered_boxes: List[Box] = []
 658    ftr_indices: List[int] = []
 659    for bin_idx, x, y, width, height, ftr_idx in rect_packer.rect_list():
 660        assert bin_idx == 0
 661        unordered_boxes.append(Box(
 662            up=y,
 663            down=y + height - 1,
 664            left=x,
 665            right=x + width - 1,
 666        ))
 667        ftr_indices.append(ftr_idx)
 668
 669    # Order boxes.
 670    inverse_ftr_indices = [-1] * len(ftr_indices)
 671    for inverse_ftr_idx, ftr_idx in enumerate(ftr_indices):
 672        inverse_ftr_indices[ftr_idx] = inverse_ftr_idx
 673    for inverse_ftr_idx in inverse_ftr_indices:
 674        assert inverse_ftr_idx >= 0
 675    padded_boxes = [unordered_boxes[inverse_ftr_idx] for inverse_ftr_idx in inverse_ftr_indices]
 676
 677    page_height = max(box.down for box in padded_boxes) + 1 + page_double_pad
 678    page_width = max(box.right for box in padded_boxes) + 1 + page_double_pad
 679
 680    image = build_background_image_for_stacking(page_height, page_width)
 681    boxes: List[Box] = []
 682    char_polygons: List[Polygon] = []
 683
 684    for padded_box, flattened_text_region in zip(padded_boxes, flattened_text_regions):
 685        assert flattened_text_region.height + flattened_text_regions_double_pad \
 686            == padded_box.height
 687        assert flattened_text_region.width + flattened_text_regions_double_pad \
 688            == padded_box.width
 689
 690        # Remove box padding.
 691        up = padded_box.up + flattened_text_regions_pad + page_pad
 692        left = padded_box.left + flattened_text_regions_pad + page_pad
 693
 694        box = Box(
 695            up=up,
 696            down=up + flattened_text_region.height - 1,
 697            left=left,
 698            right=left + flattened_text_region.width - 1,
 699        )
 700        boxes.append(box)
 701
 702        # Render.
 703        box.fill_image(
 704            image,
 705            flattened_text_region.flattened_image,
 706            image_mask=flattened_text_region.flattened_mask,
 707        )
 708
 709        if flattened_text_region.flattened_char_polygons:
 710            for char_polygon in flattened_text_region.flattened_char_polygons:
 711                char_polygons.append(char_polygon.to_shifted_polygon(
 712                    offset_y=up,
 713                    offset_x=left,
 714                ))
 715
 716    return image, boxes, char_polygons
 717
 718
 719class PageTextRegionStep(
 720    PipelineStep[
 721        PageTextRegionStepConfig,
 722        PageTextRegionStepInput,
 723        PageTextRegionStepOutput,
 724    ]
 725):  # yapf: disable
 726
 727    @classmethod
 728    def generate_precise_text_region_candidate_polygons(
 729        cls,
 730        precise_mask: Mask,
 731        disconnected_text_region_mask: Mask,
 732    ):
 733        assert precise_mask.box and disconnected_text_region_mask.box
 734
 735        # Get the intersection.
 736        intersected_box = Box(
 737            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
 738            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
 739            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
 740            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
 741        )
 742        assert intersected_box.up <= intersected_box.down
 743        assert intersected_box.left <= intersected_box.right
 744
 745        precise_mask = intersected_box.extract_mask(precise_mask)
 746        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
 747
 748        # Apply mask bitwise-and operation.
 749        intersected_mask = Mask(
 750            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
 751        )
 752        intersected_mask = intersected_mask.to_box_attached(intersected_box)
 753
 754        # NOTE:
 755        # 1. Could extract more than one polygons.
 756        # 2. Some polygons are in border and should be removed later.
 757        return intersected_mask.to_disconnected_polygons()
 758
 759    @classmethod
 760    def strtree_query_intersected_polygons(
 761        cls,
 762        strtree: STRtree,
 763        id_to_anchor_polygon: Dict[int, Polygon],
 764        candidate_polygon: Polygon,
 765    ):
 766        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
 767        candidate_mask = candidate_polygon.mask
 768
 769        for anchor_shapely_polygon in strtree.query(candidate_shapely_polygon):
 770            anchor_id = id(anchor_shapely_polygon)
 771            anchor_polygon = id_to_anchor_polygon[anchor_id]
 772            anchor_mask = anchor_polygon.mask
 773
 774            intersected_ratio = calculate_boxed_masks_intersected_ratio(
 775                anchor_mask=anchor_mask,
 776                candidate_mask=candidate_mask,
 777                use_candidate_as_base=True,
 778            )
 779
 780            yield (
 781                anchor_id,
 782                anchor_polygon,
 783                anchor_mask,
 784                candidate_mask,
 785                intersected_ratio,
 786            )
 787
 788    def sample_page_non_text_region_polygons(
 789        self,
 790        page_non_text_region_polygons: Sequence[Polygon],
 791        num_page_text_region_infos: int,
 792        rng: RandomGenerator,
 793    ):
 794        negative_ratio = self.config.negative_text_region_ratio
 795        num_page_non_text_region_polygons = round(
 796            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
 797        )
 798        return rng_choice_with_size(
 799            rng,
 800            page_non_text_region_polygons,
 801            size=min(
 802                num_page_non_text_region_polygons,
 803                len(page_non_text_region_polygons),
 804            ),
 805            replace=False,
 806        )
 807
 808    def build_flattened_text_regions(
 809        self,
 810        page_image: Image,
 811        page_text_region_infos: Sequence[PageTextRegionInfo],
 812        page_non_text_region_polygons: Sequence[Polygon],
 813        rng: RandomGenerator,
 814    ):
 815        text_region_polygon_dilate_ratio = float(
 816            rng.uniform(
 817                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
 818                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
 819            )
 820        )
 821        typical_long_side_ratio_min = \
 822            self.config.text_region_flattener_typical_long_side_ratio_min
 823
 824        text_region_polygons: List[Polygon] = []
 825        grouped_char_polygons: List[Sequence[Polygon]] = []
 826        for page_text_region_info in page_text_region_infos:
 827            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
 828            grouped_char_polygons.append(page_text_region_info.char_polygons)
 829
 830        # Inject nagative regions.
 831        for page_non_text_region_polygon in page_non_text_region_polygons:
 832            text_region_polygons.append(page_non_text_region_polygon)
 833            grouped_char_polygons.append(tuple())
 834
 835        text_region_flattener = TextRegionFlattener(
 836            typical_long_side_ratio_min=typical_long_side_ratio_min,
 837            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 838            image=page_image,
 839            text_region_polygons=text_region_polygons,
 840            grouped_char_polygons=grouped_char_polygons,
 841            is_training=True,
 842        )
 843
 844        # Resize positive ftr.
 845        positive_flattened_text_regions: List[FlattenedTextRegion] = []
 846        # For negative sampling.
 847        positive_reference_heights: List[float] = []
 848        positive_reference_widths: List[float] = []
 849        num_negative_flattened_text_regions = 0
 850
 851        for flattened_text_region in text_region_flattener.flattened_text_regions:
 852            if not flattened_text_region.flattened_char_polygons:
 853                num_negative_flattened_text_regions += 1
 854                continue
 855
 856            char_height_median = flattened_text_region.get_char_height_meidan()
 857
 858            text_region_resize_char_height_median = int(
 859                rng.integers(
 860                    self.config.text_region_resize_char_height_median_min,
 861                    self.config.text_region_resize_char_height_median_max + 1,
 862                )
 863            )
 864            scale = text_region_resize_char_height_median / char_height_median
 865
 866            height, width = flattened_text_region.shape
 867            resized_height = round(height * scale)
 868            resized_width = round(width * scale)
 869
 870            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
 871                resized_height=resized_height,
 872                resized_width=resized_width,
 873            )
 874
 875            positive_reference_heights.append(resized_height)
 876            positive_reference_widths.append(resized_width)
 877
 878            # Post rotate.
 879            post_rotate_angle = 0
 880            if flattened_text_region.is_typical:
 881                if rng.random() < self.config.text_region_typical_post_rotate_prob:
 882                    # Upside down only.
 883                    post_rotate_angle = 180
 884            else:
 885                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
 886                    # 3-way rotate.
 887                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
 888
 889            if post_rotate_angle != 0:
 890                flattened_text_region = \
 891                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
 892
 893            positive_flattened_text_regions.append(flattened_text_region)
 894
 895        # Resize negative ftr.
 896        negative_reference_heights = list(
 897            rng_choice_with_size(
 898                rng,
 899                positive_reference_heights,
 900                size=num_negative_flattened_text_regions,
 901                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
 902            )
 903        )
 904
 905        negative_height_max = max(positive_reference_heights)
 906        negative_width_max = max(positive_reference_widths)
 907
 908        negative_flattened_text_regions: List[FlattenedTextRegion] = []
 909
 910        for flattened_text_region in text_region_flattener.flattened_text_regions:
 911            if flattened_text_region.flattened_char_polygons:
 912                continue
 913
 914            reference_height = negative_reference_heights.pop()
 915            scale = reference_height / flattened_text_region.height
 916
 917            height, width = flattened_text_region.shape
 918            resized_height = round(height * scale)
 919            resized_width = round(width * scale)
 920
 921            # Remove negative region that is too large.
 922            if resized_height > negative_height_max or resized_width > negative_width_max:
 923                continue
 924
 925            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
 926                resized_height=resized_height,
 927                resized_width=resized_width,
 928            )
 929
 930            # Post rotate.
 931            post_rotate_angle = 0
 932            if flattened_text_region.is_typical:
 933                if rng.random() < self.config.text_region_typical_post_rotate_prob:
 934                    # Upside down only.
 935                    post_rotate_angle = 180
 936            else:
 937                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
 938                    # 3-way rotate.
 939                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
 940
 941            if post_rotate_angle != 0:
 942                flattened_text_region = \
 943                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
 944
 945            negative_flattened_text_regions.append(flattened_text_region)
 946
 947        flattened_text_regions = (
 948            *positive_flattened_text_regions,
 949            *negative_flattened_text_regions,
 950        )
 951        return flattened_text_regions
 952
 953    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
 954        page_distortion_step_output = input.page_distortion_step_output
 955        page_image = page_distortion_step_output.page_image
 956        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
 957        page_disconnected_text_region_collection = \
 958            page_distortion_step_output.page_disconnected_text_region_collection
 959        page_non_text_region_collection = \
 960            page_distortion_step_output.page_non_text_region_collection
 961
 962        page_resizing_step_output = input.page_resizing_step_output
 963        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
 964
 965        debug = None
 966        if self.config.enable_debug:
 967            debug = PageTextRegionStepDebug()
 968
 969        # Build R-tree to track text regions.
 970        # https://github.com/shapely/shapely/issues/640
 971        id_to_disconnected_text_region_polygon: Dict[int, Polygon] = {}
 972        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
 973
 974        for polygon in page_disconnected_text_region_collection.to_polygons():
 975            shapely_polygon = polygon.to_shapely_polygon()
 976            id_to_disconnected_text_region_polygon[id(shapely_polygon)] = polygon
 977            disconnected_text_region_shapely_polygons.append(shapely_polygon)
 978
 979        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
 980
 981        # Get the precise text regions.
 982        precise_text_region_candidate_polygons: List[Polygon] = []
 983        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
 984            # Resize back to the shape after distortion.
 985            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
 986                page_resized_text_line_mask,
 987                resized_height=page_image.height,
 988                resized_width=page_image.width,
 989            )
 990
 991            # Find and extract intersected text region.
 992            # NOTE: One precise_polygon could be overlapped with
 993            # more than one disconnected_text_region_polygon!
 994            for _, _, disconnected_text_region_mask, precise_mask, _ in \
 995                    self.strtree_query_intersected_polygons(
 996                        strtree=disconnected_text_region_tree,
 997                        id_to_anchor_polygon=id_to_disconnected_text_region_polygon,
 998                        candidate_polygon=precise_polygon,
 999                    ):
1000                precise_text_region_candidate_polygons.extend(
1001                    self.generate_precise_text_region_candidate_polygons(
1002                        precise_mask=precise_mask,
1003                        disconnected_text_region_mask=disconnected_text_region_mask,
1004                    )
1005                )
1006
1007        if debug:
1008            debug.page_image = page_image
1009            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1010
1011        # Help gc.
1012        del id_to_disconnected_text_region_polygon
1013        del disconnected_text_region_shapely_polygons
1014        del disconnected_text_region_tree
1015
1016        # Bind char-level polygon to precise text region.
1017        id_to_precise_text_region_polygon: Dict[int, Polygon] = {}
1018        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1019
1020        for polygon in precise_text_region_candidate_polygons:
1021            shapely_polygon = polygon.to_shapely_polygon()
1022            id_to_precise_text_region_polygon[id(shapely_polygon)] = polygon
1023            precise_text_region_shapely_polygons.append(shapely_polygon)
1024
1025        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1026
1027        id_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1028        for char_polygon in page_char_polygon_collection.polygons:
1029            best_precise_text_region_id = None
1030            intersected_ratio_max = 0
1031
1032            for (
1033                precise_text_region_id,
1034                _,
1035                _,
1036                _,
1037                intersected_ratio,
1038            ) in self.strtree_query_intersected_polygons(
1039                strtree=precise_text_region_tree,
1040                id_to_anchor_polygon=id_to_precise_text_region_polygon,
1041                candidate_polygon=char_polygon,
1042            ):
1043                if intersected_ratio > intersected_ratio_max:
1044                    intersected_ratio_max = intersected_ratio
1045                    best_precise_text_region_id = precise_text_region_id
1046
1047            if best_precise_text_region_id is not None:
1048                id_to_char_polygons[best_precise_text_region_id].append(char_polygon)
1049            else:
1050                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1051                # In such case, the text line bounding box is smaller than the char polygon, since
1052                # the leading/trailing char paddings are ignored during text line rendering.
1053                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1054                # introduce labeling noise.
1055                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1056
1057        page_text_region_infos: List[PageTextRegionInfo] = []
1058        for precise_text_region_shapely_polygon in precise_text_region_shapely_polygons:
1059            ptrsp_id = id(precise_text_region_shapely_polygon)
1060            if ptrsp_id not in id_to_char_polygons:
1061                # Not related to any char polygons.
1062                continue
1063            assert id_to_char_polygons[ptrsp_id]
1064            page_text_region_infos.append(
1065                PageTextRegionInfo(
1066                    precise_text_region_polygon=id_to_precise_text_region_polygon[ptrsp_id],
1067                    char_polygons=id_to_char_polygons[ptrsp_id],
1068                )
1069            )
1070
1071        # Help gc.
1072        del id_to_precise_text_region_polygon
1073        del precise_text_region_shapely_polygons
1074        del precise_text_region_tree
1075
1076        if debug:
1077            debug.page_text_region_infos = page_text_region_infos
1078
1079        # Negative sampling.
1080        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1081            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1082            num_page_text_region_infos=len(page_text_region_infos),
1083            rng=rng,
1084        )
1085
1086        flattened_text_regions = self.build_flattened_text_regions(
1087            page_image=page_image,
1088            page_text_region_infos=page_text_region_infos,
1089            page_non_text_region_polygons=page_non_text_region_polygons,
1090            rng=rng,
1091        )
1092        if debug:
1093            debug.flattened_text_regions = flattened_text_regions
1094
1095        # Stack text regions.
1096        image, _, char_polygons = stack_flattened_text_regions(
1097            page_pad=0,
1098            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1099            flattened_text_regions=flattened_text_regions,
1100        )
1101
1102        # Post uniform rotation.
1103        shape_before_rotate = image.shape
1104        rotate_angle = 0
1105
1106        if self.config.enable_post_rotate:
1107            rotate_angle = int(
1108                rng.integers(
1109                    self.config.post_rotate_angle_min,
1110                    self.config.post_rotate_angle_max + 1,
1111                )
1112            )
1113            rotated_result = rotate.distort(
1114                {'angle': rotate_angle},
1115                image=image,
1116                polygons=char_polygons,
1117            )
1118            assert rotated_result.image and rotated_result.polygons
1119            image = rotated_result.image
1120            char_polygons = rotated_result.polygons
1121
1122        return PageTextRegionStepOutput(
1123            page_image=image,
1124            page_char_polygons=char_polygons,
1125            shape_before_rotate=shape_before_rotate,
1126            rotate_angle=rotate_angle,
1127            debug=debug,
1128        )
1129
1130
1131page_text_region_step_factory = PipelineStepFactory(PageTextRegionStep)
class PageTextRegionStepConfig:
45class PageTextRegionStepConfig:
46    text_region_flattener_typical_long_side_ratio_min: float = 3.0
47    text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85
48    text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0
49    text_region_resize_char_height_median_min: int = 30
50    text_region_resize_char_height_median_max: int = 45
51    text_region_typical_post_rotate_prob: float = 0.2
52    text_region_untypical_post_rotate_prob: float = 0.2
53    negative_text_region_ratio: float = 0.1
54    negative_text_region_post_rotate_prob: float = 0.2
55    stack_flattened_text_regions_pad: int = 2
56    enable_post_rotate: bool = False
57    post_rotate_angle_min: int = -10
58    post_rotate_angle_max: int = 10
59    enable_debug: bool = False
PageTextRegionStepConfig( text_region_flattener_typical_long_side_ratio_min: float = 3.0, text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85, text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0, text_region_resize_char_height_median_min: int = 30, text_region_resize_char_height_median_max: int = 45, text_region_typical_post_rotate_prob: float = 0.2, text_region_untypical_post_rotate_prob: float = 0.2, negative_text_region_ratio: float = 0.1, negative_text_region_post_rotate_prob: float = 0.2, stack_flattened_text_regions_pad: int = 2, enable_post_rotate: bool = False, post_rotate_angle_min: int = -10, post_rotate_angle_max: int = 10, enable_debug: bool = False)
 2def __init__(self, text_region_flattener_typical_long_side_ratio_min=attr_dict['text_region_flattener_typical_long_side_ratio_min'].default, text_region_flattener_text_region_polygon_dilate_ratio_min=attr_dict['text_region_flattener_text_region_polygon_dilate_ratio_min'].default, text_region_flattener_text_region_polygon_dilate_ratio_max=attr_dict['text_region_flattener_text_region_polygon_dilate_ratio_max'].default, text_region_resize_char_height_median_min=attr_dict['text_region_resize_char_height_median_min'].default, text_region_resize_char_height_median_max=attr_dict['text_region_resize_char_height_median_max'].default, text_region_typical_post_rotate_prob=attr_dict['text_region_typical_post_rotate_prob'].default, text_region_untypical_post_rotate_prob=attr_dict['text_region_untypical_post_rotate_prob'].default, negative_text_region_ratio=attr_dict['negative_text_region_ratio'].default, negative_text_region_post_rotate_prob=attr_dict['negative_text_region_post_rotate_prob'].default, stack_flattened_text_regions_pad=attr_dict['stack_flattened_text_regions_pad'].default, enable_post_rotate=attr_dict['enable_post_rotate'].default, post_rotate_angle_min=attr_dict['post_rotate_angle_min'].default, post_rotate_angle_max=attr_dict['post_rotate_angle_max'].default, enable_debug=attr_dict['enable_debug'].default):
 3    self.text_region_flattener_typical_long_side_ratio_min = text_region_flattener_typical_long_side_ratio_min
 4    self.text_region_flattener_text_region_polygon_dilate_ratio_min = text_region_flattener_text_region_polygon_dilate_ratio_min
 5    self.text_region_flattener_text_region_polygon_dilate_ratio_max = text_region_flattener_text_region_polygon_dilate_ratio_max
 6    self.text_region_resize_char_height_median_min = text_region_resize_char_height_median_min
 7    self.text_region_resize_char_height_median_max = text_region_resize_char_height_median_max
 8    self.text_region_typical_post_rotate_prob = text_region_typical_post_rotate_prob
 9    self.text_region_untypical_post_rotate_prob = text_region_untypical_post_rotate_prob
10    self.negative_text_region_ratio = negative_text_region_ratio
11    self.negative_text_region_post_rotate_prob = negative_text_region_post_rotate_prob
12    self.stack_flattened_text_regions_pad = stack_flattened_text_regions_pad
13    self.enable_post_rotate = enable_post_rotate
14    self.post_rotate_angle_min = post_rotate_angle_min
15    self.post_rotate_angle_max = post_rotate_angle_max
16    self.enable_debug = enable_debug

Method generated by attrs for class PageTextRegionStepConfig.

class PageTextRegionStepInput:
63class PageTextRegionStepInput:
64    page_distortion_step_output: PageDistortionStepOutput
65    page_resizing_step_output: PageResizingStepOutput
PageTextRegionStepInput( page_distortion_step_output: vkit.pipeline.text_detection.page_distortion.PageDistortionStepOutput, page_resizing_step_output: vkit.pipeline.text_detection.page_resizing.PageResizingStepOutput)
2def __init__(self, page_distortion_step_output, page_resizing_step_output):
3    self.page_distortion_step_output = page_distortion_step_output
4    self.page_resizing_step_output = page_resizing_step_output

Method generated by attrs for class PageTextRegionStepInput.

class PageTextRegionInfo:
69class PageTextRegionInfo:
70    precise_text_region_polygon: Polygon
71    char_polygons: Sequence[Polygon]
PageTextRegionInfo( precise_text_region_polygon: vkit.element.polygon.Polygon, char_polygons: Sequence[vkit.element.polygon.Polygon])
2def __init__(self, precise_text_region_polygon, char_polygons):
3    self.precise_text_region_polygon = precise_text_region_polygon
4    self.char_polygons = char_polygons

Method generated by attrs for class PageTextRegionInfo.

class FlattenedTextRegion:
 75class FlattenedTextRegion:
 76    is_typical: bool
 77    text_region_polygon: Polygon
 78    text_region_image: Image
 79    bounding_extended_text_region_mask: Mask
 80    flattening_rotate_angle: int
 81    shape_before_trim: Tuple[int, int]
 82    rotated_trimmed_box: Box
 83    shape_before_resize: Tuple[int, int]
 84    post_rotate_angle: int
 85    flattened_image: Image
 86    flattened_mask: Mask
 87    flattened_char_polygons: Optional[Sequence[Polygon]]
 88
 89    @property
 90    def shape(self):
 91        return self.flattened_image.shape
 92
 93    @property
 94    def height(self):
 95        return self.flattened_image.height
 96
 97    @property
 98    def width(self):
 99        return self.flattened_image.width
100
101    @property
102    def area(self):
103        return self.flattened_image.area
104
105    def get_char_height_meidan(self):
106        assert self.flattened_char_polygons
107        return statistics.median(
108            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
109        )
110
111    def to_resized_flattened_text_region(
112        self,
113        resized_height: Optional[int] = None,
114        resized_width: Optional[int] = None,
115    ):
116        resized_flattened_image = self.flattened_image.to_resized_image(
117            resized_height=resized_height,
118            resized_width=resized_width,
119        )
120
121        resized_flattened_mask = self.flattened_mask.to_resized_mask(
122            resized_height=resized_height,
123            resized_width=resized_width,
124        )
125
126        resized_flattened_char_polygons = None
127        if self.flattened_char_polygons is not None:
128            resized_flattened_char_polygons = [
129                flattened_char_polygon.to_conducted_resized_polygon(
130                    self.shape,
131                    resized_height=resized_height,
132                    resized_width=resized_width,
133                ) for flattened_char_polygon in self.flattened_char_polygons
134            ]
135
136        return attrs.evolve(
137            self,
138            flattened_image=resized_flattened_image,
139            flattened_mask=resized_flattened_mask,
140            flattened_char_polygons=resized_flattened_char_polygons,
141        )
142
143    def to_post_rotated_flattened_text_region(
144        self,
145        post_rotate_angle: int,
146    ):
147        assert self.post_rotate_angle == 0
148
149        # NOTE: No need to trim.
150        rotated_result = rotate.distort(
151            {'angle': post_rotate_angle},
152            image=self.flattened_image,
153            mask=self.flattened_mask,
154            polygons=self.flattened_char_polygons,
155        )
156        rotated_flattened_image = rotated_result.image
157        assert rotated_flattened_image
158        rotated_flattened_mask = rotated_result.mask
159        assert rotated_flattened_mask
160        rotated_flattened_char_polygons = rotated_result.polygons
161
162        return attrs.evolve(
163            self,
164            post_rotate_angle=post_rotate_angle,
165            flattened_image=rotated_flattened_image,
166            flattened_mask=rotated_flattened_mask,
167            flattened_char_polygons=rotated_flattened_char_polygons,
168        )
FlattenedTextRegion( is_typical: bool, text_region_polygon: vkit.element.polygon.Polygon, text_region_image: vkit.element.image.Image, bounding_extended_text_region_mask: vkit.element.mask.Mask, flattening_rotate_angle: int, shape_before_trim: Tuple[int, int], rotated_trimmed_box: vkit.element.box.Box, shape_before_resize: Tuple[int, int], post_rotate_angle: int, flattened_image: vkit.element.image.Image, flattened_mask: vkit.element.mask.Mask, flattened_char_polygons: Union[Sequence[vkit.element.polygon.Polygon], NoneType])
 2def __init__(self, is_typical, text_region_polygon, text_region_image, bounding_extended_text_region_mask, flattening_rotate_angle, shape_before_trim, rotated_trimmed_box, shape_before_resize, post_rotate_angle, flattened_image, flattened_mask, flattened_char_polygons):
 3    self.is_typical = is_typical
 4    self.text_region_polygon = text_region_polygon
 5    self.text_region_image = text_region_image
 6    self.bounding_extended_text_region_mask = bounding_extended_text_region_mask
 7    self.flattening_rotate_angle = flattening_rotate_angle
 8    self.shape_before_trim = shape_before_trim
 9    self.rotated_trimmed_box = rotated_trimmed_box
10    self.shape_before_resize = shape_before_resize
11    self.post_rotate_angle = post_rotate_angle
12    self.flattened_image = flattened_image
13    self.flattened_mask = flattened_mask
14    self.flattened_char_polygons = flattened_char_polygons

Method generated by attrs for class FlattenedTextRegion.

def get_char_height_meidan(self):
105    def get_char_height_meidan(self):
106        assert self.flattened_char_polygons
107        return statistics.median(
108            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
109        )
def to_resized_flattened_text_region( self, resized_height: Union[int, NoneType] = None, resized_width: Union[int, NoneType] = None):
111    def to_resized_flattened_text_region(
112        self,
113        resized_height: Optional[int] = None,
114        resized_width: Optional[int] = None,
115    ):
116        resized_flattened_image = self.flattened_image.to_resized_image(
117            resized_height=resized_height,
118            resized_width=resized_width,
119        )
120
121        resized_flattened_mask = self.flattened_mask.to_resized_mask(
122            resized_height=resized_height,
123            resized_width=resized_width,
124        )
125
126        resized_flattened_char_polygons = None
127        if self.flattened_char_polygons is not None:
128            resized_flattened_char_polygons = [
129                flattened_char_polygon.to_conducted_resized_polygon(
130                    self.shape,
131                    resized_height=resized_height,
132                    resized_width=resized_width,
133                ) for flattened_char_polygon in self.flattened_char_polygons
134            ]
135
136        return attrs.evolve(
137            self,
138            flattened_image=resized_flattened_image,
139            flattened_mask=resized_flattened_mask,
140            flattened_char_polygons=resized_flattened_char_polygons,
141        )
def to_post_rotated_flattened_text_region(self, post_rotate_angle: int):
143    def to_post_rotated_flattened_text_region(
144        self,
145        post_rotate_angle: int,
146    ):
147        assert self.post_rotate_angle == 0
148
149        # NOTE: No need to trim.
150        rotated_result = rotate.distort(
151            {'angle': post_rotate_angle},
152            image=self.flattened_image,
153            mask=self.flattened_mask,
154            polygons=self.flattened_char_polygons,
155        )
156        rotated_flattened_image = rotated_result.image
157        assert rotated_flattened_image
158        rotated_flattened_mask = rotated_result.mask
159        assert rotated_flattened_mask
160        rotated_flattened_char_polygons = rotated_result.polygons
161
162        return attrs.evolve(
163            self,
164            post_rotate_angle=post_rotate_angle,
165            flattened_image=rotated_flattened_image,
166            flattened_mask=rotated_flattened_mask,
167            flattened_char_polygons=rotated_flattened_char_polygons,
168        )
class PageTextRegionStepDebug:
172class PageTextRegionStepDebug:
173    page_image: Image = attrs.field(default=None)
174    precise_text_region_candidate_polygons: Sequence[Polygon] = attrs.field(default=None)
175    page_text_region_infos: Sequence[PageTextRegionInfo] = attrs.field(default=None)
176    flattened_text_regions: Sequence[FlattenedTextRegion] = attrs.field(default=None)
PageTextRegionStepDebug( page_image: vkit.element.image.Image = None, precise_text_region_candidate_polygons: Sequence[vkit.element.polygon.Polygon] = None, page_text_region_infos: Sequence[vkit.pipeline.text_detection.page_text_region.PageTextRegionInfo] = None, flattened_text_regions: Sequence[vkit.pipeline.text_detection.page_text_region.FlattenedTextRegion] = None)
2def __init__(self, page_image=attr_dict['page_image'].default, precise_text_region_candidate_polygons=attr_dict['precise_text_region_candidate_polygons'].default, page_text_region_infos=attr_dict['page_text_region_infos'].default, flattened_text_regions=attr_dict['flattened_text_regions'].default):
3    self.page_image = page_image
4    self.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
5    self.page_text_region_infos = page_text_region_infos
6    self.flattened_text_regions = flattened_text_regions

Method generated by attrs for class PageTextRegionStepDebug.

class PageTextRegionStepOutput:
180class PageTextRegionStepOutput:
181    page_image: Image
182    page_char_polygons: Sequence[Polygon]
183    shape_before_rotate: Tuple[int, int]
184    rotate_angle: int
185    debug: Optional[PageTextRegionStepDebug]
PageTextRegionStepOutput( page_image: vkit.element.image.Image, page_char_polygons: Sequence[vkit.element.polygon.Polygon], shape_before_rotate: Tuple[int, int], rotate_angle: int, debug: Union[vkit.pipeline.text_detection.page_text_region.PageTextRegionStepDebug, NoneType])
2def __init__(self, page_image, page_char_polygons, shape_before_rotate, rotate_angle, debug):
3    self.page_image = page_image
4    self.page_char_polygons = page_char_polygons
5    self.shape_before_rotate = shape_before_rotate
6    self.rotate_angle = rotate_angle
7    self.debug = debug

Method generated by attrs for class PageTextRegionStepOutput.

def calculate_boxed_masks_intersected_ratio( anchor_mask: vkit.element.mask.Mask, candidate_mask: vkit.element.mask.Mask, use_candidate_as_base: bool = False):
188def calculate_boxed_masks_intersected_ratio(
189    anchor_mask: Mask,
190    candidate_mask: Mask,
191    use_candidate_as_base: bool = False,
192):
193    anchor_box = anchor_mask.box
194    assert anchor_box
195
196    candidate_box = candidate_mask.box
197    assert candidate_box
198
199    # Calculate intersection.
200    up = max(anchor_box.up, candidate_box.up)
201    down = min(anchor_box.down, candidate_box.down)
202    left = max(anchor_box.left, candidate_box.left)
203    right = min(anchor_box.right, candidate_box.right)
204
205    if up > down or left > right:
206        return 0.0
207
208    np_intersected_anchor_mask = anchor_mask.mat[
209        up - anchor_box.up:down - anchor_box.up + 1,
210        left - anchor_box.left:right - anchor_box.left + 1,
211    ]  # yapf: disable
212    np_intersected_candidate_mask = candidate_mask.mat[
213        up - candidate_box.up:down - candidate_box.up + 1,
214        left - candidate_box.left:right - candidate_box.left + 1,
215    ]  # yapf: disable
216    np_intersected_mask = np_intersected_anchor_mask & np_intersected_candidate_mask
217    intersected_area = int(np_intersected_mask.sum())
218
219    if use_candidate_as_base:
220        base_area = int(candidate_mask.np_mask.sum())
221    else:
222        base_area = (
223            int(anchor_mask.np_mask.sum()) + int(candidate_mask.np_mask.sum()) - intersected_area
224        )
225
226    return intersected_area / base_area
class TextRegionFlattener:
229class TextRegionFlattener:
230
231    @classmethod
232    def patch_text_region_polygons(
233        cls,
234        text_region_polygons: Sequence[Polygon],
235        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
236    ):
237        if grouped_char_polygons is None:
238            return text_region_polygons
239
240        assert len(text_region_polygons) == len(grouped_char_polygons)
241
242        patched_text_region_polygons: List[Polygon] = []
243        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
244            # Need to make sure all char polygons are included.
245            unionized_polygons = [text_region_polygon]
246            unionized_polygons.extend(char_polygons)
247
248            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
249            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
250            for polygon in unionized_polygons:
251                polygon.fill_mask(mask)
252
253            patched_text_region_polygons.append(mask.to_external_polygon())
254
255        return patched_text_region_polygons
256
257    @classmethod
258    def process_text_region_polygons(
259        cls,
260        text_region_polygon_dilate_ratio: float,
261        shape: Tuple[int, int],
262        text_region_polygons: Sequence[Polygon],
263        force_no_dilation_flags: Optional[Sequence[bool]] = None,
264    ):
265        text_mask = Mask.from_polygons(shape, text_region_polygons)
266        non_text_mask = text_mask.to_inverted_mask()
267
268        box = Box.from_shape(shape)
269        text_mask = text_mask.to_box_attached(box)
270        non_text_mask = non_text_mask.to_box_attached(box)
271
272        bounding_extended_text_region_masks: List[Mask] = []
273        bounding_rectangular_polygons: List[Polygon] = []
274
275        if force_no_dilation_flags is None:
276            force_no_dilation_flags_iter = itertools.repeat(False)
277        else:
278            assert len(force_no_dilation_flags) == len(text_region_polygons)
279            force_no_dilation_flags_iter = force_no_dilation_flags
280
281        for text_region_polygon, force_no_dilation_flag in zip(
282            text_region_polygons, force_no_dilation_flags_iter
283        ):
284            original_text_region_polygon = text_region_polygon
285
286            if not force_no_dilation_flag:
287                # Dilate.
288                text_region_polygon = text_region_polygon.to_dilated_polygon(
289                    ratio=text_region_polygon_dilate_ratio,
290                )
291                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
292
293            # Get bounding rectangular box (polygon).
294            bounding_rectangular_polygon = \
295                text_region_polygon.to_bounding_rectangular_polygon(shape)
296
297            bounding_box = bounding_rectangular_polygon.bounding_box
298
299            # Get other text region.
300            bounding_other_text_mask = bounding_rectangular_polygon.extract_mask(text_mask).copy()
301            # NOTE: Use the original text region polygon to unset the current text mask.
302            original_text_region_polygon.fill_mask(bounding_other_text_mask, 0)
303
304            # Get protentially dilated text region.
305            bounding_text_mask = Mask.from_shapable(bounding_other_text_mask)
306            bounding_text_mask = bounding_text_mask.to_box_attached(bounding_box)
307            # NOTE: Use the protentially dilated text region polygon to set the current text mask.
308            text_region_polygon.fill_mask(bounding_text_mask, value=1)
309
310            # Should not use the protentially dilated text region polygon anymore.
311            del text_region_polygon
312
313            # Trim protentially dilated text region polygon by eliminating other text region.
314            bounding_trimmed_text_mask = Mask.from_masks(
315                bounding_box,
316                [
317                    # Includes the protentially dilated text region.
318                    bounding_text_mask,
319                    # But not includes any other text regions.
320                    bounding_other_text_mask.to_inverted_mask(),
321                ],
322                ElementSetOperationMode.INTERSECT,
323            )
324
325            # Get non-text region.
326            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
327
328            # Combine trimmed text region and non-text region.
329            bounding_extended_text_region_mask = Mask.from_masks(
330                bounding_box,
331                [bounding_trimmed_text_mask, bounding_non_text_mask],
332            )
333
334            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
335            bounding_rectangular_polygons.append(bounding_rectangular_polygon)
336
337        return bounding_extended_text_region_masks, bounding_rectangular_polygons
338
339    @classmethod
340    def analyze_bounding_rectangular_polygons(
341        cls,
342        bounding_rectangular_polygons: Sequence[Polygon],
343    ):
344        long_side_ratios: List[float] = []
345        long_side_angles: List[int] = []
346
347        for polygon in bounding_rectangular_polygons:
348            # Get reference line.
349            point0, point1, _, point3 = polygon.points
350            side0_length = math.hypot(
351                point0.smooth_y - point1.smooth_y,
352                point0.smooth_x - point1.smooth_x,
353            )
354            side1_length = math.hypot(
355                point0.smooth_y - point3.smooth_y,
356                point0.smooth_x - point3.smooth_x,
357            )
358
359            long_side_ratios.append(
360                max(side0_length, side1_length) / min(side0_length, side1_length)
361            )
362
363            point_a = point0
364            if side0_length > side1_length:
365                # Reference line (p0 -> p1).
366                point_b = point1
367            else:
368                # Reference line (p0 -> p3).
369                point_b = point3
370
371            # Get the angle of reference line, in [0, 180) degree.
372            np_theta = np.arctan2(
373                point_a.smooth_y - point_b.smooth_y,
374                point_a.smooth_x - point_b.smooth_x,
375            )
376            np_theta = np_theta % np.pi
377            long_side_angle = round(np_theta / np.pi * 180) % 180
378            long_side_angles.append(long_side_angle)
379
380        return long_side_ratios, long_side_angles
381
382    @classmethod
383    def get_typical_angle(
384        cls,
385        typical_long_side_ratio_min: float,
386        long_side_ratios: Sequence[float],
387        long_side_angles: Sequence[int],
388    ):
389        typical_indices: Set[int] = set()
390        typical_long_side_angles: List[float] = []
391
392        for idx, (long_side_ratio, long_side_angle) in \
393                enumerate(zip(long_side_ratios, long_side_angles)):
394            if long_side_ratio < typical_long_side_ratio_min:
395                continue
396
397            typical_indices.add(idx)
398            typical_long_side_angles.append(long_side_angle)
399
400        if not typical_long_side_angles:
401            return None, typical_indices
402
403        # NOTE: Due to the sudden change between 179 and 0 degree,
404        # we need to normalize the range to [0, 360) before calculate the mean of angles.
405        two_pi = 2 * np.pi
406        np_angles = np.asarray(typical_long_side_angles) / 180 * two_pi
407        np_sin_mean = np.sin(np_angles).mean()
408        np_cos_mean = np.cos(np_angles).mean()
409
410        np_theta = np.arctan2(np_sin_mean, np_cos_mean)
411        np_theta = np_theta % two_pi
412        # Rescale the range back to [0, 180).
413        typical_angle = round(np_theta / two_pi * 180)
414
415        return typical_angle, typical_indices
416
417    @classmethod
418    def get_flattening_rotate_angles(
419        cls,
420        typical_angle: Optional[int],
421        typical_indices: Set[int],
422        long_side_angles: Sequence[int],
423    ):
424        if typical_angle is not None:
425            assert typical_indices
426
427        flattening_rotate_angles: List[int] = []
428
429        for idx, long_side_angle in enumerate(long_side_angles):
430            if typical_angle is None or idx in typical_indices:
431                # Dominated by long_side_angle.
432                main_angle = long_side_angle
433
434            else:
435                # Dominated by typical_angle.
436                short_side_angle = (long_side_angle + 90) % 180
437                long_side_delta = abs((long_side_angle - typical_angle + 90) % 180 - 90)
438                short_side_delta = abs((short_side_angle - typical_angle + 90) % 180 - 90)
439
440                if long_side_delta < short_side_delta:
441                    main_angle = long_side_angle
442                else:
443                    main_angle = short_side_angle
444
445            # Angle for flattening.
446            if main_angle <= 90:
447                # [270, 360).
448                flattening_rotate_angle = (360 - main_angle) % 360
449            else:
450                # [1, 90).
451                flattening_rotate_angle = 180 - main_angle
452            flattening_rotate_angles.append(flattening_rotate_angle)
453
454        return flattening_rotate_angles
455
456    @classmethod
457    def build_flattened_text_regions(
458        cls,
459        image: Image,
460        text_region_polygons: Sequence[Polygon],
461        bounding_extended_text_region_masks: Sequence[Mask],
462        typical_indices: Set[int],
463        flattening_rotate_angles: Sequence[int],
464        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
465    ):
466        flattened_text_regions: List[FlattenedTextRegion] = []
467
468        for idx, (
469            text_region_polygon,
470            bounding_extended_text_region_mask,
471            flattening_rotate_angle,
472        ) in enumerate(
473            zip(
474                text_region_polygons,
475                bounding_extended_text_region_masks,
476                flattening_rotate_angles,
477            )
478        ):
479            bounding_box = bounding_extended_text_region_mask.box
480            assert bounding_box
481
482            # Extract image.
483            text_region_image = bounding_extended_text_region_mask.extract_image(image)
484
485            # Shift char polygons.
486            relative_char_polygons = None
487            if grouped_char_polygons is not None:
488                char_polygons = grouped_char_polygons[idx]
489                relative_char_polygons = [
490                    char_polygon.to_relative_polygon(
491                        origin_y=bounding_box.up,
492                        origin_x=bounding_box.left,
493                    ) for char_polygon in char_polygons
494                ]
495
496            # Rotate.
497            rotated_result = rotate.distort(
498                {'angle': flattening_rotate_angle},
499                image=text_region_image,
500                mask=bounding_extended_text_region_mask,
501                polygons=relative_char_polygons,
502            )
503            rotated_text_region_image = rotated_result.image
504            assert rotated_text_region_image
505            rotated_bounding_extended_text_region_mask = rotated_result.mask
506            assert rotated_bounding_extended_text_region_mask
507            # Could be None.
508            rotated_char_polygons = rotated_result.polygons
509
510            # Trim.
511            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
512
513            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
514                up=rotated_trimmed_box.up,
515                down=rotated_trimmed_box.down,
516                left=rotated_trimmed_box.left,
517                right=rotated_trimmed_box.right,
518            )
519
520            trimmed_mask = rotated_trimmed_box.extract_mask(
521                rotated_bounding_extended_text_region_mask
522            )
523
524            trimmed_char_polygons = None
525            if rotated_char_polygons:
526                trimmed_char_polygons = [
527                    rotated_char_polygon.to_relative_polygon(
528                        origin_y=rotated_trimmed_box.up,
529                        origin_x=rotated_trimmed_box.left,
530                    ) for rotated_char_polygon in rotated_char_polygons
531                ]
532
533            flattened_text_regions.append(
534                FlattenedTextRegion(
535                    is_typical=(idx in typical_indices),
536                    text_region_polygon=text_region_polygon,
537                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
538                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
539                    flattening_rotate_angle=flattening_rotate_angle,
540                    shape_before_trim=rotated_text_region_image.shape,
541                    rotated_trimmed_box=rotated_trimmed_box,
542                    shape_before_resize=trimmed_text_region_image.shape,
543                    post_rotate_angle=0,
544                    flattened_image=trimmed_text_region_image,
545                    flattened_mask=trimmed_mask,
546                    flattened_char_polygons=trimmed_char_polygons,
547                )
548            )
549
550        return flattened_text_regions
551
552    def __init__(
553        self,
554        typical_long_side_ratio_min: float,
555        text_region_polygon_dilate_ratio: float,
556        image: Image,
557        text_region_polygons: Sequence[Polygon],
558        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
559        is_training: bool = False,
560    ):
561        self.origional_text_region_polygons = text_region_polygons
562
563        self.text_region_polygons = self.patch_text_region_polygons(
564            text_region_polygons=text_region_polygons,
565            grouped_char_polygons=grouped_char_polygons,
566        )
567
568        force_no_dilation_flags = None
569        if is_training:
570            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
571            force_no_dilation_flags = []
572            for char_polygons in grouped_char_polygons:
573                force_no_dilation_flags.append(not char_polygons)
574
575        self.bounding_extended_text_region_masks, self.bounding_rectangular_polygons = \
576            self.process_text_region_polygons(
577                text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
578                shape=image.shape,
579                text_region_polygons=self.text_region_polygons,
580                force_no_dilation_flags=force_no_dilation_flags,
581            )
582
583        self.long_side_ratios, self.long_side_angles = \
584            self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
585
586        self.typical_angle, self.typical_indices = self.get_typical_angle(
587            typical_long_side_ratio_min=typical_long_side_ratio_min,
588            long_side_ratios=self.long_side_ratios,
589            long_side_angles=self.long_side_angles,
590        )
591
592        self.flattening_rotate_angles = self.get_flattening_rotate_angles(
593            typical_angle=self.typical_angle,
594            typical_indices=self.typical_indices,
595            long_side_angles=self.long_side_angles,
596        )
597
598        self.flattened_text_regions = self.build_flattened_text_regions(
599            image=image,
600            text_region_polygons=self.origional_text_region_polygons,
601            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
602            typical_indices=self.typical_indices,
603            flattening_rotate_angles=self.flattening_rotate_angles,
604            grouped_char_polygons=grouped_char_polygons,
605        )
TextRegionFlattener( typical_long_side_ratio_min: float, text_region_polygon_dilate_ratio: float, image: vkit.element.image.Image, text_region_polygons: Sequence[vkit.element.polygon.Polygon], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType] = None, is_training: bool = False)
552    def __init__(
553        self,
554        typical_long_side_ratio_min: float,
555        text_region_polygon_dilate_ratio: float,
556        image: Image,
557        text_region_polygons: Sequence[Polygon],
558        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
559        is_training: bool = False,
560    ):
561        self.origional_text_region_polygons = text_region_polygons
562
563        self.text_region_polygons = self.patch_text_region_polygons(
564            text_region_polygons=text_region_polygons,
565            grouped_char_polygons=grouped_char_polygons,
566        )
567
568        force_no_dilation_flags = None
569        if is_training:
570            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
571            force_no_dilation_flags = []
572            for char_polygons in grouped_char_polygons:
573                force_no_dilation_flags.append(not char_polygons)
574
575        self.bounding_extended_text_region_masks, self.bounding_rectangular_polygons = \
576            self.process_text_region_polygons(
577                text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
578                shape=image.shape,
579                text_region_polygons=self.text_region_polygons,
580                force_no_dilation_flags=force_no_dilation_flags,
581            )
582
583        self.long_side_ratios, self.long_side_angles = \
584            self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
585
586        self.typical_angle, self.typical_indices = self.get_typical_angle(
587            typical_long_side_ratio_min=typical_long_side_ratio_min,
588            long_side_ratios=self.long_side_ratios,
589            long_side_angles=self.long_side_angles,
590        )
591
592        self.flattening_rotate_angles = self.get_flattening_rotate_angles(
593            typical_angle=self.typical_angle,
594            typical_indices=self.typical_indices,
595            long_side_angles=self.long_side_angles,
596        )
597
598        self.flattened_text_regions = self.build_flattened_text_regions(
599            image=image,
600            text_region_polygons=self.origional_text_region_polygons,
601            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
602            typical_indices=self.typical_indices,
603            flattening_rotate_angles=self.flattening_rotate_angles,
604            grouped_char_polygons=grouped_char_polygons,
605        )
@classmethod
def patch_text_region_polygons( cls, text_region_polygons: Sequence[vkit.element.polygon.Polygon], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType]):
231    @classmethod
232    def patch_text_region_polygons(
233        cls,
234        text_region_polygons: Sequence[Polygon],
235        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
236    ):
237        if grouped_char_polygons is None:
238            return text_region_polygons
239
240        assert len(text_region_polygons) == len(grouped_char_polygons)
241
242        patched_text_region_polygons: List[Polygon] = []
243        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
244            # Need to make sure all char polygons are included.
245            unionized_polygons = [text_region_polygon]
246            unionized_polygons.extend(char_polygons)
247
248            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
249            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
250            for polygon in unionized_polygons:
251                polygon.fill_mask(mask)
252
253            patched_text_region_polygons.append(mask.to_external_polygon())
254
255        return patched_text_region_polygons
@classmethod
def process_text_region_polygons( cls, text_region_polygon_dilate_ratio: float, shape: Tuple[int, int], text_region_polygons: Sequence[vkit.element.polygon.Polygon], force_no_dilation_flags: Union[Sequence[bool], NoneType] = None):
257    @classmethod
258    def process_text_region_polygons(
259        cls,
260        text_region_polygon_dilate_ratio: float,
261        shape: Tuple[int, int],
262        text_region_polygons: Sequence[Polygon],
263        force_no_dilation_flags: Optional[Sequence[bool]] = None,
264    ):
265        text_mask = Mask.from_polygons(shape, text_region_polygons)
266        non_text_mask = text_mask.to_inverted_mask()
267
268        box = Box.from_shape(shape)
269        text_mask = text_mask.to_box_attached(box)
270        non_text_mask = non_text_mask.to_box_attached(box)
271
272        bounding_extended_text_region_masks: List[Mask] = []
273        bounding_rectangular_polygons: List[Polygon] = []
274
275        if force_no_dilation_flags is None:
276            force_no_dilation_flags_iter = itertools.repeat(False)
277        else:
278            assert len(force_no_dilation_flags) == len(text_region_polygons)
279            force_no_dilation_flags_iter = force_no_dilation_flags
280
281        for text_region_polygon, force_no_dilation_flag in zip(
282            text_region_polygons, force_no_dilation_flags_iter
283        ):
284            original_text_region_polygon = text_region_polygon
285
286            if not force_no_dilation_flag:
287                # Dilate.
288                text_region_polygon = text_region_polygon.to_dilated_polygon(
289                    ratio=text_region_polygon_dilate_ratio,
290                )
291                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
292
293            # Get bounding rectangular box (polygon).
294            bounding_rectangular_polygon = \
295                text_region_polygon.to_bounding_rectangular_polygon(shape)
296
297            bounding_box = bounding_rectangular_polygon.bounding_box
298
299            # Get other text region.
300            bounding_other_text_mask = bounding_rectangular_polygon.extract_mask(text_mask).copy()
301            # NOTE: Use the original text region polygon to unset the current text mask.
302            original_text_region_polygon.fill_mask(bounding_other_text_mask, 0)
303
304            # Get protentially dilated text region.
305            bounding_text_mask = Mask.from_shapable(bounding_other_text_mask)
306            bounding_text_mask = bounding_text_mask.to_box_attached(bounding_box)
307            # NOTE: Use the protentially dilated text region polygon to set the current text mask.
308            text_region_polygon.fill_mask(bounding_text_mask, value=1)
309
310            # Should not use the protentially dilated text region polygon anymore.
311            del text_region_polygon
312
313            # Trim protentially dilated text region polygon by eliminating other text region.
314            bounding_trimmed_text_mask = Mask.from_masks(
315                bounding_box,
316                [
317                    # Includes the protentially dilated text region.
318                    bounding_text_mask,
319                    # But not includes any other text regions.
320                    bounding_other_text_mask.to_inverted_mask(),
321                ],
322                ElementSetOperationMode.INTERSECT,
323            )
324
325            # Get non-text region.
326            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
327
328            # Combine trimmed text region and non-text region.
329            bounding_extended_text_region_mask = Mask.from_masks(
330                bounding_box,
331                [bounding_trimmed_text_mask, bounding_non_text_mask],
332            )
333
334            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
335            bounding_rectangular_polygons.append(bounding_rectangular_polygon)
336
337        return bounding_extended_text_region_masks, bounding_rectangular_polygons
@classmethod
def analyze_bounding_rectangular_polygons( cls, bounding_rectangular_polygons: Sequence[vkit.element.polygon.Polygon]):
339    @classmethod
340    def analyze_bounding_rectangular_polygons(
341        cls,
342        bounding_rectangular_polygons: Sequence[Polygon],
343    ):
344        long_side_ratios: List[float] = []
345        long_side_angles: List[int] = []
346
347        for polygon in bounding_rectangular_polygons:
348            # Get reference line.
349            point0, point1, _, point3 = polygon.points
350            side0_length = math.hypot(
351                point0.smooth_y - point1.smooth_y,
352                point0.smooth_x - point1.smooth_x,
353            )
354            side1_length = math.hypot(
355                point0.smooth_y - point3.smooth_y,
356                point0.smooth_x - point3.smooth_x,
357            )
358
359            long_side_ratios.append(
360                max(side0_length, side1_length) / min(side0_length, side1_length)
361            )
362
363            point_a = point0
364            if side0_length > side1_length:
365                # Reference line (p0 -> p1).
366                point_b = point1
367            else:
368                # Reference line (p0 -> p3).
369                point_b = point3
370
371            # Get the angle of reference line, in [0, 180) degree.
372            np_theta = np.arctan2(
373                point_a.smooth_y - point_b.smooth_y,
374                point_a.smooth_x - point_b.smooth_x,
375            )
376            np_theta = np_theta % np.pi
377            long_side_angle = round(np_theta / np.pi * 180) % 180
378            long_side_angles.append(long_side_angle)
379
380        return long_side_ratios, long_side_angles
@classmethod
def get_typical_angle( cls, typical_long_side_ratio_min: float, long_side_ratios: Sequence[float], long_side_angles: Sequence[int]):
382    @classmethod
383    def get_typical_angle(
384        cls,
385        typical_long_side_ratio_min: float,
386        long_side_ratios: Sequence[float],
387        long_side_angles: Sequence[int],
388    ):
389        typical_indices: Set[int] = set()
390        typical_long_side_angles: List[float] = []
391
392        for idx, (long_side_ratio, long_side_angle) in \
393                enumerate(zip(long_side_ratios, long_side_angles)):
394            if long_side_ratio < typical_long_side_ratio_min:
395                continue
396
397            typical_indices.add(idx)
398            typical_long_side_angles.append(long_side_angle)
399
400        if not typical_long_side_angles:
401            return None, typical_indices
402
403        # NOTE: Due to the sudden change between 179 and 0 degree,
404        # we need to normalize the range to [0, 360) before calculate the mean of angles.
405        two_pi = 2 * np.pi
406        np_angles = np.asarray(typical_long_side_angles) / 180 * two_pi
407        np_sin_mean = np.sin(np_angles).mean()
408        np_cos_mean = np.cos(np_angles).mean()
409
410        np_theta = np.arctan2(np_sin_mean, np_cos_mean)
411        np_theta = np_theta % two_pi
412        # Rescale the range back to [0, 180).
413        typical_angle = round(np_theta / two_pi * 180)
414
415        return typical_angle, typical_indices
@classmethod
def get_flattening_rotate_angles( cls, typical_angle: Union[int, NoneType], typical_indices: Set[int], long_side_angles: Sequence[int]):
417    @classmethod
418    def get_flattening_rotate_angles(
419        cls,
420        typical_angle: Optional[int],
421        typical_indices: Set[int],
422        long_side_angles: Sequence[int],
423    ):
424        if typical_angle is not None:
425            assert typical_indices
426
427        flattening_rotate_angles: List[int] = []
428
429        for idx, long_side_angle in enumerate(long_side_angles):
430            if typical_angle is None or idx in typical_indices:
431                # Dominated by long_side_angle.
432                main_angle = long_side_angle
433
434            else:
435                # Dominated by typical_angle.
436                short_side_angle = (long_side_angle + 90) % 180
437                long_side_delta = abs((long_side_angle - typical_angle + 90) % 180 - 90)
438                short_side_delta = abs((short_side_angle - typical_angle + 90) % 180 - 90)
439
440                if long_side_delta < short_side_delta:
441                    main_angle = long_side_angle
442                else:
443                    main_angle = short_side_angle
444
445            # Angle for flattening.
446            if main_angle <= 90:
447                # [270, 360).
448                flattening_rotate_angle = (360 - main_angle) % 360
449            else:
450                # [1, 90).
451                flattening_rotate_angle = 180 - main_angle
452            flattening_rotate_angles.append(flattening_rotate_angle)
453
454        return flattening_rotate_angles
@classmethod
def build_flattened_text_regions( cls, image: vkit.element.image.Image, text_region_polygons: Sequence[vkit.element.polygon.Polygon], bounding_extended_text_region_masks: Sequence[vkit.element.mask.Mask], typical_indices: Set[int], flattening_rotate_angles: Sequence[int], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType]):
456    @classmethod
457    def build_flattened_text_regions(
458        cls,
459        image: Image,
460        text_region_polygons: Sequence[Polygon],
461        bounding_extended_text_region_masks: Sequence[Mask],
462        typical_indices: Set[int],
463        flattening_rotate_angles: Sequence[int],
464        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
465    ):
466        flattened_text_regions: List[FlattenedTextRegion] = []
467
468        for idx, (
469            text_region_polygon,
470            bounding_extended_text_region_mask,
471            flattening_rotate_angle,
472        ) in enumerate(
473            zip(
474                text_region_polygons,
475                bounding_extended_text_region_masks,
476                flattening_rotate_angles,
477            )
478        ):
479            bounding_box = bounding_extended_text_region_mask.box
480            assert bounding_box
481
482            # Extract image.
483            text_region_image = bounding_extended_text_region_mask.extract_image(image)
484
485            # Shift char polygons.
486            relative_char_polygons = None
487            if grouped_char_polygons is not None:
488                char_polygons = grouped_char_polygons[idx]
489                relative_char_polygons = [
490                    char_polygon.to_relative_polygon(
491                        origin_y=bounding_box.up,
492                        origin_x=bounding_box.left,
493                    ) for char_polygon in char_polygons
494                ]
495
496            # Rotate.
497            rotated_result = rotate.distort(
498                {'angle': flattening_rotate_angle},
499                image=text_region_image,
500                mask=bounding_extended_text_region_mask,
501                polygons=relative_char_polygons,
502            )
503            rotated_text_region_image = rotated_result.image
504            assert rotated_text_region_image
505            rotated_bounding_extended_text_region_mask = rotated_result.mask
506            assert rotated_bounding_extended_text_region_mask
507            # Could be None.
508            rotated_char_polygons = rotated_result.polygons
509
510            # Trim.
511            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
512
513            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
514                up=rotated_trimmed_box.up,
515                down=rotated_trimmed_box.down,
516                left=rotated_trimmed_box.left,
517                right=rotated_trimmed_box.right,
518            )
519
520            trimmed_mask = rotated_trimmed_box.extract_mask(
521                rotated_bounding_extended_text_region_mask
522            )
523
524            trimmed_char_polygons = None
525            if rotated_char_polygons:
526                trimmed_char_polygons = [
527                    rotated_char_polygon.to_relative_polygon(
528                        origin_y=rotated_trimmed_box.up,
529                        origin_x=rotated_trimmed_box.left,
530                    ) for rotated_char_polygon in rotated_char_polygons
531                ]
532
533            flattened_text_regions.append(
534                FlattenedTextRegion(
535                    is_typical=(idx in typical_indices),
536                    text_region_polygon=text_region_polygon,
537                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
538                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
539                    flattening_rotate_angle=flattening_rotate_angle,
540                    shape_before_trim=rotated_text_region_image.shape,
541                    rotated_trimmed_box=rotated_trimmed_box,
542                    shape_before_resize=trimmed_text_region_image.shape,
543                    post_rotate_angle=0,
544                    flattened_image=trimmed_text_region_image,
545                    flattened_mask=trimmed_mask,
546                    flattened_char_polygons=trimmed_char_polygons,
547                )
548            )
549
550        return flattened_text_regions
def build_background_image_for_stacking(height: int, width: int):
608def build_background_image_for_stacking(height: int, width: int):
609    np_rgb_rows = [np.zeros((width, 3), dtype=np.uint8) for _ in range(3)]
610    rgb_tuples = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
611
612    for color_offset, np_row in enumerate(np_rgb_rows):
613        for color_idx in range(3):
614            color_tuple = rgb_tuples[(color_offset + color_idx) % 3]
615            np_row[color_idx::3] = color_tuple
616
617    np_image = np.zeros((height, width, 3), dtype=np.uint8)
618    for row_offset, np_row in enumerate(np_rgb_rows):
619        np_image[row_offset::3] = np_row
620
621    return Image(mat=np_image)
def stack_flattened_text_regions( page_pad: int, flattened_text_regions_pad: int, flattened_text_regions: Sequence[vkit.pipeline.text_detection.page_text_region.FlattenedTextRegion]):
624def stack_flattened_text_regions(
625    page_pad: int,
626    flattened_text_regions_pad: int,
627    flattened_text_regions: Sequence[FlattenedTextRegion],
628):
629    page_double_pad = 2 * page_pad
630    flattened_text_regions_double_pad = 2 * flattened_text_regions_pad
631
632    rect_packer = RectPacker(rotation=False)
633
634    # Add box and bin.
635    # NOTE: Only one bin is added, that is, packing all text region into one image.
636    bin_width = 0
637    bin_height = 0
638
639    for ftr_idx, flattened_text_region in enumerate(flattened_text_regions):
640        rect_packer.add_rect(
641            width=flattened_text_region.width + flattened_text_regions_double_pad,
642            height=flattened_text_region.height + flattened_text_regions_double_pad,
643            rid=ftr_idx,
644        )
645
646        bin_width = max(bin_width, flattened_text_region.width)
647        bin_height += flattened_text_region.height
648
649    bin_width += flattened_text_regions_double_pad
650    bin_height += flattened_text_regions_double_pad
651
652    rect_packer.add_bin(width=bin_width, height=bin_height)
653
654    # Pack boxes.
655    rect_packer.pack()  # type: ignore
656
657    # Get packed boxes.
658    unordered_boxes: List[Box] = []
659    ftr_indices: List[int] = []
660    for bin_idx, x, y, width, height, ftr_idx in rect_packer.rect_list():
661        assert bin_idx == 0
662        unordered_boxes.append(Box(
663            up=y,
664            down=y + height - 1,
665            left=x,
666            right=x + width - 1,
667        ))
668        ftr_indices.append(ftr_idx)
669
670    # Order boxes.
671    inverse_ftr_indices = [-1] * len(ftr_indices)
672    for inverse_ftr_idx, ftr_idx in enumerate(ftr_indices):
673        inverse_ftr_indices[ftr_idx] = inverse_ftr_idx
674    for inverse_ftr_idx in inverse_ftr_indices:
675        assert inverse_ftr_idx >= 0
676    padded_boxes = [unordered_boxes[inverse_ftr_idx] for inverse_ftr_idx in inverse_ftr_indices]
677
678    page_height = max(box.down for box in padded_boxes) + 1 + page_double_pad
679    page_width = max(box.right for box in padded_boxes) + 1 + page_double_pad
680
681    image = build_background_image_for_stacking(page_height, page_width)
682    boxes: List[Box] = []
683    char_polygons: List[Polygon] = []
684
685    for padded_box, flattened_text_region in zip(padded_boxes, flattened_text_regions):
686        assert flattened_text_region.height + flattened_text_regions_double_pad \
687            == padded_box.height
688        assert flattened_text_region.width + flattened_text_regions_double_pad \
689            == padded_box.width
690
691        # Remove box padding.
692        up = padded_box.up + flattened_text_regions_pad + page_pad
693        left = padded_box.left + flattened_text_regions_pad + page_pad
694
695        box = Box(
696            up=up,
697            down=up + flattened_text_region.height - 1,
698            left=left,
699            right=left + flattened_text_region.width - 1,
700        )
701        boxes.append(box)
702
703        # Render.
704        box.fill_image(
705            image,
706            flattened_text_region.flattened_image,
707            image_mask=flattened_text_region.flattened_mask,
708        )
709
710        if flattened_text_region.flattened_char_polygons:
711            for char_polygon in flattened_text_region.flattened_char_polygons:
712                char_polygons.append(char_polygon.to_shifted_polygon(
713                    offset_y=up,
714                    offset_x=left,
715                ))
716
717    return image, boxes, char_polygons
 720class PageTextRegionStep(
 721    PipelineStep[
 722        PageTextRegionStepConfig,
 723        PageTextRegionStepInput,
 724        PageTextRegionStepOutput,
 725    ]
 726):  # yapf: disable
 727
 728    @classmethod
 729    def generate_precise_text_region_candidate_polygons(
 730        cls,
 731        precise_mask: Mask,
 732        disconnected_text_region_mask: Mask,
 733    ):
 734        assert precise_mask.box and disconnected_text_region_mask.box
 735
 736        # Get the intersection.
 737        intersected_box = Box(
 738            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
 739            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
 740            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
 741            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
 742        )
 743        assert intersected_box.up <= intersected_box.down
 744        assert intersected_box.left <= intersected_box.right
 745
 746        precise_mask = intersected_box.extract_mask(precise_mask)
 747        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
 748
 749        # Apply mask bitwise-and operation.
 750        intersected_mask = Mask(
 751            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
 752        )
 753        intersected_mask = intersected_mask.to_box_attached(intersected_box)
 754
 755        # NOTE:
 756        # 1. Could extract more than one polygons.
 757        # 2. Some polygons are in border and should be removed later.
 758        return intersected_mask.to_disconnected_polygons()
 759
 760    @classmethod
 761    def strtree_query_intersected_polygons(
 762        cls,
 763        strtree: STRtree,
 764        id_to_anchor_polygon: Dict[int, Polygon],
 765        candidate_polygon: Polygon,
 766    ):
 767        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
 768        candidate_mask = candidate_polygon.mask
 769
 770        for anchor_shapely_polygon in strtree.query(candidate_shapely_polygon):
 771            anchor_id = id(anchor_shapely_polygon)
 772            anchor_polygon = id_to_anchor_polygon[anchor_id]
 773            anchor_mask = anchor_polygon.mask
 774
 775            intersected_ratio = calculate_boxed_masks_intersected_ratio(
 776                anchor_mask=anchor_mask,
 777                candidate_mask=candidate_mask,
 778                use_candidate_as_base=True,
 779            )
 780
 781            yield (
 782                anchor_id,
 783                anchor_polygon,
 784                anchor_mask,
 785                candidate_mask,
 786                intersected_ratio,
 787            )
 788
 789    def sample_page_non_text_region_polygons(
 790        self,
 791        page_non_text_region_polygons: Sequence[Polygon],
 792        num_page_text_region_infos: int,
 793        rng: RandomGenerator,
 794    ):
 795        negative_ratio = self.config.negative_text_region_ratio
 796        num_page_non_text_region_polygons = round(
 797            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
 798        )
 799        return rng_choice_with_size(
 800            rng,
 801            page_non_text_region_polygons,
 802            size=min(
 803                num_page_non_text_region_polygons,
 804                len(page_non_text_region_polygons),
 805            ),
 806            replace=False,
 807        )
 808
 809    def build_flattened_text_regions(
 810        self,
 811        page_image: Image,
 812        page_text_region_infos: Sequence[PageTextRegionInfo],
 813        page_non_text_region_polygons: Sequence[Polygon],
 814        rng: RandomGenerator,
 815    ):
 816        text_region_polygon_dilate_ratio = float(
 817            rng.uniform(
 818                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
 819                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
 820            )
 821        )
 822        typical_long_side_ratio_min = \
 823            self.config.text_region_flattener_typical_long_side_ratio_min
 824
 825        text_region_polygons: List[Polygon] = []
 826        grouped_char_polygons: List[Sequence[Polygon]] = []
 827        for page_text_region_info in page_text_region_infos:
 828            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
 829            grouped_char_polygons.append(page_text_region_info.char_polygons)
 830
 831        # Inject nagative regions.
 832        for page_non_text_region_polygon in page_non_text_region_polygons:
 833            text_region_polygons.append(page_non_text_region_polygon)
 834            grouped_char_polygons.append(tuple())
 835
 836        text_region_flattener = TextRegionFlattener(
 837            typical_long_side_ratio_min=typical_long_side_ratio_min,
 838            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 839            image=page_image,
 840            text_region_polygons=text_region_polygons,
 841            grouped_char_polygons=grouped_char_polygons,
 842            is_training=True,
 843        )
 844
 845        # Resize positive ftr.
 846        positive_flattened_text_regions: List[FlattenedTextRegion] = []
 847        # For negative sampling.
 848        positive_reference_heights: List[float] = []
 849        positive_reference_widths: List[float] = []
 850        num_negative_flattened_text_regions = 0
 851
 852        for flattened_text_region in text_region_flattener.flattened_text_regions:
 853            if not flattened_text_region.flattened_char_polygons:
 854                num_negative_flattened_text_regions += 1
 855                continue
 856
 857            char_height_median = flattened_text_region.get_char_height_meidan()
 858
 859            text_region_resize_char_height_median = int(
 860                rng.integers(
 861                    self.config.text_region_resize_char_height_median_min,
 862                    self.config.text_region_resize_char_height_median_max + 1,
 863                )
 864            )
 865            scale = text_region_resize_char_height_median / char_height_median
 866
 867            height, width = flattened_text_region.shape
 868            resized_height = round(height * scale)
 869            resized_width = round(width * scale)
 870
 871            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
 872                resized_height=resized_height,
 873                resized_width=resized_width,
 874            )
 875
 876            positive_reference_heights.append(resized_height)
 877            positive_reference_widths.append(resized_width)
 878
 879            # Post rotate.
 880            post_rotate_angle = 0
 881            if flattened_text_region.is_typical:
 882                if rng.random() < self.config.text_region_typical_post_rotate_prob:
 883                    # Upside down only.
 884                    post_rotate_angle = 180
 885            else:
 886                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
 887                    # 3-way rotate.
 888                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
 889
 890            if post_rotate_angle != 0:
 891                flattened_text_region = \
 892                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
 893
 894            positive_flattened_text_regions.append(flattened_text_region)
 895
 896        # Resize negative ftr.
 897        negative_reference_heights = list(
 898            rng_choice_with_size(
 899                rng,
 900                positive_reference_heights,
 901                size=num_negative_flattened_text_regions,
 902                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
 903            )
 904        )
 905
 906        negative_height_max = max(positive_reference_heights)
 907        negative_width_max = max(positive_reference_widths)
 908
 909        negative_flattened_text_regions: List[FlattenedTextRegion] = []
 910
 911        for flattened_text_region in text_region_flattener.flattened_text_regions:
 912            if flattened_text_region.flattened_char_polygons:
 913                continue
 914
 915            reference_height = negative_reference_heights.pop()
 916            scale = reference_height / flattened_text_region.height
 917
 918            height, width = flattened_text_region.shape
 919            resized_height = round(height * scale)
 920            resized_width = round(width * scale)
 921
 922            # Remove negative region that is too large.
 923            if resized_height > negative_height_max or resized_width > negative_width_max:
 924                continue
 925
 926            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
 927                resized_height=resized_height,
 928                resized_width=resized_width,
 929            )
 930
 931            # Post rotate.
 932            post_rotate_angle = 0
 933            if flattened_text_region.is_typical:
 934                if rng.random() < self.config.text_region_typical_post_rotate_prob:
 935                    # Upside down only.
 936                    post_rotate_angle = 180
 937            else:
 938                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
 939                    # 3-way rotate.
 940                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
 941
 942            if post_rotate_angle != 0:
 943                flattened_text_region = \
 944                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
 945
 946            negative_flattened_text_regions.append(flattened_text_region)
 947
 948        flattened_text_regions = (
 949            *positive_flattened_text_regions,
 950            *negative_flattened_text_regions,
 951        )
 952        return flattened_text_regions
 953
 954    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
 955        page_distortion_step_output = input.page_distortion_step_output
 956        page_image = page_distortion_step_output.page_image
 957        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
 958        page_disconnected_text_region_collection = \
 959            page_distortion_step_output.page_disconnected_text_region_collection
 960        page_non_text_region_collection = \
 961            page_distortion_step_output.page_non_text_region_collection
 962
 963        page_resizing_step_output = input.page_resizing_step_output
 964        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
 965
 966        debug = None
 967        if self.config.enable_debug:
 968            debug = PageTextRegionStepDebug()
 969
 970        # Build R-tree to track text regions.
 971        # https://github.com/shapely/shapely/issues/640
 972        id_to_disconnected_text_region_polygon: Dict[int, Polygon] = {}
 973        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
 974
 975        for polygon in page_disconnected_text_region_collection.to_polygons():
 976            shapely_polygon = polygon.to_shapely_polygon()
 977            id_to_disconnected_text_region_polygon[id(shapely_polygon)] = polygon
 978            disconnected_text_region_shapely_polygons.append(shapely_polygon)
 979
 980        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
 981
 982        # Get the precise text regions.
 983        precise_text_region_candidate_polygons: List[Polygon] = []
 984        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
 985            # Resize back to the shape after distortion.
 986            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
 987                page_resized_text_line_mask,
 988                resized_height=page_image.height,
 989                resized_width=page_image.width,
 990            )
 991
 992            # Find and extract intersected text region.
 993            # NOTE: One precise_polygon could be overlapped with
 994            # more than one disconnected_text_region_polygon!
 995            for _, _, disconnected_text_region_mask, precise_mask, _ in \
 996                    self.strtree_query_intersected_polygons(
 997                        strtree=disconnected_text_region_tree,
 998                        id_to_anchor_polygon=id_to_disconnected_text_region_polygon,
 999                        candidate_polygon=precise_polygon,
1000                    ):
1001                precise_text_region_candidate_polygons.extend(
1002                    self.generate_precise_text_region_candidate_polygons(
1003                        precise_mask=precise_mask,
1004                        disconnected_text_region_mask=disconnected_text_region_mask,
1005                    )
1006                )
1007
1008        if debug:
1009            debug.page_image = page_image
1010            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1011
1012        # Help gc.
1013        del id_to_disconnected_text_region_polygon
1014        del disconnected_text_region_shapely_polygons
1015        del disconnected_text_region_tree
1016
1017        # Bind char-level polygon to precise text region.
1018        id_to_precise_text_region_polygon: Dict[int, Polygon] = {}
1019        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1020
1021        for polygon in precise_text_region_candidate_polygons:
1022            shapely_polygon = polygon.to_shapely_polygon()
1023            id_to_precise_text_region_polygon[id(shapely_polygon)] = polygon
1024            precise_text_region_shapely_polygons.append(shapely_polygon)
1025
1026        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1027
1028        id_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1029        for char_polygon in page_char_polygon_collection.polygons:
1030            best_precise_text_region_id = None
1031            intersected_ratio_max = 0
1032
1033            for (
1034                precise_text_region_id,
1035                _,
1036                _,
1037                _,
1038                intersected_ratio,
1039            ) in self.strtree_query_intersected_polygons(
1040                strtree=precise_text_region_tree,
1041                id_to_anchor_polygon=id_to_precise_text_region_polygon,
1042                candidate_polygon=char_polygon,
1043            ):
1044                if intersected_ratio > intersected_ratio_max:
1045                    intersected_ratio_max = intersected_ratio
1046                    best_precise_text_region_id = precise_text_region_id
1047
1048            if best_precise_text_region_id is not None:
1049                id_to_char_polygons[best_precise_text_region_id].append(char_polygon)
1050            else:
1051                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1052                # In such case, the text line bounding box is smaller than the char polygon, since
1053                # the leading/trailing char paddings are ignored during text line rendering.
1054                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1055                # introduce labeling noise.
1056                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1057
1058        page_text_region_infos: List[PageTextRegionInfo] = []
1059        for precise_text_region_shapely_polygon in precise_text_region_shapely_polygons:
1060            ptrsp_id = id(precise_text_region_shapely_polygon)
1061            if ptrsp_id not in id_to_char_polygons:
1062                # Not related to any char polygons.
1063                continue
1064            assert id_to_char_polygons[ptrsp_id]
1065            page_text_region_infos.append(
1066                PageTextRegionInfo(
1067                    precise_text_region_polygon=id_to_precise_text_region_polygon[ptrsp_id],
1068                    char_polygons=id_to_char_polygons[ptrsp_id],
1069                )
1070            )
1071
1072        # Help gc.
1073        del id_to_precise_text_region_polygon
1074        del precise_text_region_shapely_polygons
1075        del precise_text_region_tree
1076
1077        if debug:
1078            debug.page_text_region_infos = page_text_region_infos
1079
1080        # Negative sampling.
1081        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1082            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1083            num_page_text_region_infos=len(page_text_region_infos),
1084            rng=rng,
1085        )
1086
1087        flattened_text_regions = self.build_flattened_text_regions(
1088            page_image=page_image,
1089            page_text_region_infos=page_text_region_infos,
1090            page_non_text_region_polygons=page_non_text_region_polygons,
1091            rng=rng,
1092        )
1093        if debug:
1094            debug.flattened_text_regions = flattened_text_regions
1095
1096        # Stack text regions.
1097        image, _, char_polygons = stack_flattened_text_regions(
1098            page_pad=0,
1099            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1100            flattened_text_regions=flattened_text_regions,
1101        )
1102
1103        # Post uniform rotation.
1104        shape_before_rotate = image.shape
1105        rotate_angle = 0
1106
1107        if self.config.enable_post_rotate:
1108            rotate_angle = int(
1109                rng.integers(
1110                    self.config.post_rotate_angle_min,
1111                    self.config.post_rotate_angle_max + 1,
1112                )
1113            )
1114            rotated_result = rotate.distort(
1115                {'angle': rotate_angle},
1116                image=image,
1117                polygons=char_polygons,
1118            )
1119            assert rotated_result.image and rotated_result.polygons
1120            image = rotated_result.image
1121            char_polygons = rotated_result.polygons
1122
1123        return PageTextRegionStepOutput(
1124            page_image=image,
1125            page_char_polygons=char_polygons,
1126            shape_before_rotate=shape_before_rotate,
1127            rotate_angle=rotate_angle,
1128            debug=debug,
1129        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

@classmethod
def generate_precise_text_region_candidate_polygons( cls, precise_mask: vkit.element.mask.Mask, disconnected_text_region_mask: vkit.element.mask.Mask):
728    @classmethod
729    def generate_precise_text_region_candidate_polygons(
730        cls,
731        precise_mask: Mask,
732        disconnected_text_region_mask: Mask,
733    ):
734        assert precise_mask.box and disconnected_text_region_mask.box
735
736        # Get the intersection.
737        intersected_box = Box(
738            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
739            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
740            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
741            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
742        )
743        assert intersected_box.up <= intersected_box.down
744        assert intersected_box.left <= intersected_box.right
745
746        precise_mask = intersected_box.extract_mask(precise_mask)
747        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
748
749        # Apply mask bitwise-and operation.
750        intersected_mask = Mask(
751            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
752        )
753        intersected_mask = intersected_mask.to_box_attached(intersected_box)
754
755        # NOTE:
756        # 1. Could extract more than one polygons.
757        # 2. Some polygons are in border and should be removed later.
758        return intersected_mask.to_disconnected_polygons()
@classmethod
def strtree_query_intersected_polygons( cls, strtree: shapely.strtree.STRtree, id_to_anchor_polygon: Dict[int, vkit.element.polygon.Polygon], candidate_polygon: vkit.element.polygon.Polygon):
760    @classmethod
761    def strtree_query_intersected_polygons(
762        cls,
763        strtree: STRtree,
764        id_to_anchor_polygon: Dict[int, Polygon],
765        candidate_polygon: Polygon,
766    ):
767        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
768        candidate_mask = candidate_polygon.mask
769
770        for anchor_shapely_polygon in strtree.query(candidate_shapely_polygon):
771            anchor_id = id(anchor_shapely_polygon)
772            anchor_polygon = id_to_anchor_polygon[anchor_id]
773            anchor_mask = anchor_polygon.mask
774
775            intersected_ratio = calculate_boxed_masks_intersected_ratio(
776                anchor_mask=anchor_mask,
777                candidate_mask=candidate_mask,
778                use_candidate_as_base=True,
779            )
780
781            yield (
782                anchor_id,
783                anchor_polygon,
784                anchor_mask,
785                candidate_mask,
786                intersected_ratio,
787            )
def sample_page_non_text_region_polygons( self, page_non_text_region_polygons: Sequence[vkit.element.polygon.Polygon], num_page_text_region_infos: int, rng: numpy.random._generator.Generator):
789    def sample_page_non_text_region_polygons(
790        self,
791        page_non_text_region_polygons: Sequence[Polygon],
792        num_page_text_region_infos: int,
793        rng: RandomGenerator,
794    ):
795        negative_ratio = self.config.negative_text_region_ratio
796        num_page_non_text_region_polygons = round(
797            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
798        )
799        return rng_choice_with_size(
800            rng,
801            page_non_text_region_polygons,
802            size=min(
803                num_page_non_text_region_polygons,
804                len(page_non_text_region_polygons),
805            ),
806            replace=False,
807        )
def build_flattened_text_regions( self, page_image: vkit.element.image.Image, page_text_region_infos: Sequence[vkit.pipeline.text_detection.page_text_region.PageTextRegionInfo], page_non_text_region_polygons: Sequence[vkit.element.polygon.Polygon], rng: numpy.random._generator.Generator):
809    def build_flattened_text_regions(
810        self,
811        page_image: Image,
812        page_text_region_infos: Sequence[PageTextRegionInfo],
813        page_non_text_region_polygons: Sequence[Polygon],
814        rng: RandomGenerator,
815    ):
816        text_region_polygon_dilate_ratio = float(
817            rng.uniform(
818                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
819                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
820            )
821        )
822        typical_long_side_ratio_min = \
823            self.config.text_region_flattener_typical_long_side_ratio_min
824
825        text_region_polygons: List[Polygon] = []
826        grouped_char_polygons: List[Sequence[Polygon]] = []
827        for page_text_region_info in page_text_region_infos:
828            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
829            grouped_char_polygons.append(page_text_region_info.char_polygons)
830
831        # Inject nagative regions.
832        for page_non_text_region_polygon in page_non_text_region_polygons:
833            text_region_polygons.append(page_non_text_region_polygon)
834            grouped_char_polygons.append(tuple())
835
836        text_region_flattener = TextRegionFlattener(
837            typical_long_side_ratio_min=typical_long_side_ratio_min,
838            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
839            image=page_image,
840            text_region_polygons=text_region_polygons,
841            grouped_char_polygons=grouped_char_polygons,
842            is_training=True,
843        )
844
845        # Resize positive ftr.
846        positive_flattened_text_regions: List[FlattenedTextRegion] = []
847        # For negative sampling.
848        positive_reference_heights: List[float] = []
849        positive_reference_widths: List[float] = []
850        num_negative_flattened_text_regions = 0
851
852        for flattened_text_region in text_region_flattener.flattened_text_regions:
853            if not flattened_text_region.flattened_char_polygons:
854                num_negative_flattened_text_regions += 1
855                continue
856
857            char_height_median = flattened_text_region.get_char_height_meidan()
858
859            text_region_resize_char_height_median = int(
860                rng.integers(
861                    self.config.text_region_resize_char_height_median_min,
862                    self.config.text_region_resize_char_height_median_max + 1,
863                )
864            )
865            scale = text_region_resize_char_height_median / char_height_median
866
867            height, width = flattened_text_region.shape
868            resized_height = round(height * scale)
869            resized_width = round(width * scale)
870
871            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
872                resized_height=resized_height,
873                resized_width=resized_width,
874            )
875
876            positive_reference_heights.append(resized_height)
877            positive_reference_widths.append(resized_width)
878
879            # Post rotate.
880            post_rotate_angle = 0
881            if flattened_text_region.is_typical:
882                if rng.random() < self.config.text_region_typical_post_rotate_prob:
883                    # Upside down only.
884                    post_rotate_angle = 180
885            else:
886                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
887                    # 3-way rotate.
888                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
889
890            if post_rotate_angle != 0:
891                flattened_text_region = \
892                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
893
894            positive_flattened_text_regions.append(flattened_text_region)
895
896        # Resize negative ftr.
897        negative_reference_heights = list(
898            rng_choice_with_size(
899                rng,
900                positive_reference_heights,
901                size=num_negative_flattened_text_regions,
902                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
903            )
904        )
905
906        negative_height_max = max(positive_reference_heights)
907        negative_width_max = max(positive_reference_widths)
908
909        negative_flattened_text_regions: List[FlattenedTextRegion] = []
910
911        for flattened_text_region in text_region_flattener.flattened_text_regions:
912            if flattened_text_region.flattened_char_polygons:
913                continue
914
915            reference_height = negative_reference_heights.pop()
916            scale = reference_height / flattened_text_region.height
917
918            height, width = flattened_text_region.shape
919            resized_height = round(height * scale)
920            resized_width = round(width * scale)
921
922            # Remove negative region that is too large.
923            if resized_height > negative_height_max or resized_width > negative_width_max:
924                continue
925
926            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
927                resized_height=resized_height,
928                resized_width=resized_width,
929            )
930
931            # Post rotate.
932            post_rotate_angle = 0
933            if flattened_text_region.is_typical:
934                if rng.random() < self.config.text_region_typical_post_rotate_prob:
935                    # Upside down only.
936                    post_rotate_angle = 180
937            else:
938                if rng.random() < self.config.text_region_untypical_post_rotate_prob:
939                    # 3-way rotate.
940                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
941
942            if post_rotate_angle != 0:
943                flattened_text_region = \
944                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
945
946            negative_flattened_text_regions.append(flattened_text_region)
947
948        flattened_text_regions = (
949            *positive_flattened_text_regions,
950            *negative_flattened_text_regions,
951        )
952        return flattened_text_regions
def run( self, input: vkit.pipeline.text_detection.page_text_region.PageTextRegionStepInput, rng: numpy.random._generator.Generator):
 954    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
 955        page_distortion_step_output = input.page_distortion_step_output
 956        page_image = page_distortion_step_output.page_image
 957        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
 958        page_disconnected_text_region_collection = \
 959            page_distortion_step_output.page_disconnected_text_region_collection
 960        page_non_text_region_collection = \
 961            page_distortion_step_output.page_non_text_region_collection
 962
 963        page_resizing_step_output = input.page_resizing_step_output
 964        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
 965
 966        debug = None
 967        if self.config.enable_debug:
 968            debug = PageTextRegionStepDebug()
 969
 970        # Build R-tree to track text regions.
 971        # https://github.com/shapely/shapely/issues/640
 972        id_to_disconnected_text_region_polygon: Dict[int, Polygon] = {}
 973        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
 974
 975        for polygon in page_disconnected_text_region_collection.to_polygons():
 976            shapely_polygon = polygon.to_shapely_polygon()
 977            id_to_disconnected_text_region_polygon[id(shapely_polygon)] = polygon
 978            disconnected_text_region_shapely_polygons.append(shapely_polygon)
 979
 980        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
 981
 982        # Get the precise text regions.
 983        precise_text_region_candidate_polygons: List[Polygon] = []
 984        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
 985            # Resize back to the shape after distortion.
 986            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
 987                page_resized_text_line_mask,
 988                resized_height=page_image.height,
 989                resized_width=page_image.width,
 990            )
 991
 992            # Find and extract intersected text region.
 993            # NOTE: One precise_polygon could be overlapped with
 994            # more than one disconnected_text_region_polygon!
 995            for _, _, disconnected_text_region_mask, precise_mask, _ in \
 996                    self.strtree_query_intersected_polygons(
 997                        strtree=disconnected_text_region_tree,
 998                        id_to_anchor_polygon=id_to_disconnected_text_region_polygon,
 999                        candidate_polygon=precise_polygon,
1000                    ):
1001                precise_text_region_candidate_polygons.extend(
1002                    self.generate_precise_text_region_candidate_polygons(
1003                        precise_mask=precise_mask,
1004                        disconnected_text_region_mask=disconnected_text_region_mask,
1005                    )
1006                )
1007
1008        if debug:
1009            debug.page_image = page_image
1010            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1011
1012        # Help gc.
1013        del id_to_disconnected_text_region_polygon
1014        del disconnected_text_region_shapely_polygons
1015        del disconnected_text_region_tree
1016
1017        # Bind char-level polygon to precise text region.
1018        id_to_precise_text_region_polygon: Dict[int, Polygon] = {}
1019        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1020
1021        for polygon in precise_text_region_candidate_polygons:
1022            shapely_polygon = polygon.to_shapely_polygon()
1023            id_to_precise_text_region_polygon[id(shapely_polygon)] = polygon
1024            precise_text_region_shapely_polygons.append(shapely_polygon)
1025
1026        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1027
1028        id_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1029        for char_polygon in page_char_polygon_collection.polygons:
1030            best_precise_text_region_id = None
1031            intersected_ratio_max = 0
1032
1033            for (
1034                precise_text_region_id,
1035                _,
1036                _,
1037                _,
1038                intersected_ratio,
1039            ) in self.strtree_query_intersected_polygons(
1040                strtree=precise_text_region_tree,
1041                id_to_anchor_polygon=id_to_precise_text_region_polygon,
1042                candidate_polygon=char_polygon,
1043            ):
1044                if intersected_ratio > intersected_ratio_max:
1045                    intersected_ratio_max = intersected_ratio
1046                    best_precise_text_region_id = precise_text_region_id
1047
1048            if best_precise_text_region_id is not None:
1049                id_to_char_polygons[best_precise_text_region_id].append(char_polygon)
1050            else:
1051                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1052                # In such case, the text line bounding box is smaller than the char polygon, since
1053                # the leading/trailing char paddings are ignored during text line rendering.
1054                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1055                # introduce labeling noise.
1056                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1057
1058        page_text_region_infos: List[PageTextRegionInfo] = []
1059        for precise_text_region_shapely_polygon in precise_text_region_shapely_polygons:
1060            ptrsp_id = id(precise_text_region_shapely_polygon)
1061            if ptrsp_id not in id_to_char_polygons:
1062                # Not related to any char polygons.
1063                continue
1064            assert id_to_char_polygons[ptrsp_id]
1065            page_text_region_infos.append(
1066                PageTextRegionInfo(
1067                    precise_text_region_polygon=id_to_precise_text_region_polygon[ptrsp_id],
1068                    char_polygons=id_to_char_polygons[ptrsp_id],
1069                )
1070            )
1071
1072        # Help gc.
1073        del id_to_precise_text_region_polygon
1074        del precise_text_region_shapely_polygons
1075        del precise_text_region_tree
1076
1077        if debug:
1078            debug.page_text_region_infos = page_text_region_infos
1079
1080        # Negative sampling.
1081        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1082            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1083            num_page_text_region_infos=len(page_text_region_infos),
1084            rng=rng,
1085        )
1086
1087        flattened_text_regions = self.build_flattened_text_regions(
1088            page_image=page_image,
1089            page_text_region_infos=page_text_region_infos,
1090            page_non_text_region_polygons=page_non_text_region_polygons,
1091            rng=rng,
1092        )
1093        if debug:
1094            debug.flattened_text_regions = flattened_text_regions
1095
1096        # Stack text regions.
1097        image, _, char_polygons = stack_flattened_text_regions(
1098            page_pad=0,
1099            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1100            flattened_text_regions=flattened_text_regions,
1101        )
1102
1103        # Post uniform rotation.
1104        shape_before_rotate = image.shape
1105        rotate_angle = 0
1106
1107        if self.config.enable_post_rotate:
1108            rotate_angle = int(
1109                rng.integers(
1110                    self.config.post_rotate_angle_min,
1111                    self.config.post_rotate_angle_max + 1,
1112                )
1113            )
1114            rotated_result = rotate.distort(
1115                {'angle': rotate_angle},
1116                image=image,
1117                polygons=char_polygons,
1118            )
1119            assert rotated_result.image and rotated_result.polygons
1120            image = rotated_result.image
1121            char_polygons = rotated_result.polygons
1122
1123        return PageTextRegionStepOutput(
1124            page_image=image,
1125            page_char_polygons=char_polygons,
1126            shape_before_rotate=shape_before_rotate,
1127            rotate_angle=rotate_angle,
1128            debug=debug,
1129        )