vkit.pipeline.text_detection.page_text_region

   1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
   2#
   3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
   4#
   5# The commercial license gives you the full rights to create and distribute software
   6# on your own terms without any SSPL license obligations. For more information,
   7# please see the "LICENSE_COMMERCIAL.txt" file.
   8#
   9# This project is also available under Server Side Public License (SSPL).
  10# The SSPL licensing is ideal for use cases such as open source projects with
  11# SSPL distribution, student/academic purposes, hobby projects, internal research
  12# projects without external distribution, or other projects where all SSPL
  13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
  14from typing import cast, List, Optional, DefaultDict, Sequence, Tuple
  15from collections import defaultdict
  16import itertools
  17import math
  18import statistics
  19import logging
  20
  21import attrs
  22from numpy.random import Generator as RandomGenerator
  23import numpy as np
  24from sklearn.neighbors import KDTree
  25from shapely.strtree import STRtree
  26from shapely.geometry import Polygon as ShapelyPolygon
  27from rectpack import newPacker as RectPacker
  28
  29from vkit.utility import rng_choice, rng_choice_with_size
  30from vkit.element import PointList, Box, Polygon, Mask, Image, ElementSetOperationMode
  31from vkit.mechanism.distortion import rotate
  32from ..interface import PipelineStep, PipelineStepFactory
  33from .page_distortion import PageDistortionStepOutput
  34from .page_resizing import PageResizingStepOutput
  35
  36logger = logging.getLogger(__name__)
  37
  38
  39@attrs.define
  40class PageTextRegionStepConfig:
  41    use_adjusted_char_polygons: bool = False
  42    prob_drop_single_char_page_text_region_info: float = 0.5
  43    text_region_flattener_typical_long_side_ratio_min: float = 3.0
  44    text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85
  45    text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0
  46    text_region_resize_char_height_median_min: int = 32
  47    text_region_resize_char_height_median_max: int = 46
  48    prob_text_region_typical_post_rotate: float = 0.2
  49    prob_text_region_untypical_post_rotate: float = 0.2
  50    negative_text_region_ratio: float = 0.1
  51    prob_negative_text_region_post_rotate: float = 0.2
  52    stack_flattened_text_regions_pad: int = 2
  53    prob_post_rotate_90_angle: float = 0.5
  54    prob_post_rotate_random_angle: float = 0.0
  55    post_rotate_random_angle_min: int = -5
  56    post_rotate_random_angle_max: int = 5
  57    enable_debug: bool = False
  58
  59
  60@attrs.define
  61class PageTextRegionStepInput:
  62    page_distortion_step_output: PageDistortionStepOutput
  63    page_resizing_step_output: PageResizingStepOutput
  64
  65
  66@attrs.define
  67class PageTextRegionInfo:
  68    precise_text_region_polygon: Polygon
  69    char_polygons: Sequence[Polygon]
  70
  71
  72@attrs.define
  73class FlattenedTextRegion:
  74    is_typical: bool
  75    text_region_polygon: Polygon
  76    text_region_image: Image
  77    bounding_extended_text_region_mask: Mask
  78    flattening_rotate_angle: int
  79    shape_before_trim: Tuple[int, int]
  80    rotated_trimmed_box: Box
  81    shape_before_resize: Tuple[int, int]
  82    post_rotate_angle: int
  83    flattened_image: Image
  84    flattened_mask: Mask
  85    flattened_char_polygons: Optional[Sequence[Polygon]]
  86
  87    @property
  88    def shape(self):
  89        return self.flattened_image.shape
  90
  91    @property
  92    def height(self):
  93        return self.flattened_image.height
  94
  95    @property
  96    def width(self):
  97        return self.flattened_image.width
  98
  99    @property
 100    def area(self):
 101        return self.flattened_image.area
 102
 103    def get_char_height_meidan(self):
 104        assert self.flattened_char_polygons
 105        return statistics.median(
 106            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
 107        )
 108
 109    def to_resized_flattened_text_region(
 110        self,
 111        resized_height: Optional[int] = None,
 112        resized_width: Optional[int] = None,
 113    ):
 114        resized_flattened_image = self.flattened_image.to_resized_image(
 115            resized_height=resized_height,
 116            resized_width=resized_width,
 117        )
 118
 119        resized_flattened_mask = self.flattened_mask.to_resized_mask(
 120            resized_height=resized_height,
 121            resized_width=resized_width,
 122        )
 123
 124        resized_flattened_char_polygons = None
 125        if self.flattened_char_polygons is not None:
 126            resized_flattened_char_polygons = [
 127                flattened_char_polygon.to_conducted_resized_polygon(
 128                    self.shape,
 129                    resized_height=resized_height,
 130                    resized_width=resized_width,
 131                ) for flattened_char_polygon in self.flattened_char_polygons
 132            ]
 133
 134        return attrs.evolve(
 135            self,
 136            flattened_image=resized_flattened_image,
 137            flattened_mask=resized_flattened_mask,
 138            flattened_char_polygons=resized_flattened_char_polygons,
 139        )
 140
 141    def to_post_rotated_flattened_text_region(
 142        self,
 143        post_rotate_angle: int,
 144    ):
 145        assert self.post_rotate_angle == 0
 146
 147        # NOTE: No need to trim.
 148        rotated_result = rotate.distort(
 149            {'angle': post_rotate_angle},
 150            image=self.flattened_image,
 151            mask=self.flattened_mask,
 152            polygons=self.flattened_char_polygons,
 153        )
 154        rotated_flattened_image = rotated_result.image
 155        assert rotated_flattened_image
 156        rotated_flattened_mask = rotated_result.mask
 157        assert rotated_flattened_mask
 158        rotated_flattened_char_polygons = rotated_result.polygons
 159
 160        return attrs.evolve(
 161            self,
 162            post_rotate_angle=post_rotate_angle,
 163            flattened_image=rotated_flattened_image,
 164            flattened_mask=rotated_flattened_mask,
 165            flattened_char_polygons=rotated_flattened_char_polygons,
 166        )
 167
 168
 169@attrs.define
 170class PageTextRegionStepDebug:
 171    page_image: Image = attrs.field(default=None)
 172    precise_text_region_candidate_polygons: Sequence[Polygon] = attrs.field(default=None)
 173    page_text_region_infos: Sequence[PageTextRegionInfo] = attrs.field(default=None)
 174    flattened_text_regions: Sequence[FlattenedTextRegion] = attrs.field(default=None)
 175
 176
 177@attrs.define
 178class PageTextRegionStepOutput:
 179    page_image: Image
 180    page_active_mask: Mask
 181    page_char_polygons: Sequence[Polygon]
 182    page_text_region_polygons: Sequence[Polygon]
 183    page_char_polygon_text_region_polygon_indices: Sequence[int]
 184    shape_before_rotate: Tuple[int, int]
 185    rotate_angle: int
 186    debug: Optional[PageTextRegionStepDebug]
 187
 188
 189def calculate_boxed_masks_intersected_ratio(
 190    anchor_mask: Mask,
 191    candidate_mask: Mask,
 192    use_candidate_as_base: bool = False,
 193):
 194    anchor_box = anchor_mask.box
 195    assert anchor_box
 196
 197    candidate_box = candidate_mask.box
 198    assert candidate_box
 199
 200    # Calculate intersection.
 201    up = max(anchor_box.up, candidate_box.up)
 202    down = min(anchor_box.down, candidate_box.down)
 203    left = max(anchor_box.left, candidate_box.left)
 204    right = min(anchor_box.right, candidate_box.right)
 205
 206    if up > down or left > right:
 207        return 0.0
 208
 209    np_intersected_anchor_mask = anchor_mask.mat[
 210        up - anchor_box.up:down - anchor_box.up + 1,
 211        left - anchor_box.left:right - anchor_box.left + 1,
 212    ]  # yapf: disable
 213    np_intersected_candidate_mask = candidate_mask.mat[
 214        up - candidate_box.up:down - candidate_box.up + 1,
 215        left - candidate_box.left:right - candidate_box.left + 1,
 216    ]  # yapf: disable
 217    np_intersected_mask = np_intersected_anchor_mask & np_intersected_candidate_mask
 218    intersected_area = int(np_intersected_mask.sum())
 219
 220    if use_candidate_as_base:
 221        base_area = int(candidate_mask.np_mask.sum())
 222    else:
 223        base_area = (
 224            int(anchor_mask.np_mask.sum()) + int(candidate_mask.np_mask.sum()) - intersected_area
 225        )
 226
 227    return intersected_area / base_area
 228
 229
 230class TextRegionFlattener:
 231
 232    @classmethod
 233    def patch_text_region_polygons(
 234        cls,
 235        text_region_polygons: Sequence[Polygon],
 236        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
 237    ):
 238        if grouped_char_polygons is None:
 239            return text_region_polygons
 240
 241        assert len(text_region_polygons) == len(grouped_char_polygons)
 242
 243        patched_text_region_polygons: List[Polygon] = []
 244        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
 245            # Need to make sure all char polygons are included.
 246            unionized_polygons = [text_region_polygon]
 247            unionized_polygons.extend(char_polygons)
 248
 249            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
 250            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
 251            for polygon in unionized_polygons:
 252                polygon.fill_mask(mask)
 253
 254            patched_text_region_polygons.append(mask.to_external_polygon())
 255
 256        return patched_text_region_polygons
 257
 258    @classmethod
 259    def get_dilated_and_bounding_rectangular_polygons(
 260        cls,
 261        text_region_polygon_dilate_ratio: float,
 262        shape: Tuple[int, int],
 263        text_region_polygons: Sequence[Polygon],
 264        force_no_dilation_flags: Optional[Sequence[bool]] = None,
 265    ):
 266        dilated_text_region_polygons: List[Polygon] = []
 267        bounding_rectangular_polygons: List[Polygon] = []
 268
 269        if force_no_dilation_flags is None:
 270            force_no_dilation_flags_iter = itertools.repeat(False)
 271        else:
 272            assert len(force_no_dilation_flags) == len(text_region_polygons)
 273            force_no_dilation_flags_iter = force_no_dilation_flags
 274
 275        for text_region_polygon, force_no_dilation_flag in zip(
 276            text_region_polygons, force_no_dilation_flags_iter
 277        ):
 278
 279            if not force_no_dilation_flag:
 280                # Dilate.
 281                text_region_polygon = text_region_polygon.to_dilated_polygon(
 282                    ratio=text_region_polygon_dilate_ratio,
 283                )
 284                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
 285
 286            dilated_text_region_polygons.append(text_region_polygon)
 287            bounding_rectangular_polygons.append(
 288                text_region_polygon.to_bounding_rectangular_polygon(shape)
 289            )
 290
 291        return dilated_text_region_polygons, bounding_rectangular_polygons
 292
 293    @classmethod
 294    def analyze_bounding_rectangular_polygons(
 295        cls,
 296        bounding_rectangular_polygons: Sequence[Polygon],
 297    ):
 298        short_side_lengths: List[float] = []
 299        long_side_ratios: List[float] = []
 300        long_side_angles: List[int] = []
 301
 302        for polygon in bounding_rectangular_polygons:
 303            # Get reference line.
 304            point0, point1, _, point3 = polygon.points
 305            side0_length = math.hypot(
 306                point0.smooth_y - point1.smooth_y,
 307                point0.smooth_x - point1.smooth_x,
 308            )
 309            side1_length = math.hypot(
 310                point0.smooth_y - point3.smooth_y,
 311                point0.smooth_x - point3.smooth_x,
 312            )
 313
 314            # Get the short side length.
 315            short_side_lengths.append(min(side0_length, side1_length))
 316
 317            long_side_ratios.append(
 318                max(side0_length, side1_length) / min(side0_length, side1_length)
 319            )
 320
 321            point_a = point0
 322            if side0_length > side1_length:
 323                # Reference line (p0 -> p1).
 324                point_b = point1
 325            else:
 326                # Reference line (p0 -> p3).
 327                point_b = point3
 328
 329            # Get the angle of reference line, in [0, 180) degree.
 330            np_theta = np.arctan2(
 331                point_a.smooth_y - point_b.smooth_y,
 332                point_a.smooth_x - point_b.smooth_x,
 333            )
 334            np_theta = np_theta % np.pi
 335            long_side_angle = round(np_theta / np.pi * 180) % 180
 336            long_side_angles.append(long_side_angle)
 337
 338        return short_side_lengths, long_side_ratios, long_side_angles
 339
 340    @classmethod
 341    def get_typical_indices(
 342        cls,
 343        typical_long_side_ratio_min: float,
 344        long_side_ratios: Sequence[float],
 345    ):
 346        return tuple(
 347            idx for idx, long_side_ratio in enumerate(long_side_ratios)
 348            if long_side_ratio >= typical_long_side_ratio_min
 349        )
 350
 351    @classmethod
 352    def check_first_text_region_polygon_is_larger(
 353        cls,
 354        text_region_polygons: Sequence[Polygon],
 355        short_side_lengths: Sequence[float],
 356        first_idx: int,
 357        second_idx: int,
 358    ):
 359        first_text_region_polygon = text_region_polygons[first_idx]
 360        second_text_region_polygon = text_region_polygons[second_idx]
 361
 362        # The short side indicates the text line height.
 363        first_short_side_length = short_side_lengths[first_idx]
 364        second_short_side_length = short_side_lengths[second_idx]
 365
 366        return (
 367            first_text_region_polygon.area >= second_text_region_polygon.area
 368            and first_short_side_length >= second_short_side_length
 369        )
 370
 371    @classmethod
 372    def get_main_and_flattening_rotate_angles(
 373        cls,
 374        text_region_polygons: Sequence[Polygon],
 375        typical_indices: Sequence[int],
 376        short_side_lengths: Sequence[float],
 377        long_side_angles: Sequence[int],
 378    ):
 379        typical_indices_set = set(typical_indices)
 380        text_region_center_points = [
 381            text_region_polygon.get_center_point() for text_region_polygon in text_region_polygons
 382        ]
 383
 384        main_angles: List[Optional[int]] = [None] * len(long_side_angles)
 385
 386        # 1. For typical indices, or if no typical indices.
 387        for idx, long_side_angle in enumerate(long_side_angles):
 388            if not typical_indices_set or idx in typical_indices_set:
 389                main_angles[idx] = long_side_angle
 390
 391        # 2. For nontypcial indices.
 392        if typical_indices_set:
 393            typical_center_points = PointList(
 394                text_region_center_points[idx] for idx in typical_indices
 395            )
 396            kd_tree = KDTree(typical_center_points.to_np_array())
 397
 398            nontypical_indices = tuple(
 399                idx for idx, _ in enumerate(long_side_angles) if idx not in typical_indices_set
 400            )
 401            nontypical_center_points = PointList(
 402                text_region_center_points[idx] for idx in nontypical_indices
 403            )
 404
 405            # Set main angle as the closest typical angle.
 406            # Round 1: Set if the closest typical polygon is large enough.
 407            _, np_kd_nbr_indices = kd_tree.query(nontypical_center_points.to_np_array())
 408            round2_nontypical_indices: List[int] = []
 409            for nontypical_idx, typical_indices_idx in zip(
 410                nontypical_indices,
 411                np_kd_nbr_indices[:, 0].tolist(),
 412            ):
 413                typical_idx = typical_indices[typical_indices_idx]
 414                if cls.check_first_text_region_polygon_is_larger(
 415                    text_region_polygons=text_region_polygons,
 416                    short_side_lengths=short_side_lengths,
 417                    first_idx=typical_idx,
 418                    second_idx=nontypical_idx,
 419                ):
 420                    main_angles[nontypical_idx] = main_angles[typical_idx]
 421                else:
 422                    round2_nontypical_indices.append(nontypical_idx)
 423
 424            # Round 2: Searching the closest typical polygon that has larger area.
 425            round3_nontypical_indices: List[int] = []
 426            if round2_nontypical_indices:
 427                round2_nontypical_center_points = PointList(
 428                    text_region_center_points[idx] for idx in round2_nontypical_indices
 429                )
 430                _, np_kd_nbr_indices = kd_tree.query(
 431                    round2_nontypical_center_points.to_np_array(),
 432                    k=len(typical_center_points),
 433                )
 434                for nontypical_idx, typical_indices_indices in zip(
 435                    round2_nontypical_indices,
 436                    np_kd_nbr_indices.tolist(),
 437                ):
 438                    hit_typical_idx = None
 439                    for typical_indices_idx in typical_indices_indices:
 440                        typical_idx = typical_indices[typical_indices_idx]
 441                        if cls.check_first_text_region_polygon_is_larger(
 442                            text_region_polygons=text_region_polygons,
 443                            short_side_lengths=short_side_lengths,
 444                            first_idx=typical_idx,
 445                            second_idx=nontypical_idx,
 446                        ):
 447                            hit_typical_idx = typical_idx
 448                            break
 449
 450                    if hit_typical_idx is not None:
 451                        main_angles[nontypical_idx] = main_angles[hit_typical_idx]
 452                    else:
 453                        round3_nontypical_indices.append(nontypical_idx)
 454
 455            # Round 3: Last resort. Set to the median of typical angles.
 456            if round3_nontypical_indices:
 457                main_angles_median = statistics.median_low(
 458                    long_side_angles[typical_idx] for typical_idx in typical_indices
 459                )
 460                for nontypical_idx in round3_nontypical_indices:
 461                    main_angles[nontypical_idx] = main_angles_median
 462
 463        # 3. Get angle for flattening.
 464        flattening_rotate_angles: List[int] = []
 465        for main_angle in main_angles:
 466            assert main_angle is not None
 467            if main_angle <= 90:
 468                # [270, 360).
 469                flattening_rotate_angle = (360 - main_angle) % 360
 470            else:
 471                # [1, 90).
 472                flattening_rotate_angle = 180 - main_angle
 473            flattening_rotate_angles.append(flattening_rotate_angle)
 474
 475        return cast(List[int], main_angles), flattening_rotate_angles
 476
 477    @classmethod
 478    def get_bounding_extended_text_region_masks(
 479        cls,
 480        shape: Tuple[int, int],
 481        text_region_polygons: Sequence[Polygon],
 482        dilated_text_region_polygons: Sequence[Polygon],
 483        bounding_rectangular_polygons: Sequence[Polygon],
 484        typical_indices: Sequence[int],
 485        main_angles: Sequence[int],
 486    ):
 487        typical_indices_set = set(typical_indices)
 488
 489        text_mask = Mask.from_polygons(shape, text_region_polygons)
 490        non_text_mask = text_mask.to_inverted_mask()
 491
 492        box = Box.from_shape(shape)
 493        text_mask = text_mask.to_box_attached(box)
 494        non_text_mask = non_text_mask.to_box_attached(box)
 495
 496        bounding_extended_text_region_masks: List[Mask] = []
 497
 498        num_text_region_polygons = len(text_region_polygons)
 499        for idx in range(num_text_region_polygons):
 500            text_region_polygon = text_region_polygons[idx]
 501            dilated_text_region_polygon = dilated_text_region_polygons[idx]
 502            bounding_rectangular_polygon = bounding_rectangular_polygons[idx]
 503
 504            if typical_indices_set and idx not in typical_indices_set:
 505                # Patch bounding rectangular polygon if is nontypical.
 506                main_angle = main_angles[idx]
 507                bounding_rectangular_polygon = \
 508                    dilated_text_region_polygon.to_bounding_rectangular_polygon(
 509                        shape=shape,
 510                        angle=main_angle,
 511                    )
 512
 513            # See the comment in Polygon.to_bounding_rectangular_polygon.
 514            bounding_box = Box.from_boxes((
 515                dilated_text_region_polygon.bounding_box,
 516                bounding_rectangular_polygon.bounding_box,
 517            ))
 518
 519            # Fill other text region.
 520            bounding_other_text_mask = \
 521                Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
 522            # Copy from text mask.
 523            bounding_rectangular_polygon.fill_mask(bounding_other_text_mask, text_mask)
 524            # Use the original text region polygon to unset the current text mask.
 525            text_region_polygon.fill_mask(bounding_other_text_mask, 0)
 526
 527            # Fill protentially dilated text region.
 528            bounding_text_mask = \
 529                Mask.from_shapable(bounding_other_text_mask).to_box_attached(bounding_box)
 530            # Use the protentially dilated text region polygon to set the current text mask.
 531            dilated_text_region_polygon.fill_mask(bounding_text_mask, value=1)
 532
 533            del dilated_text_region_polygon
 534
 535            # Trim protentially dilated text region polygon by eliminating other text region.
 536            bounding_trimmed_text_mask = Mask.from_masks(
 537                bounding_box,
 538                [
 539                    # Includes the protentially dilated text region.
 540                    bounding_text_mask,
 541                    # But not includes any other text regions.
 542                    bounding_other_text_mask.to_inverted_mask(),
 543                ],
 544                ElementSetOperationMode.INTERSECT,
 545            )
 546
 547            # Extract non-text region.
 548            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
 549
 550            # Unionize trimmed text region and non-text region.
 551            bounding_extended_text_region_mask = Mask.from_masks(
 552                bounding_box,
 553                [bounding_trimmed_text_mask, bounding_non_text_mask],
 554            )
 555
 556            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
 557
 558        return bounding_extended_text_region_masks
 559
 560    @classmethod
 561    def build_flattened_text_regions(
 562        cls,
 563        image: Image,
 564        text_region_polygons: Sequence[Polygon],
 565        bounding_extended_text_region_masks: Sequence[Mask],
 566        typical_indices: Sequence[int],
 567        flattening_rotate_angles: Sequence[int],
 568        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
 569    ):
 570        typical_indices_set = set(typical_indices)
 571
 572        flattened_text_regions: List[FlattenedTextRegion] = []
 573
 574        for idx, (
 575            text_region_polygon,
 576            bounding_extended_text_region_mask,
 577            flattening_rotate_angle,
 578        ) in enumerate(
 579            zip(
 580                text_region_polygons,
 581                bounding_extended_text_region_masks,
 582                flattening_rotate_angles,
 583            )
 584        ):
 585            bounding_box = bounding_extended_text_region_mask.box
 586            assert bounding_box
 587
 588            # Extract image.
 589            text_region_image = bounding_extended_text_region_mask.extract_image(image)
 590
 591            # Shift char polygons.
 592            relative_char_polygons = None
 593            if grouped_char_polygons is not None:
 594                char_polygons = grouped_char_polygons[idx]
 595                relative_char_polygons = [
 596                    char_polygon.to_relative_polygon(
 597                        origin_y=bounding_box.up,
 598                        origin_x=bounding_box.left,
 599                    ) for char_polygon in char_polygons
 600                ]
 601
 602            # Rotate.
 603            rotated_result = rotate.distort(
 604                {'angle': flattening_rotate_angle},
 605                image=text_region_image,
 606                mask=bounding_extended_text_region_mask,
 607                polygons=relative_char_polygons,
 608            )
 609            rotated_text_region_image = rotated_result.image
 610            assert rotated_text_region_image
 611            rotated_bounding_extended_text_region_mask = rotated_result.mask
 612            assert rotated_bounding_extended_text_region_mask
 613            # Could be None.
 614            rotated_char_polygons = rotated_result.polygons
 615
 616            # Trim.
 617            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
 618
 619            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
 620                up=rotated_trimmed_box.up,
 621                down=rotated_trimmed_box.down,
 622                left=rotated_trimmed_box.left,
 623                right=rotated_trimmed_box.right,
 624            )
 625
 626            trimmed_mask = rotated_trimmed_box.extract_mask(
 627                rotated_bounding_extended_text_region_mask
 628            )
 629
 630            trimmed_char_polygons = None
 631            if rotated_char_polygons:
 632                trimmed_char_polygons = [
 633                    rotated_char_polygon.to_relative_polygon(
 634                        origin_y=rotated_trimmed_box.up,
 635                        origin_x=rotated_trimmed_box.left,
 636                    ) for rotated_char_polygon in rotated_char_polygons
 637                ]
 638
 639            flattened_text_regions.append(
 640                FlattenedTextRegion(
 641                    is_typical=(idx in typical_indices_set),
 642                    text_region_polygon=text_region_polygon,
 643                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
 644                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
 645                    flattening_rotate_angle=flattening_rotate_angle,
 646                    shape_before_trim=rotated_text_region_image.shape,
 647                    rotated_trimmed_box=rotated_trimmed_box,
 648                    shape_before_resize=trimmed_text_region_image.shape,
 649                    post_rotate_angle=0,
 650                    flattened_image=trimmed_text_region_image,
 651                    flattened_mask=trimmed_mask,
 652                    flattened_char_polygons=trimmed_char_polygons,
 653                )
 654            )
 655
 656        return flattened_text_regions
 657
 658    def __init__(
 659        self,
 660        typical_long_side_ratio_min: float,
 661        text_region_polygon_dilate_ratio: float,
 662        image: Image,
 663        text_region_polygons: Sequence[Polygon],
 664        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
 665        is_training: bool = False,
 666    ):
 667        self.original_text_region_polygons = text_region_polygons
 668
 669        self.text_region_polygons = self.patch_text_region_polygons(
 670            text_region_polygons=text_region_polygons,
 671            grouped_char_polygons=grouped_char_polygons,
 672        )
 673
 674        force_no_dilation_flags = None
 675        if is_training:
 676            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
 677            force_no_dilation_flags = []
 678            for char_polygons in grouped_char_polygons:
 679                force_no_dilation_flags.append(not char_polygons)
 680
 681        (
 682            self.dilated_text_region_polygons,
 683            self.bounding_rectangular_polygons,
 684        ) = self.get_dilated_and_bounding_rectangular_polygons(
 685            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 686            shape=image.shape,
 687            text_region_polygons=self.text_region_polygons,
 688            force_no_dilation_flags=force_no_dilation_flags,
 689        )
 690
 691        (
 692            self.short_side_lengths,
 693            self.long_side_ratios,
 694            self.long_side_angles,
 695        ) = self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
 696
 697        self.typical_indices = self.get_typical_indices(
 698            typical_long_side_ratio_min=typical_long_side_ratio_min,
 699            long_side_ratios=self.long_side_ratios,
 700        )
 701
 702        (
 703            self.main_angles,
 704            self.flattening_rotate_angles,
 705        ) = self.get_main_and_flattening_rotate_angles(
 706            text_region_polygons=self.text_region_polygons,
 707            typical_indices=self.typical_indices,
 708            short_side_lengths=self.short_side_lengths,
 709            long_side_angles=self.long_side_angles,
 710        )
 711
 712        self.bounding_extended_text_region_masks = self.get_bounding_extended_text_region_masks(
 713            shape=image.shape,
 714            text_region_polygons=self.text_region_polygons,
 715            dilated_text_region_polygons=self.dilated_text_region_polygons,
 716            bounding_rectangular_polygons=self.bounding_rectangular_polygons,
 717            typical_indices=self.typical_indices,
 718            main_angles=self.main_angles,
 719        )
 720
 721        self.flattened_text_regions = self.build_flattened_text_regions(
 722            image=image,
 723            # NOTE: need to use the original text region polygons for reversed opts.
 724            text_region_polygons=self.original_text_region_polygons,
 725            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
 726            typical_indices=self.typical_indices,
 727            flattening_rotate_angles=self.flattening_rotate_angles,
 728            grouped_char_polygons=grouped_char_polygons,
 729        )
 730
 731
 732def build_background_image_for_stacking(height: int, width: int):
 733    np_rgb_rows = [np.zeros((width, 3), dtype=np.uint8) for _ in range(3)]
 734    rgb_tuples = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
 735
 736    for color_offset, np_row in enumerate(np_rgb_rows):
 737        for color_idx in range(3):
 738            color_tuple = rgb_tuples[(color_offset + color_idx) % 3]
 739            np_row[color_idx::3] = color_tuple
 740
 741    np_image = np.zeros((height, width, 3), dtype=np.uint8)
 742    for row_offset, np_row in enumerate(np_rgb_rows):
 743        np_image[row_offset::3] = np_row
 744
 745    return Image(mat=np_image)
 746
 747
 748def stack_flattened_text_regions(
 749    page_pad: int,
 750    flattened_text_regions_pad: int,
 751    flattened_text_regions: Sequence[FlattenedTextRegion],
 752):
 753    page_double_pad = 2 * page_pad
 754    flattened_text_regions_double_pad = 2 * flattened_text_regions_pad
 755
 756    rect_packer = RectPacker(rotation=False)
 757
 758    # Add box and bin.
 759    # NOTE: Only one bin is added, that is, packing all text region into one image.
 760    bin_width = 0
 761    bin_height = 0
 762
 763    for ftr_idx, flattened_text_region in enumerate(flattened_text_regions):
 764        rect_packer.add_rect(
 765            width=flattened_text_region.width + flattened_text_regions_double_pad,
 766            height=flattened_text_region.height + flattened_text_regions_double_pad,
 767            rid=ftr_idx,
 768        )
 769
 770        bin_width = max(bin_width, flattened_text_region.width)
 771        bin_height += flattened_text_region.height
 772
 773    bin_width += flattened_text_regions_double_pad
 774    bin_height += flattened_text_regions_double_pad
 775
 776    rect_packer.add_bin(width=bin_width, height=bin_height)
 777
 778    # Pack boxes.
 779    rect_packer.pack()  # type: ignore
 780
 781    # Get packed boxes.
 782    unordered_boxes: List[Box] = []
 783    ftr_indices: List[int] = []
 784    for bin_idx, x, y, width, height, ftr_idx in rect_packer.rect_list():
 785        assert bin_idx == 0
 786        unordered_boxes.append(Box(
 787            up=y,
 788            down=y + height - 1,
 789            left=x,
 790            right=x + width - 1,
 791        ))
 792        ftr_indices.append(ftr_idx)
 793
 794    # Order boxes.
 795    inverse_ftr_indices = [-1] * len(ftr_indices)
 796    for inverse_ftr_idx, ftr_idx in enumerate(ftr_indices):
 797        inverse_ftr_indices[ftr_idx] = inverse_ftr_idx
 798    for inverse_ftr_idx in inverse_ftr_indices:
 799        assert inverse_ftr_idx >= 0
 800    padded_boxes = [unordered_boxes[inverse_ftr_idx] for inverse_ftr_idx in inverse_ftr_indices]
 801
 802    page_height = max(box.down for box in padded_boxes) + 1 + page_double_pad
 803    page_width = max(box.right for box in padded_boxes) + 1 + page_double_pad
 804
 805    image = build_background_image_for_stacking(page_height, page_width)
 806    active_mask = Mask.from_shapable(image)
 807    text_region_boxes: List[Box] = []
 808    char_polygons: List[Polygon] = []
 809    char_polygon_text_region_box_indices: List[int] = []
 810
 811    for padded_box, flattened_text_region in zip(padded_boxes, flattened_text_regions):
 812        assert flattened_text_region.height + flattened_text_regions_double_pad \
 813            == padded_box.height
 814        assert flattened_text_region.width + flattened_text_regions_double_pad \
 815            == padded_box.width
 816
 817        # Remove box padding.
 818        up = padded_box.up + flattened_text_regions_pad + page_pad
 819        left = padded_box.left + flattened_text_regions_pad + page_pad
 820
 821        text_region_box = Box(
 822            up=up,
 823            down=up + flattened_text_region.height - 1,
 824            left=left,
 825            right=left + flattened_text_region.width - 1,
 826        )
 827        text_region_boxes.append(text_region_box)
 828        text_region_box_idx = len(text_region_boxes) - 1
 829
 830        # Render.
 831        text_region_box.fill_image(
 832            image,
 833            flattened_text_region.flattened_image,
 834            image_mask=flattened_text_region.flattened_mask,
 835        )
 836        text_region_box.fill_mask(
 837            active_mask,
 838            value=1,
 839            mask_mask=flattened_text_region.flattened_mask,
 840        )
 841
 842        if flattened_text_region.flattened_char_polygons:
 843            for char_polygon in flattened_text_region.flattened_char_polygons:
 844                char_polygons.append(char_polygon.to_shifted_polygon(
 845                    offset_y=up,
 846                    offset_x=left,
 847                ))
 848                char_polygon_text_region_box_indices.append(text_region_box_idx)
 849
 850    return (
 851        image,
 852        active_mask,
 853        text_region_boxes,
 854        char_polygons,
 855        char_polygon_text_region_box_indices,
 856    )
 857
 858
 859class PageTextRegionStep(
 860    PipelineStep[
 861        PageTextRegionStepConfig,
 862        PageTextRegionStepInput,
 863        PageTextRegionStepOutput,
 864    ]
 865):  # yapf: disable
 866
 867    @classmethod
 868    def generate_precise_text_region_candidate_polygons(
 869        cls,
 870        precise_mask: Mask,
 871        disconnected_text_region_mask: Mask,
 872    ):
 873        assert precise_mask.box and disconnected_text_region_mask.box
 874
 875        # Get the intersection.
 876        intersected_box = Box(
 877            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
 878            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
 879            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
 880            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
 881        )
 882        assert intersected_box.up <= intersected_box.down
 883        assert intersected_box.left <= intersected_box.right
 884
 885        precise_mask = intersected_box.extract_mask(precise_mask)
 886        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
 887
 888        # Apply mask bitwise-and operation.
 889        intersected_mask = Mask(
 890            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
 891        )
 892        intersected_mask = intersected_mask.to_box_attached(intersected_box)
 893
 894        # NOTE:
 895        # 1. Could extract more than one polygons.
 896        # 2. Some polygons are in border and should be removed later.
 897        return intersected_mask.to_disconnected_polygons()
 898
 899    @classmethod
 900    def strtree_query_intersected_polygons(
 901        cls,
 902        strtree: STRtree,
 903        anchor_polygons: Sequence[Polygon],
 904        candidate_polygon: Polygon,
 905    ):
 906        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
 907        candidate_mask = candidate_polygon.mask
 908
 909        for anchor_idx in sorted(strtree.query(candidate_shapely_polygon)):
 910            anchor_polygon = anchor_polygons[anchor_idx]
 911            anchor_mask = anchor_polygon.mask
 912
 913            intersected_ratio = calculate_boxed_masks_intersected_ratio(
 914                anchor_mask=anchor_mask,
 915                candidate_mask=candidate_mask,
 916                use_candidate_as_base=True,
 917            )
 918
 919            yield (
 920                anchor_idx,
 921                anchor_polygon,
 922                anchor_mask,
 923                candidate_mask,
 924                intersected_ratio,
 925            )
 926
 927    def sample_page_non_text_region_polygons(
 928        self,
 929        page_non_text_region_polygons: Sequence[Polygon],
 930        num_page_text_region_infos: int,
 931        rng: RandomGenerator,
 932    ):
 933        negative_ratio = self.config.negative_text_region_ratio
 934        num_page_non_text_region_polygons = round(
 935            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
 936        )
 937        return rng_choice_with_size(
 938            rng,
 939            page_non_text_region_polygons,
 940            size=min(
 941                num_page_non_text_region_polygons,
 942                len(page_non_text_region_polygons),
 943            ),
 944            replace=False,
 945        )
 946
 947    def build_flattened_text_regions(
 948        self,
 949        page_image: Image,
 950        page_text_region_infos: Sequence[PageTextRegionInfo],
 951        page_non_text_region_polygons: Sequence[Polygon],
 952        rng: RandomGenerator,
 953    ):
 954        text_region_polygon_dilate_ratio = float(
 955            rng.uniform(
 956                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
 957                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
 958            )
 959        )
 960        typical_long_side_ratio_min = \
 961            self.config.text_region_flattener_typical_long_side_ratio_min
 962
 963        text_region_polygons: List[Polygon] = []
 964        grouped_char_polygons: List[Sequence[Polygon]] = []
 965        for page_text_region_info in page_text_region_infos:
 966            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
 967            grouped_char_polygons.append(page_text_region_info.char_polygons)
 968
 969        # Inject nagative regions.
 970        for page_non_text_region_polygon in page_non_text_region_polygons:
 971            # NOTE: Don't drop any text region here, otherwise will introduce labeling confusion,
 972            # since dropped text region will be considered as non-text region.
 973            text_region_polygons.append(page_non_text_region_polygon)
 974            grouped_char_polygons.append(tuple())
 975
 976        text_region_flattener = TextRegionFlattener(
 977            typical_long_side_ratio_min=typical_long_side_ratio_min,
 978            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 979            image=page_image,
 980            text_region_polygons=text_region_polygons,
 981            grouped_char_polygons=grouped_char_polygons,
 982            is_training=True,
 983        )
 984
 985        # Resize positive ftr.
 986        positive_flattened_text_regions: List[FlattenedTextRegion] = []
 987        # For negative sampling.
 988        positive_reference_heights: List[float] = []
 989        positive_reference_widths: List[float] = []
 990        num_negative_flattened_text_regions = 0
 991
 992        for flattened_text_region in text_region_flattener.flattened_text_regions:
 993            if not flattened_text_region.flattened_char_polygons:
 994                num_negative_flattened_text_regions += 1
 995                continue
 996
 997            if len(flattened_text_region.flattened_char_polygons) == 1 \
 998                    and rng.random() < self.config.prob_drop_single_char_page_text_region_info:
 999                # Ignore some single-char text region for reducing label confusion.
1000                continue
1001
1002            char_height_median = flattened_text_region.get_char_height_meidan()
1003
1004            text_region_resize_char_height_median = int(
1005                rng.integers(
1006                    self.config.text_region_resize_char_height_median_min,
1007                    self.config.text_region_resize_char_height_median_max + 1,
1008                )
1009            )
1010            scale = text_region_resize_char_height_median / char_height_median
1011
1012            height, width = flattened_text_region.shape
1013            resized_height = round(height * scale)
1014            resized_width = round(width * scale)
1015
1016            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1017                resized_height=resized_height,
1018                resized_width=resized_width,
1019            )
1020
1021            positive_reference_heights.append(resized_height)
1022            positive_reference_widths.append(resized_width)
1023
1024            # Post rotate.
1025            post_rotate_angle = 0
1026            if flattened_text_region.is_typical:
1027                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1028                    # Upside down only.
1029                    post_rotate_angle = 180
1030            else:
1031                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1032                    # 3-way rotate.
1033                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1034
1035            if post_rotate_angle != 0:
1036                flattened_text_region = \
1037                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1038
1039            positive_flattened_text_regions.append(flattened_text_region)
1040
1041        # Resize negative ftr.
1042        negative_reference_heights = list(
1043            rng_choice_with_size(
1044                rng,
1045                positive_reference_heights,
1046                size=num_negative_flattened_text_regions,
1047                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
1048            )
1049        )
1050
1051        negative_height_max = max(positive_reference_heights)
1052        negative_width_max = max(positive_reference_widths)
1053
1054        negative_flattened_text_regions: List[FlattenedTextRegion] = []
1055
1056        for flattened_text_region in text_region_flattener.flattened_text_regions:
1057            if flattened_text_region.flattened_char_polygons:
1058                continue
1059
1060            reference_height = negative_reference_heights.pop()
1061            scale = reference_height / flattened_text_region.height
1062
1063            height, width = flattened_text_region.shape
1064            resized_height = round(height * scale)
1065            resized_width = round(width * scale)
1066
1067            # Remove negative region that is too large.
1068            if resized_height > negative_height_max or resized_width > negative_width_max:
1069                continue
1070
1071            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1072                resized_height=resized_height,
1073                resized_width=resized_width,
1074            )
1075
1076            # Post rotate.
1077            post_rotate_angle = 0
1078            if flattened_text_region.is_typical:
1079                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1080                    # Upside down only.
1081                    post_rotate_angle = 180
1082            else:
1083                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1084                    # 3-way rotate.
1085                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1086
1087            if post_rotate_angle != 0:
1088                flattened_text_region = \
1089                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1090
1091            negative_flattened_text_regions.append(flattened_text_region)
1092
1093        flattened_text_regions = (
1094            *positive_flattened_text_regions,
1095            *negative_flattened_text_regions,
1096        )
1097        return flattened_text_regions
1098
1099    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
1100        page_distortion_step_output = input.page_distortion_step_output
1101        page_image = page_distortion_step_output.page_image
1102        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
1103        page_disconnected_text_region_collection = \
1104            page_distortion_step_output.page_disconnected_text_region_collection
1105        page_non_text_region_collection = \
1106            page_distortion_step_output.page_non_text_region_collection
1107
1108        page_resizing_step_output = input.page_resizing_step_output
1109        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
1110
1111        debug = None
1112        if self.config.enable_debug:
1113            debug = PageTextRegionStepDebug()
1114
1115        # Build R-tree to track text regions.
1116        disconnected_text_region_polygons: List[Polygon] = []
1117        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
1118        for polygon in page_disconnected_text_region_collection.to_polygons():
1119            disconnected_text_region_polygons.append(polygon)
1120            shapely_polygon = polygon.to_shapely_polygon()
1121            disconnected_text_region_shapely_polygons.append(shapely_polygon)
1122
1123        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
1124
1125        # Get the precise text regions.
1126        precise_text_region_candidate_polygons: List[Polygon] = []
1127        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
1128            # Resize back to the shape after distortion.
1129            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
1130                page_resized_text_line_mask,
1131                resized_height=page_image.height,
1132                resized_width=page_image.width,
1133            )
1134
1135            # Find and extract intersected text region.
1136            # NOTE: One precise_polygon could be overlapped with
1137            # more than one disconnected_text_region_polygon!
1138            for _, _, disconnected_text_region_mask, precise_mask, _ in \
1139                    self.strtree_query_intersected_polygons(
1140                        strtree=disconnected_text_region_tree,
1141                        anchor_polygons=disconnected_text_region_polygons,
1142                        candidate_polygon=precise_polygon,
1143                    ):
1144                precise_text_region_candidate_polygons.extend(
1145                    self.generate_precise_text_region_candidate_polygons(
1146                        precise_mask=precise_mask,
1147                        disconnected_text_region_mask=disconnected_text_region_mask,
1148                    )
1149                )
1150
1151        if debug:
1152            debug.page_image = page_image
1153            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1154
1155        # Help gc.
1156        del disconnected_text_region_polygons
1157        del disconnected_text_region_shapely_polygons
1158        del disconnected_text_region_tree
1159
1160        # Bind char-level polygon to precise text region.
1161        precise_text_region_polygons: List[Polygon] = []
1162        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1163
1164        for polygon in precise_text_region_candidate_polygons:
1165            precise_text_region_polygons.append(polygon)
1166            shapely_polygon = polygon.to_shapely_polygon()
1167            precise_text_region_shapely_polygons.append(shapely_polygon)
1168
1169        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1170
1171        if not self.config.use_adjusted_char_polygons:
1172            selected_char_polygons = page_char_polygon_collection.char_polygons
1173        else:
1174            selected_char_polygons = page_char_polygon_collection.adjusted_char_polygons
1175
1176        ptrp_idx_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1177
1178        for char_polygon in selected_char_polygons:
1179            best_precise_text_region_polygon_idx = None
1180            intersected_ratio_max = 0
1181
1182            for (
1183                precise_text_region_polygon_idx,
1184                _,
1185                _,
1186                _,
1187                intersected_ratio,
1188            ) in self.strtree_query_intersected_polygons(
1189                strtree=precise_text_region_tree,
1190                anchor_polygons=precise_text_region_polygons,
1191                candidate_polygon=char_polygon,
1192            ):
1193                if intersected_ratio > intersected_ratio_max:
1194                    intersected_ratio_max = intersected_ratio
1195                    best_precise_text_region_polygon_idx = precise_text_region_polygon_idx
1196
1197            if best_precise_text_region_polygon_idx is not None:
1198                ptrp_idx_to_char_polygons[best_precise_text_region_polygon_idx].append(char_polygon)
1199            else:
1200                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1201                # In such case, the text line bounding box is smaller than the char polygon, since
1202                # the leading/trailing char paddings are ignored during text line rendering.
1203                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1204                # introduce labeling noise.
1205                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1206
1207        page_text_region_infos: List[PageTextRegionInfo] = []
1208        for ptrp_idx, precise_text_region_polygon in enumerate(precise_text_region_polygons):
1209            if ptrp_idx not in ptrp_idx_to_char_polygons:
1210                continue
1211            page_text_region_infos.append(
1212                PageTextRegionInfo(
1213                    precise_text_region_polygon=precise_text_region_polygon,
1214                    char_polygons=ptrp_idx_to_char_polygons[ptrp_idx],
1215                )
1216            )
1217
1218        # Help gc.
1219        del precise_text_region_polygons
1220        del precise_text_region_shapely_polygons
1221        del precise_text_region_tree
1222
1223        if debug:
1224            debug.page_text_region_infos = page_text_region_infos
1225
1226        # Negative sampling.
1227        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1228            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1229            num_page_text_region_infos=len(page_text_region_infos),
1230            rng=rng,
1231        )
1232
1233        flattened_text_regions = self.build_flattened_text_regions(
1234            page_image=page_image,
1235            page_text_region_infos=page_text_region_infos,
1236            page_non_text_region_polygons=page_non_text_region_polygons,
1237            rng=rng,
1238        )
1239        if debug:
1240            debug.flattened_text_regions = flattened_text_regions
1241
1242        # Stack text regions.
1243        (
1244            image,
1245            active_mask,
1246            text_region_boxes,
1247            char_polygons,
1248            char_polygon_text_region_box_indices,
1249        ) = stack_flattened_text_regions(
1250            page_pad=0,
1251            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1252            flattened_text_regions=flattened_text_regions,
1253        )
1254
1255        text_region_polygons = [
1256            text_region_box.to_polygon() for text_region_box in text_region_boxes
1257        ]
1258
1259        # Post uniform rotation.
1260        shape_before_rotate = image.shape
1261        rotate_angle = 0
1262
1263        if rng.random() < self.config.prob_post_rotate_90_angle:
1264            rotate_angle = 90
1265
1266        if rng.random() < self.config.prob_post_rotate_random_angle:
1267            rotate_angle += int(
1268                rng.integers(
1269                    self.config.post_rotate_random_angle_min,
1270                    self.config.post_rotate_random_angle_max + 1,
1271                )
1272            )
1273
1274        if rotate_angle != 0:
1275            # For unpacking.
1276            num_char_polygons = len(char_polygons)
1277            rotated_result = rotate.distort(
1278                {'angle': rotate_angle},
1279                image=image,
1280                mask=active_mask,
1281                polygons=(*char_polygons, *text_region_polygons),
1282            )
1283            assert rotated_result.image and rotated_result.mask and rotated_result.polygons
1284            image = rotated_result.image
1285            active_mask = rotated_result.mask
1286            char_polygons = rotated_result.polygons[:num_char_polygons]
1287            text_region_polygons = rotated_result.polygons[num_char_polygons:]
1288
1289        return PageTextRegionStepOutput(
1290            page_image=image,
1291            page_active_mask=active_mask,
1292            page_char_polygons=char_polygons,
1293            page_text_region_polygons=text_region_polygons,
1294            page_char_polygon_text_region_polygon_indices=char_polygon_text_region_box_indices,
1295            shape_before_rotate=shape_before_rotate,
1296            rotate_angle=rotate_angle,
1297            debug=debug,
1298        )
1299
1300
1301page_text_region_step_factory = PipelineStepFactory(PageTextRegionStep)
class PageTextRegionStepConfig:
41class PageTextRegionStepConfig:
42    use_adjusted_char_polygons: bool = False
43    prob_drop_single_char_page_text_region_info: float = 0.5
44    text_region_flattener_typical_long_side_ratio_min: float = 3.0
45    text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85
46    text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0
47    text_region_resize_char_height_median_min: int = 32
48    text_region_resize_char_height_median_max: int = 46
49    prob_text_region_typical_post_rotate: float = 0.2
50    prob_text_region_untypical_post_rotate: float = 0.2
51    negative_text_region_ratio: float = 0.1
52    prob_negative_text_region_post_rotate: float = 0.2
53    stack_flattened_text_regions_pad: int = 2
54    prob_post_rotate_90_angle: float = 0.5
55    prob_post_rotate_random_angle: float = 0.0
56    post_rotate_random_angle_min: int = -5
57    post_rotate_random_angle_max: int = 5
58    enable_debug: bool = False
PageTextRegionStepConfig( use_adjusted_char_polygons: bool = False, prob_drop_single_char_page_text_region_info: float = 0.5, text_region_flattener_typical_long_side_ratio_min: float = 3.0, text_region_flattener_text_region_polygon_dilate_ratio_min: float = 0.85, text_region_flattener_text_region_polygon_dilate_ratio_max: float = 1.0, text_region_resize_char_height_median_min: int = 32, text_region_resize_char_height_median_max: int = 46, prob_text_region_typical_post_rotate: float = 0.2, prob_text_region_untypical_post_rotate: float = 0.2, negative_text_region_ratio: float = 0.1, prob_negative_text_region_post_rotate: float = 0.2, stack_flattened_text_regions_pad: int = 2, prob_post_rotate_90_angle: float = 0.5, prob_post_rotate_random_angle: float = 0.0, post_rotate_random_angle_min: int = -5, post_rotate_random_angle_max: int = 5, enable_debug: bool = False)
 2def __init__(self, use_adjusted_char_polygons=attr_dict['use_adjusted_char_polygons'].default, prob_drop_single_char_page_text_region_info=attr_dict['prob_drop_single_char_page_text_region_info'].default, text_region_flattener_typical_long_side_ratio_min=attr_dict['text_region_flattener_typical_long_side_ratio_min'].default, text_region_flattener_text_region_polygon_dilate_ratio_min=attr_dict['text_region_flattener_text_region_polygon_dilate_ratio_min'].default, text_region_flattener_text_region_polygon_dilate_ratio_max=attr_dict['text_region_flattener_text_region_polygon_dilate_ratio_max'].default, text_region_resize_char_height_median_min=attr_dict['text_region_resize_char_height_median_min'].default, text_region_resize_char_height_median_max=attr_dict['text_region_resize_char_height_median_max'].default, prob_text_region_typical_post_rotate=attr_dict['prob_text_region_typical_post_rotate'].default, prob_text_region_untypical_post_rotate=attr_dict['prob_text_region_untypical_post_rotate'].default, negative_text_region_ratio=attr_dict['negative_text_region_ratio'].default, prob_negative_text_region_post_rotate=attr_dict['prob_negative_text_region_post_rotate'].default, stack_flattened_text_regions_pad=attr_dict['stack_flattened_text_regions_pad'].default, prob_post_rotate_90_angle=attr_dict['prob_post_rotate_90_angle'].default, prob_post_rotate_random_angle=attr_dict['prob_post_rotate_random_angle'].default, post_rotate_random_angle_min=attr_dict['post_rotate_random_angle_min'].default, post_rotate_random_angle_max=attr_dict['post_rotate_random_angle_max'].default, enable_debug=attr_dict['enable_debug'].default):
 3    self.use_adjusted_char_polygons = use_adjusted_char_polygons
 4    self.prob_drop_single_char_page_text_region_info = prob_drop_single_char_page_text_region_info
 5    self.text_region_flattener_typical_long_side_ratio_min = text_region_flattener_typical_long_side_ratio_min
 6    self.text_region_flattener_text_region_polygon_dilate_ratio_min = text_region_flattener_text_region_polygon_dilate_ratio_min
 7    self.text_region_flattener_text_region_polygon_dilate_ratio_max = text_region_flattener_text_region_polygon_dilate_ratio_max
 8    self.text_region_resize_char_height_median_min = text_region_resize_char_height_median_min
 9    self.text_region_resize_char_height_median_max = text_region_resize_char_height_median_max
10    self.prob_text_region_typical_post_rotate = prob_text_region_typical_post_rotate
11    self.prob_text_region_untypical_post_rotate = prob_text_region_untypical_post_rotate
12    self.negative_text_region_ratio = negative_text_region_ratio
13    self.prob_negative_text_region_post_rotate = prob_negative_text_region_post_rotate
14    self.stack_flattened_text_regions_pad = stack_flattened_text_regions_pad
15    self.prob_post_rotate_90_angle = prob_post_rotate_90_angle
16    self.prob_post_rotate_random_angle = prob_post_rotate_random_angle
17    self.post_rotate_random_angle_min = post_rotate_random_angle_min
18    self.post_rotate_random_angle_max = post_rotate_random_angle_max
19    self.enable_debug = enable_debug

Method generated by attrs for class PageTextRegionStepConfig.

class PageTextRegionStepInput:
62class PageTextRegionStepInput:
63    page_distortion_step_output: PageDistortionStepOutput
64    page_resizing_step_output: PageResizingStepOutput
PageTextRegionStepInput( page_distortion_step_output: vkit.pipeline.text_detection.page_distortion.PageDistortionStepOutput, page_resizing_step_output: vkit.pipeline.text_detection.page_resizing.PageResizingStepOutput)
2def __init__(self, page_distortion_step_output, page_resizing_step_output):
3    self.page_distortion_step_output = page_distortion_step_output
4    self.page_resizing_step_output = page_resizing_step_output

Method generated by attrs for class PageTextRegionStepInput.

class PageTextRegionInfo:
68class PageTextRegionInfo:
69    precise_text_region_polygon: Polygon
70    char_polygons: Sequence[Polygon]
PageTextRegionInfo( precise_text_region_polygon: vkit.element.polygon.Polygon, char_polygons: Sequence[vkit.element.polygon.Polygon])
2def __init__(self, precise_text_region_polygon, char_polygons):
3    self.precise_text_region_polygon = precise_text_region_polygon
4    self.char_polygons = char_polygons

Method generated by attrs for class PageTextRegionInfo.

class FlattenedTextRegion:
 74class FlattenedTextRegion:
 75    is_typical: bool
 76    text_region_polygon: Polygon
 77    text_region_image: Image
 78    bounding_extended_text_region_mask: Mask
 79    flattening_rotate_angle: int
 80    shape_before_trim: Tuple[int, int]
 81    rotated_trimmed_box: Box
 82    shape_before_resize: Tuple[int, int]
 83    post_rotate_angle: int
 84    flattened_image: Image
 85    flattened_mask: Mask
 86    flattened_char_polygons: Optional[Sequence[Polygon]]
 87
 88    @property
 89    def shape(self):
 90        return self.flattened_image.shape
 91
 92    @property
 93    def height(self):
 94        return self.flattened_image.height
 95
 96    @property
 97    def width(self):
 98        return self.flattened_image.width
 99
100    @property
101    def area(self):
102        return self.flattened_image.area
103
104    def get_char_height_meidan(self):
105        assert self.flattened_char_polygons
106        return statistics.median(
107            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
108        )
109
110    def to_resized_flattened_text_region(
111        self,
112        resized_height: Optional[int] = None,
113        resized_width: Optional[int] = None,
114    ):
115        resized_flattened_image = self.flattened_image.to_resized_image(
116            resized_height=resized_height,
117            resized_width=resized_width,
118        )
119
120        resized_flattened_mask = self.flattened_mask.to_resized_mask(
121            resized_height=resized_height,
122            resized_width=resized_width,
123        )
124
125        resized_flattened_char_polygons = None
126        if self.flattened_char_polygons is not None:
127            resized_flattened_char_polygons = [
128                flattened_char_polygon.to_conducted_resized_polygon(
129                    self.shape,
130                    resized_height=resized_height,
131                    resized_width=resized_width,
132                ) for flattened_char_polygon in self.flattened_char_polygons
133            ]
134
135        return attrs.evolve(
136            self,
137            flattened_image=resized_flattened_image,
138            flattened_mask=resized_flattened_mask,
139            flattened_char_polygons=resized_flattened_char_polygons,
140        )
141
142    def to_post_rotated_flattened_text_region(
143        self,
144        post_rotate_angle: int,
145    ):
146        assert self.post_rotate_angle == 0
147
148        # NOTE: No need to trim.
149        rotated_result = rotate.distort(
150            {'angle': post_rotate_angle},
151            image=self.flattened_image,
152            mask=self.flattened_mask,
153            polygons=self.flattened_char_polygons,
154        )
155        rotated_flattened_image = rotated_result.image
156        assert rotated_flattened_image
157        rotated_flattened_mask = rotated_result.mask
158        assert rotated_flattened_mask
159        rotated_flattened_char_polygons = rotated_result.polygons
160
161        return attrs.evolve(
162            self,
163            post_rotate_angle=post_rotate_angle,
164            flattened_image=rotated_flattened_image,
165            flattened_mask=rotated_flattened_mask,
166            flattened_char_polygons=rotated_flattened_char_polygons,
167        )
FlattenedTextRegion( is_typical: bool, text_region_polygon: vkit.element.polygon.Polygon, text_region_image: vkit.element.image.Image, bounding_extended_text_region_mask: vkit.element.mask.Mask, flattening_rotate_angle: int, shape_before_trim: Tuple[int, int], rotated_trimmed_box: vkit.element.box.Box, shape_before_resize: Tuple[int, int], post_rotate_angle: int, flattened_image: vkit.element.image.Image, flattened_mask: vkit.element.mask.Mask, flattened_char_polygons: Union[Sequence[vkit.element.polygon.Polygon], NoneType])
 2def __init__(self, is_typical, text_region_polygon, text_region_image, bounding_extended_text_region_mask, flattening_rotate_angle, shape_before_trim, rotated_trimmed_box, shape_before_resize, post_rotate_angle, flattened_image, flattened_mask, flattened_char_polygons):
 3    self.is_typical = is_typical
 4    self.text_region_polygon = text_region_polygon
 5    self.text_region_image = text_region_image
 6    self.bounding_extended_text_region_mask = bounding_extended_text_region_mask
 7    self.flattening_rotate_angle = flattening_rotate_angle
 8    self.shape_before_trim = shape_before_trim
 9    self.rotated_trimmed_box = rotated_trimmed_box
10    self.shape_before_resize = shape_before_resize
11    self.post_rotate_angle = post_rotate_angle
12    self.flattened_image = flattened_image
13    self.flattened_mask = flattened_mask
14    self.flattened_char_polygons = flattened_char_polygons

Method generated by attrs for class FlattenedTextRegion.

def get_char_height_meidan(self):
104    def get_char_height_meidan(self):
105        assert self.flattened_char_polygons
106        return statistics.median(
107            char_polygon.get_rectangular_height() for char_polygon in self.flattened_char_polygons
108        )
def to_resized_flattened_text_region( self, resized_height: Union[int, NoneType] = None, resized_width: Union[int, NoneType] = None):
110    def to_resized_flattened_text_region(
111        self,
112        resized_height: Optional[int] = None,
113        resized_width: Optional[int] = None,
114    ):
115        resized_flattened_image = self.flattened_image.to_resized_image(
116            resized_height=resized_height,
117            resized_width=resized_width,
118        )
119
120        resized_flattened_mask = self.flattened_mask.to_resized_mask(
121            resized_height=resized_height,
122            resized_width=resized_width,
123        )
124
125        resized_flattened_char_polygons = None
126        if self.flattened_char_polygons is not None:
127            resized_flattened_char_polygons = [
128                flattened_char_polygon.to_conducted_resized_polygon(
129                    self.shape,
130                    resized_height=resized_height,
131                    resized_width=resized_width,
132                ) for flattened_char_polygon in self.flattened_char_polygons
133            ]
134
135        return attrs.evolve(
136            self,
137            flattened_image=resized_flattened_image,
138            flattened_mask=resized_flattened_mask,
139            flattened_char_polygons=resized_flattened_char_polygons,
140        )
def to_post_rotated_flattened_text_region(self, post_rotate_angle: int):
142    def to_post_rotated_flattened_text_region(
143        self,
144        post_rotate_angle: int,
145    ):
146        assert self.post_rotate_angle == 0
147
148        # NOTE: No need to trim.
149        rotated_result = rotate.distort(
150            {'angle': post_rotate_angle},
151            image=self.flattened_image,
152            mask=self.flattened_mask,
153            polygons=self.flattened_char_polygons,
154        )
155        rotated_flattened_image = rotated_result.image
156        assert rotated_flattened_image
157        rotated_flattened_mask = rotated_result.mask
158        assert rotated_flattened_mask
159        rotated_flattened_char_polygons = rotated_result.polygons
160
161        return attrs.evolve(
162            self,
163            post_rotate_angle=post_rotate_angle,
164            flattened_image=rotated_flattened_image,
165            flattened_mask=rotated_flattened_mask,
166            flattened_char_polygons=rotated_flattened_char_polygons,
167        )
class PageTextRegionStepDebug:
171class PageTextRegionStepDebug:
172    page_image: Image = attrs.field(default=None)
173    precise_text_region_candidate_polygons: Sequence[Polygon] = attrs.field(default=None)
174    page_text_region_infos: Sequence[PageTextRegionInfo] = attrs.field(default=None)
175    flattened_text_regions: Sequence[FlattenedTextRegion] = attrs.field(default=None)
PageTextRegionStepDebug( page_image: vkit.element.image.Image = None, precise_text_region_candidate_polygons: Sequence[vkit.element.polygon.Polygon] = None, page_text_region_infos: Sequence[vkit.pipeline.text_detection.page_text_region.PageTextRegionInfo] = None, flattened_text_regions: Sequence[vkit.pipeline.text_detection.page_text_region.FlattenedTextRegion] = None)
2def __init__(self, page_image=attr_dict['page_image'].default, precise_text_region_candidate_polygons=attr_dict['precise_text_region_candidate_polygons'].default, page_text_region_infos=attr_dict['page_text_region_infos'].default, flattened_text_regions=attr_dict['flattened_text_regions'].default):
3    self.page_image = page_image
4    self.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
5    self.page_text_region_infos = page_text_region_infos
6    self.flattened_text_regions = flattened_text_regions

Method generated by attrs for class PageTextRegionStepDebug.

class PageTextRegionStepOutput:
179class PageTextRegionStepOutput:
180    page_image: Image
181    page_active_mask: Mask
182    page_char_polygons: Sequence[Polygon]
183    page_text_region_polygons: Sequence[Polygon]
184    page_char_polygon_text_region_polygon_indices: Sequence[int]
185    shape_before_rotate: Tuple[int, int]
186    rotate_angle: int
187    debug: Optional[PageTextRegionStepDebug]
PageTextRegionStepOutput( page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_polygons: Sequence[vkit.element.polygon.Polygon], page_text_region_polygons: Sequence[vkit.element.polygon.Polygon], page_char_polygon_text_region_polygon_indices: Sequence[int], shape_before_rotate: Tuple[int, int], rotate_angle: int, debug: Union[vkit.pipeline.text_detection.page_text_region.PageTextRegionStepDebug, NoneType])
 2def __init__(self, page_image, page_active_mask, page_char_polygons, page_text_region_polygons, page_char_polygon_text_region_polygon_indices, shape_before_rotate, rotate_angle, debug):
 3    self.page_image = page_image
 4    self.page_active_mask = page_active_mask
 5    self.page_char_polygons = page_char_polygons
 6    self.page_text_region_polygons = page_text_region_polygons
 7    self.page_char_polygon_text_region_polygon_indices = page_char_polygon_text_region_polygon_indices
 8    self.shape_before_rotate = shape_before_rotate
 9    self.rotate_angle = rotate_angle
10    self.debug = debug

Method generated by attrs for class PageTextRegionStepOutput.

def calculate_boxed_masks_intersected_ratio( anchor_mask: vkit.element.mask.Mask, candidate_mask: vkit.element.mask.Mask, use_candidate_as_base: bool = False):
190def calculate_boxed_masks_intersected_ratio(
191    anchor_mask: Mask,
192    candidate_mask: Mask,
193    use_candidate_as_base: bool = False,
194):
195    anchor_box = anchor_mask.box
196    assert anchor_box
197
198    candidate_box = candidate_mask.box
199    assert candidate_box
200
201    # Calculate intersection.
202    up = max(anchor_box.up, candidate_box.up)
203    down = min(anchor_box.down, candidate_box.down)
204    left = max(anchor_box.left, candidate_box.left)
205    right = min(anchor_box.right, candidate_box.right)
206
207    if up > down or left > right:
208        return 0.0
209
210    np_intersected_anchor_mask = anchor_mask.mat[
211        up - anchor_box.up:down - anchor_box.up + 1,
212        left - anchor_box.left:right - anchor_box.left + 1,
213    ]  # yapf: disable
214    np_intersected_candidate_mask = candidate_mask.mat[
215        up - candidate_box.up:down - candidate_box.up + 1,
216        left - candidate_box.left:right - candidate_box.left + 1,
217    ]  # yapf: disable
218    np_intersected_mask = np_intersected_anchor_mask & np_intersected_candidate_mask
219    intersected_area = int(np_intersected_mask.sum())
220
221    if use_candidate_as_base:
222        base_area = int(candidate_mask.np_mask.sum())
223    else:
224        base_area = (
225            int(anchor_mask.np_mask.sum()) + int(candidate_mask.np_mask.sum()) - intersected_area
226        )
227
228    return intersected_area / base_area
class TextRegionFlattener:
231class TextRegionFlattener:
232
233    @classmethod
234    def patch_text_region_polygons(
235        cls,
236        text_region_polygons: Sequence[Polygon],
237        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
238    ):
239        if grouped_char_polygons is None:
240            return text_region_polygons
241
242        assert len(text_region_polygons) == len(grouped_char_polygons)
243
244        patched_text_region_polygons: List[Polygon] = []
245        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
246            # Need to make sure all char polygons are included.
247            unionized_polygons = [text_region_polygon]
248            unionized_polygons.extend(char_polygons)
249
250            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
251            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
252            for polygon in unionized_polygons:
253                polygon.fill_mask(mask)
254
255            patched_text_region_polygons.append(mask.to_external_polygon())
256
257        return patched_text_region_polygons
258
259    @classmethod
260    def get_dilated_and_bounding_rectangular_polygons(
261        cls,
262        text_region_polygon_dilate_ratio: float,
263        shape: Tuple[int, int],
264        text_region_polygons: Sequence[Polygon],
265        force_no_dilation_flags: Optional[Sequence[bool]] = None,
266    ):
267        dilated_text_region_polygons: List[Polygon] = []
268        bounding_rectangular_polygons: List[Polygon] = []
269
270        if force_no_dilation_flags is None:
271            force_no_dilation_flags_iter = itertools.repeat(False)
272        else:
273            assert len(force_no_dilation_flags) == len(text_region_polygons)
274            force_no_dilation_flags_iter = force_no_dilation_flags
275
276        for text_region_polygon, force_no_dilation_flag in zip(
277            text_region_polygons, force_no_dilation_flags_iter
278        ):
279
280            if not force_no_dilation_flag:
281                # Dilate.
282                text_region_polygon = text_region_polygon.to_dilated_polygon(
283                    ratio=text_region_polygon_dilate_ratio,
284                )
285                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
286
287            dilated_text_region_polygons.append(text_region_polygon)
288            bounding_rectangular_polygons.append(
289                text_region_polygon.to_bounding_rectangular_polygon(shape)
290            )
291
292        return dilated_text_region_polygons, bounding_rectangular_polygons
293
294    @classmethod
295    def analyze_bounding_rectangular_polygons(
296        cls,
297        bounding_rectangular_polygons: Sequence[Polygon],
298    ):
299        short_side_lengths: List[float] = []
300        long_side_ratios: List[float] = []
301        long_side_angles: List[int] = []
302
303        for polygon in bounding_rectangular_polygons:
304            # Get reference line.
305            point0, point1, _, point3 = polygon.points
306            side0_length = math.hypot(
307                point0.smooth_y - point1.smooth_y,
308                point0.smooth_x - point1.smooth_x,
309            )
310            side1_length = math.hypot(
311                point0.smooth_y - point3.smooth_y,
312                point0.smooth_x - point3.smooth_x,
313            )
314
315            # Get the short side length.
316            short_side_lengths.append(min(side0_length, side1_length))
317
318            long_side_ratios.append(
319                max(side0_length, side1_length) / min(side0_length, side1_length)
320            )
321
322            point_a = point0
323            if side0_length > side1_length:
324                # Reference line (p0 -> p1).
325                point_b = point1
326            else:
327                # Reference line (p0 -> p3).
328                point_b = point3
329
330            # Get the angle of reference line, in [0, 180) degree.
331            np_theta = np.arctan2(
332                point_a.smooth_y - point_b.smooth_y,
333                point_a.smooth_x - point_b.smooth_x,
334            )
335            np_theta = np_theta % np.pi
336            long_side_angle = round(np_theta / np.pi * 180) % 180
337            long_side_angles.append(long_side_angle)
338
339        return short_side_lengths, long_side_ratios, long_side_angles
340
341    @classmethod
342    def get_typical_indices(
343        cls,
344        typical_long_side_ratio_min: float,
345        long_side_ratios: Sequence[float],
346    ):
347        return tuple(
348            idx for idx, long_side_ratio in enumerate(long_side_ratios)
349            if long_side_ratio >= typical_long_side_ratio_min
350        )
351
352    @classmethod
353    def check_first_text_region_polygon_is_larger(
354        cls,
355        text_region_polygons: Sequence[Polygon],
356        short_side_lengths: Sequence[float],
357        first_idx: int,
358        second_idx: int,
359    ):
360        first_text_region_polygon = text_region_polygons[first_idx]
361        second_text_region_polygon = text_region_polygons[second_idx]
362
363        # The short side indicates the text line height.
364        first_short_side_length = short_side_lengths[first_idx]
365        second_short_side_length = short_side_lengths[second_idx]
366
367        return (
368            first_text_region_polygon.area >= second_text_region_polygon.area
369            and first_short_side_length >= second_short_side_length
370        )
371
372    @classmethod
373    def get_main_and_flattening_rotate_angles(
374        cls,
375        text_region_polygons: Sequence[Polygon],
376        typical_indices: Sequence[int],
377        short_side_lengths: Sequence[float],
378        long_side_angles: Sequence[int],
379    ):
380        typical_indices_set = set(typical_indices)
381        text_region_center_points = [
382            text_region_polygon.get_center_point() for text_region_polygon in text_region_polygons
383        ]
384
385        main_angles: List[Optional[int]] = [None] * len(long_side_angles)
386
387        # 1. For typical indices, or if no typical indices.
388        for idx, long_side_angle in enumerate(long_side_angles):
389            if not typical_indices_set or idx in typical_indices_set:
390                main_angles[idx] = long_side_angle
391
392        # 2. For nontypcial indices.
393        if typical_indices_set:
394            typical_center_points = PointList(
395                text_region_center_points[idx] for idx in typical_indices
396            )
397            kd_tree = KDTree(typical_center_points.to_np_array())
398
399            nontypical_indices = tuple(
400                idx for idx, _ in enumerate(long_side_angles) if idx not in typical_indices_set
401            )
402            nontypical_center_points = PointList(
403                text_region_center_points[idx] for idx in nontypical_indices
404            )
405
406            # Set main angle as the closest typical angle.
407            # Round 1: Set if the closest typical polygon is large enough.
408            _, np_kd_nbr_indices = kd_tree.query(nontypical_center_points.to_np_array())
409            round2_nontypical_indices: List[int] = []
410            for nontypical_idx, typical_indices_idx in zip(
411                nontypical_indices,
412                np_kd_nbr_indices[:, 0].tolist(),
413            ):
414                typical_idx = typical_indices[typical_indices_idx]
415                if cls.check_first_text_region_polygon_is_larger(
416                    text_region_polygons=text_region_polygons,
417                    short_side_lengths=short_side_lengths,
418                    first_idx=typical_idx,
419                    second_idx=nontypical_idx,
420                ):
421                    main_angles[nontypical_idx] = main_angles[typical_idx]
422                else:
423                    round2_nontypical_indices.append(nontypical_idx)
424
425            # Round 2: Searching the closest typical polygon that has larger area.
426            round3_nontypical_indices: List[int] = []
427            if round2_nontypical_indices:
428                round2_nontypical_center_points = PointList(
429                    text_region_center_points[idx] for idx in round2_nontypical_indices
430                )
431                _, np_kd_nbr_indices = kd_tree.query(
432                    round2_nontypical_center_points.to_np_array(),
433                    k=len(typical_center_points),
434                )
435                for nontypical_idx, typical_indices_indices in zip(
436                    round2_nontypical_indices,
437                    np_kd_nbr_indices.tolist(),
438                ):
439                    hit_typical_idx = None
440                    for typical_indices_idx in typical_indices_indices:
441                        typical_idx = typical_indices[typical_indices_idx]
442                        if cls.check_first_text_region_polygon_is_larger(
443                            text_region_polygons=text_region_polygons,
444                            short_side_lengths=short_side_lengths,
445                            first_idx=typical_idx,
446                            second_idx=nontypical_idx,
447                        ):
448                            hit_typical_idx = typical_idx
449                            break
450
451                    if hit_typical_idx is not None:
452                        main_angles[nontypical_idx] = main_angles[hit_typical_idx]
453                    else:
454                        round3_nontypical_indices.append(nontypical_idx)
455
456            # Round 3: Last resort. Set to the median of typical angles.
457            if round3_nontypical_indices:
458                main_angles_median = statistics.median_low(
459                    long_side_angles[typical_idx] for typical_idx in typical_indices
460                )
461                for nontypical_idx in round3_nontypical_indices:
462                    main_angles[nontypical_idx] = main_angles_median
463
464        # 3. Get angle for flattening.
465        flattening_rotate_angles: List[int] = []
466        for main_angle in main_angles:
467            assert main_angle is not None
468            if main_angle <= 90:
469                # [270, 360).
470                flattening_rotate_angle = (360 - main_angle) % 360
471            else:
472                # [1, 90).
473                flattening_rotate_angle = 180 - main_angle
474            flattening_rotate_angles.append(flattening_rotate_angle)
475
476        return cast(List[int], main_angles), flattening_rotate_angles
477
478    @classmethod
479    def get_bounding_extended_text_region_masks(
480        cls,
481        shape: Tuple[int, int],
482        text_region_polygons: Sequence[Polygon],
483        dilated_text_region_polygons: Sequence[Polygon],
484        bounding_rectangular_polygons: Sequence[Polygon],
485        typical_indices: Sequence[int],
486        main_angles: Sequence[int],
487    ):
488        typical_indices_set = set(typical_indices)
489
490        text_mask = Mask.from_polygons(shape, text_region_polygons)
491        non_text_mask = text_mask.to_inverted_mask()
492
493        box = Box.from_shape(shape)
494        text_mask = text_mask.to_box_attached(box)
495        non_text_mask = non_text_mask.to_box_attached(box)
496
497        bounding_extended_text_region_masks: List[Mask] = []
498
499        num_text_region_polygons = len(text_region_polygons)
500        for idx in range(num_text_region_polygons):
501            text_region_polygon = text_region_polygons[idx]
502            dilated_text_region_polygon = dilated_text_region_polygons[idx]
503            bounding_rectangular_polygon = bounding_rectangular_polygons[idx]
504
505            if typical_indices_set and idx not in typical_indices_set:
506                # Patch bounding rectangular polygon if is nontypical.
507                main_angle = main_angles[idx]
508                bounding_rectangular_polygon = \
509                    dilated_text_region_polygon.to_bounding_rectangular_polygon(
510                        shape=shape,
511                        angle=main_angle,
512                    )
513
514            # See the comment in Polygon.to_bounding_rectangular_polygon.
515            bounding_box = Box.from_boxes((
516                dilated_text_region_polygon.bounding_box,
517                bounding_rectangular_polygon.bounding_box,
518            ))
519
520            # Fill other text region.
521            bounding_other_text_mask = \
522                Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
523            # Copy from text mask.
524            bounding_rectangular_polygon.fill_mask(bounding_other_text_mask, text_mask)
525            # Use the original text region polygon to unset the current text mask.
526            text_region_polygon.fill_mask(bounding_other_text_mask, 0)
527
528            # Fill protentially dilated text region.
529            bounding_text_mask = \
530                Mask.from_shapable(bounding_other_text_mask).to_box_attached(bounding_box)
531            # Use the protentially dilated text region polygon to set the current text mask.
532            dilated_text_region_polygon.fill_mask(bounding_text_mask, value=1)
533
534            del dilated_text_region_polygon
535
536            # Trim protentially dilated text region polygon by eliminating other text region.
537            bounding_trimmed_text_mask = Mask.from_masks(
538                bounding_box,
539                [
540                    # Includes the protentially dilated text region.
541                    bounding_text_mask,
542                    # But not includes any other text regions.
543                    bounding_other_text_mask.to_inverted_mask(),
544                ],
545                ElementSetOperationMode.INTERSECT,
546            )
547
548            # Extract non-text region.
549            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
550
551            # Unionize trimmed text region and non-text region.
552            bounding_extended_text_region_mask = Mask.from_masks(
553                bounding_box,
554                [bounding_trimmed_text_mask, bounding_non_text_mask],
555            )
556
557            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
558
559        return bounding_extended_text_region_masks
560
561    @classmethod
562    def build_flattened_text_regions(
563        cls,
564        image: Image,
565        text_region_polygons: Sequence[Polygon],
566        bounding_extended_text_region_masks: Sequence[Mask],
567        typical_indices: Sequence[int],
568        flattening_rotate_angles: Sequence[int],
569        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
570    ):
571        typical_indices_set = set(typical_indices)
572
573        flattened_text_regions: List[FlattenedTextRegion] = []
574
575        for idx, (
576            text_region_polygon,
577            bounding_extended_text_region_mask,
578            flattening_rotate_angle,
579        ) in enumerate(
580            zip(
581                text_region_polygons,
582                bounding_extended_text_region_masks,
583                flattening_rotate_angles,
584            )
585        ):
586            bounding_box = bounding_extended_text_region_mask.box
587            assert bounding_box
588
589            # Extract image.
590            text_region_image = bounding_extended_text_region_mask.extract_image(image)
591
592            # Shift char polygons.
593            relative_char_polygons = None
594            if grouped_char_polygons is not None:
595                char_polygons = grouped_char_polygons[idx]
596                relative_char_polygons = [
597                    char_polygon.to_relative_polygon(
598                        origin_y=bounding_box.up,
599                        origin_x=bounding_box.left,
600                    ) for char_polygon in char_polygons
601                ]
602
603            # Rotate.
604            rotated_result = rotate.distort(
605                {'angle': flattening_rotate_angle},
606                image=text_region_image,
607                mask=bounding_extended_text_region_mask,
608                polygons=relative_char_polygons,
609            )
610            rotated_text_region_image = rotated_result.image
611            assert rotated_text_region_image
612            rotated_bounding_extended_text_region_mask = rotated_result.mask
613            assert rotated_bounding_extended_text_region_mask
614            # Could be None.
615            rotated_char_polygons = rotated_result.polygons
616
617            # Trim.
618            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
619
620            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
621                up=rotated_trimmed_box.up,
622                down=rotated_trimmed_box.down,
623                left=rotated_trimmed_box.left,
624                right=rotated_trimmed_box.right,
625            )
626
627            trimmed_mask = rotated_trimmed_box.extract_mask(
628                rotated_bounding_extended_text_region_mask
629            )
630
631            trimmed_char_polygons = None
632            if rotated_char_polygons:
633                trimmed_char_polygons = [
634                    rotated_char_polygon.to_relative_polygon(
635                        origin_y=rotated_trimmed_box.up,
636                        origin_x=rotated_trimmed_box.left,
637                    ) for rotated_char_polygon in rotated_char_polygons
638                ]
639
640            flattened_text_regions.append(
641                FlattenedTextRegion(
642                    is_typical=(idx in typical_indices_set),
643                    text_region_polygon=text_region_polygon,
644                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
645                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
646                    flattening_rotate_angle=flattening_rotate_angle,
647                    shape_before_trim=rotated_text_region_image.shape,
648                    rotated_trimmed_box=rotated_trimmed_box,
649                    shape_before_resize=trimmed_text_region_image.shape,
650                    post_rotate_angle=0,
651                    flattened_image=trimmed_text_region_image,
652                    flattened_mask=trimmed_mask,
653                    flattened_char_polygons=trimmed_char_polygons,
654                )
655            )
656
657        return flattened_text_regions
658
659    def __init__(
660        self,
661        typical_long_side_ratio_min: float,
662        text_region_polygon_dilate_ratio: float,
663        image: Image,
664        text_region_polygons: Sequence[Polygon],
665        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
666        is_training: bool = False,
667    ):
668        self.original_text_region_polygons = text_region_polygons
669
670        self.text_region_polygons = self.patch_text_region_polygons(
671            text_region_polygons=text_region_polygons,
672            grouped_char_polygons=grouped_char_polygons,
673        )
674
675        force_no_dilation_flags = None
676        if is_training:
677            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
678            force_no_dilation_flags = []
679            for char_polygons in grouped_char_polygons:
680                force_no_dilation_flags.append(not char_polygons)
681
682        (
683            self.dilated_text_region_polygons,
684            self.bounding_rectangular_polygons,
685        ) = self.get_dilated_and_bounding_rectangular_polygons(
686            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
687            shape=image.shape,
688            text_region_polygons=self.text_region_polygons,
689            force_no_dilation_flags=force_no_dilation_flags,
690        )
691
692        (
693            self.short_side_lengths,
694            self.long_side_ratios,
695            self.long_side_angles,
696        ) = self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
697
698        self.typical_indices = self.get_typical_indices(
699            typical_long_side_ratio_min=typical_long_side_ratio_min,
700            long_side_ratios=self.long_side_ratios,
701        )
702
703        (
704            self.main_angles,
705            self.flattening_rotate_angles,
706        ) = self.get_main_and_flattening_rotate_angles(
707            text_region_polygons=self.text_region_polygons,
708            typical_indices=self.typical_indices,
709            short_side_lengths=self.short_side_lengths,
710            long_side_angles=self.long_side_angles,
711        )
712
713        self.bounding_extended_text_region_masks = self.get_bounding_extended_text_region_masks(
714            shape=image.shape,
715            text_region_polygons=self.text_region_polygons,
716            dilated_text_region_polygons=self.dilated_text_region_polygons,
717            bounding_rectangular_polygons=self.bounding_rectangular_polygons,
718            typical_indices=self.typical_indices,
719            main_angles=self.main_angles,
720        )
721
722        self.flattened_text_regions = self.build_flattened_text_regions(
723            image=image,
724            # NOTE: need to use the original text region polygons for reversed opts.
725            text_region_polygons=self.original_text_region_polygons,
726            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
727            typical_indices=self.typical_indices,
728            flattening_rotate_angles=self.flattening_rotate_angles,
729            grouped_char_polygons=grouped_char_polygons,
730        )
TextRegionFlattener( typical_long_side_ratio_min: float, text_region_polygon_dilate_ratio: float, image: vkit.element.image.Image, text_region_polygons: Sequence[vkit.element.polygon.Polygon], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType] = None, is_training: bool = False)
659    def __init__(
660        self,
661        typical_long_side_ratio_min: float,
662        text_region_polygon_dilate_ratio: float,
663        image: Image,
664        text_region_polygons: Sequence[Polygon],
665        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]] = None,
666        is_training: bool = False,
667    ):
668        self.original_text_region_polygons = text_region_polygons
669
670        self.text_region_polygons = self.patch_text_region_polygons(
671            text_region_polygons=text_region_polygons,
672            grouped_char_polygons=grouped_char_polygons,
673        )
674
675        force_no_dilation_flags = None
676        if is_training:
677            assert grouped_char_polygons and len(text_region_polygons) == len(grouped_char_polygons)
678            force_no_dilation_flags = []
679            for char_polygons in grouped_char_polygons:
680                force_no_dilation_flags.append(not char_polygons)
681
682        (
683            self.dilated_text_region_polygons,
684            self.bounding_rectangular_polygons,
685        ) = self.get_dilated_and_bounding_rectangular_polygons(
686            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
687            shape=image.shape,
688            text_region_polygons=self.text_region_polygons,
689            force_no_dilation_flags=force_no_dilation_flags,
690        )
691
692        (
693            self.short_side_lengths,
694            self.long_side_ratios,
695            self.long_side_angles,
696        ) = self.analyze_bounding_rectangular_polygons(self.bounding_rectangular_polygons)
697
698        self.typical_indices = self.get_typical_indices(
699            typical_long_side_ratio_min=typical_long_side_ratio_min,
700            long_side_ratios=self.long_side_ratios,
701        )
702
703        (
704            self.main_angles,
705            self.flattening_rotate_angles,
706        ) = self.get_main_and_flattening_rotate_angles(
707            text_region_polygons=self.text_region_polygons,
708            typical_indices=self.typical_indices,
709            short_side_lengths=self.short_side_lengths,
710            long_side_angles=self.long_side_angles,
711        )
712
713        self.bounding_extended_text_region_masks = self.get_bounding_extended_text_region_masks(
714            shape=image.shape,
715            text_region_polygons=self.text_region_polygons,
716            dilated_text_region_polygons=self.dilated_text_region_polygons,
717            bounding_rectangular_polygons=self.bounding_rectangular_polygons,
718            typical_indices=self.typical_indices,
719            main_angles=self.main_angles,
720        )
721
722        self.flattened_text_regions = self.build_flattened_text_regions(
723            image=image,
724            # NOTE: need to use the original text region polygons for reversed opts.
725            text_region_polygons=self.original_text_region_polygons,
726            bounding_extended_text_region_masks=self.bounding_extended_text_region_masks,
727            typical_indices=self.typical_indices,
728            flattening_rotate_angles=self.flattening_rotate_angles,
729            grouped_char_polygons=grouped_char_polygons,
730        )
@classmethod
def patch_text_region_polygons( cls, text_region_polygons: Sequence[vkit.element.polygon.Polygon], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType]):
233    @classmethod
234    def patch_text_region_polygons(
235        cls,
236        text_region_polygons: Sequence[Polygon],
237        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
238    ):
239        if grouped_char_polygons is None:
240            return text_region_polygons
241
242        assert len(text_region_polygons) == len(grouped_char_polygons)
243
244        patched_text_region_polygons: List[Polygon] = []
245        for text_region_polygon, char_polygons in zip(text_region_polygons, grouped_char_polygons):
246            # Need to make sure all char polygons are included.
247            unionized_polygons = [text_region_polygon]
248            unionized_polygons.extend(char_polygons)
249
250            bounding_box = Box.from_boxes((polygon.bounding_box for polygon in unionized_polygons))
251            mask = Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
252            for polygon in unionized_polygons:
253                polygon.fill_mask(mask)
254
255            patched_text_region_polygons.append(mask.to_external_polygon())
256
257        return patched_text_region_polygons
@classmethod
def get_dilated_and_bounding_rectangular_polygons( cls, text_region_polygon_dilate_ratio: float, shape: Tuple[int, int], text_region_polygons: Sequence[vkit.element.polygon.Polygon], force_no_dilation_flags: Union[Sequence[bool], NoneType] = None):
259    @classmethod
260    def get_dilated_and_bounding_rectangular_polygons(
261        cls,
262        text_region_polygon_dilate_ratio: float,
263        shape: Tuple[int, int],
264        text_region_polygons: Sequence[Polygon],
265        force_no_dilation_flags: Optional[Sequence[bool]] = None,
266    ):
267        dilated_text_region_polygons: List[Polygon] = []
268        bounding_rectangular_polygons: List[Polygon] = []
269
270        if force_no_dilation_flags is None:
271            force_no_dilation_flags_iter = itertools.repeat(False)
272        else:
273            assert len(force_no_dilation_flags) == len(text_region_polygons)
274            force_no_dilation_flags_iter = force_no_dilation_flags
275
276        for text_region_polygon, force_no_dilation_flag in zip(
277            text_region_polygons, force_no_dilation_flags_iter
278        ):
279
280            if not force_no_dilation_flag:
281                # Dilate.
282                text_region_polygon = text_region_polygon.to_dilated_polygon(
283                    ratio=text_region_polygon_dilate_ratio,
284                )
285                text_region_polygon = text_region_polygon.to_clipped_polygon(shape)
286
287            dilated_text_region_polygons.append(text_region_polygon)
288            bounding_rectangular_polygons.append(
289                text_region_polygon.to_bounding_rectangular_polygon(shape)
290            )
291
292        return dilated_text_region_polygons, bounding_rectangular_polygons
@classmethod
def analyze_bounding_rectangular_polygons( cls, bounding_rectangular_polygons: Sequence[vkit.element.polygon.Polygon]):
294    @classmethod
295    def analyze_bounding_rectangular_polygons(
296        cls,
297        bounding_rectangular_polygons: Sequence[Polygon],
298    ):
299        short_side_lengths: List[float] = []
300        long_side_ratios: List[float] = []
301        long_side_angles: List[int] = []
302
303        for polygon in bounding_rectangular_polygons:
304            # Get reference line.
305            point0, point1, _, point3 = polygon.points
306            side0_length = math.hypot(
307                point0.smooth_y - point1.smooth_y,
308                point0.smooth_x - point1.smooth_x,
309            )
310            side1_length = math.hypot(
311                point0.smooth_y - point3.smooth_y,
312                point0.smooth_x - point3.smooth_x,
313            )
314
315            # Get the short side length.
316            short_side_lengths.append(min(side0_length, side1_length))
317
318            long_side_ratios.append(
319                max(side0_length, side1_length) / min(side0_length, side1_length)
320            )
321
322            point_a = point0
323            if side0_length > side1_length:
324                # Reference line (p0 -> p1).
325                point_b = point1
326            else:
327                # Reference line (p0 -> p3).
328                point_b = point3
329
330            # Get the angle of reference line, in [0, 180) degree.
331            np_theta = np.arctan2(
332                point_a.smooth_y - point_b.smooth_y,
333                point_a.smooth_x - point_b.smooth_x,
334            )
335            np_theta = np_theta % np.pi
336            long_side_angle = round(np_theta / np.pi * 180) % 180
337            long_side_angles.append(long_side_angle)
338
339        return short_side_lengths, long_side_ratios, long_side_angles
@classmethod
def get_typical_indices( cls, typical_long_side_ratio_min: float, long_side_ratios: Sequence[float]):
341    @classmethod
342    def get_typical_indices(
343        cls,
344        typical_long_side_ratio_min: float,
345        long_side_ratios: Sequence[float],
346    ):
347        return tuple(
348            idx for idx, long_side_ratio in enumerate(long_side_ratios)
349            if long_side_ratio >= typical_long_side_ratio_min
350        )
@classmethod
def check_first_text_region_polygon_is_larger( cls, text_region_polygons: Sequence[vkit.element.polygon.Polygon], short_side_lengths: Sequence[float], first_idx: int, second_idx: int):
352    @classmethod
353    def check_first_text_region_polygon_is_larger(
354        cls,
355        text_region_polygons: Sequence[Polygon],
356        short_side_lengths: Sequence[float],
357        first_idx: int,
358        second_idx: int,
359    ):
360        first_text_region_polygon = text_region_polygons[first_idx]
361        second_text_region_polygon = text_region_polygons[second_idx]
362
363        # The short side indicates the text line height.
364        first_short_side_length = short_side_lengths[first_idx]
365        second_short_side_length = short_side_lengths[second_idx]
366
367        return (
368            first_text_region_polygon.area >= second_text_region_polygon.area
369            and first_short_side_length >= second_short_side_length
370        )
@classmethod
def get_main_and_flattening_rotate_angles( cls, text_region_polygons: Sequence[vkit.element.polygon.Polygon], typical_indices: Sequence[int], short_side_lengths: Sequence[float], long_side_angles: Sequence[int]):
372    @classmethod
373    def get_main_and_flattening_rotate_angles(
374        cls,
375        text_region_polygons: Sequence[Polygon],
376        typical_indices: Sequence[int],
377        short_side_lengths: Sequence[float],
378        long_side_angles: Sequence[int],
379    ):
380        typical_indices_set = set(typical_indices)
381        text_region_center_points = [
382            text_region_polygon.get_center_point() for text_region_polygon in text_region_polygons
383        ]
384
385        main_angles: List[Optional[int]] = [None] * len(long_side_angles)
386
387        # 1. For typical indices, or if no typical indices.
388        for idx, long_side_angle in enumerate(long_side_angles):
389            if not typical_indices_set or idx in typical_indices_set:
390                main_angles[idx] = long_side_angle
391
392        # 2. For nontypcial indices.
393        if typical_indices_set:
394            typical_center_points = PointList(
395                text_region_center_points[idx] for idx in typical_indices
396            )
397            kd_tree = KDTree(typical_center_points.to_np_array())
398
399            nontypical_indices = tuple(
400                idx for idx, _ in enumerate(long_side_angles) if idx not in typical_indices_set
401            )
402            nontypical_center_points = PointList(
403                text_region_center_points[idx] for idx in nontypical_indices
404            )
405
406            # Set main angle as the closest typical angle.
407            # Round 1: Set if the closest typical polygon is large enough.
408            _, np_kd_nbr_indices = kd_tree.query(nontypical_center_points.to_np_array())
409            round2_nontypical_indices: List[int] = []
410            for nontypical_idx, typical_indices_idx in zip(
411                nontypical_indices,
412                np_kd_nbr_indices[:, 0].tolist(),
413            ):
414                typical_idx = typical_indices[typical_indices_idx]
415                if cls.check_first_text_region_polygon_is_larger(
416                    text_region_polygons=text_region_polygons,
417                    short_side_lengths=short_side_lengths,
418                    first_idx=typical_idx,
419                    second_idx=nontypical_idx,
420                ):
421                    main_angles[nontypical_idx] = main_angles[typical_idx]
422                else:
423                    round2_nontypical_indices.append(nontypical_idx)
424
425            # Round 2: Searching the closest typical polygon that has larger area.
426            round3_nontypical_indices: List[int] = []
427            if round2_nontypical_indices:
428                round2_nontypical_center_points = PointList(
429                    text_region_center_points[idx] for idx in round2_nontypical_indices
430                )
431                _, np_kd_nbr_indices = kd_tree.query(
432                    round2_nontypical_center_points.to_np_array(),
433                    k=len(typical_center_points),
434                )
435                for nontypical_idx, typical_indices_indices in zip(
436                    round2_nontypical_indices,
437                    np_kd_nbr_indices.tolist(),
438                ):
439                    hit_typical_idx = None
440                    for typical_indices_idx in typical_indices_indices:
441                        typical_idx = typical_indices[typical_indices_idx]
442                        if cls.check_first_text_region_polygon_is_larger(
443                            text_region_polygons=text_region_polygons,
444                            short_side_lengths=short_side_lengths,
445                            first_idx=typical_idx,
446                            second_idx=nontypical_idx,
447                        ):
448                            hit_typical_idx = typical_idx
449                            break
450
451                    if hit_typical_idx is not None:
452                        main_angles[nontypical_idx] = main_angles[hit_typical_idx]
453                    else:
454                        round3_nontypical_indices.append(nontypical_idx)
455
456            # Round 3: Last resort. Set to the median of typical angles.
457            if round3_nontypical_indices:
458                main_angles_median = statistics.median_low(
459                    long_side_angles[typical_idx] for typical_idx in typical_indices
460                )
461                for nontypical_idx in round3_nontypical_indices:
462                    main_angles[nontypical_idx] = main_angles_median
463
464        # 3. Get angle for flattening.
465        flattening_rotate_angles: List[int] = []
466        for main_angle in main_angles:
467            assert main_angle is not None
468            if main_angle <= 90:
469                # [270, 360).
470                flattening_rotate_angle = (360 - main_angle) % 360
471            else:
472                # [1, 90).
473                flattening_rotate_angle = 180 - main_angle
474            flattening_rotate_angles.append(flattening_rotate_angle)
475
476        return cast(List[int], main_angles), flattening_rotate_angles
@classmethod
def get_bounding_extended_text_region_masks( cls, shape: Tuple[int, int], text_region_polygons: Sequence[vkit.element.polygon.Polygon], dilated_text_region_polygons: Sequence[vkit.element.polygon.Polygon], bounding_rectangular_polygons: Sequence[vkit.element.polygon.Polygon], typical_indices: Sequence[int], main_angles: Sequence[int]):
478    @classmethod
479    def get_bounding_extended_text_region_masks(
480        cls,
481        shape: Tuple[int, int],
482        text_region_polygons: Sequence[Polygon],
483        dilated_text_region_polygons: Sequence[Polygon],
484        bounding_rectangular_polygons: Sequence[Polygon],
485        typical_indices: Sequence[int],
486        main_angles: Sequence[int],
487    ):
488        typical_indices_set = set(typical_indices)
489
490        text_mask = Mask.from_polygons(shape, text_region_polygons)
491        non_text_mask = text_mask.to_inverted_mask()
492
493        box = Box.from_shape(shape)
494        text_mask = text_mask.to_box_attached(box)
495        non_text_mask = non_text_mask.to_box_attached(box)
496
497        bounding_extended_text_region_masks: List[Mask] = []
498
499        num_text_region_polygons = len(text_region_polygons)
500        for idx in range(num_text_region_polygons):
501            text_region_polygon = text_region_polygons[idx]
502            dilated_text_region_polygon = dilated_text_region_polygons[idx]
503            bounding_rectangular_polygon = bounding_rectangular_polygons[idx]
504
505            if typical_indices_set and idx not in typical_indices_set:
506                # Patch bounding rectangular polygon if is nontypical.
507                main_angle = main_angles[idx]
508                bounding_rectangular_polygon = \
509                    dilated_text_region_polygon.to_bounding_rectangular_polygon(
510                        shape=shape,
511                        angle=main_angle,
512                    )
513
514            # See the comment in Polygon.to_bounding_rectangular_polygon.
515            bounding_box = Box.from_boxes((
516                dilated_text_region_polygon.bounding_box,
517                bounding_rectangular_polygon.bounding_box,
518            ))
519
520            # Fill other text region.
521            bounding_other_text_mask = \
522                Mask.from_shapable(bounding_box).to_box_attached(bounding_box)
523            # Copy from text mask.
524            bounding_rectangular_polygon.fill_mask(bounding_other_text_mask, text_mask)
525            # Use the original text region polygon to unset the current text mask.
526            text_region_polygon.fill_mask(bounding_other_text_mask, 0)
527
528            # Fill protentially dilated text region.
529            bounding_text_mask = \
530                Mask.from_shapable(bounding_other_text_mask).to_box_attached(bounding_box)
531            # Use the protentially dilated text region polygon to set the current text mask.
532            dilated_text_region_polygon.fill_mask(bounding_text_mask, value=1)
533
534            del dilated_text_region_polygon
535
536            # Trim protentially dilated text region polygon by eliminating other text region.
537            bounding_trimmed_text_mask = Mask.from_masks(
538                bounding_box,
539                [
540                    # Includes the protentially dilated text region.
541                    bounding_text_mask,
542                    # But not includes any other text regions.
543                    bounding_other_text_mask.to_inverted_mask(),
544                ],
545                ElementSetOperationMode.INTERSECT,
546            )
547
548            # Extract non-text region.
549            bounding_non_text_mask = bounding_rectangular_polygon.extract_mask(non_text_mask)
550
551            # Unionize trimmed text region and non-text region.
552            bounding_extended_text_region_mask = Mask.from_masks(
553                bounding_box,
554                [bounding_trimmed_text_mask, bounding_non_text_mask],
555            )
556
557            bounding_extended_text_region_masks.append(bounding_extended_text_region_mask)
558
559        return bounding_extended_text_region_masks
@classmethod
def build_flattened_text_regions( cls, image: vkit.element.image.Image, text_region_polygons: Sequence[vkit.element.polygon.Polygon], bounding_extended_text_region_masks: Sequence[vkit.element.mask.Mask], typical_indices: Sequence[int], flattening_rotate_angles: Sequence[int], grouped_char_polygons: Union[Sequence[Sequence[vkit.element.polygon.Polygon]], NoneType]):
561    @classmethod
562    def build_flattened_text_regions(
563        cls,
564        image: Image,
565        text_region_polygons: Sequence[Polygon],
566        bounding_extended_text_region_masks: Sequence[Mask],
567        typical_indices: Sequence[int],
568        flattening_rotate_angles: Sequence[int],
569        grouped_char_polygons: Optional[Sequence[Sequence[Polygon]]],
570    ):
571        typical_indices_set = set(typical_indices)
572
573        flattened_text_regions: List[FlattenedTextRegion] = []
574
575        for idx, (
576            text_region_polygon,
577            bounding_extended_text_region_mask,
578            flattening_rotate_angle,
579        ) in enumerate(
580            zip(
581                text_region_polygons,
582                bounding_extended_text_region_masks,
583                flattening_rotate_angles,
584            )
585        ):
586            bounding_box = bounding_extended_text_region_mask.box
587            assert bounding_box
588
589            # Extract image.
590            text_region_image = bounding_extended_text_region_mask.extract_image(image)
591
592            # Shift char polygons.
593            relative_char_polygons = None
594            if grouped_char_polygons is not None:
595                char_polygons = grouped_char_polygons[idx]
596                relative_char_polygons = [
597                    char_polygon.to_relative_polygon(
598                        origin_y=bounding_box.up,
599                        origin_x=bounding_box.left,
600                    ) for char_polygon in char_polygons
601                ]
602
603            # Rotate.
604            rotated_result = rotate.distort(
605                {'angle': flattening_rotate_angle},
606                image=text_region_image,
607                mask=bounding_extended_text_region_mask,
608                polygons=relative_char_polygons,
609            )
610            rotated_text_region_image = rotated_result.image
611            assert rotated_text_region_image
612            rotated_bounding_extended_text_region_mask = rotated_result.mask
613            assert rotated_bounding_extended_text_region_mask
614            # Could be None.
615            rotated_char_polygons = rotated_result.polygons
616
617            # Trim.
618            rotated_trimmed_box = rotated_bounding_extended_text_region_mask.to_external_box()
619
620            trimmed_text_region_image = rotated_text_region_image.to_cropped_image(
621                up=rotated_trimmed_box.up,
622                down=rotated_trimmed_box.down,
623                left=rotated_trimmed_box.left,
624                right=rotated_trimmed_box.right,
625            )
626
627            trimmed_mask = rotated_trimmed_box.extract_mask(
628                rotated_bounding_extended_text_region_mask
629            )
630
631            trimmed_char_polygons = None
632            if rotated_char_polygons:
633                trimmed_char_polygons = [
634                    rotated_char_polygon.to_relative_polygon(
635                        origin_y=rotated_trimmed_box.up,
636                        origin_x=rotated_trimmed_box.left,
637                    ) for rotated_char_polygon in rotated_char_polygons
638                ]
639
640            flattened_text_regions.append(
641                FlattenedTextRegion(
642                    is_typical=(idx in typical_indices_set),
643                    text_region_polygon=text_region_polygon,
644                    text_region_image=bounding_extended_text_region_mask.extract_image(image),
645                    bounding_extended_text_region_mask=bounding_extended_text_region_mask,
646                    flattening_rotate_angle=flattening_rotate_angle,
647                    shape_before_trim=rotated_text_region_image.shape,
648                    rotated_trimmed_box=rotated_trimmed_box,
649                    shape_before_resize=trimmed_text_region_image.shape,
650                    post_rotate_angle=0,
651                    flattened_image=trimmed_text_region_image,
652                    flattened_mask=trimmed_mask,
653                    flattened_char_polygons=trimmed_char_polygons,
654                )
655            )
656
657        return flattened_text_regions
def build_background_image_for_stacking(height: int, width: int):
733def build_background_image_for_stacking(height: int, width: int):
734    np_rgb_rows = [np.zeros((width, 3), dtype=np.uint8) for _ in range(3)]
735    rgb_tuples = [(255, 0, 0), (0, 255, 0), (0, 0, 255)]
736
737    for color_offset, np_row in enumerate(np_rgb_rows):
738        for color_idx in range(3):
739            color_tuple = rgb_tuples[(color_offset + color_idx) % 3]
740            np_row[color_idx::3] = color_tuple
741
742    np_image = np.zeros((height, width, 3), dtype=np.uint8)
743    for row_offset, np_row in enumerate(np_rgb_rows):
744        np_image[row_offset::3] = np_row
745
746    return Image(mat=np_image)
def stack_flattened_text_regions( page_pad: int, flattened_text_regions_pad: int, flattened_text_regions: Sequence[vkit.pipeline.text_detection.page_text_region.FlattenedTextRegion]):
749def stack_flattened_text_regions(
750    page_pad: int,
751    flattened_text_regions_pad: int,
752    flattened_text_regions: Sequence[FlattenedTextRegion],
753):
754    page_double_pad = 2 * page_pad
755    flattened_text_regions_double_pad = 2 * flattened_text_regions_pad
756
757    rect_packer = RectPacker(rotation=False)
758
759    # Add box and bin.
760    # NOTE: Only one bin is added, that is, packing all text region into one image.
761    bin_width = 0
762    bin_height = 0
763
764    for ftr_idx, flattened_text_region in enumerate(flattened_text_regions):
765        rect_packer.add_rect(
766            width=flattened_text_region.width + flattened_text_regions_double_pad,
767            height=flattened_text_region.height + flattened_text_regions_double_pad,
768            rid=ftr_idx,
769        )
770
771        bin_width = max(bin_width, flattened_text_region.width)
772        bin_height += flattened_text_region.height
773
774    bin_width += flattened_text_regions_double_pad
775    bin_height += flattened_text_regions_double_pad
776
777    rect_packer.add_bin(width=bin_width, height=bin_height)
778
779    # Pack boxes.
780    rect_packer.pack()  # type: ignore
781
782    # Get packed boxes.
783    unordered_boxes: List[Box] = []
784    ftr_indices: List[int] = []
785    for bin_idx, x, y, width, height, ftr_idx in rect_packer.rect_list():
786        assert bin_idx == 0
787        unordered_boxes.append(Box(
788            up=y,
789            down=y + height - 1,
790            left=x,
791            right=x + width - 1,
792        ))
793        ftr_indices.append(ftr_idx)
794
795    # Order boxes.
796    inverse_ftr_indices = [-1] * len(ftr_indices)
797    for inverse_ftr_idx, ftr_idx in enumerate(ftr_indices):
798        inverse_ftr_indices[ftr_idx] = inverse_ftr_idx
799    for inverse_ftr_idx in inverse_ftr_indices:
800        assert inverse_ftr_idx >= 0
801    padded_boxes = [unordered_boxes[inverse_ftr_idx] for inverse_ftr_idx in inverse_ftr_indices]
802
803    page_height = max(box.down for box in padded_boxes) + 1 + page_double_pad
804    page_width = max(box.right for box in padded_boxes) + 1 + page_double_pad
805
806    image = build_background_image_for_stacking(page_height, page_width)
807    active_mask = Mask.from_shapable(image)
808    text_region_boxes: List[Box] = []
809    char_polygons: List[Polygon] = []
810    char_polygon_text_region_box_indices: List[int] = []
811
812    for padded_box, flattened_text_region in zip(padded_boxes, flattened_text_regions):
813        assert flattened_text_region.height + flattened_text_regions_double_pad \
814            == padded_box.height
815        assert flattened_text_region.width + flattened_text_regions_double_pad \
816            == padded_box.width
817
818        # Remove box padding.
819        up = padded_box.up + flattened_text_regions_pad + page_pad
820        left = padded_box.left + flattened_text_regions_pad + page_pad
821
822        text_region_box = Box(
823            up=up,
824            down=up + flattened_text_region.height - 1,
825            left=left,
826            right=left + flattened_text_region.width - 1,
827        )
828        text_region_boxes.append(text_region_box)
829        text_region_box_idx = len(text_region_boxes) - 1
830
831        # Render.
832        text_region_box.fill_image(
833            image,
834            flattened_text_region.flattened_image,
835            image_mask=flattened_text_region.flattened_mask,
836        )
837        text_region_box.fill_mask(
838            active_mask,
839            value=1,
840            mask_mask=flattened_text_region.flattened_mask,
841        )
842
843        if flattened_text_region.flattened_char_polygons:
844            for char_polygon in flattened_text_region.flattened_char_polygons:
845                char_polygons.append(char_polygon.to_shifted_polygon(
846                    offset_y=up,
847                    offset_x=left,
848                ))
849                char_polygon_text_region_box_indices.append(text_region_box_idx)
850
851    return (
852        image,
853        active_mask,
854        text_region_boxes,
855        char_polygons,
856        char_polygon_text_region_box_indices,
857    )
 860class PageTextRegionStep(
 861    PipelineStep[
 862        PageTextRegionStepConfig,
 863        PageTextRegionStepInput,
 864        PageTextRegionStepOutput,
 865    ]
 866):  # yapf: disable
 867
 868    @classmethod
 869    def generate_precise_text_region_candidate_polygons(
 870        cls,
 871        precise_mask: Mask,
 872        disconnected_text_region_mask: Mask,
 873    ):
 874        assert precise_mask.box and disconnected_text_region_mask.box
 875
 876        # Get the intersection.
 877        intersected_box = Box(
 878            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
 879            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
 880            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
 881            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
 882        )
 883        assert intersected_box.up <= intersected_box.down
 884        assert intersected_box.left <= intersected_box.right
 885
 886        precise_mask = intersected_box.extract_mask(precise_mask)
 887        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
 888
 889        # Apply mask bitwise-and operation.
 890        intersected_mask = Mask(
 891            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
 892        )
 893        intersected_mask = intersected_mask.to_box_attached(intersected_box)
 894
 895        # NOTE:
 896        # 1. Could extract more than one polygons.
 897        # 2. Some polygons are in border and should be removed later.
 898        return intersected_mask.to_disconnected_polygons()
 899
 900    @classmethod
 901    def strtree_query_intersected_polygons(
 902        cls,
 903        strtree: STRtree,
 904        anchor_polygons: Sequence[Polygon],
 905        candidate_polygon: Polygon,
 906    ):
 907        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
 908        candidate_mask = candidate_polygon.mask
 909
 910        for anchor_idx in sorted(strtree.query(candidate_shapely_polygon)):
 911            anchor_polygon = anchor_polygons[anchor_idx]
 912            anchor_mask = anchor_polygon.mask
 913
 914            intersected_ratio = calculate_boxed_masks_intersected_ratio(
 915                anchor_mask=anchor_mask,
 916                candidate_mask=candidate_mask,
 917                use_candidate_as_base=True,
 918            )
 919
 920            yield (
 921                anchor_idx,
 922                anchor_polygon,
 923                anchor_mask,
 924                candidate_mask,
 925                intersected_ratio,
 926            )
 927
 928    def sample_page_non_text_region_polygons(
 929        self,
 930        page_non_text_region_polygons: Sequence[Polygon],
 931        num_page_text_region_infos: int,
 932        rng: RandomGenerator,
 933    ):
 934        negative_ratio = self.config.negative_text_region_ratio
 935        num_page_non_text_region_polygons = round(
 936            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
 937        )
 938        return rng_choice_with_size(
 939            rng,
 940            page_non_text_region_polygons,
 941            size=min(
 942                num_page_non_text_region_polygons,
 943                len(page_non_text_region_polygons),
 944            ),
 945            replace=False,
 946        )
 947
 948    def build_flattened_text_regions(
 949        self,
 950        page_image: Image,
 951        page_text_region_infos: Sequence[PageTextRegionInfo],
 952        page_non_text_region_polygons: Sequence[Polygon],
 953        rng: RandomGenerator,
 954    ):
 955        text_region_polygon_dilate_ratio = float(
 956            rng.uniform(
 957                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
 958                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
 959            )
 960        )
 961        typical_long_side_ratio_min = \
 962            self.config.text_region_flattener_typical_long_side_ratio_min
 963
 964        text_region_polygons: List[Polygon] = []
 965        grouped_char_polygons: List[Sequence[Polygon]] = []
 966        for page_text_region_info in page_text_region_infos:
 967            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
 968            grouped_char_polygons.append(page_text_region_info.char_polygons)
 969
 970        # Inject nagative regions.
 971        for page_non_text_region_polygon in page_non_text_region_polygons:
 972            # NOTE: Don't drop any text region here, otherwise will introduce labeling confusion,
 973            # since dropped text region will be considered as non-text region.
 974            text_region_polygons.append(page_non_text_region_polygon)
 975            grouped_char_polygons.append(tuple())
 976
 977        text_region_flattener = TextRegionFlattener(
 978            typical_long_side_ratio_min=typical_long_side_ratio_min,
 979            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 980            image=page_image,
 981            text_region_polygons=text_region_polygons,
 982            grouped_char_polygons=grouped_char_polygons,
 983            is_training=True,
 984        )
 985
 986        # Resize positive ftr.
 987        positive_flattened_text_regions: List[FlattenedTextRegion] = []
 988        # For negative sampling.
 989        positive_reference_heights: List[float] = []
 990        positive_reference_widths: List[float] = []
 991        num_negative_flattened_text_regions = 0
 992
 993        for flattened_text_region in text_region_flattener.flattened_text_regions:
 994            if not flattened_text_region.flattened_char_polygons:
 995                num_negative_flattened_text_regions += 1
 996                continue
 997
 998            if len(flattened_text_region.flattened_char_polygons) == 1 \
 999                    and rng.random() < self.config.prob_drop_single_char_page_text_region_info:
1000                # Ignore some single-char text region for reducing label confusion.
1001                continue
1002
1003            char_height_median = flattened_text_region.get_char_height_meidan()
1004
1005            text_region_resize_char_height_median = int(
1006                rng.integers(
1007                    self.config.text_region_resize_char_height_median_min,
1008                    self.config.text_region_resize_char_height_median_max + 1,
1009                )
1010            )
1011            scale = text_region_resize_char_height_median / char_height_median
1012
1013            height, width = flattened_text_region.shape
1014            resized_height = round(height * scale)
1015            resized_width = round(width * scale)
1016
1017            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1018                resized_height=resized_height,
1019                resized_width=resized_width,
1020            )
1021
1022            positive_reference_heights.append(resized_height)
1023            positive_reference_widths.append(resized_width)
1024
1025            # Post rotate.
1026            post_rotate_angle = 0
1027            if flattened_text_region.is_typical:
1028                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1029                    # Upside down only.
1030                    post_rotate_angle = 180
1031            else:
1032                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1033                    # 3-way rotate.
1034                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1035
1036            if post_rotate_angle != 0:
1037                flattened_text_region = \
1038                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1039
1040            positive_flattened_text_regions.append(flattened_text_region)
1041
1042        # Resize negative ftr.
1043        negative_reference_heights = list(
1044            rng_choice_with_size(
1045                rng,
1046                positive_reference_heights,
1047                size=num_negative_flattened_text_regions,
1048                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
1049            )
1050        )
1051
1052        negative_height_max = max(positive_reference_heights)
1053        negative_width_max = max(positive_reference_widths)
1054
1055        negative_flattened_text_regions: List[FlattenedTextRegion] = []
1056
1057        for flattened_text_region in text_region_flattener.flattened_text_regions:
1058            if flattened_text_region.flattened_char_polygons:
1059                continue
1060
1061            reference_height = negative_reference_heights.pop()
1062            scale = reference_height / flattened_text_region.height
1063
1064            height, width = flattened_text_region.shape
1065            resized_height = round(height * scale)
1066            resized_width = round(width * scale)
1067
1068            # Remove negative region that is too large.
1069            if resized_height > negative_height_max or resized_width > negative_width_max:
1070                continue
1071
1072            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1073                resized_height=resized_height,
1074                resized_width=resized_width,
1075            )
1076
1077            # Post rotate.
1078            post_rotate_angle = 0
1079            if flattened_text_region.is_typical:
1080                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1081                    # Upside down only.
1082                    post_rotate_angle = 180
1083            else:
1084                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1085                    # 3-way rotate.
1086                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1087
1088            if post_rotate_angle != 0:
1089                flattened_text_region = \
1090                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1091
1092            negative_flattened_text_regions.append(flattened_text_region)
1093
1094        flattened_text_regions = (
1095            *positive_flattened_text_regions,
1096            *negative_flattened_text_regions,
1097        )
1098        return flattened_text_regions
1099
1100    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
1101        page_distortion_step_output = input.page_distortion_step_output
1102        page_image = page_distortion_step_output.page_image
1103        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
1104        page_disconnected_text_region_collection = \
1105            page_distortion_step_output.page_disconnected_text_region_collection
1106        page_non_text_region_collection = \
1107            page_distortion_step_output.page_non_text_region_collection
1108
1109        page_resizing_step_output = input.page_resizing_step_output
1110        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
1111
1112        debug = None
1113        if self.config.enable_debug:
1114            debug = PageTextRegionStepDebug()
1115
1116        # Build R-tree to track text regions.
1117        disconnected_text_region_polygons: List[Polygon] = []
1118        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
1119        for polygon in page_disconnected_text_region_collection.to_polygons():
1120            disconnected_text_region_polygons.append(polygon)
1121            shapely_polygon = polygon.to_shapely_polygon()
1122            disconnected_text_region_shapely_polygons.append(shapely_polygon)
1123
1124        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
1125
1126        # Get the precise text regions.
1127        precise_text_region_candidate_polygons: List[Polygon] = []
1128        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
1129            # Resize back to the shape after distortion.
1130            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
1131                page_resized_text_line_mask,
1132                resized_height=page_image.height,
1133                resized_width=page_image.width,
1134            )
1135
1136            # Find and extract intersected text region.
1137            # NOTE: One precise_polygon could be overlapped with
1138            # more than one disconnected_text_region_polygon!
1139            for _, _, disconnected_text_region_mask, precise_mask, _ in \
1140                    self.strtree_query_intersected_polygons(
1141                        strtree=disconnected_text_region_tree,
1142                        anchor_polygons=disconnected_text_region_polygons,
1143                        candidate_polygon=precise_polygon,
1144                    ):
1145                precise_text_region_candidate_polygons.extend(
1146                    self.generate_precise_text_region_candidate_polygons(
1147                        precise_mask=precise_mask,
1148                        disconnected_text_region_mask=disconnected_text_region_mask,
1149                    )
1150                )
1151
1152        if debug:
1153            debug.page_image = page_image
1154            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1155
1156        # Help gc.
1157        del disconnected_text_region_polygons
1158        del disconnected_text_region_shapely_polygons
1159        del disconnected_text_region_tree
1160
1161        # Bind char-level polygon to precise text region.
1162        precise_text_region_polygons: List[Polygon] = []
1163        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1164
1165        for polygon in precise_text_region_candidate_polygons:
1166            precise_text_region_polygons.append(polygon)
1167            shapely_polygon = polygon.to_shapely_polygon()
1168            precise_text_region_shapely_polygons.append(shapely_polygon)
1169
1170        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1171
1172        if not self.config.use_adjusted_char_polygons:
1173            selected_char_polygons = page_char_polygon_collection.char_polygons
1174        else:
1175            selected_char_polygons = page_char_polygon_collection.adjusted_char_polygons
1176
1177        ptrp_idx_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1178
1179        for char_polygon in selected_char_polygons:
1180            best_precise_text_region_polygon_idx = None
1181            intersected_ratio_max = 0
1182
1183            for (
1184                precise_text_region_polygon_idx,
1185                _,
1186                _,
1187                _,
1188                intersected_ratio,
1189            ) in self.strtree_query_intersected_polygons(
1190                strtree=precise_text_region_tree,
1191                anchor_polygons=precise_text_region_polygons,
1192                candidate_polygon=char_polygon,
1193            ):
1194                if intersected_ratio > intersected_ratio_max:
1195                    intersected_ratio_max = intersected_ratio
1196                    best_precise_text_region_polygon_idx = precise_text_region_polygon_idx
1197
1198            if best_precise_text_region_polygon_idx is not None:
1199                ptrp_idx_to_char_polygons[best_precise_text_region_polygon_idx].append(char_polygon)
1200            else:
1201                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1202                # In such case, the text line bounding box is smaller than the char polygon, since
1203                # the leading/trailing char paddings are ignored during text line rendering.
1204                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1205                # introduce labeling noise.
1206                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1207
1208        page_text_region_infos: List[PageTextRegionInfo] = []
1209        for ptrp_idx, precise_text_region_polygon in enumerate(precise_text_region_polygons):
1210            if ptrp_idx not in ptrp_idx_to_char_polygons:
1211                continue
1212            page_text_region_infos.append(
1213                PageTextRegionInfo(
1214                    precise_text_region_polygon=precise_text_region_polygon,
1215                    char_polygons=ptrp_idx_to_char_polygons[ptrp_idx],
1216                )
1217            )
1218
1219        # Help gc.
1220        del precise_text_region_polygons
1221        del precise_text_region_shapely_polygons
1222        del precise_text_region_tree
1223
1224        if debug:
1225            debug.page_text_region_infos = page_text_region_infos
1226
1227        # Negative sampling.
1228        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1229            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1230            num_page_text_region_infos=len(page_text_region_infos),
1231            rng=rng,
1232        )
1233
1234        flattened_text_regions = self.build_flattened_text_regions(
1235            page_image=page_image,
1236            page_text_region_infos=page_text_region_infos,
1237            page_non_text_region_polygons=page_non_text_region_polygons,
1238            rng=rng,
1239        )
1240        if debug:
1241            debug.flattened_text_regions = flattened_text_regions
1242
1243        # Stack text regions.
1244        (
1245            image,
1246            active_mask,
1247            text_region_boxes,
1248            char_polygons,
1249            char_polygon_text_region_box_indices,
1250        ) = stack_flattened_text_regions(
1251            page_pad=0,
1252            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1253            flattened_text_regions=flattened_text_regions,
1254        )
1255
1256        text_region_polygons = [
1257            text_region_box.to_polygon() for text_region_box in text_region_boxes
1258        ]
1259
1260        # Post uniform rotation.
1261        shape_before_rotate = image.shape
1262        rotate_angle = 0
1263
1264        if rng.random() < self.config.prob_post_rotate_90_angle:
1265            rotate_angle = 90
1266
1267        if rng.random() < self.config.prob_post_rotate_random_angle:
1268            rotate_angle += int(
1269                rng.integers(
1270                    self.config.post_rotate_random_angle_min,
1271                    self.config.post_rotate_random_angle_max + 1,
1272                )
1273            )
1274
1275        if rotate_angle != 0:
1276            # For unpacking.
1277            num_char_polygons = len(char_polygons)
1278            rotated_result = rotate.distort(
1279                {'angle': rotate_angle},
1280                image=image,
1281                mask=active_mask,
1282                polygons=(*char_polygons, *text_region_polygons),
1283            )
1284            assert rotated_result.image and rotated_result.mask and rotated_result.polygons
1285            image = rotated_result.image
1286            active_mask = rotated_result.mask
1287            char_polygons = rotated_result.polygons[:num_char_polygons]
1288            text_region_polygons = rotated_result.polygons[num_char_polygons:]
1289
1290        return PageTextRegionStepOutput(
1291            page_image=image,
1292            page_active_mask=active_mask,
1293            page_char_polygons=char_polygons,
1294            page_text_region_polygons=text_region_polygons,
1295            page_char_polygon_text_region_polygon_indices=char_polygon_text_region_box_indices,
1296            shape_before_rotate=shape_before_rotate,
1297            rotate_angle=rotate_angle,
1298            debug=debug,
1299        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

@classmethod
def generate_precise_text_region_candidate_polygons( cls, precise_mask: vkit.element.mask.Mask, disconnected_text_region_mask: vkit.element.mask.Mask):
868    @classmethod
869    def generate_precise_text_region_candidate_polygons(
870        cls,
871        precise_mask: Mask,
872        disconnected_text_region_mask: Mask,
873    ):
874        assert precise_mask.box and disconnected_text_region_mask.box
875
876        # Get the intersection.
877        intersected_box = Box(
878            up=max(precise_mask.box.up, disconnected_text_region_mask.box.up),
879            down=min(precise_mask.box.down, disconnected_text_region_mask.box.down),
880            left=max(precise_mask.box.left, disconnected_text_region_mask.box.left),
881            right=min(precise_mask.box.right, disconnected_text_region_mask.box.right),
882        )
883        assert intersected_box.up <= intersected_box.down
884        assert intersected_box.left <= intersected_box.right
885
886        precise_mask = intersected_box.extract_mask(precise_mask)
887        disconnected_text_region_mask = intersected_box.extract_mask(disconnected_text_region_mask)
888
889        # Apply mask bitwise-and operation.
890        intersected_mask = Mask(
891            mat=(disconnected_text_region_mask.mat & precise_mask.mat).astype(np.uint8)
892        )
893        intersected_mask = intersected_mask.to_box_attached(intersected_box)
894
895        # NOTE:
896        # 1. Could extract more than one polygons.
897        # 2. Some polygons are in border and should be removed later.
898        return intersected_mask.to_disconnected_polygons()
@classmethod
def strtree_query_intersected_polygons( cls, strtree: shapely.strtree.STRtree, anchor_polygons: Sequence[vkit.element.polygon.Polygon], candidate_polygon: vkit.element.polygon.Polygon):
900    @classmethod
901    def strtree_query_intersected_polygons(
902        cls,
903        strtree: STRtree,
904        anchor_polygons: Sequence[Polygon],
905        candidate_polygon: Polygon,
906    ):
907        candidate_shapely_polygon = candidate_polygon.to_shapely_polygon()
908        candidate_mask = candidate_polygon.mask
909
910        for anchor_idx in sorted(strtree.query(candidate_shapely_polygon)):
911            anchor_polygon = anchor_polygons[anchor_idx]
912            anchor_mask = anchor_polygon.mask
913
914            intersected_ratio = calculate_boxed_masks_intersected_ratio(
915                anchor_mask=anchor_mask,
916                candidate_mask=candidate_mask,
917                use_candidate_as_base=True,
918            )
919
920            yield (
921                anchor_idx,
922                anchor_polygon,
923                anchor_mask,
924                candidate_mask,
925                intersected_ratio,
926            )
def sample_page_non_text_region_polygons( self, page_non_text_region_polygons: Sequence[vkit.element.polygon.Polygon], num_page_text_region_infos: int, rng: numpy.random._generator.Generator):
928    def sample_page_non_text_region_polygons(
929        self,
930        page_non_text_region_polygons: Sequence[Polygon],
931        num_page_text_region_infos: int,
932        rng: RandomGenerator,
933    ):
934        negative_ratio = self.config.negative_text_region_ratio
935        num_page_non_text_region_polygons = round(
936            negative_ratio * num_page_text_region_infos / (1 - negative_ratio)
937        )
938        return rng_choice_with_size(
939            rng,
940            page_non_text_region_polygons,
941            size=min(
942                num_page_non_text_region_polygons,
943                len(page_non_text_region_polygons),
944            ),
945            replace=False,
946        )
def build_flattened_text_regions( self, page_image: vkit.element.image.Image, page_text_region_infos: Sequence[vkit.pipeline.text_detection.page_text_region.PageTextRegionInfo], page_non_text_region_polygons: Sequence[vkit.element.polygon.Polygon], rng: numpy.random._generator.Generator):
 948    def build_flattened_text_regions(
 949        self,
 950        page_image: Image,
 951        page_text_region_infos: Sequence[PageTextRegionInfo],
 952        page_non_text_region_polygons: Sequence[Polygon],
 953        rng: RandomGenerator,
 954    ):
 955        text_region_polygon_dilate_ratio = float(
 956            rng.uniform(
 957                self.config.text_region_flattener_text_region_polygon_dilate_ratio_min,
 958                self.config.text_region_flattener_text_region_polygon_dilate_ratio_max,
 959            )
 960        )
 961        typical_long_side_ratio_min = \
 962            self.config.text_region_flattener_typical_long_side_ratio_min
 963
 964        text_region_polygons: List[Polygon] = []
 965        grouped_char_polygons: List[Sequence[Polygon]] = []
 966        for page_text_region_info in page_text_region_infos:
 967            text_region_polygons.append(page_text_region_info.precise_text_region_polygon)
 968            grouped_char_polygons.append(page_text_region_info.char_polygons)
 969
 970        # Inject nagative regions.
 971        for page_non_text_region_polygon in page_non_text_region_polygons:
 972            # NOTE: Don't drop any text region here, otherwise will introduce labeling confusion,
 973            # since dropped text region will be considered as non-text region.
 974            text_region_polygons.append(page_non_text_region_polygon)
 975            grouped_char_polygons.append(tuple())
 976
 977        text_region_flattener = TextRegionFlattener(
 978            typical_long_side_ratio_min=typical_long_side_ratio_min,
 979            text_region_polygon_dilate_ratio=text_region_polygon_dilate_ratio,
 980            image=page_image,
 981            text_region_polygons=text_region_polygons,
 982            grouped_char_polygons=grouped_char_polygons,
 983            is_training=True,
 984        )
 985
 986        # Resize positive ftr.
 987        positive_flattened_text_regions: List[FlattenedTextRegion] = []
 988        # For negative sampling.
 989        positive_reference_heights: List[float] = []
 990        positive_reference_widths: List[float] = []
 991        num_negative_flattened_text_regions = 0
 992
 993        for flattened_text_region in text_region_flattener.flattened_text_regions:
 994            if not flattened_text_region.flattened_char_polygons:
 995                num_negative_flattened_text_regions += 1
 996                continue
 997
 998            if len(flattened_text_region.flattened_char_polygons) == 1 \
 999                    and rng.random() < self.config.prob_drop_single_char_page_text_region_info:
1000                # Ignore some single-char text region for reducing label confusion.
1001                continue
1002
1003            char_height_median = flattened_text_region.get_char_height_meidan()
1004
1005            text_region_resize_char_height_median = int(
1006                rng.integers(
1007                    self.config.text_region_resize_char_height_median_min,
1008                    self.config.text_region_resize_char_height_median_max + 1,
1009                )
1010            )
1011            scale = text_region_resize_char_height_median / char_height_median
1012
1013            height, width = flattened_text_region.shape
1014            resized_height = round(height * scale)
1015            resized_width = round(width * scale)
1016
1017            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1018                resized_height=resized_height,
1019                resized_width=resized_width,
1020            )
1021
1022            positive_reference_heights.append(resized_height)
1023            positive_reference_widths.append(resized_width)
1024
1025            # Post rotate.
1026            post_rotate_angle = 0
1027            if flattened_text_region.is_typical:
1028                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1029                    # Upside down only.
1030                    post_rotate_angle = 180
1031            else:
1032                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1033                    # 3-way rotate.
1034                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1035
1036            if post_rotate_angle != 0:
1037                flattened_text_region = \
1038                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1039
1040            positive_flattened_text_regions.append(flattened_text_region)
1041
1042        # Resize negative ftr.
1043        negative_reference_heights = list(
1044            rng_choice_with_size(
1045                rng,
1046                positive_reference_heights,
1047                size=num_negative_flattened_text_regions,
1048                replace=(num_negative_flattened_text_regions > len(positive_reference_heights)),
1049            )
1050        )
1051
1052        negative_height_max = max(positive_reference_heights)
1053        negative_width_max = max(positive_reference_widths)
1054
1055        negative_flattened_text_regions: List[FlattenedTextRegion] = []
1056
1057        for flattened_text_region in text_region_flattener.flattened_text_regions:
1058            if flattened_text_region.flattened_char_polygons:
1059                continue
1060
1061            reference_height = negative_reference_heights.pop()
1062            scale = reference_height / flattened_text_region.height
1063
1064            height, width = flattened_text_region.shape
1065            resized_height = round(height * scale)
1066            resized_width = round(width * scale)
1067
1068            # Remove negative region that is too large.
1069            if resized_height > negative_height_max or resized_width > negative_width_max:
1070                continue
1071
1072            flattened_text_region = flattened_text_region.to_resized_flattened_text_region(
1073                resized_height=resized_height,
1074                resized_width=resized_width,
1075            )
1076
1077            # Post rotate.
1078            post_rotate_angle = 0
1079            if flattened_text_region.is_typical:
1080                if rng.random() < self.config.prob_text_region_typical_post_rotate:
1081                    # Upside down only.
1082                    post_rotate_angle = 180
1083            else:
1084                if rng.random() < self.config.prob_text_region_untypical_post_rotate:
1085                    # 3-way rotate.
1086                    post_rotate_angle = rng_choice(rng, (180, 90, 270), probs=(0.5, 0.25, 0.25))
1087
1088            if post_rotate_angle != 0:
1089                flattened_text_region = \
1090                    flattened_text_region.to_post_rotated_flattened_text_region(post_rotate_angle)
1091
1092            negative_flattened_text_regions.append(flattened_text_region)
1093
1094        flattened_text_regions = (
1095            *positive_flattened_text_regions,
1096            *negative_flattened_text_regions,
1097        )
1098        return flattened_text_regions
def run( self, input: vkit.pipeline.text_detection.page_text_region.PageTextRegionStepInput, rng: numpy.random._generator.Generator):
1100    def run(self, input: PageTextRegionStepInput, rng: RandomGenerator):
1101        page_distortion_step_output = input.page_distortion_step_output
1102        page_image = page_distortion_step_output.page_image
1103        page_char_polygon_collection = page_distortion_step_output.page_char_polygon_collection
1104        page_disconnected_text_region_collection = \
1105            page_distortion_step_output.page_disconnected_text_region_collection
1106        page_non_text_region_collection = \
1107            page_distortion_step_output.page_non_text_region_collection
1108
1109        page_resizing_step_output = input.page_resizing_step_output
1110        page_resized_text_line_mask = page_resizing_step_output.page_text_line_mask
1111
1112        debug = None
1113        if self.config.enable_debug:
1114            debug = PageTextRegionStepDebug()
1115
1116        # Build R-tree to track text regions.
1117        disconnected_text_region_polygons: List[Polygon] = []
1118        disconnected_text_region_shapely_polygons: List[ShapelyPolygon] = []
1119        for polygon in page_disconnected_text_region_collection.to_polygons():
1120            disconnected_text_region_polygons.append(polygon)
1121            shapely_polygon = polygon.to_shapely_polygon()
1122            disconnected_text_region_shapely_polygons.append(shapely_polygon)
1123
1124        disconnected_text_region_tree = STRtree(disconnected_text_region_shapely_polygons)
1125
1126        # Get the precise text regions.
1127        precise_text_region_candidate_polygons: List[Polygon] = []
1128        for resized_precise_polygon in page_resized_text_line_mask.to_disconnected_polygons():
1129            # Resize back to the shape after distortion.
1130            precise_polygon = resized_precise_polygon.to_conducted_resized_polygon(
1131                page_resized_text_line_mask,
1132                resized_height=page_image.height,
1133                resized_width=page_image.width,
1134            )
1135
1136            # Find and extract intersected text region.
1137            # NOTE: One precise_polygon could be overlapped with
1138            # more than one disconnected_text_region_polygon!
1139            for _, _, disconnected_text_region_mask, precise_mask, _ in \
1140                    self.strtree_query_intersected_polygons(
1141                        strtree=disconnected_text_region_tree,
1142                        anchor_polygons=disconnected_text_region_polygons,
1143                        candidate_polygon=precise_polygon,
1144                    ):
1145                precise_text_region_candidate_polygons.extend(
1146                    self.generate_precise_text_region_candidate_polygons(
1147                        precise_mask=precise_mask,
1148                        disconnected_text_region_mask=disconnected_text_region_mask,
1149                    )
1150                )
1151
1152        if debug:
1153            debug.page_image = page_image
1154            debug.precise_text_region_candidate_polygons = precise_text_region_candidate_polygons
1155
1156        # Help gc.
1157        del disconnected_text_region_polygons
1158        del disconnected_text_region_shapely_polygons
1159        del disconnected_text_region_tree
1160
1161        # Bind char-level polygon to precise text region.
1162        precise_text_region_polygons: List[Polygon] = []
1163        precise_text_region_shapely_polygons: List[ShapelyPolygon] = []
1164
1165        for polygon in precise_text_region_candidate_polygons:
1166            precise_text_region_polygons.append(polygon)
1167            shapely_polygon = polygon.to_shapely_polygon()
1168            precise_text_region_shapely_polygons.append(shapely_polygon)
1169
1170        precise_text_region_tree = STRtree(precise_text_region_shapely_polygons)
1171
1172        if not self.config.use_adjusted_char_polygons:
1173            selected_char_polygons = page_char_polygon_collection.char_polygons
1174        else:
1175            selected_char_polygons = page_char_polygon_collection.adjusted_char_polygons
1176
1177        ptrp_idx_to_char_polygons: DefaultDict[int, List[Polygon]] = defaultdict(list)
1178
1179        for char_polygon in selected_char_polygons:
1180            best_precise_text_region_polygon_idx = None
1181            intersected_ratio_max = 0
1182
1183            for (
1184                precise_text_region_polygon_idx,
1185                _,
1186                _,
1187                _,
1188                intersected_ratio,
1189            ) in self.strtree_query_intersected_polygons(
1190                strtree=precise_text_region_tree,
1191                anchor_polygons=precise_text_region_polygons,
1192                candidate_polygon=char_polygon,
1193            ):
1194                if intersected_ratio > intersected_ratio_max:
1195                    intersected_ratio_max = intersected_ratio
1196                    best_precise_text_region_polygon_idx = precise_text_region_polygon_idx
1197
1198            if best_precise_text_region_polygon_idx is not None:
1199                ptrp_idx_to_char_polygons[best_precise_text_region_polygon_idx].append(char_polygon)
1200            else:
1201                # NOTE: Text line with only a small char (i.e. delimiter) could enter this branch.
1202                # In such case, the text line bounding box is smaller than the char polygon, since
1203                # the leading/trailing char paddings are ignored during text line rendering.
1204                # It's acceptable for now since: 1) this case happens rarely, 2) and it won't
1205                # introduce labeling noise.
1206                logger.warning(f'Cannot assign a text region for char_polygon={char_polygon}')
1207
1208        page_text_region_infos: List[PageTextRegionInfo] = []
1209        for ptrp_idx, precise_text_region_polygon in enumerate(precise_text_region_polygons):
1210            if ptrp_idx not in ptrp_idx_to_char_polygons:
1211                continue
1212            page_text_region_infos.append(
1213                PageTextRegionInfo(
1214                    precise_text_region_polygon=precise_text_region_polygon,
1215                    char_polygons=ptrp_idx_to_char_polygons[ptrp_idx],
1216                )
1217            )
1218
1219        # Help gc.
1220        del precise_text_region_polygons
1221        del precise_text_region_shapely_polygons
1222        del precise_text_region_tree
1223
1224        if debug:
1225            debug.page_text_region_infos = page_text_region_infos
1226
1227        # Negative sampling.
1228        page_non_text_region_polygons = self.sample_page_non_text_region_polygons(
1229            page_non_text_region_polygons=tuple(page_non_text_region_collection.to_polygons()),
1230            num_page_text_region_infos=len(page_text_region_infos),
1231            rng=rng,
1232        )
1233
1234        flattened_text_regions = self.build_flattened_text_regions(
1235            page_image=page_image,
1236            page_text_region_infos=page_text_region_infos,
1237            page_non_text_region_polygons=page_non_text_region_polygons,
1238            rng=rng,
1239        )
1240        if debug:
1241            debug.flattened_text_regions = flattened_text_regions
1242
1243        # Stack text regions.
1244        (
1245            image,
1246            active_mask,
1247            text_region_boxes,
1248            char_polygons,
1249            char_polygon_text_region_box_indices,
1250        ) = stack_flattened_text_regions(
1251            page_pad=0,
1252            flattened_text_regions_pad=self.config.stack_flattened_text_regions_pad,
1253            flattened_text_regions=flattened_text_regions,
1254        )
1255
1256        text_region_polygons = [
1257            text_region_box.to_polygon() for text_region_box in text_region_boxes
1258        ]
1259
1260        # Post uniform rotation.
1261        shape_before_rotate = image.shape
1262        rotate_angle = 0
1263
1264        if rng.random() < self.config.prob_post_rotate_90_angle:
1265            rotate_angle = 90
1266
1267        if rng.random() < self.config.prob_post_rotate_random_angle:
1268            rotate_angle += int(
1269                rng.integers(
1270                    self.config.post_rotate_random_angle_min,
1271                    self.config.post_rotate_random_angle_max + 1,
1272                )
1273            )
1274
1275        if rotate_angle != 0:
1276            # For unpacking.
1277            num_char_polygons = len(char_polygons)
1278            rotated_result = rotate.distort(
1279                {'angle': rotate_angle},
1280                image=image,
1281                mask=active_mask,
1282                polygons=(*char_polygons, *text_region_polygons),
1283            )
1284            assert rotated_result.image and rotated_result.mask and rotated_result.polygons
1285            image = rotated_result.image
1286            active_mask = rotated_result.mask
1287            char_polygons = rotated_result.polygons[:num_char_polygons]
1288            text_region_polygons = rotated_result.polygons[num_char_polygons:]
1289
1290        return PageTextRegionStepOutput(
1291            page_image=image,
1292            page_active_mask=active_mask,
1293            page_char_polygons=char_polygons,
1294            page_text_region_polygons=text_region_polygons,
1295            page_char_polygon_text_region_polygon_indices=char_polygon_text_region_box_indices,
1296            shape_before_rotate=shape_before_rotate,
1297            rotate_angle=rotate_angle,
1298            debug=debug,
1299        )