vkit.pipeline.text_detection.page_layout

   1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
   2#
   3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
   4#
   5# The commercial license gives you the full rights to create and distribute software
   6# on your own terms without any SSPL license obligations. For more information,
   7# please see the "LICENSE_COMMERCIAL.txt" file.
   8#
   9# This project is also available under Server Side Public License (SSPL).
  10# The SSPL licensing is ideal for use cases such as open source projects with
  11# SSPL distribution, student/academic purposes, hobby projects, internal research
  12# projects without external distribution, or other projects where all SSPL
  13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
  14from typing import Optional, Sequence, List, DefaultDict
  15import math
  16import heapq
  17from enum import Enum, unique
  18import itertools
  19from collections import defaultdict
  20
  21import attrs
  22from numpy.random import Generator as RandomGenerator
  23
  24from vkit.utility import rng_choice, normalize_to_probs, normalize_to_keys_and_probs
  25from vkit.element import Box, BoxOverlappingValidator, Polygon
  26from vkit.engine.font import FontEngineRunConfigGlyphSequence
  27from .page_shape import PageShapeStepOutput
  28from ..interface import PipelineStep, PipelineStepFactory
  29
  30
  31@attrs.define
  32class PageLayoutStepConfig:
  33    # Text line heights.
  34    reference_aspect_ratio: float = 1 / 1.4142
  35
  36    # Grid points.
  37    grid_pad_ratio_min: float = 0.01
  38    grid_pad_ratio_max: float = 0.05
  39    grid_step_ratio_min: float = 1.0
  40    grid_step_ratio_max: float = 1.1
  41    grid_vert_gap_ratio_min: float = 0.0
  42    grid_vert_gap_ratio_max: float = 0.5
  43    grid_hori_gap_ratio_min: float = 1.0
  44    grid_hori_gap_ratio_max: float = 1.15
  45
  46    # Large text line.
  47    prob_add_large_text_line: float = 0.25
  48    large_text_line_height_ratio_min: float = 0.05
  49    large_text_line_height_ratio_max: float = 0.075
  50    large_text_line_length_ratio_min: float = 0.5
  51    large_text_line_length_ratio_max: float = 1.0
  52
  53    # Normal text line.
  54    num_normal_text_line_heights_min: int = 2
  55    num_normal_text_line_heights_max: int = 4
  56    normal_text_line_height_ratio_min: float = 0.006
  57    normal_text_line_height_ratio_max: float = 0.036
  58    force_add_normal_text_line_height_ratio_min: bool = True
  59
  60    # Non-text symbol.
  61    num_non_text_symbols_min: int = 0
  62    num_non_text_symbols_max: int = 5
  63    num_retries_to_get_non_overlapped_non_text_symbol: int = 5
  64    non_text_symbol_height_ratio_min: float = 0.018
  65    non_text_symbol_height_ratio_max: float = 0.064
  66    non_text_symbol_aspect_ratio_min: float = 0.9
  67    non_text_symbol_aspect_ratio_max: float = 1.111
  68    non_text_symbol_non_overlapped_alpha_min: float = 0.8
  69    non_text_symbol_non_overlapped_alpha_max: float = 1.0
  70    non_text_symbol_overlapped_alpha_min: float = 0.15
  71    non_text_symbol_overlapped_alpha_max: float = 0.55
  72
  73    prob_normal_text_line_diff_heights_gap: float = 0.5
  74    prob_normal_text_line_gap: float = 0.5
  75    normal_text_line_gap_ratio_min: float = 0.05
  76    normal_text_line_gap_ratio_max: float = 1.25
  77    normal_text_line_length_ratio_min: float = 0.5
  78    normal_text_line_length_ratio_max: float = 1.0
  79
  80    # Image.
  81    num_images_min: int = 0
  82    num_images_max: int = 3
  83    image_height_ratio_min: float = 0.1
  84    image_height_ratio_max: float = 0.35
  85    image_width_ratio_min: float = 0.1
  86    image_width_ratio_max: float = 0.35
  87
  88    # Barcode (qr).
  89    num_barcode_qrs_min: int = 0
  90    num_barcode_qrs_max: int = 2
  91    barcode_qr_length_ratio_min: float = 0.05
  92    barcode_qr_length_ratio_max: float = 0.15
  93
  94    # Barcode (code39).
  95    num_barcode_code39s_min: int = 0
  96    num_barcode_code39s_max: int = 2
  97    barcode_code39_height_ratio_min: float = 0.025
  98    barcode_code39_height_ratio_max: float = 0.05
  99    barcode_code39_aspect_ratio: float = 0.2854396602149411
 100    barcode_code39_num_chars_min: int = 9
 101    barcode_code39_num_chars_max: int = 13
 102
 103    # Seal impression.
 104    num_seal_impressions_min: int = 1
 105    num_seal_impressions_max: int = 3
 106    seal_impression_angle_min: int = -45
 107    seal_impression_angle_max: int = 45
 108    seal_impression_height_ratio_min: float = 0.1
 109    seal_impression_height_ratio_max: float = 0.2
 110    seal_impression_weight_circle: float = 1
 111    seal_impression_weight_general_ellipse: float = 1
 112    seal_impression_general_ellipse_aspect_ratio_min: float = 0.75
 113    seal_impression_general_ellipse_aspect_ratio_max: float = 1.333
 114
 115    # For char-level polygon regression.
 116    disconnected_text_region_polygons_height_ratio_max: float = 2.0
 117
 118
 119@attrs.define
 120class PageLayoutStepInput:
 121    page_shape_step_output: PageShapeStepOutput
 122
 123
 124@attrs.define
 125class LayoutTextLine:
 126    # grid_idx:
 127    #   == -1: for large text line.
 128    #   >= 0: for normal text lines.
 129    grid_idx: int
 130    # text_line_idx: index within a grid.
 131    text_line_idx: int
 132    text_line_height: int
 133    box: Box
 134    glyph_sequence: FontEngineRunConfigGlyphSequence
 135
 136
 137@attrs.define
 138class LayoutNonTextSymbol:
 139    box: Box
 140    alpha: float
 141
 142
 143@attrs.define
 144class LayoutSealImpression:
 145    box: Box
 146    angle: int
 147
 148
 149@attrs.define
 150class LayoutImage:
 151    box: Box
 152
 153
 154@attrs.define
 155class LayoutBarcodeQr:
 156    box: Box
 157
 158
 159@attrs.define
 160class LayoutBarcodeCode39:
 161    box: Box
 162
 163
 164@unique
 165class LayoutXcodePlacement(Enum):
 166    NEXT_TO_UP = 'next_to_up'
 167    NEXT_TO_DOWN = 'next_to_down'
 168    NEXT_TO_LEFT = 'next_to_left'
 169    NEXT_TO_RIGHT = 'next_to_right'
 170
 171
 172@attrs.define
 173class DisconnectedTextRegion:
 174    polygon: Polygon
 175
 176
 177@attrs.define
 178class NonTextRegion:
 179    polygon: Polygon
 180
 181
 182@unique
 183class LayoutNonTextLineDirection(Enum):
 184    UP = 'up'
 185    DOWN = 'down'
 186    LEFT = 'left'
 187    RIGHT = 'right'
 188
 189
 190@attrs.define
 191class PageLayout:
 192    height: int
 193    width: int
 194    layout_text_lines: Sequence[LayoutTextLine]
 195    layout_non_text_symbols: Sequence[LayoutNonTextSymbol]
 196    layout_seal_impressions: Sequence[LayoutSealImpression]
 197    layout_images: Sequence[LayoutImage]
 198    layout_barcode_qrs: Sequence[LayoutBarcodeQr]
 199    layout_barcode_code39s: Sequence[LayoutBarcodeCode39]
 200    disconnected_text_regions: Sequence[DisconnectedTextRegion]
 201    non_text_regions: Sequence[NonTextRegion]
 202
 203
 204@attrs.define
 205class PageLayoutStepOutput:
 206    page_layout: PageLayout
 207    debug_large_text_line_gird: Optional[Box]
 208    debug_grids: Sequence[Box]
 209
 210
 211@attrs.define(order=True)
 212class PrioritizedSegment:
 213    vert_begin_idx: int = attrs.field(order=True)
 214    hori_begin_idx: int = attrs.field(order=False)
 215    hori_end_idx: int = attrs.field(order=False)
 216
 217
 218@unique
 219class SealImpressionEllipseShapeMode(Enum):
 220    CIRCLE = 'circle'
 221    GENERAL_ELLIPSE = 'general_ellipse'
 222
 223
 224class PageLayoutStep(
 225    PipelineStep[
 226        PageLayoutStepConfig,
 227        PageLayoutStepInput,
 228        PageLayoutStepOutput,
 229    ]
 230):  # yapf: disable
 231
 232    def __init__(self, config: PageLayoutStepConfig):
 233        super().__init__(config)
 234
 235        (
 236            self.seal_impression_ellipse_shape_modes,
 237            self.seal_impression_ellipse_shape_modes_probs,
 238        ) = normalize_to_keys_and_probs([
 239            (
 240                SealImpressionEllipseShapeMode.CIRCLE,
 241                self.config.seal_impression_weight_circle,
 242            ),
 243            (
 244                SealImpressionEllipseShapeMode.GENERAL_ELLIPSE,
 245                self.config.seal_impression_weight_general_ellipse,
 246            ),
 247        ])
 248
 249    def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator):
 250        if rng.random() < self.config.prob_add_large_text_line:
 251            large_text_line_height_ratio = rng.uniform(
 252                self.config.large_text_line_height_ratio_min,
 253                self.config.large_text_line_height_ratio_max,
 254            )
 255            return round(large_text_line_height_ratio * reference_height)
 256
 257        else:
 258            return None
 259
 260    def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator):
 261        normal_text_line_heights: List[int] = []
 262
 263        if self.config.force_add_normal_text_line_height_ratio_min:
 264            normal_text_line_heights.append(
 265                round(self.config.normal_text_line_height_ratio_min * reference_height)
 266            )
 267
 268        num_normal_text_line_heights = rng.integers(
 269            self.config.num_normal_text_line_heights_min,
 270            self.config.num_normal_text_line_heights_max + 1,
 271        )
 272        ratio_step = (
 273            self.config.normal_text_line_height_ratio_max
 274            - self.config.normal_text_line_height_ratio_min
 275        ) / num_normal_text_line_heights
 276        for step_idx in range(num_normal_text_line_heights):
 277            ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step
 278            ratio_max = ratio_min + ratio_step
 279            ratio = rng.uniform(ratio_min, ratio_max)
 280            normal_text_line_heights.append(round(ratio * reference_height))
 281
 282        assert normal_text_line_heights
 283        return sorted(normal_text_line_heights)
 284
 285    @classmethod
 286    def generate_grid_points(
 287        cls,
 288        grid_pad_ratio: float,
 289        grid_step: int,
 290        grid_gap: int,
 291        grid_gap_min: Optional[int],
 292        length: int,
 293        rng: RandomGenerator,
 294    ):
 295        grid_pad = min(length - grid_step, length * grid_pad_ratio)
 296        assert grid_pad > 0
 297
 298        num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap)
 299        if not num_steps.is_integer():
 300            num_steps = math.floor(num_steps)
 301        num_steps = int(num_steps)
 302
 303        grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1)
 304        assert grid_pad > 0
 305        grid_pad = grid_pad // 2
 306
 307        begin = grid_pad
 308        end = grid_pad + grid_step - 1
 309        assert end < length - grid_pad
 310
 311        begins: List[int] = []
 312        ends: List[int] = []
 313
 314        while end < length - grid_pad:
 315            begins.append(begin)
 316            ends.append(end)
 317
 318            cur_gap = grid_gap
 319            if grid_gap_min is not None:
 320                cur_gap = rng.integers(grid_gap_min, grid_gap + 1)
 321
 322            begin = end + cur_gap
 323            end = begin + grid_step - 1
 324
 325        return begins, ends
 326
 327    def sample_grid_points(
 328        self,
 329        height: int,
 330        width: int,
 331        normal_text_line_heights_max: int,
 332        rng: RandomGenerator,
 333    ):
 334        grid_pad_ratio = rng.uniform(
 335            self.config.grid_pad_ratio_min,
 336            self.config.grid_pad_ratio_max,
 337        )
 338
 339        grid_step_ratio = rng.uniform(
 340            self.config.grid_step_ratio_min,
 341            self.config.grid_step_ratio_max,
 342        )
 343        grid_step = round(normal_text_line_heights_max * grid_step_ratio)
 344
 345        grid_vert_gap_min = round(
 346            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min
 347        )
 348        grid_vert_gap_max = round(
 349            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max
 350        )
 351        vert_begins, vert_ends = self.generate_grid_points(
 352            grid_pad_ratio=grid_pad_ratio,
 353            grid_step=grid_step,
 354            grid_gap=grid_vert_gap_max,
 355            grid_gap_min=grid_vert_gap_min,
 356            length=height,
 357            rng=rng,
 358        )
 359
 360        grid_hori_gap_ratio = rng.uniform(
 361            self.config.grid_hori_gap_ratio_min,
 362            self.config.grid_hori_gap_ratio_max,
 363        )
 364        grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio)
 365        grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap)
 366        hori_begins, hori_ends = self.generate_grid_points(
 367            grid_pad_ratio=grid_pad_ratio,
 368            grid_step=grid_step,
 369            grid_gap=grid_hori_gap,
 370            grid_gap_min=None,
 371            length=width,
 372            rng=rng,
 373        )
 374        return (vert_begins, vert_ends), (hori_begins, hori_ends)
 375
 376    def trim_grid_points_for_large_text_line(
 377        self,
 378        large_text_line_height: int,
 379        vert_begins: Sequence[int],
 380        vert_ends: Sequence[int],
 381        hori_begins_min: int,
 382        hori_ends_max: int,
 383    ):
 384        idx = 0
 385        while idx < len(vert_begins) \
 386                and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height:
 387            idx += 1
 388
 389        if idx >= len(vert_begins) - 1:
 390            return None, 0
 391
 392        large_text_line_gird = Box(
 393            up=vert_ends[idx] - large_text_line_height + 1,
 394            down=vert_ends[idx],
 395            left=hori_begins_min,
 396            right=hori_ends_max,
 397        )
 398        return large_text_line_gird, idx + 1
 399
 400    def sample_grids(
 401        self,
 402        vert_begins: Sequence[int],
 403        vert_ends: Sequence[int],
 404        hori_begins: Sequence[int],
 405        hori_ends: Sequence[int],
 406        rng: RandomGenerator,
 407    ):
 408        num_vert_ends = len(vert_ends)
 409        assert num_vert_ends == len(vert_begins)
 410
 411        num_hori_ends = len(hori_ends)
 412        assert num_hori_ends == len(hori_begins)
 413
 414        priority_queue = [
 415            PrioritizedSegment(
 416                vert_begin_idx=0,
 417                hori_begin_idx=0,
 418                hori_end_idx=num_hori_ends - 1,
 419            )
 420        ]
 421        grids: List[Box] = []
 422        while priority_queue:
 423            cur_segment = heapq.heappop(priority_queue)
 424
 425            # Deal with segments in the same level.
 426            same_vert_segments: List[PrioritizedSegment] = []
 427            while priority_queue \
 428                    and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx:
 429                same_vert_segments.append(heapq.heappop(priority_queue))
 430
 431            if same_vert_segments:
 432                # Rebuid segments.
 433                same_vert_segments.append(cur_segment)
 434                same_vert_segments = sorted(
 435                    same_vert_segments,
 436                    key=lambda segment: segment.hori_begin_idx,
 437                )
 438
 439                rebuilt_segments: List[PrioritizedSegment] = []
 440                rebuilt_begin = 0
 441                while rebuilt_begin < len(same_vert_segments):
 442                    rebuilt_end = rebuilt_begin
 443                    while rebuilt_end + 1 < len(same_vert_segments) \
 444                            and (same_vert_segments[rebuilt_end + 1].hori_begin_idx
 445                                 == same_vert_segments[rebuilt_end].hori_end_idx + 1):
 446                        rebuilt_end += 1
 447                    rebuilt_segments.append(
 448                        PrioritizedSegment(
 449                            vert_begin_idx=cur_segment.vert_begin_idx,
 450                            hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx,
 451                            hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx,
 452                        )
 453                    )
 454                    rebuilt_begin = rebuilt_end + 1
 455
 456                # Re-pick the first segment.
 457                cur_segment = rebuilt_segments[0]
 458                for other_segment in rebuilt_segments[1:]:
 459                    heapq.heappush(priority_queue, other_segment)
 460
 461            # Generate grids for the current segment.
 462            vert_begin_idx = cur_segment.vert_begin_idx
 463
 464            hori_begin_idx = cur_segment.hori_begin_idx
 465            hori_end_idx = cur_segment.hori_end_idx
 466            while hori_begin_idx <= hori_end_idx:
 467                # Randomly generate grid.
 468                cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends)
 469
 470                # Try to sample segment with length >= 2.
 471                if hori_end_idx + 1 - hori_begin_idx <= 3:
 472                    cur_hori_end_idx = hori_end_idx
 473                else:
 474                    cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1)
 475
 476                grids.append(
 477                    Box(
 478                        up=vert_begins[vert_begin_idx],
 479                        down=vert_ends[cur_vert_end_idx],
 480                        left=hori_begins[hori_begin_idx],
 481                        right=hori_ends[cur_hori_end_idx],
 482                    )
 483                )
 484                next_vert_begin_idx = cur_vert_end_idx + 1
 485                if next_vert_begin_idx < num_vert_ends:
 486                    heapq.heappush(
 487                        priority_queue,
 488                        PrioritizedSegment(
 489                            vert_begin_idx=next_vert_begin_idx,
 490                            hori_begin_idx=hori_begin_idx,
 491                            hori_end_idx=cur_hori_end_idx,
 492                        ),
 493                    )
 494
 495                hori_begin_idx = cur_hori_end_idx + 1
 496
 497        return grids
 498
 499    @classmethod
 500    def calculate_normal_text_line_heights_probs(
 501        cls,
 502        normal_text_line_heights_expected_probs: Sequence[float],
 503        normal_text_line_heights_acc_areas: List[int],
 504    ):
 505        if sum(normal_text_line_heights_acc_areas) == 0:
 506            normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas)
 507        else:
 508            normal_text_line_heights_cur_probs = normalize_to_probs(
 509                normal_text_line_heights_acc_areas
 510            )
 511
 512        probs = normalize_to_probs([
 513            max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip(
 514                normal_text_line_heights_cur_probs,
 515                normal_text_line_heights_expected_probs,
 516            )
 517        ])
 518        return probs
 519
 520    def fill_normal_text_lines_to_grid(
 521        self,
 522        normal_text_line_heights: Sequence[int],
 523        normal_text_line_heights_expected_probs: Sequence[float],
 524        normal_text_line_heights_acc_areas: List[int],
 525        grid_idx: int,
 526        grid: Box,
 527        rng: RandomGenerator,
 528    ):
 529        normal_text_line_heights_indices = list(range(len(normal_text_line_heights)))
 530        normal_text_line_heights_max = normal_text_line_heights[-1]
 531
 532        layout_text_lines: List[LayoutTextLine] = []
 533        up = grid.up
 534        prev_text_line_height: Optional[int] = None
 535
 536        while up + normal_text_line_heights_max - 1 <= grid.down:
 537            normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs(
 538                normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
 539                normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
 540            )
 541            normal_text_line_height_idx = rng_choice(
 542                rng=rng,
 543                items=normal_text_line_heights_indices,
 544                probs=normal_text_line_heights_probs,
 545            )
 546            normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx]
 547
 548            add_gap = False
 549            if prev_text_line_height:
 550                if prev_text_line_height != normal_text_line_height:
 551                    add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap)
 552                else:
 553                    add_gap = (rng.random() < self.config.prob_normal_text_line_gap)
 554            if add_gap:
 555                gap_ratio = rng.uniform(
 556                    self.config.normal_text_line_gap_ratio_min,
 557                    self.config.normal_text_line_gap_ratio_max,
 558                )
 559                gap = round(gap_ratio * normal_text_line_height)
 560                gap = min(grid.down - (up + normal_text_line_height - 1), gap)
 561                up += gap
 562            down = up + normal_text_line_height - 1
 563            assert down <= grid.down
 564
 565            length_ratio = rng.uniform(
 566                self.config.normal_text_line_length_ratio_min,
 567                self.config.normal_text_line_length_ratio_max,
 568            )
 569            normal_text_line_length = round(grid.width * length_ratio)
 570            normal_text_line_length = max(normal_text_line_height, normal_text_line_length)
 571
 572            pad_max = grid.width - normal_text_line_length
 573            pad = rng.integers(0, pad_max + 1)
 574            left = grid.left + pad
 575            right = left + normal_text_line_length - 1
 576            assert right <= grid.right
 577
 578            text_line_idx = len(layout_text_lines)
 579            layout_text_lines.append(
 580                LayoutTextLine(
 581                    grid_idx=grid_idx,
 582                    text_line_idx=text_line_idx,
 583                    text_line_height=normal_text_line_height,
 584                    box=Box(up=up, down=down, left=left, right=right),
 585                    glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
 586                )
 587            )
 588
 589            prev_text_line_height = normal_text_line_height
 590            normal_text_line_heights_acc_areas[normal_text_line_height_idx] \
 591                += normal_text_line_length * normal_text_line_height
 592            up = down + 1
 593
 594        return layout_text_lines
 595
 596    def fill_large_text_line_to_grid(
 597        self,
 598        large_text_line_gird: Box,
 599        rng: RandomGenerator,
 600    ):
 601        length_ratio = rng.uniform(
 602            self.config.large_text_line_length_ratio_min,
 603            self.config.large_text_line_length_ratio_max,
 604        )
 605        large_text_line_length = round(large_text_line_gird.width * length_ratio)
 606        large_text_line_length = max(large_text_line_gird.height, large_text_line_length)
 607
 608        pad_max = large_text_line_gird.width - large_text_line_length
 609        pad = rng.integers(0, pad_max + 1)
 610        left = large_text_line_gird.left + pad
 611        right = left + large_text_line_length - 1
 612        assert right <= large_text_line_gird.right
 613
 614        return LayoutTextLine(
 615            grid_idx=-1,
 616            text_line_idx=0,
 617            text_line_height=large_text_line_gird.height,
 618            box=attrs.evolve(large_text_line_gird, left=left, right=right),
 619            glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
 620        )
 621
 622    def get_reference_height(self, height: int, width: int):
 623        area = height * width
 624        reference_height = math.ceil(math.sqrt(area / self.config.reference_aspect_ratio))
 625        return reference_height
 626
 627    def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator):
 628        reference_height = self.get_reference_height(height=height, width=width)
 629
 630        normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng)
 631        (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points(
 632            height=height,
 633            width=width,
 634            normal_text_line_heights_max=normal_text_line_heights[-1],
 635            rng=rng,
 636        )
 637
 638        large_text_line_height = self.sample_large_text_line_height(reference_height, rng)
 639        large_text_line_gird: Optional[Box] = None
 640        if large_text_line_height is not None:
 641            large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line(
 642                large_text_line_height=large_text_line_height,
 643                vert_begins=vert_begins,
 644                vert_ends=vert_ends,
 645                hori_begins_min=hori_begins[0],
 646                hori_ends_max=hori_ends[-1],
 647            )
 648            if large_text_line_gird is not None:
 649                vert_begins = vert_begins[vert_trim_idx:]
 650                vert_ends = vert_ends[vert_trim_idx:]
 651
 652        grids = self.sample_grids(
 653            vert_begins=vert_begins,
 654            vert_ends=vert_ends,
 655            hori_begins=hori_begins,
 656            hori_ends=hori_ends,
 657            rng=rng,
 658        )
 659        normal_text_line_heights_expected_probs = normalize_to_probs([
 660            1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights
 661        ])
 662        normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights)
 663        layout_text_lines: List[LayoutTextLine] = []
 664        for grid_idx, grid in enumerate(grids):
 665            layout_text_lines.extend(
 666                self.fill_normal_text_lines_to_grid(
 667                    normal_text_line_heights=normal_text_line_heights,
 668                    normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
 669                    normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
 670                    grid_idx=grid_idx,
 671                    grid=grid,
 672                    rng=rng,
 673                )
 674            )
 675
 676        if large_text_line_gird:
 677            layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng))
 678
 679        # Must place text line.
 680        assert layout_text_lines
 681
 682        return (
 683            layout_text_lines,
 684            large_text_line_gird,
 685            grids,
 686        )
 687
 688    def sample_layout_images(self, height: int, width: int, rng: RandomGenerator):
 689        # Image could be overlapped with text lines.
 690        layout_images: List[LayoutImage] = []
 691
 692        num_layout_images = rng.integers(
 693            self.config.num_images_min,
 694            self.config.num_images_max + 1,
 695        )
 696        for _ in range(num_layout_images):
 697            # NOTE: It's ok to have overlapping images.
 698            image_height_ratio = rng.uniform(
 699                self.config.image_height_ratio_min,
 700                self.config.image_height_ratio_max,
 701            )
 702            image_height = round(height * image_height_ratio)
 703
 704            image_width_ratio = rng.uniform(
 705                self.config.image_width_ratio_min,
 706                self.config.image_width_ratio_max,
 707            )
 708            image_width = round(width * image_width_ratio)
 709
 710            up = rng.integers(0, height - image_height + 1)
 711            down = up + image_height - 1
 712            left = rng.integers(0, width - image_width + 1)
 713            right = left + image_width - 1
 714            layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right)))
 715
 716        return layout_images
 717
 718    @classmethod
 719    def boxes_are_overlapped(cls, box0: Box, box1: Box):
 720        vert_overlapped = (box0.down >= box1.up and box1.down >= box0.up)
 721        hori_overlapped = (box0.right >= box1.left and box1.right >= box0.left)
 722        return vert_overlapped and hori_overlapped
 723
 724    def sample_layout_barcode_qrs(
 725        self,
 726        height: int,
 727        width: int,
 728        layout_text_lines: Sequence[LayoutTextLine],
 729        rng: RandomGenerator,
 730    ):
 731        reference_height = self.get_reference_height(height=height, width=width)
 732
 733        layout_barcode_qrs: List[LayoutBarcodeQr] = []
 734
 735        num_layout_barcode_qrs = rng.integers(
 736            self.config.num_barcode_qrs_min,
 737            self.config.num_barcode_qrs_max + 1,
 738        )
 739        num_retries = 3
 740        while num_layout_barcode_qrs > 0 and num_retries > 0:
 741            barcode_qr_length_ratio = rng.uniform(
 742                self.config.barcode_qr_length_ratio_min,
 743                self.config.barcode_qr_length_ratio_max,
 744            )
 745            barcode_qr_length = round(barcode_qr_length_ratio * reference_height)
 746            barcode_qr_length = min(height, width, barcode_qr_length)
 747
 748            # Place QR code next to text line.
 749            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
 750            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
 751            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
 752
 753            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
 754                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
 755                    up = anchor_layout_text_line_box.down + 1
 756                    down = up + barcode_qr_length - 1
 757                    if down >= height:
 758                        num_retries -= 1
 759                        continue
 760                else:
 761                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
 762                    down = anchor_layout_text_line_box.up - 1
 763                    up = down + 1 - barcode_qr_length
 764                    if up < 0:
 765                        num_retries -= 1
 766                        continue
 767
 768                left_min = max(
 769                    0,
 770                    anchor_layout_text_line_box_center.x - barcode_qr_length,
 771                )
 772                left_max = min(
 773                    width - barcode_qr_length,
 774                    anchor_layout_text_line_box_center.x,
 775                )
 776                if left_min > left_max:
 777                    num_retries -= 1
 778                    continue
 779                left = int(rng.integers(left_min, left_max + 1))
 780                right = left + barcode_qr_length - 1
 781
 782            else:
 783                assert placement in (
 784                    LayoutXcodePlacement.NEXT_TO_RIGHT,
 785                    LayoutXcodePlacement.NEXT_TO_LEFT,
 786                )
 787
 788                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
 789                    left = anchor_layout_text_line_box.right + 1
 790                    right = left + barcode_qr_length - 1
 791                    if right >= width:
 792                        num_retries -= 1
 793                        continue
 794                else:
 795                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
 796                    right = anchor_layout_text_line_box.left - 1
 797                    left = right + 1 - barcode_qr_length
 798                    if left < 0:
 799                        num_retries -= 1
 800                        continue
 801
 802                up_min = max(
 803                    0,
 804                    anchor_layout_text_line_box_center.y - barcode_qr_length,
 805                )
 806                up_max = min(
 807                    height - barcode_qr_length,
 808                    anchor_layout_text_line_box_center.y,
 809                )
 810                if up_min > up_max:
 811                    num_retries -= 1
 812                    continue
 813
 814                up = int(rng.integers(up_min, up_max + 1))
 815                down = up + barcode_qr_length - 1
 816
 817            num_layout_barcode_qrs -= 1
 818            layout_barcode_qrs.append(
 819                LayoutBarcodeQr(box=Box(
 820                    up=up,
 821                    down=down,
 822                    left=left,
 823                    right=right,
 824                ))
 825            )
 826
 827        return layout_barcode_qrs
 828
 829    def sample_layout_barcode_code39s(
 830        self,
 831        height: int,
 832        width: int,
 833        layout_text_lines: Sequence[LayoutTextLine],
 834        rng: RandomGenerator,
 835    ):
 836        reference_height = self.get_reference_height(height=height, width=width)
 837
 838        layout_barcode_code39s: List[LayoutBarcodeCode39] = []
 839
 840        num_layout_barcode_code39s = rng.integers(
 841            self.config.num_barcode_code39s_min,
 842            self.config.num_barcode_code39s_max + 1,
 843        )
 844        num_retries = 3
 845        while num_layout_barcode_code39s > 0 and num_retries > 0:
 846            barcode_code39_height_ratio = rng.uniform(
 847                self.config.barcode_code39_height_ratio_min,
 848                self.config.barcode_code39_height_ratio_max,
 849            )
 850            barcode_code39_height = round(barcode_code39_height_ratio * reference_height)
 851            barcode_code39_height = min(height, width, barcode_code39_height)
 852
 853            barcode_code39_num_chars = int(
 854                rng.integers(
 855                    self.config.barcode_code39_num_chars_min,
 856                    self.config.barcode_code39_num_chars_max + 1,
 857                )
 858            )
 859            barcode_code39_width = round(
 860                barcode_code39_height * self.config.barcode_code39_aspect_ratio
 861                * barcode_code39_num_chars
 862            )
 863
 864            # Place Bar code next to text line.
 865            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
 866            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
 867            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
 868
 869            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
 870                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
 871                    up = anchor_layout_text_line_box.down + 1
 872                    down = up + barcode_code39_height - 1
 873                    if down >= height:
 874                        num_retries -= 1
 875                        continue
 876                else:
 877                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
 878                    down = anchor_layout_text_line_box.up - 1
 879                    up = down + 1 - barcode_code39_height
 880                    if up < 0:
 881                        num_retries -= 1
 882                        continue
 883
 884                left_min = max(
 885                    0,
 886                    anchor_layout_text_line_box_center.x - barcode_code39_width,
 887                )
 888                left_max = min(
 889                    width - barcode_code39_width,
 890                    anchor_layout_text_line_box_center.x,
 891                )
 892                if left_min > left_max:
 893                    num_retries -= 1
 894                    continue
 895                left = int(rng.integers(left_min, left_max + 1))
 896                right = left + barcode_code39_width - 1
 897
 898            else:
 899                assert placement in (
 900                    LayoutXcodePlacement.NEXT_TO_RIGHT,
 901                    LayoutXcodePlacement.NEXT_TO_LEFT,
 902                )
 903
 904                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
 905                    left = anchor_layout_text_line_box.right + 1
 906                    right = left + barcode_code39_width - 1
 907                    if right >= width:
 908                        num_retries -= 1
 909                        continue
 910                else:
 911                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
 912                    right = anchor_layout_text_line_box.left - 1
 913                    left = right + 1 - barcode_code39_width
 914                    if left < 0:
 915                        num_retries -= 1
 916                        continue
 917
 918                up_min = max(
 919                    0,
 920                    anchor_layout_text_line_box_center.y - barcode_code39_height,
 921                )
 922                up_max = min(
 923                    height - barcode_code39_height,
 924                    anchor_layout_text_line_box_center.y,
 925                )
 926                if up_min > up_max:
 927                    num_retries -= 1
 928                    continue
 929
 930                up = int(rng.integers(up_min, up_max + 1))
 931                down = up + barcode_code39_height - 1
 932
 933            num_layout_barcode_code39s -= 1
 934            layout_barcode_code39s.append(
 935                LayoutBarcodeCode39(box=Box(
 936                    up=up,
 937                    down=down,
 938                    left=left,
 939                    right=right,
 940                ))
 941            )
 942
 943        return layout_barcode_code39s
 944
 945    def sample_layout_barcode_qrs_and_layout_barcode_code39s(
 946        self,
 947        height: int,
 948        width: int,
 949        layout_text_lines: Sequence[LayoutTextLine],
 950        rng: RandomGenerator,
 951    ):
 952        layout_barcode_qrs = self.sample_layout_barcode_qrs(
 953            height=height,
 954            width=width,
 955            layout_text_lines=layout_text_lines,
 956            rng=rng,
 957        )
 958
 959        layout_barcode_code39s = self.sample_layout_barcode_code39s(
 960            height=height,
 961            width=width,
 962            layout_text_lines=layout_text_lines,
 963            rng=rng,
 964        )
 965
 966        if layout_barcode_qrs or layout_barcode_code39s:
 967            # Barcode could not be overlapped with text lines.
 968            # Hence need to remove the overlapped text lines.
 969            box_overlapping_validator = BoxOverlappingValidator(
 970                itertools.chain(
 971                    (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs),
 972                    (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s),
 973                )
 974            )
 975
 976            keep_layout_text_lines: List[LayoutTextLine] = []
 977            for layout_text_line in layout_text_lines:
 978                if not box_overlapping_validator.is_overlapped(layout_text_line.box):
 979                    keep_layout_text_lines.append(layout_text_line)
 980            layout_text_lines = keep_layout_text_lines
 981
 982        return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines
 983
 984    @classmethod
 985    def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]):
 986        # Sample within the text line area.
 987        text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines)
 988        text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines)
 989        text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines)
 990        text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines)
 991        return (
 992            text_line_up,
 993            text_line_down,
 994            text_line_left,
 995            text_line_right,
 996        )
 997
 998    def sample_layout_non_text_symbols(
 999        self,
1000        height: int,
1001        width: int,
1002        layout_text_lines: Sequence[LayoutTextLine],
1003        rng: RandomGenerator,
1004    ):
1005        reference_height = self.get_reference_height(height=height, width=width)
1006
1007        text_line_up = 0
1008        text_line_down = height - 1
1009        text_line_left = 0
1010        text_line_right = width - 1
1011
1012        layout_non_text_symbols: List[LayoutNonTextSymbol] = []
1013
1014        num_non_text_symbols = int(
1015            rng.integers(
1016                self.config.num_non_text_symbols_min,
1017                self.config.num_non_text_symbols_max + 1,
1018            )
1019        )
1020        for _ in range(num_non_text_symbols):
1021            non_text_symbol_height_ratio = rng.uniform(
1022                self.config.non_text_symbol_height_ratio_min,
1023                self.config.non_text_symbol_height_ratio_max,
1024            )
1025            non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height)
1026
1027            non_text_symbol_aspect_ratio = rng.uniform(
1028                self.config.non_text_symbol_aspect_ratio_min,
1029                self.config.non_text_symbol_aspect_ratio_max,
1030            )
1031            non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height)
1032
1033            box = None
1034            overlapped = True
1035            for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol):
1036                up_max = text_line_down + 1 - non_text_symbol_height
1037                up = int(rng.integers(text_line_up, up_max + 1))
1038                down = up + non_text_symbol_height - 1
1039                assert up < down
1040
1041                left_max = text_line_right + 1 - non_text_symbol_width
1042                left = int(rng.integers(text_line_left, left_max + 1))
1043                right = left + non_text_symbol_width - 1
1044                assert left < right
1045
1046                box = Box(up=up, down=down, left=left, right=right)
1047
1048                cur_overlapped = False
1049                for layout_text_line in layout_text_lines:
1050                    if self.boxes_are_overlapped(box, layout_text_line.box):
1051                        cur_overlapped = True
1052                        break
1053
1054                if not cur_overlapped:
1055                    overlapped = False
1056                    break
1057
1058            assert box
1059
1060            if not overlapped:
1061                alpha = float(
1062                    rng.uniform(
1063                        self.config.non_text_symbol_non_overlapped_alpha_min,
1064                        self.config.non_text_symbol_non_overlapped_alpha_max,
1065                    )
1066                )
1067            else:
1068                alpha = float(
1069                    rng.uniform(
1070                        self.config.non_text_symbol_overlapped_alpha_min,
1071                        self.config.non_text_symbol_overlapped_alpha_max,
1072                    )
1073                )
1074
1075            layout_non_text_symbols.append(LayoutNonTextSymbol(
1076                box=box,
1077                alpha=alpha,
1078            ))
1079
1080        return layout_non_text_symbols
1081
1082    def sample_layout_seal_impressions(
1083        self,
1084        height: int,
1085        width: int,
1086        layout_text_lines: Sequence[LayoutTextLine],
1087        rng: RandomGenerator,
1088    ):
1089        reference_height = self.get_reference_height(height=height, width=width)
1090
1091        (
1092            text_line_up,
1093            text_line_down,
1094            text_line_left,
1095            text_line_right,
1096        ) = self.get_text_line_area(layout_text_lines)
1097
1098        # Place seal impressions.
1099        layout_seal_impressions: List[LayoutSealImpression] = []
1100
1101        num_seal_impressions = int(
1102            rng.integers(
1103                self.config.num_seal_impressions_min,
1104                self.config.num_seal_impressions_max + 1,
1105            )
1106        )
1107        for _ in range(num_seal_impressions):
1108            # Sample height.
1109            seal_impression_height_ratio = float(
1110                rng.uniform(
1111                    self.config.seal_impression_height_ratio_min,
1112                    self.config.seal_impression_height_ratio_max,
1113                )
1114            )
1115            seal_impression_height = round(seal_impression_height_ratio * reference_height)
1116            seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height)
1117
1118            # Make sure even.
1119            if seal_impression_height % 2 != 0:
1120                seal_impression_height -= 1
1121
1122            # Sample width.
1123            shape_mode = rng_choice(
1124                rng,
1125                self.seal_impression_ellipse_shape_modes,
1126                probs=self.seal_impression_ellipse_shape_modes_probs,
1127            )
1128            if shape_mode == SealImpressionEllipseShapeMode.CIRCLE:
1129                seal_impression_width = seal_impression_height
1130
1131            elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE:
1132                aspect_ratio = float(
1133                    rng.uniform(
1134                        self.config.seal_impression_general_ellipse_aspect_ratio_min,
1135                        self.config.seal_impression_general_ellipse_aspect_ratio_max,
1136                    )
1137                )
1138                seal_impression_width = round(aspect_ratio * seal_impression_height)
1139
1140            else:
1141                raise NotImplementedError()
1142
1143            seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width)
1144
1145            # Make sure even.
1146            if seal_impression_width % 2 != 0:
1147                seal_impression_width -= 1
1148
1149            seal_impression_up_max = text_line_down + 1 - seal_impression_height
1150            seal_impression_up = int(rng.integers(
1151                text_line_up,
1152                seal_impression_up_max + 1,
1153            ))
1154            seal_impression_down = seal_impression_up + seal_impression_height - 1
1155
1156            seal_impression_left_max = text_line_right + 1 - seal_impression_width
1157            seal_impression_left = int(rng.integers(
1158                text_line_left,
1159                seal_impression_left_max + 1,
1160            ))
1161            seal_impression_right = seal_impression_left + seal_impression_width - 1
1162
1163            angle = int(
1164                rng.integers(
1165                    self.config.seal_impression_angle_min,
1166                    self.config.seal_impression_angle_max + 1,
1167                )
1168            )
1169            angle = angle % 360
1170
1171            layout_seal_impressions.append(
1172                LayoutSealImpression(
1173                    box=Box(
1174                        up=seal_impression_up,
1175                        down=seal_impression_down,
1176                        left=seal_impression_left,
1177                        right=seal_impression_right,
1178                    ),
1179                    angle=angle,
1180                )
1181            )
1182
1183        return layout_seal_impressions
1184
1185    def generate_disconnected_text_regions(
1186        self,
1187        layout_text_lines: Sequence[LayoutTextLine],
1188    ):
1189        grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list)
1190        for layout_text_line in layout_text_lines:
1191            grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line)
1192
1193        disconnected_text_regions: List[DisconnectedTextRegion] = []
1194
1195        for _, layout_text_lines in sorted(
1196            grid_idx_to_layout_text_lines.items(),
1197            key=lambda p: p[0],
1198        ):
1199            layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx)
1200
1201            begin = 0
1202            while begin < len(layout_text_lines):
1203                text_line_height_min = layout_text_lines[begin].text_line_height
1204                text_line_height_max = text_line_height_min
1205
1206                # Find [begin, end) interval satisfying the condition.
1207                end = begin + 1
1208                while end < len(layout_text_lines):
1209                    text_line_height = layout_text_lines[end].text_line_height
1210                    text_line_height_min = min(text_line_height_min, text_line_height)
1211                    text_line_height_max = max(text_line_height_max, text_line_height)
1212                    if text_line_height_max / text_line_height_min \
1213                            > self.config.disconnected_text_region_polygons_height_ratio_max:
1214                        break
1215                    else:
1216                        end += 1
1217
1218                # To polygon.
1219                # NOTE: Simply using a bounding box is enough.
1220                # This method is common to all glyph sequences.
1221                cur_layout_text_lines = layout_text_lines[begin:end]
1222                bounding_box = Box(
1223                    up=min(ltl.box.up for ltl in cur_layout_text_lines),
1224                    down=max(ltl.box.down for ltl in cur_layout_text_lines),
1225                    left=min(ltl.box.left for ltl in cur_layout_text_lines),
1226                    right=max(ltl.box.right for ltl in cur_layout_text_lines),
1227                )
1228                step = min(
1229                    itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines)
1230                )
1231                disconnected_text_regions.append(
1232                    DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step))
1233                )
1234
1235                # Move to next.
1236                begin = end
1237
1238        return disconnected_text_regions
1239
1240    def generate_non_text_regions(
1241        self,
1242        height: int,
1243        width: int,
1244        layout_text_lines: Sequence[LayoutTextLine],
1245        rng: RandomGenerator,
1246    ):
1247        box_overlapping_validator = BoxOverlappingValidator(
1248            layout_text_line.box for layout_text_line in layout_text_lines
1249        )
1250        directions = [
1251            LayoutNonTextLineDirection.UP,
1252            LayoutNonTextLineDirection.DOWN,
1253            LayoutNonTextLineDirection.LEFT,
1254            LayoutNonTextLineDirection.RIGHT,
1255        ]
1256
1257        lntl_boxes: List[Box] = []
1258        for layout_text_line in layout_text_lines:
1259            ltl_box = layout_text_line.box
1260
1261            for direction_idx in rng.permutation(len(directions)):
1262                direction = directions[direction_idx]
1263
1264                if direction == LayoutNonTextLineDirection.UP:
1265                    lntl_box = Box(
1266                        up=ltl_box.up - ltl_box.height,
1267                        down=ltl_box.up - 1,
1268                        left=ltl_box.left,
1269                        right=ltl_box.right,
1270                    )
1271
1272                elif direction == LayoutNonTextLineDirection.DOWN:
1273                    lntl_box = Box(
1274                        up=ltl_box.down + 1,
1275                        down=ltl_box.down + ltl_box.height,
1276                        left=ltl_box.left,
1277                        right=ltl_box.right,
1278                    )
1279
1280                elif direction == LayoutNonTextLineDirection.LEFT:
1281                    lntl_box = Box(
1282                        up=ltl_box.up,
1283                        down=ltl_box.down,
1284                        left=ltl_box.left - ltl_box.width,
1285                        right=ltl_box.left - 1,
1286                    )
1287
1288                elif direction == LayoutNonTextLineDirection.RIGHT:
1289                    lntl_box = Box(
1290                        up=ltl_box.up,
1291                        down=ltl_box.down,
1292                        left=ltl_box.right + 1,
1293                        right=ltl_box.right + ltl_box.width,
1294                    )
1295
1296                else:
1297                    raise NotImplementedError()
1298
1299                # Ignore invalid box.
1300                if not lntl_box.valid:
1301                    continue
1302                if lntl_box.down >= height or lntl_box.right >= width:
1303                    continue
1304
1305                assert ltl_box.shape == lntl_box.shape
1306
1307                # Ignore box that is overlapped with any text lines.
1308                if box_overlapping_validator.is_overlapped(lntl_box):
1309                    continue
1310
1311                # Keep only the first valid direction.
1312                lntl_boxes.append(lntl_box)
1313                break
1314
1315        step = max(
1316            1,
1317            min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)),
1318        )
1319        non_text_regions = [
1320            NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes
1321        ]
1322        return non_text_regions
1323
1324    def run(self, input: PageLayoutStepInput, rng: RandomGenerator):
1325        page_shape_step_output = input.page_shape_step_output
1326        height = page_shape_step_output.height
1327        width = page_shape_step_output.width
1328
1329        # Text lines.
1330        (
1331            layout_text_lines,
1332            large_text_line_gird,
1333            grids,
1334        ) = self.sample_layout_text_lines(height=height, width=width, rng=rng)
1335
1336        # Images.
1337        layout_images = self.sample_layout_images(height=height, width=width, rng=rng)
1338
1339        # QR codes & Bar codes.
1340        # NOTE: Some layout_text_lines could be dropped.
1341        (
1342            layout_barcode_qrs,
1343            layout_barcode_code39s,
1344            layout_text_lines,
1345        ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s(
1346            height=height,
1347            width=width,
1348            layout_text_lines=layout_text_lines,
1349            rng=rng,
1350        )
1351
1352        # Non-text symbols.
1353        layout_non_text_symbols = self.sample_layout_non_text_symbols(
1354            height=height,
1355            width=width,
1356            layout_text_lines=layout_text_lines,
1357            rng=rng,
1358        )
1359
1360        # Seal impressions.
1361        layout_seal_impressions = self.sample_layout_seal_impressions(
1362            height=height,
1363            width=width,
1364            layout_text_lines=layout_text_lines,
1365            rng=rng,
1366        )
1367
1368        # For char-level polygon regression.
1369        disconnected_text_regions = self.generate_disconnected_text_regions(
1370            layout_text_lines=layout_text_lines,
1371        )
1372
1373        # For sampling negative text region area.
1374        non_text_regions = self.generate_non_text_regions(
1375            height=height,
1376            width=width,
1377            layout_text_lines=layout_text_lines,
1378            rng=rng,
1379        )
1380
1381        return PageLayoutStepOutput(
1382            page_layout=PageLayout(
1383                height=height,
1384                width=width,
1385                layout_text_lines=layout_text_lines,
1386                layout_non_text_symbols=layout_non_text_symbols,
1387                layout_seal_impressions=layout_seal_impressions,
1388                layout_images=layout_images,
1389                layout_barcode_qrs=layout_barcode_qrs,
1390                layout_barcode_code39s=layout_barcode_code39s,
1391                disconnected_text_regions=disconnected_text_regions,
1392                non_text_regions=non_text_regions,
1393            ),
1394            debug_large_text_line_gird=large_text_line_gird,
1395            debug_grids=grids,
1396        )
1397
1398
1399page_layout_step_factory = PipelineStepFactory(PageLayoutStep)
class PageLayoutStepConfig:
 33class PageLayoutStepConfig:
 34    # Text line heights.
 35    reference_aspect_ratio: float = 1 / 1.4142
 36
 37    # Grid points.
 38    grid_pad_ratio_min: float = 0.01
 39    grid_pad_ratio_max: float = 0.05
 40    grid_step_ratio_min: float = 1.0
 41    grid_step_ratio_max: float = 1.1
 42    grid_vert_gap_ratio_min: float = 0.0
 43    grid_vert_gap_ratio_max: float = 0.5
 44    grid_hori_gap_ratio_min: float = 1.0
 45    grid_hori_gap_ratio_max: float = 1.15
 46
 47    # Large text line.
 48    prob_add_large_text_line: float = 0.25
 49    large_text_line_height_ratio_min: float = 0.05
 50    large_text_line_height_ratio_max: float = 0.075
 51    large_text_line_length_ratio_min: float = 0.5
 52    large_text_line_length_ratio_max: float = 1.0
 53
 54    # Normal text line.
 55    num_normal_text_line_heights_min: int = 2
 56    num_normal_text_line_heights_max: int = 4
 57    normal_text_line_height_ratio_min: float = 0.006
 58    normal_text_line_height_ratio_max: float = 0.036
 59    force_add_normal_text_line_height_ratio_min: bool = True
 60
 61    # Non-text symbol.
 62    num_non_text_symbols_min: int = 0
 63    num_non_text_symbols_max: int = 5
 64    num_retries_to_get_non_overlapped_non_text_symbol: int = 5
 65    non_text_symbol_height_ratio_min: float = 0.018
 66    non_text_symbol_height_ratio_max: float = 0.064
 67    non_text_symbol_aspect_ratio_min: float = 0.9
 68    non_text_symbol_aspect_ratio_max: float = 1.111
 69    non_text_symbol_non_overlapped_alpha_min: float = 0.8
 70    non_text_symbol_non_overlapped_alpha_max: float = 1.0
 71    non_text_symbol_overlapped_alpha_min: float = 0.15
 72    non_text_symbol_overlapped_alpha_max: float = 0.55
 73
 74    prob_normal_text_line_diff_heights_gap: float = 0.5
 75    prob_normal_text_line_gap: float = 0.5
 76    normal_text_line_gap_ratio_min: float = 0.05
 77    normal_text_line_gap_ratio_max: float = 1.25
 78    normal_text_line_length_ratio_min: float = 0.5
 79    normal_text_line_length_ratio_max: float = 1.0
 80
 81    # Image.
 82    num_images_min: int = 0
 83    num_images_max: int = 3
 84    image_height_ratio_min: float = 0.1
 85    image_height_ratio_max: float = 0.35
 86    image_width_ratio_min: float = 0.1
 87    image_width_ratio_max: float = 0.35
 88
 89    # Barcode (qr).
 90    num_barcode_qrs_min: int = 0
 91    num_barcode_qrs_max: int = 2
 92    barcode_qr_length_ratio_min: float = 0.05
 93    barcode_qr_length_ratio_max: float = 0.15
 94
 95    # Barcode (code39).
 96    num_barcode_code39s_min: int = 0
 97    num_barcode_code39s_max: int = 2
 98    barcode_code39_height_ratio_min: float = 0.025
 99    barcode_code39_height_ratio_max: float = 0.05
100    barcode_code39_aspect_ratio: float = 0.2854396602149411
101    barcode_code39_num_chars_min: int = 9
102    barcode_code39_num_chars_max: int = 13
103
104    # Seal impression.
105    num_seal_impressions_min: int = 1
106    num_seal_impressions_max: int = 3
107    seal_impression_angle_min: int = -45
108    seal_impression_angle_max: int = 45
109    seal_impression_height_ratio_min: float = 0.1
110    seal_impression_height_ratio_max: float = 0.2
111    seal_impression_weight_circle: float = 1
112    seal_impression_weight_general_ellipse: float = 1
113    seal_impression_general_ellipse_aspect_ratio_min: float = 0.75
114    seal_impression_general_ellipse_aspect_ratio_max: float = 1.333
115
116    # For char-level polygon regression.
117    disconnected_text_region_polygons_height_ratio_max: float = 2.0
PageLayoutStepConfig( reference_aspect_ratio: float = 0.7071135624381276, grid_pad_ratio_min: float = 0.01, grid_pad_ratio_max: float = 0.05, grid_step_ratio_min: float = 1.0, grid_step_ratio_max: float = 1.1, grid_vert_gap_ratio_min: float = 0.0, grid_vert_gap_ratio_max: float = 0.5, grid_hori_gap_ratio_min: float = 1.0, grid_hori_gap_ratio_max: float = 1.15, prob_add_large_text_line: float = 0.25, large_text_line_height_ratio_min: float = 0.05, large_text_line_height_ratio_max: float = 0.075, large_text_line_length_ratio_min: float = 0.5, large_text_line_length_ratio_max: float = 1.0, num_normal_text_line_heights_min: int = 2, num_normal_text_line_heights_max: int = 4, normal_text_line_height_ratio_min: float = 0.006, normal_text_line_height_ratio_max: float = 0.036, force_add_normal_text_line_height_ratio_min: bool = True, num_non_text_symbols_min: int = 0, num_non_text_symbols_max: int = 5, num_retries_to_get_non_overlapped_non_text_symbol: int = 5, non_text_symbol_height_ratio_min: float = 0.018, non_text_symbol_height_ratio_max: float = 0.064, non_text_symbol_aspect_ratio_min: float = 0.9, non_text_symbol_aspect_ratio_max: float = 1.111, non_text_symbol_non_overlapped_alpha_min: float = 0.8, non_text_symbol_non_overlapped_alpha_max: float = 1.0, non_text_symbol_overlapped_alpha_min: float = 0.15, non_text_symbol_overlapped_alpha_max: float = 0.55, prob_normal_text_line_diff_heights_gap: float = 0.5, prob_normal_text_line_gap: float = 0.5, normal_text_line_gap_ratio_min: float = 0.05, normal_text_line_gap_ratio_max: float = 1.25, normal_text_line_length_ratio_min: float = 0.5, normal_text_line_length_ratio_max: float = 1.0, num_images_min: int = 0, num_images_max: int = 3, image_height_ratio_min: float = 0.1, image_height_ratio_max: float = 0.35, image_width_ratio_min: float = 0.1, image_width_ratio_max: float = 0.35, num_barcode_qrs_min: int = 0, num_barcode_qrs_max: int = 2, barcode_qr_length_ratio_min: float = 0.05, barcode_qr_length_ratio_max: float = 0.15, num_barcode_code39s_min: int = 0, num_barcode_code39s_max: int = 2, barcode_code39_height_ratio_min: float = 0.025, barcode_code39_height_ratio_max: float = 0.05, barcode_code39_aspect_ratio: float = 0.2854396602149411, barcode_code39_num_chars_min: int = 9, barcode_code39_num_chars_max: int = 13, num_seal_impressions_min: int = 1, num_seal_impressions_max: int = 3, seal_impression_angle_min: int = -45, seal_impression_angle_max: int = 45, seal_impression_height_ratio_min: float = 0.1, seal_impression_height_ratio_max: float = 0.2, seal_impression_weight_circle: float = 1, seal_impression_weight_general_ellipse: float = 1, seal_impression_general_ellipse_aspect_ratio_min: float = 0.75, seal_impression_general_ellipse_aspect_ratio_max: float = 1.333, disconnected_text_region_polygons_height_ratio_max: float = 2.0)
 2def __init__(self, reference_aspect_ratio=attr_dict['reference_aspect_ratio'].default, grid_pad_ratio_min=attr_dict['grid_pad_ratio_min'].default, grid_pad_ratio_max=attr_dict['grid_pad_ratio_max'].default, grid_step_ratio_min=attr_dict['grid_step_ratio_min'].default, grid_step_ratio_max=attr_dict['grid_step_ratio_max'].default, grid_vert_gap_ratio_min=attr_dict['grid_vert_gap_ratio_min'].default, grid_vert_gap_ratio_max=attr_dict['grid_vert_gap_ratio_max'].default, grid_hori_gap_ratio_min=attr_dict['grid_hori_gap_ratio_min'].default, grid_hori_gap_ratio_max=attr_dict['grid_hori_gap_ratio_max'].default, prob_add_large_text_line=attr_dict['prob_add_large_text_line'].default, large_text_line_height_ratio_min=attr_dict['large_text_line_height_ratio_min'].default, large_text_line_height_ratio_max=attr_dict['large_text_line_height_ratio_max'].default, large_text_line_length_ratio_min=attr_dict['large_text_line_length_ratio_min'].default, large_text_line_length_ratio_max=attr_dict['large_text_line_length_ratio_max'].default, num_normal_text_line_heights_min=attr_dict['num_normal_text_line_heights_min'].default, num_normal_text_line_heights_max=attr_dict['num_normal_text_line_heights_max'].default, normal_text_line_height_ratio_min=attr_dict['normal_text_line_height_ratio_min'].default, normal_text_line_height_ratio_max=attr_dict['normal_text_line_height_ratio_max'].default, force_add_normal_text_line_height_ratio_min=attr_dict['force_add_normal_text_line_height_ratio_min'].default, num_non_text_symbols_min=attr_dict['num_non_text_symbols_min'].default, num_non_text_symbols_max=attr_dict['num_non_text_symbols_max'].default, num_retries_to_get_non_overlapped_non_text_symbol=attr_dict['num_retries_to_get_non_overlapped_non_text_symbol'].default, non_text_symbol_height_ratio_min=attr_dict['non_text_symbol_height_ratio_min'].default, non_text_symbol_height_ratio_max=attr_dict['non_text_symbol_height_ratio_max'].default, non_text_symbol_aspect_ratio_min=attr_dict['non_text_symbol_aspect_ratio_min'].default, non_text_symbol_aspect_ratio_max=attr_dict['non_text_symbol_aspect_ratio_max'].default, non_text_symbol_non_overlapped_alpha_min=attr_dict['non_text_symbol_non_overlapped_alpha_min'].default, non_text_symbol_non_overlapped_alpha_max=attr_dict['non_text_symbol_non_overlapped_alpha_max'].default, non_text_symbol_overlapped_alpha_min=attr_dict['non_text_symbol_overlapped_alpha_min'].default, non_text_symbol_overlapped_alpha_max=attr_dict['non_text_symbol_overlapped_alpha_max'].default, prob_normal_text_line_diff_heights_gap=attr_dict['prob_normal_text_line_diff_heights_gap'].default, prob_normal_text_line_gap=attr_dict['prob_normal_text_line_gap'].default, normal_text_line_gap_ratio_min=attr_dict['normal_text_line_gap_ratio_min'].default, normal_text_line_gap_ratio_max=attr_dict['normal_text_line_gap_ratio_max'].default, normal_text_line_length_ratio_min=attr_dict['normal_text_line_length_ratio_min'].default, normal_text_line_length_ratio_max=attr_dict['normal_text_line_length_ratio_max'].default, num_images_min=attr_dict['num_images_min'].default, num_images_max=attr_dict['num_images_max'].default, image_height_ratio_min=attr_dict['image_height_ratio_min'].default, image_height_ratio_max=attr_dict['image_height_ratio_max'].default, image_width_ratio_min=attr_dict['image_width_ratio_min'].default, image_width_ratio_max=attr_dict['image_width_ratio_max'].default, num_barcode_qrs_min=attr_dict['num_barcode_qrs_min'].default, num_barcode_qrs_max=attr_dict['num_barcode_qrs_max'].default, barcode_qr_length_ratio_min=attr_dict['barcode_qr_length_ratio_min'].default, barcode_qr_length_ratio_max=attr_dict['barcode_qr_length_ratio_max'].default, num_barcode_code39s_min=attr_dict['num_barcode_code39s_min'].default, num_barcode_code39s_max=attr_dict['num_barcode_code39s_max'].default, barcode_code39_height_ratio_min=attr_dict['barcode_code39_height_ratio_min'].default, barcode_code39_height_ratio_max=attr_dict['barcode_code39_height_ratio_max'].default, barcode_code39_aspect_ratio=attr_dict['barcode_code39_aspect_ratio'].default, barcode_code39_num_chars_min=attr_dict['barcode_code39_num_chars_min'].default, barcode_code39_num_chars_max=attr_dict['barcode_code39_num_chars_max'].default, num_seal_impressions_min=attr_dict['num_seal_impressions_min'].default, num_seal_impressions_max=attr_dict['num_seal_impressions_max'].default, seal_impression_angle_min=attr_dict['seal_impression_angle_min'].default, seal_impression_angle_max=attr_dict['seal_impression_angle_max'].default, seal_impression_height_ratio_min=attr_dict['seal_impression_height_ratio_min'].default, seal_impression_height_ratio_max=attr_dict['seal_impression_height_ratio_max'].default, seal_impression_weight_circle=attr_dict['seal_impression_weight_circle'].default, seal_impression_weight_general_ellipse=attr_dict['seal_impression_weight_general_ellipse'].default, seal_impression_general_ellipse_aspect_ratio_min=attr_dict['seal_impression_general_ellipse_aspect_ratio_min'].default, seal_impression_general_ellipse_aspect_ratio_max=attr_dict['seal_impression_general_ellipse_aspect_ratio_max'].default, disconnected_text_region_polygons_height_ratio_max=attr_dict['disconnected_text_region_polygons_height_ratio_max'].default):
 3    self.reference_aspect_ratio = reference_aspect_ratio
 4    self.grid_pad_ratio_min = grid_pad_ratio_min
 5    self.grid_pad_ratio_max = grid_pad_ratio_max
 6    self.grid_step_ratio_min = grid_step_ratio_min
 7    self.grid_step_ratio_max = grid_step_ratio_max
 8    self.grid_vert_gap_ratio_min = grid_vert_gap_ratio_min
 9    self.grid_vert_gap_ratio_max = grid_vert_gap_ratio_max
10    self.grid_hori_gap_ratio_min = grid_hori_gap_ratio_min
11    self.grid_hori_gap_ratio_max = grid_hori_gap_ratio_max
12    self.prob_add_large_text_line = prob_add_large_text_line
13    self.large_text_line_height_ratio_min = large_text_line_height_ratio_min
14    self.large_text_line_height_ratio_max = large_text_line_height_ratio_max
15    self.large_text_line_length_ratio_min = large_text_line_length_ratio_min
16    self.large_text_line_length_ratio_max = large_text_line_length_ratio_max
17    self.num_normal_text_line_heights_min = num_normal_text_line_heights_min
18    self.num_normal_text_line_heights_max = num_normal_text_line_heights_max
19    self.normal_text_line_height_ratio_min = normal_text_line_height_ratio_min
20    self.normal_text_line_height_ratio_max = normal_text_line_height_ratio_max
21    self.force_add_normal_text_line_height_ratio_min = force_add_normal_text_line_height_ratio_min
22    self.num_non_text_symbols_min = num_non_text_symbols_min
23    self.num_non_text_symbols_max = num_non_text_symbols_max
24    self.num_retries_to_get_non_overlapped_non_text_symbol = num_retries_to_get_non_overlapped_non_text_symbol
25    self.non_text_symbol_height_ratio_min = non_text_symbol_height_ratio_min
26    self.non_text_symbol_height_ratio_max = non_text_symbol_height_ratio_max
27    self.non_text_symbol_aspect_ratio_min = non_text_symbol_aspect_ratio_min
28    self.non_text_symbol_aspect_ratio_max = non_text_symbol_aspect_ratio_max
29    self.non_text_symbol_non_overlapped_alpha_min = non_text_symbol_non_overlapped_alpha_min
30    self.non_text_symbol_non_overlapped_alpha_max = non_text_symbol_non_overlapped_alpha_max
31    self.non_text_symbol_overlapped_alpha_min = non_text_symbol_overlapped_alpha_min
32    self.non_text_symbol_overlapped_alpha_max = non_text_symbol_overlapped_alpha_max
33    self.prob_normal_text_line_diff_heights_gap = prob_normal_text_line_diff_heights_gap
34    self.prob_normal_text_line_gap = prob_normal_text_line_gap
35    self.normal_text_line_gap_ratio_min = normal_text_line_gap_ratio_min
36    self.normal_text_line_gap_ratio_max = normal_text_line_gap_ratio_max
37    self.normal_text_line_length_ratio_min = normal_text_line_length_ratio_min
38    self.normal_text_line_length_ratio_max = normal_text_line_length_ratio_max
39    self.num_images_min = num_images_min
40    self.num_images_max = num_images_max
41    self.image_height_ratio_min = image_height_ratio_min
42    self.image_height_ratio_max = image_height_ratio_max
43    self.image_width_ratio_min = image_width_ratio_min
44    self.image_width_ratio_max = image_width_ratio_max
45    self.num_barcode_qrs_min = num_barcode_qrs_min
46    self.num_barcode_qrs_max = num_barcode_qrs_max
47    self.barcode_qr_length_ratio_min = barcode_qr_length_ratio_min
48    self.barcode_qr_length_ratio_max = barcode_qr_length_ratio_max
49    self.num_barcode_code39s_min = num_barcode_code39s_min
50    self.num_barcode_code39s_max = num_barcode_code39s_max
51    self.barcode_code39_height_ratio_min = barcode_code39_height_ratio_min
52    self.barcode_code39_height_ratio_max = barcode_code39_height_ratio_max
53    self.barcode_code39_aspect_ratio = barcode_code39_aspect_ratio
54    self.barcode_code39_num_chars_min = barcode_code39_num_chars_min
55    self.barcode_code39_num_chars_max = barcode_code39_num_chars_max
56    self.num_seal_impressions_min = num_seal_impressions_min
57    self.num_seal_impressions_max = num_seal_impressions_max
58    self.seal_impression_angle_min = seal_impression_angle_min
59    self.seal_impression_angle_max = seal_impression_angle_max
60    self.seal_impression_height_ratio_min = seal_impression_height_ratio_min
61    self.seal_impression_height_ratio_max = seal_impression_height_ratio_max
62    self.seal_impression_weight_circle = seal_impression_weight_circle
63    self.seal_impression_weight_general_ellipse = seal_impression_weight_general_ellipse
64    self.seal_impression_general_ellipse_aspect_ratio_min = seal_impression_general_ellipse_aspect_ratio_min
65    self.seal_impression_general_ellipse_aspect_ratio_max = seal_impression_general_ellipse_aspect_ratio_max
66    self.disconnected_text_region_polygons_height_ratio_max = disconnected_text_region_polygons_height_ratio_max

Method generated by attrs for class PageLayoutStepConfig.

class PageLayoutStepInput:
121class PageLayoutStepInput:
122    page_shape_step_output: PageShapeStepOutput
PageLayoutStepInput( page_shape_step_output: vkit.pipeline.text_detection.page_shape.PageShapeStepOutput)
2def __init__(self, page_shape_step_output):
3    self.page_shape_step_output = page_shape_step_output

Method generated by attrs for class PageLayoutStepInput.

class LayoutTextLine:
126class LayoutTextLine:
127    # grid_idx:
128    #   == -1: for large text line.
129    #   >= 0: for normal text lines.
130    grid_idx: int
131    # text_line_idx: index within a grid.
132    text_line_idx: int
133    text_line_height: int
134    box: Box
135    glyph_sequence: FontEngineRunConfigGlyphSequence
LayoutTextLine( grid_idx: int, text_line_idx: int, text_line_height: int, box: vkit.element.box.Box, glyph_sequence: vkit.engine.font.type.FontEngineRunConfigGlyphSequence)
2def __init__(self, grid_idx, text_line_idx, text_line_height, box, glyph_sequence):
3    self.grid_idx = grid_idx
4    self.text_line_idx = text_line_idx
5    self.text_line_height = text_line_height
6    self.box = box
7    self.glyph_sequence = glyph_sequence

Method generated by attrs for class LayoutTextLine.

class LayoutNonTextSymbol:
139class LayoutNonTextSymbol:
140    box: Box
141    alpha: float
LayoutNonTextSymbol(box: vkit.element.box.Box, alpha: float)
2def __init__(self, box, alpha):
3    self.box = box
4    self.alpha = alpha

Method generated by attrs for class LayoutNonTextSymbol.

class LayoutSealImpression:
145class LayoutSealImpression:
146    box: Box
147    angle: int
LayoutSealImpression(box: vkit.element.box.Box, angle: int)
2def __init__(self, box, angle):
3    self.box = box
4    self.angle = angle

Method generated by attrs for class LayoutSealImpression.

class LayoutImage:
151class LayoutImage:
152    box: Box
LayoutImage(box: vkit.element.box.Box)
2def __init__(self, box):
3    self.box = box

Method generated by attrs for class LayoutImage.

class LayoutBarcodeQr:
156class LayoutBarcodeQr:
157    box: Box
LayoutBarcodeQr(box: vkit.element.box.Box)
2def __init__(self, box):
3    self.box = box

Method generated by attrs for class LayoutBarcodeQr.

class LayoutBarcodeCode39:
161class LayoutBarcodeCode39:
162    box: Box
LayoutBarcodeCode39(box: vkit.element.box.Box)
2def __init__(self, box):
3    self.box = box

Method generated by attrs for class LayoutBarcodeCode39.

class LayoutXcodePlacement(enum.Enum):
166class LayoutXcodePlacement(Enum):
167    NEXT_TO_UP = 'next_to_up'
168    NEXT_TO_DOWN = 'next_to_down'
169    NEXT_TO_LEFT = 'next_to_left'
170    NEXT_TO_RIGHT = 'next_to_right'

An enumeration.

Inherited Members
enum.Enum
name
value
class DisconnectedTextRegion:
174class DisconnectedTextRegion:
175    polygon: Polygon
DisconnectedTextRegion(polygon: vkit.element.polygon.Polygon)
2def __init__(self, polygon):
3    self.polygon = polygon

Method generated by attrs for class DisconnectedTextRegion.

class NonTextRegion:
179class NonTextRegion:
180    polygon: Polygon
NonTextRegion(polygon: vkit.element.polygon.Polygon)
2def __init__(self, polygon):
3    self.polygon = polygon

Method generated by attrs for class NonTextRegion.

class LayoutNonTextLineDirection(enum.Enum):
184class LayoutNonTextLineDirection(Enum):
185    UP = 'up'
186    DOWN = 'down'
187    LEFT = 'left'
188    RIGHT = 'right'

An enumeration.

Inherited Members
enum.Enum
name
value
class PageLayout:
192class PageLayout:
193    height: int
194    width: int
195    layout_text_lines: Sequence[LayoutTextLine]
196    layout_non_text_symbols: Sequence[LayoutNonTextSymbol]
197    layout_seal_impressions: Sequence[LayoutSealImpression]
198    layout_images: Sequence[LayoutImage]
199    layout_barcode_qrs: Sequence[LayoutBarcodeQr]
200    layout_barcode_code39s: Sequence[LayoutBarcodeCode39]
201    disconnected_text_regions: Sequence[DisconnectedTextRegion]
202    non_text_regions: Sequence[NonTextRegion]
PageLayout( height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], layout_non_text_symbols: Sequence[vkit.pipeline.text_detection.page_layout.LayoutNonTextSymbol], layout_seal_impressions: Sequence[vkit.pipeline.text_detection.page_layout.LayoutSealImpression], layout_images: Sequence[vkit.pipeline.text_detection.page_layout.LayoutImage], layout_barcode_qrs: Sequence[vkit.pipeline.text_detection.page_layout.LayoutBarcodeQr], layout_barcode_code39s: Sequence[vkit.pipeline.text_detection.page_layout.LayoutBarcodeCode39], disconnected_text_regions: Sequence[vkit.pipeline.text_detection.page_layout.DisconnectedTextRegion], non_text_regions: Sequence[vkit.pipeline.text_detection.page_layout.NonTextRegion])
 2def __init__(self, height, width, layout_text_lines, layout_non_text_symbols, layout_seal_impressions, layout_images, layout_barcode_qrs, layout_barcode_code39s, disconnected_text_regions, non_text_regions):
 3    self.height = height
 4    self.width = width
 5    self.layout_text_lines = layout_text_lines
 6    self.layout_non_text_symbols = layout_non_text_symbols
 7    self.layout_seal_impressions = layout_seal_impressions
 8    self.layout_images = layout_images
 9    self.layout_barcode_qrs = layout_barcode_qrs
10    self.layout_barcode_code39s = layout_barcode_code39s
11    self.disconnected_text_regions = disconnected_text_regions
12    self.non_text_regions = non_text_regions

Method generated by attrs for class PageLayout.

class PageLayoutStepOutput:
206class PageLayoutStepOutput:
207    page_layout: PageLayout
208    debug_large_text_line_gird: Optional[Box]
209    debug_grids: Sequence[Box]
PageLayoutStepOutput( page_layout: vkit.pipeline.text_detection.page_layout.PageLayout, debug_large_text_line_gird: Union[vkit.element.box.Box, NoneType], debug_grids: Sequence[vkit.element.box.Box])
2def __init__(self, page_layout, debug_large_text_line_gird, debug_grids):
3    self.page_layout = page_layout
4    self.debug_large_text_line_gird = debug_large_text_line_gird
5    self.debug_grids = debug_grids

Method generated by attrs for class PageLayoutStepOutput.

class PrioritizedSegment:
213class PrioritizedSegment:
214    vert_begin_idx: int = attrs.field(order=True)
215    hori_begin_idx: int = attrs.field(order=False)
216    hori_end_idx: int = attrs.field(order=False)
PrioritizedSegment(vert_begin_idx: int, hori_begin_idx: int, hori_end_idx: int)
2def __init__(self, vert_begin_idx, hori_begin_idx, hori_end_idx):
3    self.vert_begin_idx = vert_begin_idx
4    self.hori_begin_idx = hori_begin_idx
5    self.hori_end_idx = hori_end_idx

Method generated by attrs for class PrioritizedSegment.

class SealImpressionEllipseShapeMode(enum.Enum):
220class SealImpressionEllipseShapeMode(Enum):
221    CIRCLE = 'circle'
222    GENERAL_ELLIPSE = 'general_ellipse'

An enumeration.

Inherited Members
enum.Enum
name
value
 225class PageLayoutStep(
 226    PipelineStep[
 227        PageLayoutStepConfig,
 228        PageLayoutStepInput,
 229        PageLayoutStepOutput,
 230    ]
 231):  # yapf: disable
 232
 233    def __init__(self, config: PageLayoutStepConfig):
 234        super().__init__(config)
 235
 236        (
 237            self.seal_impression_ellipse_shape_modes,
 238            self.seal_impression_ellipse_shape_modes_probs,
 239        ) = normalize_to_keys_and_probs([
 240            (
 241                SealImpressionEllipseShapeMode.CIRCLE,
 242                self.config.seal_impression_weight_circle,
 243            ),
 244            (
 245                SealImpressionEllipseShapeMode.GENERAL_ELLIPSE,
 246                self.config.seal_impression_weight_general_ellipse,
 247            ),
 248        ])
 249
 250    def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator):
 251        if rng.random() < self.config.prob_add_large_text_line:
 252            large_text_line_height_ratio = rng.uniform(
 253                self.config.large_text_line_height_ratio_min,
 254                self.config.large_text_line_height_ratio_max,
 255            )
 256            return round(large_text_line_height_ratio * reference_height)
 257
 258        else:
 259            return None
 260
 261    def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator):
 262        normal_text_line_heights: List[int] = []
 263
 264        if self.config.force_add_normal_text_line_height_ratio_min:
 265            normal_text_line_heights.append(
 266                round(self.config.normal_text_line_height_ratio_min * reference_height)
 267            )
 268
 269        num_normal_text_line_heights = rng.integers(
 270            self.config.num_normal_text_line_heights_min,
 271            self.config.num_normal_text_line_heights_max + 1,
 272        )
 273        ratio_step = (
 274            self.config.normal_text_line_height_ratio_max
 275            - self.config.normal_text_line_height_ratio_min
 276        ) / num_normal_text_line_heights
 277        for step_idx in range(num_normal_text_line_heights):
 278            ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step
 279            ratio_max = ratio_min + ratio_step
 280            ratio = rng.uniform(ratio_min, ratio_max)
 281            normal_text_line_heights.append(round(ratio * reference_height))
 282
 283        assert normal_text_line_heights
 284        return sorted(normal_text_line_heights)
 285
 286    @classmethod
 287    def generate_grid_points(
 288        cls,
 289        grid_pad_ratio: float,
 290        grid_step: int,
 291        grid_gap: int,
 292        grid_gap_min: Optional[int],
 293        length: int,
 294        rng: RandomGenerator,
 295    ):
 296        grid_pad = min(length - grid_step, length * grid_pad_ratio)
 297        assert grid_pad > 0
 298
 299        num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap)
 300        if not num_steps.is_integer():
 301            num_steps = math.floor(num_steps)
 302        num_steps = int(num_steps)
 303
 304        grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1)
 305        assert grid_pad > 0
 306        grid_pad = grid_pad // 2
 307
 308        begin = grid_pad
 309        end = grid_pad + grid_step - 1
 310        assert end < length - grid_pad
 311
 312        begins: List[int] = []
 313        ends: List[int] = []
 314
 315        while end < length - grid_pad:
 316            begins.append(begin)
 317            ends.append(end)
 318
 319            cur_gap = grid_gap
 320            if grid_gap_min is not None:
 321                cur_gap = rng.integers(grid_gap_min, grid_gap + 1)
 322
 323            begin = end + cur_gap
 324            end = begin + grid_step - 1
 325
 326        return begins, ends
 327
 328    def sample_grid_points(
 329        self,
 330        height: int,
 331        width: int,
 332        normal_text_line_heights_max: int,
 333        rng: RandomGenerator,
 334    ):
 335        grid_pad_ratio = rng.uniform(
 336            self.config.grid_pad_ratio_min,
 337            self.config.grid_pad_ratio_max,
 338        )
 339
 340        grid_step_ratio = rng.uniform(
 341            self.config.grid_step_ratio_min,
 342            self.config.grid_step_ratio_max,
 343        )
 344        grid_step = round(normal_text_line_heights_max * grid_step_ratio)
 345
 346        grid_vert_gap_min = round(
 347            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min
 348        )
 349        grid_vert_gap_max = round(
 350            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max
 351        )
 352        vert_begins, vert_ends = self.generate_grid_points(
 353            grid_pad_ratio=grid_pad_ratio,
 354            grid_step=grid_step,
 355            grid_gap=grid_vert_gap_max,
 356            grid_gap_min=grid_vert_gap_min,
 357            length=height,
 358            rng=rng,
 359        )
 360
 361        grid_hori_gap_ratio = rng.uniform(
 362            self.config.grid_hori_gap_ratio_min,
 363            self.config.grid_hori_gap_ratio_max,
 364        )
 365        grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio)
 366        grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap)
 367        hori_begins, hori_ends = self.generate_grid_points(
 368            grid_pad_ratio=grid_pad_ratio,
 369            grid_step=grid_step,
 370            grid_gap=grid_hori_gap,
 371            grid_gap_min=None,
 372            length=width,
 373            rng=rng,
 374        )
 375        return (vert_begins, vert_ends), (hori_begins, hori_ends)
 376
 377    def trim_grid_points_for_large_text_line(
 378        self,
 379        large_text_line_height: int,
 380        vert_begins: Sequence[int],
 381        vert_ends: Sequence[int],
 382        hori_begins_min: int,
 383        hori_ends_max: int,
 384    ):
 385        idx = 0
 386        while idx < len(vert_begins) \
 387                and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height:
 388            idx += 1
 389
 390        if idx >= len(vert_begins) - 1:
 391            return None, 0
 392
 393        large_text_line_gird = Box(
 394            up=vert_ends[idx] - large_text_line_height + 1,
 395            down=vert_ends[idx],
 396            left=hori_begins_min,
 397            right=hori_ends_max,
 398        )
 399        return large_text_line_gird, idx + 1
 400
 401    def sample_grids(
 402        self,
 403        vert_begins: Sequence[int],
 404        vert_ends: Sequence[int],
 405        hori_begins: Sequence[int],
 406        hori_ends: Sequence[int],
 407        rng: RandomGenerator,
 408    ):
 409        num_vert_ends = len(vert_ends)
 410        assert num_vert_ends == len(vert_begins)
 411
 412        num_hori_ends = len(hori_ends)
 413        assert num_hori_ends == len(hori_begins)
 414
 415        priority_queue = [
 416            PrioritizedSegment(
 417                vert_begin_idx=0,
 418                hori_begin_idx=0,
 419                hori_end_idx=num_hori_ends - 1,
 420            )
 421        ]
 422        grids: List[Box] = []
 423        while priority_queue:
 424            cur_segment = heapq.heappop(priority_queue)
 425
 426            # Deal with segments in the same level.
 427            same_vert_segments: List[PrioritizedSegment] = []
 428            while priority_queue \
 429                    and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx:
 430                same_vert_segments.append(heapq.heappop(priority_queue))
 431
 432            if same_vert_segments:
 433                # Rebuid segments.
 434                same_vert_segments.append(cur_segment)
 435                same_vert_segments = sorted(
 436                    same_vert_segments,
 437                    key=lambda segment: segment.hori_begin_idx,
 438                )
 439
 440                rebuilt_segments: List[PrioritizedSegment] = []
 441                rebuilt_begin = 0
 442                while rebuilt_begin < len(same_vert_segments):
 443                    rebuilt_end = rebuilt_begin
 444                    while rebuilt_end + 1 < len(same_vert_segments) \
 445                            and (same_vert_segments[rebuilt_end + 1].hori_begin_idx
 446                                 == same_vert_segments[rebuilt_end].hori_end_idx + 1):
 447                        rebuilt_end += 1
 448                    rebuilt_segments.append(
 449                        PrioritizedSegment(
 450                            vert_begin_idx=cur_segment.vert_begin_idx,
 451                            hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx,
 452                            hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx,
 453                        )
 454                    )
 455                    rebuilt_begin = rebuilt_end + 1
 456
 457                # Re-pick the first segment.
 458                cur_segment = rebuilt_segments[0]
 459                for other_segment in rebuilt_segments[1:]:
 460                    heapq.heappush(priority_queue, other_segment)
 461
 462            # Generate grids for the current segment.
 463            vert_begin_idx = cur_segment.vert_begin_idx
 464
 465            hori_begin_idx = cur_segment.hori_begin_idx
 466            hori_end_idx = cur_segment.hori_end_idx
 467            while hori_begin_idx <= hori_end_idx:
 468                # Randomly generate grid.
 469                cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends)
 470
 471                # Try to sample segment with length >= 2.
 472                if hori_end_idx + 1 - hori_begin_idx <= 3:
 473                    cur_hori_end_idx = hori_end_idx
 474                else:
 475                    cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1)
 476
 477                grids.append(
 478                    Box(
 479                        up=vert_begins[vert_begin_idx],
 480                        down=vert_ends[cur_vert_end_idx],
 481                        left=hori_begins[hori_begin_idx],
 482                        right=hori_ends[cur_hori_end_idx],
 483                    )
 484                )
 485                next_vert_begin_idx = cur_vert_end_idx + 1
 486                if next_vert_begin_idx < num_vert_ends:
 487                    heapq.heappush(
 488                        priority_queue,
 489                        PrioritizedSegment(
 490                            vert_begin_idx=next_vert_begin_idx,
 491                            hori_begin_idx=hori_begin_idx,
 492                            hori_end_idx=cur_hori_end_idx,
 493                        ),
 494                    )
 495
 496                hori_begin_idx = cur_hori_end_idx + 1
 497
 498        return grids
 499
 500    @classmethod
 501    def calculate_normal_text_line_heights_probs(
 502        cls,
 503        normal_text_line_heights_expected_probs: Sequence[float],
 504        normal_text_line_heights_acc_areas: List[int],
 505    ):
 506        if sum(normal_text_line_heights_acc_areas) == 0:
 507            normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas)
 508        else:
 509            normal_text_line_heights_cur_probs = normalize_to_probs(
 510                normal_text_line_heights_acc_areas
 511            )
 512
 513        probs = normalize_to_probs([
 514            max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip(
 515                normal_text_line_heights_cur_probs,
 516                normal_text_line_heights_expected_probs,
 517            )
 518        ])
 519        return probs
 520
 521    def fill_normal_text_lines_to_grid(
 522        self,
 523        normal_text_line_heights: Sequence[int],
 524        normal_text_line_heights_expected_probs: Sequence[float],
 525        normal_text_line_heights_acc_areas: List[int],
 526        grid_idx: int,
 527        grid: Box,
 528        rng: RandomGenerator,
 529    ):
 530        normal_text_line_heights_indices = list(range(len(normal_text_line_heights)))
 531        normal_text_line_heights_max = normal_text_line_heights[-1]
 532
 533        layout_text_lines: List[LayoutTextLine] = []
 534        up = grid.up
 535        prev_text_line_height: Optional[int] = None
 536
 537        while up + normal_text_line_heights_max - 1 <= grid.down:
 538            normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs(
 539                normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
 540                normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
 541            )
 542            normal_text_line_height_idx = rng_choice(
 543                rng=rng,
 544                items=normal_text_line_heights_indices,
 545                probs=normal_text_line_heights_probs,
 546            )
 547            normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx]
 548
 549            add_gap = False
 550            if prev_text_line_height:
 551                if prev_text_line_height != normal_text_line_height:
 552                    add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap)
 553                else:
 554                    add_gap = (rng.random() < self.config.prob_normal_text_line_gap)
 555            if add_gap:
 556                gap_ratio = rng.uniform(
 557                    self.config.normal_text_line_gap_ratio_min,
 558                    self.config.normal_text_line_gap_ratio_max,
 559                )
 560                gap = round(gap_ratio * normal_text_line_height)
 561                gap = min(grid.down - (up + normal_text_line_height - 1), gap)
 562                up += gap
 563            down = up + normal_text_line_height - 1
 564            assert down <= grid.down
 565
 566            length_ratio = rng.uniform(
 567                self.config.normal_text_line_length_ratio_min,
 568                self.config.normal_text_line_length_ratio_max,
 569            )
 570            normal_text_line_length = round(grid.width * length_ratio)
 571            normal_text_line_length = max(normal_text_line_height, normal_text_line_length)
 572
 573            pad_max = grid.width - normal_text_line_length
 574            pad = rng.integers(0, pad_max + 1)
 575            left = grid.left + pad
 576            right = left + normal_text_line_length - 1
 577            assert right <= grid.right
 578
 579            text_line_idx = len(layout_text_lines)
 580            layout_text_lines.append(
 581                LayoutTextLine(
 582                    grid_idx=grid_idx,
 583                    text_line_idx=text_line_idx,
 584                    text_line_height=normal_text_line_height,
 585                    box=Box(up=up, down=down, left=left, right=right),
 586                    glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
 587                )
 588            )
 589
 590            prev_text_line_height = normal_text_line_height
 591            normal_text_line_heights_acc_areas[normal_text_line_height_idx] \
 592                += normal_text_line_length * normal_text_line_height
 593            up = down + 1
 594
 595        return layout_text_lines
 596
 597    def fill_large_text_line_to_grid(
 598        self,
 599        large_text_line_gird: Box,
 600        rng: RandomGenerator,
 601    ):
 602        length_ratio = rng.uniform(
 603            self.config.large_text_line_length_ratio_min,
 604            self.config.large_text_line_length_ratio_max,
 605        )
 606        large_text_line_length = round(large_text_line_gird.width * length_ratio)
 607        large_text_line_length = max(large_text_line_gird.height, large_text_line_length)
 608
 609        pad_max = large_text_line_gird.width - large_text_line_length
 610        pad = rng.integers(0, pad_max + 1)
 611        left = large_text_line_gird.left + pad
 612        right = left + large_text_line_length - 1
 613        assert right <= large_text_line_gird.right
 614
 615        return LayoutTextLine(
 616            grid_idx=-1,
 617            text_line_idx=0,
 618            text_line_height=large_text_line_gird.height,
 619            box=attrs.evolve(large_text_line_gird, left=left, right=right),
 620            glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
 621        )
 622
 623    def get_reference_height(self, height: int, width: int):
 624        area = height * width
 625        reference_height = math.ceil(math.sqrt(area / self.config.reference_aspect_ratio))
 626        return reference_height
 627
 628    def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator):
 629        reference_height = self.get_reference_height(height=height, width=width)
 630
 631        normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng)
 632        (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points(
 633            height=height,
 634            width=width,
 635            normal_text_line_heights_max=normal_text_line_heights[-1],
 636            rng=rng,
 637        )
 638
 639        large_text_line_height = self.sample_large_text_line_height(reference_height, rng)
 640        large_text_line_gird: Optional[Box] = None
 641        if large_text_line_height is not None:
 642            large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line(
 643                large_text_line_height=large_text_line_height,
 644                vert_begins=vert_begins,
 645                vert_ends=vert_ends,
 646                hori_begins_min=hori_begins[0],
 647                hori_ends_max=hori_ends[-1],
 648            )
 649            if large_text_line_gird is not None:
 650                vert_begins = vert_begins[vert_trim_idx:]
 651                vert_ends = vert_ends[vert_trim_idx:]
 652
 653        grids = self.sample_grids(
 654            vert_begins=vert_begins,
 655            vert_ends=vert_ends,
 656            hori_begins=hori_begins,
 657            hori_ends=hori_ends,
 658            rng=rng,
 659        )
 660        normal_text_line_heights_expected_probs = normalize_to_probs([
 661            1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights
 662        ])
 663        normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights)
 664        layout_text_lines: List[LayoutTextLine] = []
 665        for grid_idx, grid in enumerate(grids):
 666            layout_text_lines.extend(
 667                self.fill_normal_text_lines_to_grid(
 668                    normal_text_line_heights=normal_text_line_heights,
 669                    normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
 670                    normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
 671                    grid_idx=grid_idx,
 672                    grid=grid,
 673                    rng=rng,
 674                )
 675            )
 676
 677        if large_text_line_gird:
 678            layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng))
 679
 680        # Must place text line.
 681        assert layout_text_lines
 682
 683        return (
 684            layout_text_lines,
 685            large_text_line_gird,
 686            grids,
 687        )
 688
 689    def sample_layout_images(self, height: int, width: int, rng: RandomGenerator):
 690        # Image could be overlapped with text lines.
 691        layout_images: List[LayoutImage] = []
 692
 693        num_layout_images = rng.integers(
 694            self.config.num_images_min,
 695            self.config.num_images_max + 1,
 696        )
 697        for _ in range(num_layout_images):
 698            # NOTE: It's ok to have overlapping images.
 699            image_height_ratio = rng.uniform(
 700                self.config.image_height_ratio_min,
 701                self.config.image_height_ratio_max,
 702            )
 703            image_height = round(height * image_height_ratio)
 704
 705            image_width_ratio = rng.uniform(
 706                self.config.image_width_ratio_min,
 707                self.config.image_width_ratio_max,
 708            )
 709            image_width = round(width * image_width_ratio)
 710
 711            up = rng.integers(0, height - image_height + 1)
 712            down = up + image_height - 1
 713            left = rng.integers(0, width - image_width + 1)
 714            right = left + image_width - 1
 715            layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right)))
 716
 717        return layout_images
 718
 719    @classmethod
 720    def boxes_are_overlapped(cls, box0: Box, box1: Box):
 721        vert_overlapped = (box0.down >= box1.up and box1.down >= box0.up)
 722        hori_overlapped = (box0.right >= box1.left and box1.right >= box0.left)
 723        return vert_overlapped and hori_overlapped
 724
 725    def sample_layout_barcode_qrs(
 726        self,
 727        height: int,
 728        width: int,
 729        layout_text_lines: Sequence[LayoutTextLine],
 730        rng: RandomGenerator,
 731    ):
 732        reference_height = self.get_reference_height(height=height, width=width)
 733
 734        layout_barcode_qrs: List[LayoutBarcodeQr] = []
 735
 736        num_layout_barcode_qrs = rng.integers(
 737            self.config.num_barcode_qrs_min,
 738            self.config.num_barcode_qrs_max + 1,
 739        )
 740        num_retries = 3
 741        while num_layout_barcode_qrs > 0 and num_retries > 0:
 742            barcode_qr_length_ratio = rng.uniform(
 743                self.config.barcode_qr_length_ratio_min,
 744                self.config.barcode_qr_length_ratio_max,
 745            )
 746            barcode_qr_length = round(barcode_qr_length_ratio * reference_height)
 747            barcode_qr_length = min(height, width, barcode_qr_length)
 748
 749            # Place QR code next to text line.
 750            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
 751            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
 752            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
 753
 754            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
 755                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
 756                    up = anchor_layout_text_line_box.down + 1
 757                    down = up + barcode_qr_length - 1
 758                    if down >= height:
 759                        num_retries -= 1
 760                        continue
 761                else:
 762                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
 763                    down = anchor_layout_text_line_box.up - 1
 764                    up = down + 1 - barcode_qr_length
 765                    if up < 0:
 766                        num_retries -= 1
 767                        continue
 768
 769                left_min = max(
 770                    0,
 771                    anchor_layout_text_line_box_center.x - barcode_qr_length,
 772                )
 773                left_max = min(
 774                    width - barcode_qr_length,
 775                    anchor_layout_text_line_box_center.x,
 776                )
 777                if left_min > left_max:
 778                    num_retries -= 1
 779                    continue
 780                left = int(rng.integers(left_min, left_max + 1))
 781                right = left + barcode_qr_length - 1
 782
 783            else:
 784                assert placement in (
 785                    LayoutXcodePlacement.NEXT_TO_RIGHT,
 786                    LayoutXcodePlacement.NEXT_TO_LEFT,
 787                )
 788
 789                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
 790                    left = anchor_layout_text_line_box.right + 1
 791                    right = left + barcode_qr_length - 1
 792                    if right >= width:
 793                        num_retries -= 1
 794                        continue
 795                else:
 796                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
 797                    right = anchor_layout_text_line_box.left - 1
 798                    left = right + 1 - barcode_qr_length
 799                    if left < 0:
 800                        num_retries -= 1
 801                        continue
 802
 803                up_min = max(
 804                    0,
 805                    anchor_layout_text_line_box_center.y - barcode_qr_length,
 806                )
 807                up_max = min(
 808                    height - barcode_qr_length,
 809                    anchor_layout_text_line_box_center.y,
 810                )
 811                if up_min > up_max:
 812                    num_retries -= 1
 813                    continue
 814
 815                up = int(rng.integers(up_min, up_max + 1))
 816                down = up + barcode_qr_length - 1
 817
 818            num_layout_barcode_qrs -= 1
 819            layout_barcode_qrs.append(
 820                LayoutBarcodeQr(box=Box(
 821                    up=up,
 822                    down=down,
 823                    left=left,
 824                    right=right,
 825                ))
 826            )
 827
 828        return layout_barcode_qrs
 829
 830    def sample_layout_barcode_code39s(
 831        self,
 832        height: int,
 833        width: int,
 834        layout_text_lines: Sequence[LayoutTextLine],
 835        rng: RandomGenerator,
 836    ):
 837        reference_height = self.get_reference_height(height=height, width=width)
 838
 839        layout_barcode_code39s: List[LayoutBarcodeCode39] = []
 840
 841        num_layout_barcode_code39s = rng.integers(
 842            self.config.num_barcode_code39s_min,
 843            self.config.num_barcode_code39s_max + 1,
 844        )
 845        num_retries = 3
 846        while num_layout_barcode_code39s > 0 and num_retries > 0:
 847            barcode_code39_height_ratio = rng.uniform(
 848                self.config.barcode_code39_height_ratio_min,
 849                self.config.barcode_code39_height_ratio_max,
 850            )
 851            barcode_code39_height = round(barcode_code39_height_ratio * reference_height)
 852            barcode_code39_height = min(height, width, barcode_code39_height)
 853
 854            barcode_code39_num_chars = int(
 855                rng.integers(
 856                    self.config.barcode_code39_num_chars_min,
 857                    self.config.barcode_code39_num_chars_max + 1,
 858                )
 859            )
 860            barcode_code39_width = round(
 861                barcode_code39_height * self.config.barcode_code39_aspect_ratio
 862                * barcode_code39_num_chars
 863            )
 864
 865            # Place Bar code next to text line.
 866            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
 867            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
 868            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
 869
 870            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
 871                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
 872                    up = anchor_layout_text_line_box.down + 1
 873                    down = up + barcode_code39_height - 1
 874                    if down >= height:
 875                        num_retries -= 1
 876                        continue
 877                else:
 878                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
 879                    down = anchor_layout_text_line_box.up - 1
 880                    up = down + 1 - barcode_code39_height
 881                    if up < 0:
 882                        num_retries -= 1
 883                        continue
 884
 885                left_min = max(
 886                    0,
 887                    anchor_layout_text_line_box_center.x - barcode_code39_width,
 888                )
 889                left_max = min(
 890                    width - barcode_code39_width,
 891                    anchor_layout_text_line_box_center.x,
 892                )
 893                if left_min > left_max:
 894                    num_retries -= 1
 895                    continue
 896                left = int(rng.integers(left_min, left_max + 1))
 897                right = left + barcode_code39_width - 1
 898
 899            else:
 900                assert placement in (
 901                    LayoutXcodePlacement.NEXT_TO_RIGHT,
 902                    LayoutXcodePlacement.NEXT_TO_LEFT,
 903                )
 904
 905                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
 906                    left = anchor_layout_text_line_box.right + 1
 907                    right = left + barcode_code39_width - 1
 908                    if right >= width:
 909                        num_retries -= 1
 910                        continue
 911                else:
 912                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
 913                    right = anchor_layout_text_line_box.left - 1
 914                    left = right + 1 - barcode_code39_width
 915                    if left < 0:
 916                        num_retries -= 1
 917                        continue
 918
 919                up_min = max(
 920                    0,
 921                    anchor_layout_text_line_box_center.y - barcode_code39_height,
 922                )
 923                up_max = min(
 924                    height - barcode_code39_height,
 925                    anchor_layout_text_line_box_center.y,
 926                )
 927                if up_min > up_max:
 928                    num_retries -= 1
 929                    continue
 930
 931                up = int(rng.integers(up_min, up_max + 1))
 932                down = up + barcode_code39_height - 1
 933
 934            num_layout_barcode_code39s -= 1
 935            layout_barcode_code39s.append(
 936                LayoutBarcodeCode39(box=Box(
 937                    up=up,
 938                    down=down,
 939                    left=left,
 940                    right=right,
 941                ))
 942            )
 943
 944        return layout_barcode_code39s
 945
 946    def sample_layout_barcode_qrs_and_layout_barcode_code39s(
 947        self,
 948        height: int,
 949        width: int,
 950        layout_text_lines: Sequence[LayoutTextLine],
 951        rng: RandomGenerator,
 952    ):
 953        layout_barcode_qrs = self.sample_layout_barcode_qrs(
 954            height=height,
 955            width=width,
 956            layout_text_lines=layout_text_lines,
 957            rng=rng,
 958        )
 959
 960        layout_barcode_code39s = self.sample_layout_barcode_code39s(
 961            height=height,
 962            width=width,
 963            layout_text_lines=layout_text_lines,
 964            rng=rng,
 965        )
 966
 967        if layout_barcode_qrs or layout_barcode_code39s:
 968            # Barcode could not be overlapped with text lines.
 969            # Hence need to remove the overlapped text lines.
 970            box_overlapping_validator = BoxOverlappingValidator(
 971                itertools.chain(
 972                    (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs),
 973                    (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s),
 974                )
 975            )
 976
 977            keep_layout_text_lines: List[LayoutTextLine] = []
 978            for layout_text_line in layout_text_lines:
 979                if not box_overlapping_validator.is_overlapped(layout_text_line.box):
 980                    keep_layout_text_lines.append(layout_text_line)
 981            layout_text_lines = keep_layout_text_lines
 982
 983        return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines
 984
 985    @classmethod
 986    def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]):
 987        # Sample within the text line area.
 988        text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines)
 989        text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines)
 990        text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines)
 991        text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines)
 992        return (
 993            text_line_up,
 994            text_line_down,
 995            text_line_left,
 996            text_line_right,
 997        )
 998
 999    def sample_layout_non_text_symbols(
1000        self,
1001        height: int,
1002        width: int,
1003        layout_text_lines: Sequence[LayoutTextLine],
1004        rng: RandomGenerator,
1005    ):
1006        reference_height = self.get_reference_height(height=height, width=width)
1007
1008        text_line_up = 0
1009        text_line_down = height - 1
1010        text_line_left = 0
1011        text_line_right = width - 1
1012
1013        layout_non_text_symbols: List[LayoutNonTextSymbol] = []
1014
1015        num_non_text_symbols = int(
1016            rng.integers(
1017                self.config.num_non_text_symbols_min,
1018                self.config.num_non_text_symbols_max + 1,
1019            )
1020        )
1021        for _ in range(num_non_text_symbols):
1022            non_text_symbol_height_ratio = rng.uniform(
1023                self.config.non_text_symbol_height_ratio_min,
1024                self.config.non_text_symbol_height_ratio_max,
1025            )
1026            non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height)
1027
1028            non_text_symbol_aspect_ratio = rng.uniform(
1029                self.config.non_text_symbol_aspect_ratio_min,
1030                self.config.non_text_symbol_aspect_ratio_max,
1031            )
1032            non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height)
1033
1034            box = None
1035            overlapped = True
1036            for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol):
1037                up_max = text_line_down + 1 - non_text_symbol_height
1038                up = int(rng.integers(text_line_up, up_max + 1))
1039                down = up + non_text_symbol_height - 1
1040                assert up < down
1041
1042                left_max = text_line_right + 1 - non_text_symbol_width
1043                left = int(rng.integers(text_line_left, left_max + 1))
1044                right = left + non_text_symbol_width - 1
1045                assert left < right
1046
1047                box = Box(up=up, down=down, left=left, right=right)
1048
1049                cur_overlapped = False
1050                for layout_text_line in layout_text_lines:
1051                    if self.boxes_are_overlapped(box, layout_text_line.box):
1052                        cur_overlapped = True
1053                        break
1054
1055                if not cur_overlapped:
1056                    overlapped = False
1057                    break
1058
1059            assert box
1060
1061            if not overlapped:
1062                alpha = float(
1063                    rng.uniform(
1064                        self.config.non_text_symbol_non_overlapped_alpha_min,
1065                        self.config.non_text_symbol_non_overlapped_alpha_max,
1066                    )
1067                )
1068            else:
1069                alpha = float(
1070                    rng.uniform(
1071                        self.config.non_text_symbol_overlapped_alpha_min,
1072                        self.config.non_text_symbol_overlapped_alpha_max,
1073                    )
1074                )
1075
1076            layout_non_text_symbols.append(LayoutNonTextSymbol(
1077                box=box,
1078                alpha=alpha,
1079            ))
1080
1081        return layout_non_text_symbols
1082
1083    def sample_layout_seal_impressions(
1084        self,
1085        height: int,
1086        width: int,
1087        layout_text_lines: Sequence[LayoutTextLine],
1088        rng: RandomGenerator,
1089    ):
1090        reference_height = self.get_reference_height(height=height, width=width)
1091
1092        (
1093            text_line_up,
1094            text_line_down,
1095            text_line_left,
1096            text_line_right,
1097        ) = self.get_text_line_area(layout_text_lines)
1098
1099        # Place seal impressions.
1100        layout_seal_impressions: List[LayoutSealImpression] = []
1101
1102        num_seal_impressions = int(
1103            rng.integers(
1104                self.config.num_seal_impressions_min,
1105                self.config.num_seal_impressions_max + 1,
1106            )
1107        )
1108        for _ in range(num_seal_impressions):
1109            # Sample height.
1110            seal_impression_height_ratio = float(
1111                rng.uniform(
1112                    self.config.seal_impression_height_ratio_min,
1113                    self.config.seal_impression_height_ratio_max,
1114                )
1115            )
1116            seal_impression_height = round(seal_impression_height_ratio * reference_height)
1117            seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height)
1118
1119            # Make sure even.
1120            if seal_impression_height % 2 != 0:
1121                seal_impression_height -= 1
1122
1123            # Sample width.
1124            shape_mode = rng_choice(
1125                rng,
1126                self.seal_impression_ellipse_shape_modes,
1127                probs=self.seal_impression_ellipse_shape_modes_probs,
1128            )
1129            if shape_mode == SealImpressionEllipseShapeMode.CIRCLE:
1130                seal_impression_width = seal_impression_height
1131
1132            elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE:
1133                aspect_ratio = float(
1134                    rng.uniform(
1135                        self.config.seal_impression_general_ellipse_aspect_ratio_min,
1136                        self.config.seal_impression_general_ellipse_aspect_ratio_max,
1137                    )
1138                )
1139                seal_impression_width = round(aspect_ratio * seal_impression_height)
1140
1141            else:
1142                raise NotImplementedError()
1143
1144            seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width)
1145
1146            # Make sure even.
1147            if seal_impression_width % 2 != 0:
1148                seal_impression_width -= 1
1149
1150            seal_impression_up_max = text_line_down + 1 - seal_impression_height
1151            seal_impression_up = int(rng.integers(
1152                text_line_up,
1153                seal_impression_up_max + 1,
1154            ))
1155            seal_impression_down = seal_impression_up + seal_impression_height - 1
1156
1157            seal_impression_left_max = text_line_right + 1 - seal_impression_width
1158            seal_impression_left = int(rng.integers(
1159                text_line_left,
1160                seal_impression_left_max + 1,
1161            ))
1162            seal_impression_right = seal_impression_left + seal_impression_width - 1
1163
1164            angle = int(
1165                rng.integers(
1166                    self.config.seal_impression_angle_min,
1167                    self.config.seal_impression_angle_max + 1,
1168                )
1169            )
1170            angle = angle % 360
1171
1172            layout_seal_impressions.append(
1173                LayoutSealImpression(
1174                    box=Box(
1175                        up=seal_impression_up,
1176                        down=seal_impression_down,
1177                        left=seal_impression_left,
1178                        right=seal_impression_right,
1179                    ),
1180                    angle=angle,
1181                )
1182            )
1183
1184        return layout_seal_impressions
1185
1186    def generate_disconnected_text_regions(
1187        self,
1188        layout_text_lines: Sequence[LayoutTextLine],
1189    ):
1190        grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list)
1191        for layout_text_line in layout_text_lines:
1192            grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line)
1193
1194        disconnected_text_regions: List[DisconnectedTextRegion] = []
1195
1196        for _, layout_text_lines in sorted(
1197            grid_idx_to_layout_text_lines.items(),
1198            key=lambda p: p[0],
1199        ):
1200            layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx)
1201
1202            begin = 0
1203            while begin < len(layout_text_lines):
1204                text_line_height_min = layout_text_lines[begin].text_line_height
1205                text_line_height_max = text_line_height_min
1206
1207                # Find [begin, end) interval satisfying the condition.
1208                end = begin + 1
1209                while end < len(layout_text_lines):
1210                    text_line_height = layout_text_lines[end].text_line_height
1211                    text_line_height_min = min(text_line_height_min, text_line_height)
1212                    text_line_height_max = max(text_line_height_max, text_line_height)
1213                    if text_line_height_max / text_line_height_min \
1214                            > self.config.disconnected_text_region_polygons_height_ratio_max:
1215                        break
1216                    else:
1217                        end += 1
1218
1219                # To polygon.
1220                # NOTE: Simply using a bounding box is enough.
1221                # This method is common to all glyph sequences.
1222                cur_layout_text_lines = layout_text_lines[begin:end]
1223                bounding_box = Box(
1224                    up=min(ltl.box.up for ltl in cur_layout_text_lines),
1225                    down=max(ltl.box.down for ltl in cur_layout_text_lines),
1226                    left=min(ltl.box.left for ltl in cur_layout_text_lines),
1227                    right=max(ltl.box.right for ltl in cur_layout_text_lines),
1228                )
1229                step = min(
1230                    itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines)
1231                )
1232                disconnected_text_regions.append(
1233                    DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step))
1234                )
1235
1236                # Move to next.
1237                begin = end
1238
1239        return disconnected_text_regions
1240
1241    def generate_non_text_regions(
1242        self,
1243        height: int,
1244        width: int,
1245        layout_text_lines: Sequence[LayoutTextLine],
1246        rng: RandomGenerator,
1247    ):
1248        box_overlapping_validator = BoxOverlappingValidator(
1249            layout_text_line.box for layout_text_line in layout_text_lines
1250        )
1251        directions = [
1252            LayoutNonTextLineDirection.UP,
1253            LayoutNonTextLineDirection.DOWN,
1254            LayoutNonTextLineDirection.LEFT,
1255            LayoutNonTextLineDirection.RIGHT,
1256        ]
1257
1258        lntl_boxes: List[Box] = []
1259        for layout_text_line in layout_text_lines:
1260            ltl_box = layout_text_line.box
1261
1262            for direction_idx in rng.permutation(len(directions)):
1263                direction = directions[direction_idx]
1264
1265                if direction == LayoutNonTextLineDirection.UP:
1266                    lntl_box = Box(
1267                        up=ltl_box.up - ltl_box.height,
1268                        down=ltl_box.up - 1,
1269                        left=ltl_box.left,
1270                        right=ltl_box.right,
1271                    )
1272
1273                elif direction == LayoutNonTextLineDirection.DOWN:
1274                    lntl_box = Box(
1275                        up=ltl_box.down + 1,
1276                        down=ltl_box.down + ltl_box.height,
1277                        left=ltl_box.left,
1278                        right=ltl_box.right,
1279                    )
1280
1281                elif direction == LayoutNonTextLineDirection.LEFT:
1282                    lntl_box = Box(
1283                        up=ltl_box.up,
1284                        down=ltl_box.down,
1285                        left=ltl_box.left - ltl_box.width,
1286                        right=ltl_box.left - 1,
1287                    )
1288
1289                elif direction == LayoutNonTextLineDirection.RIGHT:
1290                    lntl_box = Box(
1291                        up=ltl_box.up,
1292                        down=ltl_box.down,
1293                        left=ltl_box.right + 1,
1294                        right=ltl_box.right + ltl_box.width,
1295                    )
1296
1297                else:
1298                    raise NotImplementedError()
1299
1300                # Ignore invalid box.
1301                if not lntl_box.valid:
1302                    continue
1303                if lntl_box.down >= height or lntl_box.right >= width:
1304                    continue
1305
1306                assert ltl_box.shape == lntl_box.shape
1307
1308                # Ignore box that is overlapped with any text lines.
1309                if box_overlapping_validator.is_overlapped(lntl_box):
1310                    continue
1311
1312                # Keep only the first valid direction.
1313                lntl_boxes.append(lntl_box)
1314                break
1315
1316        step = max(
1317            1,
1318            min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)),
1319        )
1320        non_text_regions = [
1321            NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes
1322        ]
1323        return non_text_regions
1324
1325    def run(self, input: PageLayoutStepInput, rng: RandomGenerator):
1326        page_shape_step_output = input.page_shape_step_output
1327        height = page_shape_step_output.height
1328        width = page_shape_step_output.width
1329
1330        # Text lines.
1331        (
1332            layout_text_lines,
1333            large_text_line_gird,
1334            grids,
1335        ) = self.sample_layout_text_lines(height=height, width=width, rng=rng)
1336
1337        # Images.
1338        layout_images = self.sample_layout_images(height=height, width=width, rng=rng)
1339
1340        # QR codes & Bar codes.
1341        # NOTE: Some layout_text_lines could be dropped.
1342        (
1343            layout_barcode_qrs,
1344            layout_barcode_code39s,
1345            layout_text_lines,
1346        ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s(
1347            height=height,
1348            width=width,
1349            layout_text_lines=layout_text_lines,
1350            rng=rng,
1351        )
1352
1353        # Non-text symbols.
1354        layout_non_text_symbols = self.sample_layout_non_text_symbols(
1355            height=height,
1356            width=width,
1357            layout_text_lines=layout_text_lines,
1358            rng=rng,
1359        )
1360
1361        # Seal impressions.
1362        layout_seal_impressions = self.sample_layout_seal_impressions(
1363            height=height,
1364            width=width,
1365            layout_text_lines=layout_text_lines,
1366            rng=rng,
1367        )
1368
1369        # For char-level polygon regression.
1370        disconnected_text_regions = self.generate_disconnected_text_regions(
1371            layout_text_lines=layout_text_lines,
1372        )
1373
1374        # For sampling negative text region area.
1375        non_text_regions = self.generate_non_text_regions(
1376            height=height,
1377            width=width,
1378            layout_text_lines=layout_text_lines,
1379            rng=rng,
1380        )
1381
1382        return PageLayoutStepOutput(
1383            page_layout=PageLayout(
1384                height=height,
1385                width=width,
1386                layout_text_lines=layout_text_lines,
1387                layout_non_text_symbols=layout_non_text_symbols,
1388                layout_seal_impressions=layout_seal_impressions,
1389                layout_images=layout_images,
1390                layout_barcode_qrs=layout_barcode_qrs,
1391                layout_barcode_code39s=layout_barcode_code39s,
1392                disconnected_text_regions=disconnected_text_regions,
1393                non_text_regions=non_text_regions,
1394            ),
1395            debug_large_text_line_gird=large_text_line_gird,
1396            debug_grids=grids,
1397        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

233    def __init__(self, config: PageLayoutStepConfig):
234        super().__init__(config)
235
236        (
237            self.seal_impression_ellipse_shape_modes,
238            self.seal_impression_ellipse_shape_modes_probs,
239        ) = normalize_to_keys_and_probs([
240            (
241                SealImpressionEllipseShapeMode.CIRCLE,
242                self.config.seal_impression_weight_circle,
243            ),
244            (
245                SealImpressionEllipseShapeMode.GENERAL_ELLIPSE,
246                self.config.seal_impression_weight_general_ellipse,
247            ),
248        ])
def sample_large_text_line_height(self, reference_height: int, rng: numpy.random._generator.Generator):
250    def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator):
251        if rng.random() < self.config.prob_add_large_text_line:
252            large_text_line_height_ratio = rng.uniform(
253                self.config.large_text_line_height_ratio_min,
254                self.config.large_text_line_height_ratio_max,
255            )
256            return round(large_text_line_height_ratio * reference_height)
257
258        else:
259            return None
def sample_normal_text_line_heights(self, reference_height: int, rng: numpy.random._generator.Generator):
261    def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator):
262        normal_text_line_heights: List[int] = []
263
264        if self.config.force_add_normal_text_line_height_ratio_min:
265            normal_text_line_heights.append(
266                round(self.config.normal_text_line_height_ratio_min * reference_height)
267            )
268
269        num_normal_text_line_heights = rng.integers(
270            self.config.num_normal_text_line_heights_min,
271            self.config.num_normal_text_line_heights_max + 1,
272        )
273        ratio_step = (
274            self.config.normal_text_line_height_ratio_max
275            - self.config.normal_text_line_height_ratio_min
276        ) / num_normal_text_line_heights
277        for step_idx in range(num_normal_text_line_heights):
278            ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step
279            ratio_max = ratio_min + ratio_step
280            ratio = rng.uniform(ratio_min, ratio_max)
281            normal_text_line_heights.append(round(ratio * reference_height))
282
283        assert normal_text_line_heights
284        return sorted(normal_text_line_heights)
@classmethod
def generate_grid_points( cls, grid_pad_ratio: float, grid_step: int, grid_gap: int, grid_gap_min: Union[int, NoneType], length: int, rng: numpy.random._generator.Generator):
286    @classmethod
287    def generate_grid_points(
288        cls,
289        grid_pad_ratio: float,
290        grid_step: int,
291        grid_gap: int,
292        grid_gap_min: Optional[int],
293        length: int,
294        rng: RandomGenerator,
295    ):
296        grid_pad = min(length - grid_step, length * grid_pad_ratio)
297        assert grid_pad > 0
298
299        num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap)
300        if not num_steps.is_integer():
301            num_steps = math.floor(num_steps)
302        num_steps = int(num_steps)
303
304        grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1)
305        assert grid_pad > 0
306        grid_pad = grid_pad // 2
307
308        begin = grid_pad
309        end = grid_pad + grid_step - 1
310        assert end < length - grid_pad
311
312        begins: List[int] = []
313        ends: List[int] = []
314
315        while end < length - grid_pad:
316            begins.append(begin)
317            ends.append(end)
318
319            cur_gap = grid_gap
320            if grid_gap_min is not None:
321                cur_gap = rng.integers(grid_gap_min, grid_gap + 1)
322
323            begin = end + cur_gap
324            end = begin + grid_step - 1
325
326        return begins, ends
def sample_grid_points( self, height: int, width: int, normal_text_line_heights_max: int, rng: numpy.random._generator.Generator):
328    def sample_grid_points(
329        self,
330        height: int,
331        width: int,
332        normal_text_line_heights_max: int,
333        rng: RandomGenerator,
334    ):
335        grid_pad_ratio = rng.uniform(
336            self.config.grid_pad_ratio_min,
337            self.config.grid_pad_ratio_max,
338        )
339
340        grid_step_ratio = rng.uniform(
341            self.config.grid_step_ratio_min,
342            self.config.grid_step_ratio_max,
343        )
344        grid_step = round(normal_text_line_heights_max * grid_step_ratio)
345
346        grid_vert_gap_min = round(
347            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min
348        )
349        grid_vert_gap_max = round(
350            normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max
351        )
352        vert_begins, vert_ends = self.generate_grid_points(
353            grid_pad_ratio=grid_pad_ratio,
354            grid_step=grid_step,
355            grid_gap=grid_vert_gap_max,
356            grid_gap_min=grid_vert_gap_min,
357            length=height,
358            rng=rng,
359        )
360
361        grid_hori_gap_ratio = rng.uniform(
362            self.config.grid_hori_gap_ratio_min,
363            self.config.grid_hori_gap_ratio_max,
364        )
365        grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio)
366        grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap)
367        hori_begins, hori_ends = self.generate_grid_points(
368            grid_pad_ratio=grid_pad_ratio,
369            grid_step=grid_step,
370            grid_gap=grid_hori_gap,
371            grid_gap_min=None,
372            length=width,
373            rng=rng,
374        )
375        return (vert_begins, vert_ends), (hori_begins, hori_ends)
def trim_grid_points_for_large_text_line( self, large_text_line_height: int, vert_begins: Sequence[int], vert_ends: Sequence[int], hori_begins_min: int, hori_ends_max: int):
377    def trim_grid_points_for_large_text_line(
378        self,
379        large_text_line_height: int,
380        vert_begins: Sequence[int],
381        vert_ends: Sequence[int],
382        hori_begins_min: int,
383        hori_ends_max: int,
384    ):
385        idx = 0
386        while idx < len(vert_begins) \
387                and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height:
388            idx += 1
389
390        if idx >= len(vert_begins) - 1:
391            return None, 0
392
393        large_text_line_gird = Box(
394            up=vert_ends[idx] - large_text_line_height + 1,
395            down=vert_ends[idx],
396            left=hori_begins_min,
397            right=hori_ends_max,
398        )
399        return large_text_line_gird, idx + 1
def sample_grids( self, vert_begins: Sequence[int], vert_ends: Sequence[int], hori_begins: Sequence[int], hori_ends: Sequence[int], rng: numpy.random._generator.Generator):
401    def sample_grids(
402        self,
403        vert_begins: Sequence[int],
404        vert_ends: Sequence[int],
405        hori_begins: Sequence[int],
406        hori_ends: Sequence[int],
407        rng: RandomGenerator,
408    ):
409        num_vert_ends = len(vert_ends)
410        assert num_vert_ends == len(vert_begins)
411
412        num_hori_ends = len(hori_ends)
413        assert num_hori_ends == len(hori_begins)
414
415        priority_queue = [
416            PrioritizedSegment(
417                vert_begin_idx=0,
418                hori_begin_idx=0,
419                hori_end_idx=num_hori_ends - 1,
420            )
421        ]
422        grids: List[Box] = []
423        while priority_queue:
424            cur_segment = heapq.heappop(priority_queue)
425
426            # Deal with segments in the same level.
427            same_vert_segments: List[PrioritizedSegment] = []
428            while priority_queue \
429                    and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx:
430                same_vert_segments.append(heapq.heappop(priority_queue))
431
432            if same_vert_segments:
433                # Rebuid segments.
434                same_vert_segments.append(cur_segment)
435                same_vert_segments = sorted(
436                    same_vert_segments,
437                    key=lambda segment: segment.hori_begin_idx,
438                )
439
440                rebuilt_segments: List[PrioritizedSegment] = []
441                rebuilt_begin = 0
442                while rebuilt_begin < len(same_vert_segments):
443                    rebuilt_end = rebuilt_begin
444                    while rebuilt_end + 1 < len(same_vert_segments) \
445                            and (same_vert_segments[rebuilt_end + 1].hori_begin_idx
446                                 == same_vert_segments[rebuilt_end].hori_end_idx + 1):
447                        rebuilt_end += 1
448                    rebuilt_segments.append(
449                        PrioritizedSegment(
450                            vert_begin_idx=cur_segment.vert_begin_idx,
451                            hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx,
452                            hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx,
453                        )
454                    )
455                    rebuilt_begin = rebuilt_end + 1
456
457                # Re-pick the first segment.
458                cur_segment = rebuilt_segments[0]
459                for other_segment in rebuilt_segments[1:]:
460                    heapq.heappush(priority_queue, other_segment)
461
462            # Generate grids for the current segment.
463            vert_begin_idx = cur_segment.vert_begin_idx
464
465            hori_begin_idx = cur_segment.hori_begin_idx
466            hori_end_idx = cur_segment.hori_end_idx
467            while hori_begin_idx <= hori_end_idx:
468                # Randomly generate grid.
469                cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends)
470
471                # Try to sample segment with length >= 2.
472                if hori_end_idx + 1 - hori_begin_idx <= 3:
473                    cur_hori_end_idx = hori_end_idx
474                else:
475                    cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1)
476
477                grids.append(
478                    Box(
479                        up=vert_begins[vert_begin_idx],
480                        down=vert_ends[cur_vert_end_idx],
481                        left=hori_begins[hori_begin_idx],
482                        right=hori_ends[cur_hori_end_idx],
483                    )
484                )
485                next_vert_begin_idx = cur_vert_end_idx + 1
486                if next_vert_begin_idx < num_vert_ends:
487                    heapq.heappush(
488                        priority_queue,
489                        PrioritizedSegment(
490                            vert_begin_idx=next_vert_begin_idx,
491                            hori_begin_idx=hori_begin_idx,
492                            hori_end_idx=cur_hori_end_idx,
493                        ),
494                    )
495
496                hori_begin_idx = cur_hori_end_idx + 1
497
498        return grids
@classmethod
def calculate_normal_text_line_heights_probs( cls, normal_text_line_heights_expected_probs: Sequence[float], normal_text_line_heights_acc_areas: List[int]):
500    @classmethod
501    def calculate_normal_text_line_heights_probs(
502        cls,
503        normal_text_line_heights_expected_probs: Sequence[float],
504        normal_text_line_heights_acc_areas: List[int],
505    ):
506        if sum(normal_text_line_heights_acc_areas) == 0:
507            normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas)
508        else:
509            normal_text_line_heights_cur_probs = normalize_to_probs(
510                normal_text_line_heights_acc_areas
511            )
512
513        probs = normalize_to_probs([
514            max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip(
515                normal_text_line_heights_cur_probs,
516                normal_text_line_heights_expected_probs,
517            )
518        ])
519        return probs
def fill_normal_text_lines_to_grid( self, normal_text_line_heights: Sequence[int], normal_text_line_heights_expected_probs: Sequence[float], normal_text_line_heights_acc_areas: List[int], grid_idx: int, grid: vkit.element.box.Box, rng: numpy.random._generator.Generator):
521    def fill_normal_text_lines_to_grid(
522        self,
523        normal_text_line_heights: Sequence[int],
524        normal_text_line_heights_expected_probs: Sequence[float],
525        normal_text_line_heights_acc_areas: List[int],
526        grid_idx: int,
527        grid: Box,
528        rng: RandomGenerator,
529    ):
530        normal_text_line_heights_indices = list(range(len(normal_text_line_heights)))
531        normal_text_line_heights_max = normal_text_line_heights[-1]
532
533        layout_text_lines: List[LayoutTextLine] = []
534        up = grid.up
535        prev_text_line_height: Optional[int] = None
536
537        while up + normal_text_line_heights_max - 1 <= grid.down:
538            normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs(
539                normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
540                normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
541            )
542            normal_text_line_height_idx = rng_choice(
543                rng=rng,
544                items=normal_text_line_heights_indices,
545                probs=normal_text_line_heights_probs,
546            )
547            normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx]
548
549            add_gap = False
550            if prev_text_line_height:
551                if prev_text_line_height != normal_text_line_height:
552                    add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap)
553                else:
554                    add_gap = (rng.random() < self.config.prob_normal_text_line_gap)
555            if add_gap:
556                gap_ratio = rng.uniform(
557                    self.config.normal_text_line_gap_ratio_min,
558                    self.config.normal_text_line_gap_ratio_max,
559                )
560                gap = round(gap_ratio * normal_text_line_height)
561                gap = min(grid.down - (up + normal_text_line_height - 1), gap)
562                up += gap
563            down = up + normal_text_line_height - 1
564            assert down <= grid.down
565
566            length_ratio = rng.uniform(
567                self.config.normal_text_line_length_ratio_min,
568                self.config.normal_text_line_length_ratio_max,
569            )
570            normal_text_line_length = round(grid.width * length_ratio)
571            normal_text_line_length = max(normal_text_line_height, normal_text_line_length)
572
573            pad_max = grid.width - normal_text_line_length
574            pad = rng.integers(0, pad_max + 1)
575            left = grid.left + pad
576            right = left + normal_text_line_length - 1
577            assert right <= grid.right
578
579            text_line_idx = len(layout_text_lines)
580            layout_text_lines.append(
581                LayoutTextLine(
582                    grid_idx=grid_idx,
583                    text_line_idx=text_line_idx,
584                    text_line_height=normal_text_line_height,
585                    box=Box(up=up, down=down, left=left, right=right),
586                    glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
587                )
588            )
589
590            prev_text_line_height = normal_text_line_height
591            normal_text_line_heights_acc_areas[normal_text_line_height_idx] \
592                += normal_text_line_length * normal_text_line_height
593            up = down + 1
594
595        return layout_text_lines
def fill_large_text_line_to_grid( self, large_text_line_gird: vkit.element.box.Box, rng: numpy.random._generator.Generator):
597    def fill_large_text_line_to_grid(
598        self,
599        large_text_line_gird: Box,
600        rng: RandomGenerator,
601    ):
602        length_ratio = rng.uniform(
603            self.config.large_text_line_length_ratio_min,
604            self.config.large_text_line_length_ratio_max,
605        )
606        large_text_line_length = round(large_text_line_gird.width * length_ratio)
607        large_text_line_length = max(large_text_line_gird.height, large_text_line_length)
608
609        pad_max = large_text_line_gird.width - large_text_line_length
610        pad = rng.integers(0, pad_max + 1)
611        left = large_text_line_gird.left + pad
612        right = left + large_text_line_length - 1
613        assert right <= large_text_line_gird.right
614
615        return LayoutTextLine(
616            grid_idx=-1,
617            text_line_idx=0,
618            text_line_height=large_text_line_gird.height,
619            box=attrs.evolve(large_text_line_gird, left=left, right=right),
620            glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT,
621        )
def get_reference_height(self, height: int, width: int):
623    def get_reference_height(self, height: int, width: int):
624        area = height * width
625        reference_height = math.ceil(math.sqrt(area / self.config.reference_aspect_ratio))
626        return reference_height
def sample_layout_text_lines( self, height: int, width: int, rng: numpy.random._generator.Generator):
628    def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator):
629        reference_height = self.get_reference_height(height=height, width=width)
630
631        normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng)
632        (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points(
633            height=height,
634            width=width,
635            normal_text_line_heights_max=normal_text_line_heights[-1],
636            rng=rng,
637        )
638
639        large_text_line_height = self.sample_large_text_line_height(reference_height, rng)
640        large_text_line_gird: Optional[Box] = None
641        if large_text_line_height is not None:
642            large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line(
643                large_text_line_height=large_text_line_height,
644                vert_begins=vert_begins,
645                vert_ends=vert_ends,
646                hori_begins_min=hori_begins[0],
647                hori_ends_max=hori_ends[-1],
648            )
649            if large_text_line_gird is not None:
650                vert_begins = vert_begins[vert_trim_idx:]
651                vert_ends = vert_ends[vert_trim_idx:]
652
653        grids = self.sample_grids(
654            vert_begins=vert_begins,
655            vert_ends=vert_ends,
656            hori_begins=hori_begins,
657            hori_ends=hori_ends,
658            rng=rng,
659        )
660        normal_text_line_heights_expected_probs = normalize_to_probs([
661            1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights
662        ])
663        normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights)
664        layout_text_lines: List[LayoutTextLine] = []
665        for grid_idx, grid in enumerate(grids):
666            layout_text_lines.extend(
667                self.fill_normal_text_lines_to_grid(
668                    normal_text_line_heights=normal_text_line_heights,
669                    normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs,
670                    normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas,
671                    grid_idx=grid_idx,
672                    grid=grid,
673                    rng=rng,
674                )
675            )
676
677        if large_text_line_gird:
678            layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng))
679
680        # Must place text line.
681        assert layout_text_lines
682
683        return (
684            layout_text_lines,
685            large_text_line_gird,
686            grids,
687        )
def sample_layout_images( self, height: int, width: int, rng: numpy.random._generator.Generator):
689    def sample_layout_images(self, height: int, width: int, rng: RandomGenerator):
690        # Image could be overlapped with text lines.
691        layout_images: List[LayoutImage] = []
692
693        num_layout_images = rng.integers(
694            self.config.num_images_min,
695            self.config.num_images_max + 1,
696        )
697        for _ in range(num_layout_images):
698            # NOTE: It's ok to have overlapping images.
699            image_height_ratio = rng.uniform(
700                self.config.image_height_ratio_min,
701                self.config.image_height_ratio_max,
702            )
703            image_height = round(height * image_height_ratio)
704
705            image_width_ratio = rng.uniform(
706                self.config.image_width_ratio_min,
707                self.config.image_width_ratio_max,
708            )
709            image_width = round(width * image_width_ratio)
710
711            up = rng.integers(0, height - image_height + 1)
712            down = up + image_height - 1
713            left = rng.integers(0, width - image_width + 1)
714            right = left + image_width - 1
715            layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right)))
716
717        return layout_images
@classmethod
def boxes_are_overlapped(cls, box0: vkit.element.box.Box, box1: vkit.element.box.Box):
719    @classmethod
720    def boxes_are_overlapped(cls, box0: Box, box1: Box):
721        vert_overlapped = (box0.down >= box1.up and box1.down >= box0.up)
722        hori_overlapped = (box0.right >= box1.left and box1.right >= box0.left)
723        return vert_overlapped and hori_overlapped
def sample_layout_barcode_qrs( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
725    def sample_layout_barcode_qrs(
726        self,
727        height: int,
728        width: int,
729        layout_text_lines: Sequence[LayoutTextLine],
730        rng: RandomGenerator,
731    ):
732        reference_height = self.get_reference_height(height=height, width=width)
733
734        layout_barcode_qrs: List[LayoutBarcodeQr] = []
735
736        num_layout_barcode_qrs = rng.integers(
737            self.config.num_barcode_qrs_min,
738            self.config.num_barcode_qrs_max + 1,
739        )
740        num_retries = 3
741        while num_layout_barcode_qrs > 0 and num_retries > 0:
742            barcode_qr_length_ratio = rng.uniform(
743                self.config.barcode_qr_length_ratio_min,
744                self.config.barcode_qr_length_ratio_max,
745            )
746            barcode_qr_length = round(barcode_qr_length_ratio * reference_height)
747            barcode_qr_length = min(height, width, barcode_qr_length)
748
749            # Place QR code next to text line.
750            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
751            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
752            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
753
754            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
755                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
756                    up = anchor_layout_text_line_box.down + 1
757                    down = up + barcode_qr_length - 1
758                    if down >= height:
759                        num_retries -= 1
760                        continue
761                else:
762                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
763                    down = anchor_layout_text_line_box.up - 1
764                    up = down + 1 - barcode_qr_length
765                    if up < 0:
766                        num_retries -= 1
767                        continue
768
769                left_min = max(
770                    0,
771                    anchor_layout_text_line_box_center.x - barcode_qr_length,
772                )
773                left_max = min(
774                    width - barcode_qr_length,
775                    anchor_layout_text_line_box_center.x,
776                )
777                if left_min > left_max:
778                    num_retries -= 1
779                    continue
780                left = int(rng.integers(left_min, left_max + 1))
781                right = left + barcode_qr_length - 1
782
783            else:
784                assert placement in (
785                    LayoutXcodePlacement.NEXT_TO_RIGHT,
786                    LayoutXcodePlacement.NEXT_TO_LEFT,
787                )
788
789                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
790                    left = anchor_layout_text_line_box.right + 1
791                    right = left + barcode_qr_length - 1
792                    if right >= width:
793                        num_retries -= 1
794                        continue
795                else:
796                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
797                    right = anchor_layout_text_line_box.left - 1
798                    left = right + 1 - barcode_qr_length
799                    if left < 0:
800                        num_retries -= 1
801                        continue
802
803                up_min = max(
804                    0,
805                    anchor_layout_text_line_box_center.y - barcode_qr_length,
806                )
807                up_max = min(
808                    height - barcode_qr_length,
809                    anchor_layout_text_line_box_center.y,
810                )
811                if up_min > up_max:
812                    num_retries -= 1
813                    continue
814
815                up = int(rng.integers(up_min, up_max + 1))
816                down = up + barcode_qr_length - 1
817
818            num_layout_barcode_qrs -= 1
819            layout_barcode_qrs.append(
820                LayoutBarcodeQr(box=Box(
821                    up=up,
822                    down=down,
823                    left=left,
824                    right=right,
825                ))
826            )
827
828        return layout_barcode_qrs
def sample_layout_barcode_code39s( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
830    def sample_layout_barcode_code39s(
831        self,
832        height: int,
833        width: int,
834        layout_text_lines: Sequence[LayoutTextLine],
835        rng: RandomGenerator,
836    ):
837        reference_height = self.get_reference_height(height=height, width=width)
838
839        layout_barcode_code39s: List[LayoutBarcodeCode39] = []
840
841        num_layout_barcode_code39s = rng.integers(
842            self.config.num_barcode_code39s_min,
843            self.config.num_barcode_code39s_max + 1,
844        )
845        num_retries = 3
846        while num_layout_barcode_code39s > 0 and num_retries > 0:
847            barcode_code39_height_ratio = rng.uniform(
848                self.config.barcode_code39_height_ratio_min,
849                self.config.barcode_code39_height_ratio_max,
850            )
851            barcode_code39_height = round(barcode_code39_height_ratio * reference_height)
852            barcode_code39_height = min(height, width, barcode_code39_height)
853
854            barcode_code39_num_chars = int(
855                rng.integers(
856                    self.config.barcode_code39_num_chars_min,
857                    self.config.barcode_code39_num_chars_max + 1,
858                )
859            )
860            barcode_code39_width = round(
861                barcode_code39_height * self.config.barcode_code39_aspect_ratio
862                * barcode_code39_num_chars
863            )
864
865            # Place Bar code next to text line.
866            anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box
867            anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point()
868            placement = rng_choice(rng, tuple(LayoutXcodePlacement))
869
870            if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP):
871                if placement == LayoutXcodePlacement.NEXT_TO_DOWN:
872                    up = anchor_layout_text_line_box.down + 1
873                    down = up + barcode_code39_height - 1
874                    if down >= height:
875                        num_retries -= 1
876                        continue
877                else:
878                    assert placement == LayoutXcodePlacement.NEXT_TO_UP
879                    down = anchor_layout_text_line_box.up - 1
880                    up = down + 1 - barcode_code39_height
881                    if up < 0:
882                        num_retries -= 1
883                        continue
884
885                left_min = max(
886                    0,
887                    anchor_layout_text_line_box_center.x - barcode_code39_width,
888                )
889                left_max = min(
890                    width - barcode_code39_width,
891                    anchor_layout_text_line_box_center.x,
892                )
893                if left_min > left_max:
894                    num_retries -= 1
895                    continue
896                left = int(rng.integers(left_min, left_max + 1))
897                right = left + barcode_code39_width - 1
898
899            else:
900                assert placement in (
901                    LayoutXcodePlacement.NEXT_TO_RIGHT,
902                    LayoutXcodePlacement.NEXT_TO_LEFT,
903                )
904
905                if placement == LayoutXcodePlacement.NEXT_TO_RIGHT:
906                    left = anchor_layout_text_line_box.right + 1
907                    right = left + barcode_code39_width - 1
908                    if right >= width:
909                        num_retries -= 1
910                        continue
911                else:
912                    assert placement == LayoutXcodePlacement.NEXT_TO_LEFT
913                    right = anchor_layout_text_line_box.left - 1
914                    left = right + 1 - barcode_code39_width
915                    if left < 0:
916                        num_retries -= 1
917                        continue
918
919                up_min = max(
920                    0,
921                    anchor_layout_text_line_box_center.y - barcode_code39_height,
922                )
923                up_max = min(
924                    height - barcode_code39_height,
925                    anchor_layout_text_line_box_center.y,
926                )
927                if up_min > up_max:
928                    num_retries -= 1
929                    continue
930
931                up = int(rng.integers(up_min, up_max + 1))
932                down = up + barcode_code39_height - 1
933
934            num_layout_barcode_code39s -= 1
935            layout_barcode_code39s.append(
936                LayoutBarcodeCode39(box=Box(
937                    up=up,
938                    down=down,
939                    left=left,
940                    right=right,
941                ))
942            )
943
944        return layout_barcode_code39s
def sample_layout_barcode_qrs_and_layout_barcode_code39s( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
946    def sample_layout_barcode_qrs_and_layout_barcode_code39s(
947        self,
948        height: int,
949        width: int,
950        layout_text_lines: Sequence[LayoutTextLine],
951        rng: RandomGenerator,
952    ):
953        layout_barcode_qrs = self.sample_layout_barcode_qrs(
954            height=height,
955            width=width,
956            layout_text_lines=layout_text_lines,
957            rng=rng,
958        )
959
960        layout_barcode_code39s = self.sample_layout_barcode_code39s(
961            height=height,
962            width=width,
963            layout_text_lines=layout_text_lines,
964            rng=rng,
965        )
966
967        if layout_barcode_qrs or layout_barcode_code39s:
968            # Barcode could not be overlapped with text lines.
969            # Hence need to remove the overlapped text lines.
970            box_overlapping_validator = BoxOverlappingValidator(
971                itertools.chain(
972                    (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs),
973                    (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s),
974                )
975            )
976
977            keep_layout_text_lines: List[LayoutTextLine] = []
978            for layout_text_line in layout_text_lines:
979                if not box_overlapping_validator.is_overlapped(layout_text_line.box):
980                    keep_layout_text_lines.append(layout_text_line)
981            layout_text_lines = keep_layout_text_lines
982
983        return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines
@classmethod
def get_text_line_area( cls, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine]):
985    @classmethod
986    def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]):
987        # Sample within the text line area.
988        text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines)
989        text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines)
990        text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines)
991        text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines)
992        return (
993            text_line_up,
994            text_line_down,
995            text_line_left,
996            text_line_right,
997        )
def sample_layout_non_text_symbols( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
 999    def sample_layout_non_text_symbols(
1000        self,
1001        height: int,
1002        width: int,
1003        layout_text_lines: Sequence[LayoutTextLine],
1004        rng: RandomGenerator,
1005    ):
1006        reference_height = self.get_reference_height(height=height, width=width)
1007
1008        text_line_up = 0
1009        text_line_down = height - 1
1010        text_line_left = 0
1011        text_line_right = width - 1
1012
1013        layout_non_text_symbols: List[LayoutNonTextSymbol] = []
1014
1015        num_non_text_symbols = int(
1016            rng.integers(
1017                self.config.num_non_text_symbols_min,
1018                self.config.num_non_text_symbols_max + 1,
1019            )
1020        )
1021        for _ in range(num_non_text_symbols):
1022            non_text_symbol_height_ratio = rng.uniform(
1023                self.config.non_text_symbol_height_ratio_min,
1024                self.config.non_text_symbol_height_ratio_max,
1025            )
1026            non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height)
1027
1028            non_text_symbol_aspect_ratio = rng.uniform(
1029                self.config.non_text_symbol_aspect_ratio_min,
1030                self.config.non_text_symbol_aspect_ratio_max,
1031            )
1032            non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height)
1033
1034            box = None
1035            overlapped = True
1036            for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol):
1037                up_max = text_line_down + 1 - non_text_symbol_height
1038                up = int(rng.integers(text_line_up, up_max + 1))
1039                down = up + non_text_symbol_height - 1
1040                assert up < down
1041
1042                left_max = text_line_right + 1 - non_text_symbol_width
1043                left = int(rng.integers(text_line_left, left_max + 1))
1044                right = left + non_text_symbol_width - 1
1045                assert left < right
1046
1047                box = Box(up=up, down=down, left=left, right=right)
1048
1049                cur_overlapped = False
1050                for layout_text_line in layout_text_lines:
1051                    if self.boxes_are_overlapped(box, layout_text_line.box):
1052                        cur_overlapped = True
1053                        break
1054
1055                if not cur_overlapped:
1056                    overlapped = False
1057                    break
1058
1059            assert box
1060
1061            if not overlapped:
1062                alpha = float(
1063                    rng.uniform(
1064                        self.config.non_text_symbol_non_overlapped_alpha_min,
1065                        self.config.non_text_symbol_non_overlapped_alpha_max,
1066                    )
1067                )
1068            else:
1069                alpha = float(
1070                    rng.uniform(
1071                        self.config.non_text_symbol_overlapped_alpha_min,
1072                        self.config.non_text_symbol_overlapped_alpha_max,
1073                    )
1074                )
1075
1076            layout_non_text_symbols.append(LayoutNonTextSymbol(
1077                box=box,
1078                alpha=alpha,
1079            ))
1080
1081        return layout_non_text_symbols
def sample_layout_seal_impressions( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
1083    def sample_layout_seal_impressions(
1084        self,
1085        height: int,
1086        width: int,
1087        layout_text_lines: Sequence[LayoutTextLine],
1088        rng: RandomGenerator,
1089    ):
1090        reference_height = self.get_reference_height(height=height, width=width)
1091
1092        (
1093            text_line_up,
1094            text_line_down,
1095            text_line_left,
1096            text_line_right,
1097        ) = self.get_text_line_area(layout_text_lines)
1098
1099        # Place seal impressions.
1100        layout_seal_impressions: List[LayoutSealImpression] = []
1101
1102        num_seal_impressions = int(
1103            rng.integers(
1104                self.config.num_seal_impressions_min,
1105                self.config.num_seal_impressions_max + 1,
1106            )
1107        )
1108        for _ in range(num_seal_impressions):
1109            # Sample height.
1110            seal_impression_height_ratio = float(
1111                rng.uniform(
1112                    self.config.seal_impression_height_ratio_min,
1113                    self.config.seal_impression_height_ratio_max,
1114                )
1115            )
1116            seal_impression_height = round(seal_impression_height_ratio * reference_height)
1117            seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height)
1118
1119            # Make sure even.
1120            if seal_impression_height % 2 != 0:
1121                seal_impression_height -= 1
1122
1123            # Sample width.
1124            shape_mode = rng_choice(
1125                rng,
1126                self.seal_impression_ellipse_shape_modes,
1127                probs=self.seal_impression_ellipse_shape_modes_probs,
1128            )
1129            if shape_mode == SealImpressionEllipseShapeMode.CIRCLE:
1130                seal_impression_width = seal_impression_height
1131
1132            elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE:
1133                aspect_ratio = float(
1134                    rng.uniform(
1135                        self.config.seal_impression_general_ellipse_aspect_ratio_min,
1136                        self.config.seal_impression_general_ellipse_aspect_ratio_max,
1137                    )
1138                )
1139                seal_impression_width = round(aspect_ratio * seal_impression_height)
1140
1141            else:
1142                raise NotImplementedError()
1143
1144            seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width)
1145
1146            # Make sure even.
1147            if seal_impression_width % 2 != 0:
1148                seal_impression_width -= 1
1149
1150            seal_impression_up_max = text_line_down + 1 - seal_impression_height
1151            seal_impression_up = int(rng.integers(
1152                text_line_up,
1153                seal_impression_up_max + 1,
1154            ))
1155            seal_impression_down = seal_impression_up + seal_impression_height - 1
1156
1157            seal_impression_left_max = text_line_right + 1 - seal_impression_width
1158            seal_impression_left = int(rng.integers(
1159                text_line_left,
1160                seal_impression_left_max + 1,
1161            ))
1162            seal_impression_right = seal_impression_left + seal_impression_width - 1
1163
1164            angle = int(
1165                rng.integers(
1166                    self.config.seal_impression_angle_min,
1167                    self.config.seal_impression_angle_max + 1,
1168                )
1169            )
1170            angle = angle % 360
1171
1172            layout_seal_impressions.append(
1173                LayoutSealImpression(
1174                    box=Box(
1175                        up=seal_impression_up,
1176                        down=seal_impression_down,
1177                        left=seal_impression_left,
1178                        right=seal_impression_right,
1179                    ),
1180                    angle=angle,
1181                )
1182            )
1183
1184        return layout_seal_impressions
def generate_disconnected_text_regions( self, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine]):
1186    def generate_disconnected_text_regions(
1187        self,
1188        layout_text_lines: Sequence[LayoutTextLine],
1189    ):
1190        grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list)
1191        for layout_text_line in layout_text_lines:
1192            grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line)
1193
1194        disconnected_text_regions: List[DisconnectedTextRegion] = []
1195
1196        for _, layout_text_lines in sorted(
1197            grid_idx_to_layout_text_lines.items(),
1198            key=lambda p: p[0],
1199        ):
1200            layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx)
1201
1202            begin = 0
1203            while begin < len(layout_text_lines):
1204                text_line_height_min = layout_text_lines[begin].text_line_height
1205                text_line_height_max = text_line_height_min
1206
1207                # Find [begin, end) interval satisfying the condition.
1208                end = begin + 1
1209                while end < len(layout_text_lines):
1210                    text_line_height = layout_text_lines[end].text_line_height
1211                    text_line_height_min = min(text_line_height_min, text_line_height)
1212                    text_line_height_max = max(text_line_height_max, text_line_height)
1213                    if text_line_height_max / text_line_height_min \
1214                            > self.config.disconnected_text_region_polygons_height_ratio_max:
1215                        break
1216                    else:
1217                        end += 1
1218
1219                # To polygon.
1220                # NOTE: Simply using a bounding box is enough.
1221                # This method is common to all glyph sequences.
1222                cur_layout_text_lines = layout_text_lines[begin:end]
1223                bounding_box = Box(
1224                    up=min(ltl.box.up for ltl in cur_layout_text_lines),
1225                    down=max(ltl.box.down for ltl in cur_layout_text_lines),
1226                    left=min(ltl.box.left for ltl in cur_layout_text_lines),
1227                    right=max(ltl.box.right for ltl in cur_layout_text_lines),
1228                )
1229                step = min(
1230                    itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines)
1231                )
1232                disconnected_text_regions.append(
1233                    DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step))
1234                )
1235
1236                # Move to next.
1237                begin = end
1238
1239        return disconnected_text_regions
def generate_non_text_regions( self, height: int, width: int, layout_text_lines: Sequence[vkit.pipeline.text_detection.page_layout.LayoutTextLine], rng: numpy.random._generator.Generator):
1241    def generate_non_text_regions(
1242        self,
1243        height: int,
1244        width: int,
1245        layout_text_lines: Sequence[LayoutTextLine],
1246        rng: RandomGenerator,
1247    ):
1248        box_overlapping_validator = BoxOverlappingValidator(
1249            layout_text_line.box for layout_text_line in layout_text_lines
1250        )
1251        directions = [
1252            LayoutNonTextLineDirection.UP,
1253            LayoutNonTextLineDirection.DOWN,
1254            LayoutNonTextLineDirection.LEFT,
1255            LayoutNonTextLineDirection.RIGHT,
1256        ]
1257
1258        lntl_boxes: List[Box] = []
1259        for layout_text_line in layout_text_lines:
1260            ltl_box = layout_text_line.box
1261
1262            for direction_idx in rng.permutation(len(directions)):
1263                direction = directions[direction_idx]
1264
1265                if direction == LayoutNonTextLineDirection.UP:
1266                    lntl_box = Box(
1267                        up=ltl_box.up - ltl_box.height,
1268                        down=ltl_box.up - 1,
1269                        left=ltl_box.left,
1270                        right=ltl_box.right,
1271                    )
1272
1273                elif direction == LayoutNonTextLineDirection.DOWN:
1274                    lntl_box = Box(
1275                        up=ltl_box.down + 1,
1276                        down=ltl_box.down + ltl_box.height,
1277                        left=ltl_box.left,
1278                        right=ltl_box.right,
1279                    )
1280
1281                elif direction == LayoutNonTextLineDirection.LEFT:
1282                    lntl_box = Box(
1283                        up=ltl_box.up,
1284                        down=ltl_box.down,
1285                        left=ltl_box.left - ltl_box.width,
1286                        right=ltl_box.left - 1,
1287                    )
1288
1289                elif direction == LayoutNonTextLineDirection.RIGHT:
1290                    lntl_box = Box(
1291                        up=ltl_box.up,
1292                        down=ltl_box.down,
1293                        left=ltl_box.right + 1,
1294                        right=ltl_box.right + ltl_box.width,
1295                    )
1296
1297                else:
1298                    raise NotImplementedError()
1299
1300                # Ignore invalid box.
1301                if not lntl_box.valid:
1302                    continue
1303                if lntl_box.down >= height or lntl_box.right >= width:
1304                    continue
1305
1306                assert ltl_box.shape == lntl_box.shape
1307
1308                # Ignore box that is overlapped with any text lines.
1309                if box_overlapping_validator.is_overlapped(lntl_box):
1310                    continue
1311
1312                # Keep only the first valid direction.
1313                lntl_boxes.append(lntl_box)
1314                break
1315
1316        step = max(
1317            1,
1318            min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)),
1319        )
1320        non_text_regions = [
1321            NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes
1322        ]
1323        return non_text_regions
def run( self, input: vkit.pipeline.text_detection.page_layout.PageLayoutStepInput, rng: numpy.random._generator.Generator):
1325    def run(self, input: PageLayoutStepInput, rng: RandomGenerator):
1326        page_shape_step_output = input.page_shape_step_output
1327        height = page_shape_step_output.height
1328        width = page_shape_step_output.width
1329
1330        # Text lines.
1331        (
1332            layout_text_lines,
1333            large_text_line_gird,
1334            grids,
1335        ) = self.sample_layout_text_lines(height=height, width=width, rng=rng)
1336
1337        # Images.
1338        layout_images = self.sample_layout_images(height=height, width=width, rng=rng)
1339
1340        # QR codes & Bar codes.
1341        # NOTE: Some layout_text_lines could be dropped.
1342        (
1343            layout_barcode_qrs,
1344            layout_barcode_code39s,
1345            layout_text_lines,
1346        ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s(
1347            height=height,
1348            width=width,
1349            layout_text_lines=layout_text_lines,
1350            rng=rng,
1351        )
1352
1353        # Non-text symbols.
1354        layout_non_text_symbols = self.sample_layout_non_text_symbols(
1355            height=height,
1356            width=width,
1357            layout_text_lines=layout_text_lines,
1358            rng=rng,
1359        )
1360
1361        # Seal impressions.
1362        layout_seal_impressions = self.sample_layout_seal_impressions(
1363            height=height,
1364            width=width,
1365            layout_text_lines=layout_text_lines,
1366            rng=rng,
1367        )
1368
1369        # For char-level polygon regression.
1370        disconnected_text_regions = self.generate_disconnected_text_regions(
1371            layout_text_lines=layout_text_lines,
1372        )
1373
1374        # For sampling negative text region area.
1375        non_text_regions = self.generate_non_text_regions(
1376            height=height,
1377            width=width,
1378            layout_text_lines=layout_text_lines,
1379            rng=rng,
1380        )
1381
1382        return PageLayoutStepOutput(
1383            page_layout=PageLayout(
1384                height=height,
1385                width=width,
1386                layout_text_lines=layout_text_lines,
1387                layout_non_text_symbols=layout_non_text_symbols,
1388                layout_seal_impressions=layout_seal_impressions,
1389                layout_images=layout_images,
1390                layout_barcode_qrs=layout_barcode_qrs,
1391                layout_barcode_code39s=layout_barcode_code39s,
1392                disconnected_text_regions=disconnected_text_regions,
1393                non_text_regions=non_text_regions,
1394            ),
1395            debug_large_text_line_gird=large_text_line_gird,
1396            debug_grids=grids,
1397        )