vkit.pipeline.text_detection.page_layout
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Optional, Sequence, List, DefaultDict 15import math 16import heapq 17from enum import Enum, unique 18import itertools 19from collections import defaultdict 20 21import attrs 22from numpy.random import Generator as RandomGenerator 23 24from vkit.utility import rng_choice, normalize_to_probs, normalize_to_keys_and_probs 25from vkit.element import Box, BoxOverlappingValidator, Polygon 26from vkit.engine.font import FontEngineRunConfigGlyphSequence 27from .page_shape import PageShapeStepOutput 28from ..interface import PipelineStep, PipelineStepFactory 29 30 31@attrs.define 32class PageLayoutStepConfig: 33 # Text line heights. 34 reference_aspect_ratio: float = 1 / 1.4142 35 36 # Grid points. 37 grid_pad_ratio_min: float = 0.01 38 grid_pad_ratio_max: float = 0.05 39 grid_step_ratio_min: float = 1.0 40 grid_step_ratio_max: float = 1.1 41 grid_vert_gap_ratio_min: float = 0.0 42 grid_vert_gap_ratio_max: float = 0.5 43 grid_hori_gap_ratio_min: float = 1.0 44 grid_hori_gap_ratio_max: float = 1.15 45 46 # Large text line. 47 prob_add_large_text_line: float = 0.25 48 large_text_line_height_ratio_min: float = 0.05 49 large_text_line_height_ratio_max: float = 0.075 50 large_text_line_length_ratio_min: float = 0.5 51 large_text_line_length_ratio_max: float = 1.0 52 53 # Normal text line. 54 num_normal_text_line_heights_min: int = 2 55 num_normal_text_line_heights_max: int = 4 56 normal_text_line_height_ratio_min: float = 0.006 57 normal_text_line_height_ratio_max: float = 0.036 58 force_add_normal_text_line_height_ratio_min: bool = True 59 60 # Non-text symbol. 61 num_non_text_symbols_min: int = 0 62 num_non_text_symbols_max: int = 5 63 num_retries_to_get_non_overlapped_non_text_symbol: int = 5 64 non_text_symbol_height_ratio_min: float = 0.018 65 non_text_symbol_height_ratio_max: float = 0.064 66 non_text_symbol_aspect_ratio_min: float = 0.9 67 non_text_symbol_aspect_ratio_max: float = 1.111 68 non_text_symbol_non_overlapped_alpha_min: float = 0.8 69 non_text_symbol_non_overlapped_alpha_max: float = 1.0 70 non_text_symbol_overlapped_alpha_min: float = 0.15 71 non_text_symbol_overlapped_alpha_max: float = 0.55 72 73 prob_normal_text_line_diff_heights_gap: float = 0.5 74 prob_normal_text_line_gap: float = 0.5 75 normal_text_line_gap_ratio_min: float = 0.05 76 normal_text_line_gap_ratio_max: float = 1.25 77 normal_text_line_length_ratio_min: float = 0.5 78 normal_text_line_length_ratio_max: float = 1.0 79 80 # Image. 81 num_images_min: int = 0 82 num_images_max: int = 3 83 image_height_ratio_min: float = 0.1 84 image_height_ratio_max: float = 0.35 85 image_width_ratio_min: float = 0.1 86 image_width_ratio_max: float = 0.35 87 88 # Barcode (qr). 89 num_barcode_qrs_min: int = 0 90 num_barcode_qrs_max: int = 2 91 barcode_qr_length_ratio_min: float = 0.05 92 barcode_qr_length_ratio_max: float = 0.15 93 94 # Barcode (code39). 95 num_barcode_code39s_min: int = 0 96 num_barcode_code39s_max: int = 2 97 barcode_code39_height_ratio_min: float = 0.025 98 barcode_code39_height_ratio_max: float = 0.05 99 barcode_code39_aspect_ratio: float = 0.2854396602149411 100 barcode_code39_num_chars_min: int = 9 101 barcode_code39_num_chars_max: int = 13 102 103 # Seal impression. 104 num_seal_impressions_min: int = 1 105 num_seal_impressions_max: int = 3 106 seal_impression_angle_min: int = -45 107 seal_impression_angle_max: int = 45 108 seal_impression_height_ratio_min: float = 0.1 109 seal_impression_height_ratio_max: float = 0.2 110 seal_impression_weight_circle: float = 1 111 seal_impression_weight_general_ellipse: float = 1 112 seal_impression_general_ellipse_aspect_ratio_min: float = 0.75 113 seal_impression_general_ellipse_aspect_ratio_max: float = 1.333 114 115 # For char-level polygon regression. 116 disconnected_text_region_polygons_height_ratio_max: float = 2.0 117 118 119@attrs.define 120class PageLayoutStepInput: 121 page_shape_step_output: PageShapeStepOutput 122 123 124@attrs.define 125class LayoutTextLine: 126 # grid_idx: 127 # == -1: for large text line. 128 # >= 0: for normal text lines. 129 grid_idx: int 130 # text_line_idx: index within a grid. 131 text_line_idx: int 132 text_line_height: int 133 box: Box 134 glyph_sequence: FontEngineRunConfigGlyphSequence 135 136 137@attrs.define 138class LayoutNonTextSymbol: 139 box: Box 140 alpha: float 141 142 143@attrs.define 144class LayoutSealImpression: 145 box: Box 146 angle: int 147 148 149@attrs.define 150class LayoutImage: 151 box: Box 152 153 154@attrs.define 155class LayoutBarcodeQr: 156 box: Box 157 158 159@attrs.define 160class LayoutBarcodeCode39: 161 box: Box 162 163 164@unique 165class LayoutXcodePlacement(Enum): 166 NEXT_TO_UP = 'next_to_up' 167 NEXT_TO_DOWN = 'next_to_down' 168 NEXT_TO_LEFT = 'next_to_left' 169 NEXT_TO_RIGHT = 'next_to_right' 170 171 172@attrs.define 173class DisconnectedTextRegion: 174 polygon: Polygon 175 176 177@attrs.define 178class NonTextRegion: 179 polygon: Polygon 180 181 182@unique 183class LayoutNonTextLineDirection(Enum): 184 UP = 'up' 185 DOWN = 'down' 186 LEFT = 'left' 187 RIGHT = 'right' 188 189 190@attrs.define 191class PageLayout: 192 height: int 193 width: int 194 layout_text_lines: Sequence[LayoutTextLine] 195 layout_non_text_symbols: Sequence[LayoutNonTextSymbol] 196 layout_seal_impressions: Sequence[LayoutSealImpression] 197 layout_images: Sequence[LayoutImage] 198 layout_barcode_qrs: Sequence[LayoutBarcodeQr] 199 layout_barcode_code39s: Sequence[LayoutBarcodeCode39] 200 disconnected_text_regions: Sequence[DisconnectedTextRegion] 201 non_text_regions: Sequence[NonTextRegion] 202 203 204@attrs.define 205class PageLayoutStepOutput: 206 page_layout: PageLayout 207 debug_large_text_line_gird: Optional[Box] 208 debug_grids: Sequence[Box] 209 210 211@attrs.define(order=True) 212class PrioritizedSegment: 213 vert_begin_idx: int = attrs.field(order=True) 214 hori_begin_idx: int = attrs.field(order=False) 215 hori_end_idx: int = attrs.field(order=False) 216 217 218@unique 219class SealImpressionEllipseShapeMode(Enum): 220 CIRCLE = 'circle' 221 GENERAL_ELLIPSE = 'general_ellipse' 222 223 224class PageLayoutStep( 225 PipelineStep[ 226 PageLayoutStepConfig, 227 PageLayoutStepInput, 228 PageLayoutStepOutput, 229 ] 230): # yapf: disable 231 232 def __init__(self, config: PageLayoutStepConfig): 233 super().__init__(config) 234 235 ( 236 self.seal_impression_ellipse_shape_modes, 237 self.seal_impression_ellipse_shape_modes_probs, 238 ) = normalize_to_keys_and_probs([ 239 ( 240 SealImpressionEllipseShapeMode.CIRCLE, 241 self.config.seal_impression_weight_circle, 242 ), 243 ( 244 SealImpressionEllipseShapeMode.GENERAL_ELLIPSE, 245 self.config.seal_impression_weight_general_ellipse, 246 ), 247 ]) 248 249 def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator): 250 if rng.random() < self.config.prob_add_large_text_line: 251 large_text_line_height_ratio = rng.uniform( 252 self.config.large_text_line_height_ratio_min, 253 self.config.large_text_line_height_ratio_max, 254 ) 255 return round(large_text_line_height_ratio * reference_height) 256 257 else: 258 return None 259 260 def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator): 261 normal_text_line_heights: List[int] = [] 262 263 if self.config.force_add_normal_text_line_height_ratio_min: 264 normal_text_line_heights.append( 265 round(self.config.normal_text_line_height_ratio_min * reference_height) 266 ) 267 268 num_normal_text_line_heights = rng.integers( 269 self.config.num_normal_text_line_heights_min, 270 self.config.num_normal_text_line_heights_max + 1, 271 ) 272 ratio_step = ( 273 self.config.normal_text_line_height_ratio_max 274 - self.config.normal_text_line_height_ratio_min 275 ) / num_normal_text_line_heights 276 for step_idx in range(num_normal_text_line_heights): 277 ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step 278 ratio_max = ratio_min + ratio_step 279 ratio = rng.uniform(ratio_min, ratio_max) 280 normal_text_line_heights.append(round(ratio * reference_height)) 281 282 assert normal_text_line_heights 283 return sorted(normal_text_line_heights) 284 285 @classmethod 286 def generate_grid_points( 287 cls, 288 grid_pad_ratio: float, 289 grid_step: int, 290 grid_gap: int, 291 grid_gap_min: Optional[int], 292 length: int, 293 rng: RandomGenerator, 294 ): 295 grid_pad = min(length - grid_step, length * grid_pad_ratio) 296 assert grid_pad > 0 297 298 num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap) 299 if not num_steps.is_integer(): 300 num_steps = math.floor(num_steps) 301 num_steps = int(num_steps) 302 303 grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1) 304 assert grid_pad > 0 305 grid_pad = grid_pad // 2 306 307 begin = grid_pad 308 end = grid_pad + grid_step - 1 309 assert end < length - grid_pad 310 311 begins: List[int] = [] 312 ends: List[int] = [] 313 314 while end < length - grid_pad: 315 begins.append(begin) 316 ends.append(end) 317 318 cur_gap = grid_gap 319 if grid_gap_min is not None: 320 cur_gap = rng.integers(grid_gap_min, grid_gap + 1) 321 322 begin = end + cur_gap 323 end = begin + grid_step - 1 324 325 return begins, ends 326 327 def sample_grid_points( 328 self, 329 height: int, 330 width: int, 331 normal_text_line_heights_max: int, 332 rng: RandomGenerator, 333 ): 334 grid_pad_ratio = rng.uniform( 335 self.config.grid_pad_ratio_min, 336 self.config.grid_pad_ratio_max, 337 ) 338 339 grid_step_ratio = rng.uniform( 340 self.config.grid_step_ratio_min, 341 self.config.grid_step_ratio_max, 342 ) 343 grid_step = round(normal_text_line_heights_max * grid_step_ratio) 344 345 grid_vert_gap_min = round( 346 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min 347 ) 348 grid_vert_gap_max = round( 349 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max 350 ) 351 vert_begins, vert_ends = self.generate_grid_points( 352 grid_pad_ratio=grid_pad_ratio, 353 grid_step=grid_step, 354 grid_gap=grid_vert_gap_max, 355 grid_gap_min=grid_vert_gap_min, 356 length=height, 357 rng=rng, 358 ) 359 360 grid_hori_gap_ratio = rng.uniform( 361 self.config.grid_hori_gap_ratio_min, 362 self.config.grid_hori_gap_ratio_max, 363 ) 364 grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio) 365 grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap) 366 hori_begins, hori_ends = self.generate_grid_points( 367 grid_pad_ratio=grid_pad_ratio, 368 grid_step=grid_step, 369 grid_gap=grid_hori_gap, 370 grid_gap_min=None, 371 length=width, 372 rng=rng, 373 ) 374 return (vert_begins, vert_ends), (hori_begins, hori_ends) 375 376 def trim_grid_points_for_large_text_line( 377 self, 378 large_text_line_height: int, 379 vert_begins: Sequence[int], 380 vert_ends: Sequence[int], 381 hori_begins_min: int, 382 hori_ends_max: int, 383 ): 384 idx = 0 385 while idx < len(vert_begins) \ 386 and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height: 387 idx += 1 388 389 if idx >= len(vert_begins) - 1: 390 return None, 0 391 392 large_text_line_gird = Box( 393 up=vert_ends[idx] - large_text_line_height + 1, 394 down=vert_ends[idx], 395 left=hori_begins_min, 396 right=hori_ends_max, 397 ) 398 return large_text_line_gird, idx + 1 399 400 def sample_grids( 401 self, 402 vert_begins: Sequence[int], 403 vert_ends: Sequence[int], 404 hori_begins: Sequence[int], 405 hori_ends: Sequence[int], 406 rng: RandomGenerator, 407 ): 408 num_vert_ends = len(vert_ends) 409 assert num_vert_ends == len(vert_begins) 410 411 num_hori_ends = len(hori_ends) 412 assert num_hori_ends == len(hori_begins) 413 414 priority_queue = [ 415 PrioritizedSegment( 416 vert_begin_idx=0, 417 hori_begin_idx=0, 418 hori_end_idx=num_hori_ends - 1, 419 ) 420 ] 421 grids: List[Box] = [] 422 while priority_queue: 423 cur_segment = heapq.heappop(priority_queue) 424 425 # Deal with segments in the same level. 426 same_vert_segments: List[PrioritizedSegment] = [] 427 while priority_queue \ 428 and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx: 429 same_vert_segments.append(heapq.heappop(priority_queue)) 430 431 if same_vert_segments: 432 # Rebuid segments. 433 same_vert_segments.append(cur_segment) 434 same_vert_segments = sorted( 435 same_vert_segments, 436 key=lambda segment: segment.hori_begin_idx, 437 ) 438 439 rebuilt_segments: List[PrioritizedSegment] = [] 440 rebuilt_begin = 0 441 while rebuilt_begin < len(same_vert_segments): 442 rebuilt_end = rebuilt_begin 443 while rebuilt_end + 1 < len(same_vert_segments) \ 444 and (same_vert_segments[rebuilt_end + 1].hori_begin_idx 445 == same_vert_segments[rebuilt_end].hori_end_idx + 1): 446 rebuilt_end += 1 447 rebuilt_segments.append( 448 PrioritizedSegment( 449 vert_begin_idx=cur_segment.vert_begin_idx, 450 hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx, 451 hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx, 452 ) 453 ) 454 rebuilt_begin = rebuilt_end + 1 455 456 # Re-pick the first segment. 457 cur_segment = rebuilt_segments[0] 458 for other_segment in rebuilt_segments[1:]: 459 heapq.heappush(priority_queue, other_segment) 460 461 # Generate grids for the current segment. 462 vert_begin_idx = cur_segment.vert_begin_idx 463 464 hori_begin_idx = cur_segment.hori_begin_idx 465 hori_end_idx = cur_segment.hori_end_idx 466 while hori_begin_idx <= hori_end_idx: 467 # Randomly generate grid. 468 cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends) 469 470 # Try to sample segment with length >= 2. 471 if hori_end_idx + 1 - hori_begin_idx <= 3: 472 cur_hori_end_idx = hori_end_idx 473 else: 474 cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1) 475 476 grids.append( 477 Box( 478 up=vert_begins[vert_begin_idx], 479 down=vert_ends[cur_vert_end_idx], 480 left=hori_begins[hori_begin_idx], 481 right=hori_ends[cur_hori_end_idx], 482 ) 483 ) 484 next_vert_begin_idx = cur_vert_end_idx + 1 485 if next_vert_begin_idx < num_vert_ends: 486 heapq.heappush( 487 priority_queue, 488 PrioritizedSegment( 489 vert_begin_idx=next_vert_begin_idx, 490 hori_begin_idx=hori_begin_idx, 491 hori_end_idx=cur_hori_end_idx, 492 ), 493 ) 494 495 hori_begin_idx = cur_hori_end_idx + 1 496 497 return grids 498 499 @classmethod 500 def calculate_normal_text_line_heights_probs( 501 cls, 502 normal_text_line_heights_expected_probs: Sequence[float], 503 normal_text_line_heights_acc_areas: List[int], 504 ): 505 if sum(normal_text_line_heights_acc_areas) == 0: 506 normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas) 507 else: 508 normal_text_line_heights_cur_probs = normalize_to_probs( 509 normal_text_line_heights_acc_areas 510 ) 511 512 probs = normalize_to_probs([ 513 max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip( 514 normal_text_line_heights_cur_probs, 515 normal_text_line_heights_expected_probs, 516 ) 517 ]) 518 return probs 519 520 def fill_normal_text_lines_to_grid( 521 self, 522 normal_text_line_heights: Sequence[int], 523 normal_text_line_heights_expected_probs: Sequence[float], 524 normal_text_line_heights_acc_areas: List[int], 525 grid_idx: int, 526 grid: Box, 527 rng: RandomGenerator, 528 ): 529 normal_text_line_heights_indices = list(range(len(normal_text_line_heights))) 530 normal_text_line_heights_max = normal_text_line_heights[-1] 531 532 layout_text_lines: List[LayoutTextLine] = [] 533 up = grid.up 534 prev_text_line_height: Optional[int] = None 535 536 while up + normal_text_line_heights_max - 1 <= grid.down: 537 normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs( 538 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 539 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 540 ) 541 normal_text_line_height_idx = rng_choice( 542 rng=rng, 543 items=normal_text_line_heights_indices, 544 probs=normal_text_line_heights_probs, 545 ) 546 normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx] 547 548 add_gap = False 549 if prev_text_line_height: 550 if prev_text_line_height != normal_text_line_height: 551 add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap) 552 else: 553 add_gap = (rng.random() < self.config.prob_normal_text_line_gap) 554 if add_gap: 555 gap_ratio = rng.uniform( 556 self.config.normal_text_line_gap_ratio_min, 557 self.config.normal_text_line_gap_ratio_max, 558 ) 559 gap = round(gap_ratio * normal_text_line_height) 560 gap = min(grid.down - (up + normal_text_line_height - 1), gap) 561 up += gap 562 down = up + normal_text_line_height - 1 563 assert down <= grid.down 564 565 length_ratio = rng.uniform( 566 self.config.normal_text_line_length_ratio_min, 567 self.config.normal_text_line_length_ratio_max, 568 ) 569 normal_text_line_length = round(grid.width * length_ratio) 570 normal_text_line_length = max(normal_text_line_height, normal_text_line_length) 571 572 pad_max = grid.width - normal_text_line_length 573 pad = rng.integers(0, pad_max + 1) 574 left = grid.left + pad 575 right = left + normal_text_line_length - 1 576 assert right <= grid.right 577 578 text_line_idx = len(layout_text_lines) 579 layout_text_lines.append( 580 LayoutTextLine( 581 grid_idx=grid_idx, 582 text_line_idx=text_line_idx, 583 text_line_height=normal_text_line_height, 584 box=Box(up=up, down=down, left=left, right=right), 585 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 586 ) 587 ) 588 589 prev_text_line_height = normal_text_line_height 590 normal_text_line_heights_acc_areas[normal_text_line_height_idx] \ 591 += normal_text_line_length * normal_text_line_height 592 up = down + 1 593 594 return layout_text_lines 595 596 def fill_large_text_line_to_grid( 597 self, 598 large_text_line_gird: Box, 599 rng: RandomGenerator, 600 ): 601 length_ratio = rng.uniform( 602 self.config.large_text_line_length_ratio_min, 603 self.config.large_text_line_length_ratio_max, 604 ) 605 large_text_line_length = round(large_text_line_gird.width * length_ratio) 606 large_text_line_length = max(large_text_line_gird.height, large_text_line_length) 607 608 pad_max = large_text_line_gird.width - large_text_line_length 609 pad = rng.integers(0, pad_max + 1) 610 left = large_text_line_gird.left + pad 611 right = left + large_text_line_length - 1 612 assert right <= large_text_line_gird.right 613 614 return LayoutTextLine( 615 grid_idx=-1, 616 text_line_idx=0, 617 text_line_height=large_text_line_gird.height, 618 box=attrs.evolve(large_text_line_gird, left=left, right=right), 619 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 620 ) 621 622 def get_reference_height(self, height: int, width: int): 623 area = height * width 624 reference_height = math.ceil(math.sqrt(area / self.config.reference_aspect_ratio)) 625 return reference_height 626 627 def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator): 628 reference_height = self.get_reference_height(height=height, width=width) 629 630 normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng) 631 (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points( 632 height=height, 633 width=width, 634 normal_text_line_heights_max=normal_text_line_heights[-1], 635 rng=rng, 636 ) 637 638 large_text_line_height = self.sample_large_text_line_height(reference_height, rng) 639 large_text_line_gird: Optional[Box] = None 640 if large_text_line_height is not None: 641 large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line( 642 large_text_line_height=large_text_line_height, 643 vert_begins=vert_begins, 644 vert_ends=vert_ends, 645 hori_begins_min=hori_begins[0], 646 hori_ends_max=hori_ends[-1], 647 ) 648 if large_text_line_gird is not None: 649 vert_begins = vert_begins[vert_trim_idx:] 650 vert_ends = vert_ends[vert_trim_idx:] 651 652 grids = self.sample_grids( 653 vert_begins=vert_begins, 654 vert_ends=vert_ends, 655 hori_begins=hori_begins, 656 hori_ends=hori_ends, 657 rng=rng, 658 ) 659 normal_text_line_heights_expected_probs = normalize_to_probs([ 660 1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights 661 ]) 662 normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights) 663 layout_text_lines: List[LayoutTextLine] = [] 664 for grid_idx, grid in enumerate(grids): 665 layout_text_lines.extend( 666 self.fill_normal_text_lines_to_grid( 667 normal_text_line_heights=normal_text_line_heights, 668 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 669 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 670 grid_idx=grid_idx, 671 grid=grid, 672 rng=rng, 673 ) 674 ) 675 676 if large_text_line_gird: 677 layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng)) 678 679 # Must place text line. 680 assert layout_text_lines 681 682 return ( 683 layout_text_lines, 684 large_text_line_gird, 685 grids, 686 ) 687 688 def sample_layout_images(self, height: int, width: int, rng: RandomGenerator): 689 # Image could be overlapped with text lines. 690 layout_images: List[LayoutImage] = [] 691 692 num_layout_images = rng.integers( 693 self.config.num_images_min, 694 self.config.num_images_max + 1, 695 ) 696 for _ in range(num_layout_images): 697 # NOTE: It's ok to have overlapping images. 698 image_height_ratio = rng.uniform( 699 self.config.image_height_ratio_min, 700 self.config.image_height_ratio_max, 701 ) 702 image_height = round(height * image_height_ratio) 703 704 image_width_ratio = rng.uniform( 705 self.config.image_width_ratio_min, 706 self.config.image_width_ratio_max, 707 ) 708 image_width = round(width * image_width_ratio) 709 710 up = rng.integers(0, height - image_height + 1) 711 down = up + image_height - 1 712 left = rng.integers(0, width - image_width + 1) 713 right = left + image_width - 1 714 layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right))) 715 716 return layout_images 717 718 @classmethod 719 def boxes_are_overlapped(cls, box0: Box, box1: Box): 720 vert_overlapped = (box0.down >= box1.up and box1.down >= box0.up) 721 hori_overlapped = (box0.right >= box1.left and box1.right >= box0.left) 722 return vert_overlapped and hori_overlapped 723 724 def sample_layout_barcode_qrs( 725 self, 726 height: int, 727 width: int, 728 layout_text_lines: Sequence[LayoutTextLine], 729 rng: RandomGenerator, 730 ): 731 reference_height = self.get_reference_height(height=height, width=width) 732 733 layout_barcode_qrs: List[LayoutBarcodeQr] = [] 734 735 num_layout_barcode_qrs = rng.integers( 736 self.config.num_barcode_qrs_min, 737 self.config.num_barcode_qrs_max + 1, 738 ) 739 num_retries = 3 740 while num_layout_barcode_qrs > 0 and num_retries > 0: 741 barcode_qr_length_ratio = rng.uniform( 742 self.config.barcode_qr_length_ratio_min, 743 self.config.barcode_qr_length_ratio_max, 744 ) 745 barcode_qr_length = round(barcode_qr_length_ratio * reference_height) 746 barcode_qr_length = min(height, width, barcode_qr_length) 747 748 # Place QR code next to text line. 749 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 750 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 751 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 752 753 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 754 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 755 up = anchor_layout_text_line_box.down + 1 756 down = up + barcode_qr_length - 1 757 if down >= height: 758 num_retries -= 1 759 continue 760 else: 761 assert placement == LayoutXcodePlacement.NEXT_TO_UP 762 down = anchor_layout_text_line_box.up - 1 763 up = down + 1 - barcode_qr_length 764 if up < 0: 765 num_retries -= 1 766 continue 767 768 left_min = max( 769 0, 770 anchor_layout_text_line_box_center.x - barcode_qr_length, 771 ) 772 left_max = min( 773 width - barcode_qr_length, 774 anchor_layout_text_line_box_center.x, 775 ) 776 if left_min > left_max: 777 num_retries -= 1 778 continue 779 left = int(rng.integers(left_min, left_max + 1)) 780 right = left + barcode_qr_length - 1 781 782 else: 783 assert placement in ( 784 LayoutXcodePlacement.NEXT_TO_RIGHT, 785 LayoutXcodePlacement.NEXT_TO_LEFT, 786 ) 787 788 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 789 left = anchor_layout_text_line_box.right + 1 790 right = left + barcode_qr_length - 1 791 if right >= width: 792 num_retries -= 1 793 continue 794 else: 795 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 796 right = anchor_layout_text_line_box.left - 1 797 left = right + 1 - barcode_qr_length 798 if left < 0: 799 num_retries -= 1 800 continue 801 802 up_min = max( 803 0, 804 anchor_layout_text_line_box_center.y - barcode_qr_length, 805 ) 806 up_max = min( 807 height - barcode_qr_length, 808 anchor_layout_text_line_box_center.y, 809 ) 810 if up_min > up_max: 811 num_retries -= 1 812 continue 813 814 up = int(rng.integers(up_min, up_max + 1)) 815 down = up + barcode_qr_length - 1 816 817 num_layout_barcode_qrs -= 1 818 layout_barcode_qrs.append( 819 LayoutBarcodeQr(box=Box( 820 up=up, 821 down=down, 822 left=left, 823 right=right, 824 )) 825 ) 826 827 return layout_barcode_qrs 828 829 def sample_layout_barcode_code39s( 830 self, 831 height: int, 832 width: int, 833 layout_text_lines: Sequence[LayoutTextLine], 834 rng: RandomGenerator, 835 ): 836 reference_height = self.get_reference_height(height=height, width=width) 837 838 layout_barcode_code39s: List[LayoutBarcodeCode39] = [] 839 840 num_layout_barcode_code39s = rng.integers( 841 self.config.num_barcode_code39s_min, 842 self.config.num_barcode_code39s_max + 1, 843 ) 844 num_retries = 3 845 while num_layout_barcode_code39s > 0 and num_retries > 0: 846 barcode_code39_height_ratio = rng.uniform( 847 self.config.barcode_code39_height_ratio_min, 848 self.config.barcode_code39_height_ratio_max, 849 ) 850 barcode_code39_height = round(barcode_code39_height_ratio * reference_height) 851 barcode_code39_height = min(height, width, barcode_code39_height) 852 853 barcode_code39_num_chars = int( 854 rng.integers( 855 self.config.barcode_code39_num_chars_min, 856 self.config.barcode_code39_num_chars_max + 1, 857 ) 858 ) 859 barcode_code39_width = round( 860 barcode_code39_height * self.config.barcode_code39_aspect_ratio 861 * barcode_code39_num_chars 862 ) 863 864 # Place Bar code next to text line. 865 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 866 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 867 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 868 869 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 870 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 871 up = anchor_layout_text_line_box.down + 1 872 down = up + barcode_code39_height - 1 873 if down >= height: 874 num_retries -= 1 875 continue 876 else: 877 assert placement == LayoutXcodePlacement.NEXT_TO_UP 878 down = anchor_layout_text_line_box.up - 1 879 up = down + 1 - barcode_code39_height 880 if up < 0: 881 num_retries -= 1 882 continue 883 884 left_min = max( 885 0, 886 anchor_layout_text_line_box_center.x - barcode_code39_width, 887 ) 888 left_max = min( 889 width - barcode_code39_width, 890 anchor_layout_text_line_box_center.x, 891 ) 892 if left_min > left_max: 893 num_retries -= 1 894 continue 895 left = int(rng.integers(left_min, left_max + 1)) 896 right = left + barcode_code39_width - 1 897 898 else: 899 assert placement in ( 900 LayoutXcodePlacement.NEXT_TO_RIGHT, 901 LayoutXcodePlacement.NEXT_TO_LEFT, 902 ) 903 904 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 905 left = anchor_layout_text_line_box.right + 1 906 right = left + barcode_code39_width - 1 907 if right >= width: 908 num_retries -= 1 909 continue 910 else: 911 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 912 right = anchor_layout_text_line_box.left - 1 913 left = right + 1 - barcode_code39_width 914 if left < 0: 915 num_retries -= 1 916 continue 917 918 up_min = max( 919 0, 920 anchor_layout_text_line_box_center.y - barcode_code39_height, 921 ) 922 up_max = min( 923 height - barcode_code39_height, 924 anchor_layout_text_line_box_center.y, 925 ) 926 if up_min > up_max: 927 num_retries -= 1 928 continue 929 930 up = int(rng.integers(up_min, up_max + 1)) 931 down = up + barcode_code39_height - 1 932 933 num_layout_barcode_code39s -= 1 934 layout_barcode_code39s.append( 935 LayoutBarcodeCode39(box=Box( 936 up=up, 937 down=down, 938 left=left, 939 right=right, 940 )) 941 ) 942 943 return layout_barcode_code39s 944 945 def sample_layout_barcode_qrs_and_layout_barcode_code39s( 946 self, 947 height: int, 948 width: int, 949 layout_text_lines: Sequence[LayoutTextLine], 950 rng: RandomGenerator, 951 ): 952 layout_barcode_qrs = self.sample_layout_barcode_qrs( 953 height=height, 954 width=width, 955 layout_text_lines=layout_text_lines, 956 rng=rng, 957 ) 958 959 layout_barcode_code39s = self.sample_layout_barcode_code39s( 960 height=height, 961 width=width, 962 layout_text_lines=layout_text_lines, 963 rng=rng, 964 ) 965 966 if layout_barcode_qrs or layout_barcode_code39s: 967 # Barcode could not be overlapped with text lines. 968 # Hence need to remove the overlapped text lines. 969 box_overlapping_validator = BoxOverlappingValidator( 970 itertools.chain( 971 (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs), 972 (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s), 973 ) 974 ) 975 976 keep_layout_text_lines: List[LayoutTextLine] = [] 977 for layout_text_line in layout_text_lines: 978 if not box_overlapping_validator.is_overlapped(layout_text_line.box): 979 keep_layout_text_lines.append(layout_text_line) 980 layout_text_lines = keep_layout_text_lines 981 982 return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines 983 984 @classmethod 985 def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]): 986 # Sample within the text line area. 987 text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines) 988 text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines) 989 text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines) 990 text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines) 991 return ( 992 text_line_up, 993 text_line_down, 994 text_line_left, 995 text_line_right, 996 ) 997 998 def sample_layout_non_text_symbols( 999 self, 1000 height: int, 1001 width: int, 1002 layout_text_lines: Sequence[LayoutTextLine], 1003 rng: RandomGenerator, 1004 ): 1005 reference_height = self.get_reference_height(height=height, width=width) 1006 1007 text_line_up = 0 1008 text_line_down = height - 1 1009 text_line_left = 0 1010 text_line_right = width - 1 1011 1012 layout_non_text_symbols: List[LayoutNonTextSymbol] = [] 1013 1014 num_non_text_symbols = int( 1015 rng.integers( 1016 self.config.num_non_text_symbols_min, 1017 self.config.num_non_text_symbols_max + 1, 1018 ) 1019 ) 1020 for _ in range(num_non_text_symbols): 1021 non_text_symbol_height_ratio = rng.uniform( 1022 self.config.non_text_symbol_height_ratio_min, 1023 self.config.non_text_symbol_height_ratio_max, 1024 ) 1025 non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height) 1026 1027 non_text_symbol_aspect_ratio = rng.uniform( 1028 self.config.non_text_symbol_aspect_ratio_min, 1029 self.config.non_text_symbol_aspect_ratio_max, 1030 ) 1031 non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height) 1032 1033 box = None 1034 overlapped = True 1035 for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol): 1036 up_max = text_line_down + 1 - non_text_symbol_height 1037 up = int(rng.integers(text_line_up, up_max + 1)) 1038 down = up + non_text_symbol_height - 1 1039 assert up < down 1040 1041 left_max = text_line_right + 1 - non_text_symbol_width 1042 left = int(rng.integers(text_line_left, left_max + 1)) 1043 right = left + non_text_symbol_width - 1 1044 assert left < right 1045 1046 box = Box(up=up, down=down, left=left, right=right) 1047 1048 cur_overlapped = False 1049 for layout_text_line in layout_text_lines: 1050 if self.boxes_are_overlapped(box, layout_text_line.box): 1051 cur_overlapped = True 1052 break 1053 1054 if not cur_overlapped: 1055 overlapped = False 1056 break 1057 1058 assert box 1059 1060 if not overlapped: 1061 alpha = float( 1062 rng.uniform( 1063 self.config.non_text_symbol_non_overlapped_alpha_min, 1064 self.config.non_text_symbol_non_overlapped_alpha_max, 1065 ) 1066 ) 1067 else: 1068 alpha = float( 1069 rng.uniform( 1070 self.config.non_text_symbol_overlapped_alpha_min, 1071 self.config.non_text_symbol_overlapped_alpha_max, 1072 ) 1073 ) 1074 1075 layout_non_text_symbols.append(LayoutNonTextSymbol( 1076 box=box, 1077 alpha=alpha, 1078 )) 1079 1080 return layout_non_text_symbols 1081 1082 def sample_layout_seal_impressions( 1083 self, 1084 height: int, 1085 width: int, 1086 layout_text_lines: Sequence[LayoutTextLine], 1087 rng: RandomGenerator, 1088 ): 1089 reference_height = self.get_reference_height(height=height, width=width) 1090 1091 ( 1092 text_line_up, 1093 text_line_down, 1094 text_line_left, 1095 text_line_right, 1096 ) = self.get_text_line_area(layout_text_lines) 1097 1098 # Place seal impressions. 1099 layout_seal_impressions: List[LayoutSealImpression] = [] 1100 1101 num_seal_impressions = int( 1102 rng.integers( 1103 self.config.num_seal_impressions_min, 1104 self.config.num_seal_impressions_max + 1, 1105 ) 1106 ) 1107 for _ in range(num_seal_impressions): 1108 # Sample height. 1109 seal_impression_height_ratio = float( 1110 rng.uniform( 1111 self.config.seal_impression_height_ratio_min, 1112 self.config.seal_impression_height_ratio_max, 1113 ) 1114 ) 1115 seal_impression_height = round(seal_impression_height_ratio * reference_height) 1116 seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height) 1117 1118 # Make sure even. 1119 if seal_impression_height % 2 != 0: 1120 seal_impression_height -= 1 1121 1122 # Sample width. 1123 shape_mode = rng_choice( 1124 rng, 1125 self.seal_impression_ellipse_shape_modes, 1126 probs=self.seal_impression_ellipse_shape_modes_probs, 1127 ) 1128 if shape_mode == SealImpressionEllipseShapeMode.CIRCLE: 1129 seal_impression_width = seal_impression_height 1130 1131 elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE: 1132 aspect_ratio = float( 1133 rng.uniform( 1134 self.config.seal_impression_general_ellipse_aspect_ratio_min, 1135 self.config.seal_impression_general_ellipse_aspect_ratio_max, 1136 ) 1137 ) 1138 seal_impression_width = round(aspect_ratio * seal_impression_height) 1139 1140 else: 1141 raise NotImplementedError() 1142 1143 seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width) 1144 1145 # Make sure even. 1146 if seal_impression_width % 2 != 0: 1147 seal_impression_width -= 1 1148 1149 seal_impression_up_max = text_line_down + 1 - seal_impression_height 1150 seal_impression_up = int(rng.integers( 1151 text_line_up, 1152 seal_impression_up_max + 1, 1153 )) 1154 seal_impression_down = seal_impression_up + seal_impression_height - 1 1155 1156 seal_impression_left_max = text_line_right + 1 - seal_impression_width 1157 seal_impression_left = int(rng.integers( 1158 text_line_left, 1159 seal_impression_left_max + 1, 1160 )) 1161 seal_impression_right = seal_impression_left + seal_impression_width - 1 1162 1163 angle = int( 1164 rng.integers( 1165 self.config.seal_impression_angle_min, 1166 self.config.seal_impression_angle_max + 1, 1167 ) 1168 ) 1169 angle = angle % 360 1170 1171 layout_seal_impressions.append( 1172 LayoutSealImpression( 1173 box=Box( 1174 up=seal_impression_up, 1175 down=seal_impression_down, 1176 left=seal_impression_left, 1177 right=seal_impression_right, 1178 ), 1179 angle=angle, 1180 ) 1181 ) 1182 1183 return layout_seal_impressions 1184 1185 def generate_disconnected_text_regions( 1186 self, 1187 layout_text_lines: Sequence[LayoutTextLine], 1188 ): 1189 grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list) 1190 for layout_text_line in layout_text_lines: 1191 grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line) 1192 1193 disconnected_text_regions: List[DisconnectedTextRegion] = [] 1194 1195 for _, layout_text_lines in sorted( 1196 grid_idx_to_layout_text_lines.items(), 1197 key=lambda p: p[0], 1198 ): 1199 layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx) 1200 1201 begin = 0 1202 while begin < len(layout_text_lines): 1203 text_line_height_min = layout_text_lines[begin].text_line_height 1204 text_line_height_max = text_line_height_min 1205 1206 # Find [begin, end) interval satisfying the condition. 1207 end = begin + 1 1208 while end < len(layout_text_lines): 1209 text_line_height = layout_text_lines[end].text_line_height 1210 text_line_height_min = min(text_line_height_min, text_line_height) 1211 text_line_height_max = max(text_line_height_max, text_line_height) 1212 if text_line_height_max / text_line_height_min \ 1213 > self.config.disconnected_text_region_polygons_height_ratio_max: 1214 break 1215 else: 1216 end += 1 1217 1218 # To polygon. 1219 # NOTE: Simply using a bounding box is enough. 1220 # This method is common to all glyph sequences. 1221 cur_layout_text_lines = layout_text_lines[begin:end] 1222 bounding_box = Box( 1223 up=min(ltl.box.up for ltl in cur_layout_text_lines), 1224 down=max(ltl.box.down for ltl in cur_layout_text_lines), 1225 left=min(ltl.box.left for ltl in cur_layout_text_lines), 1226 right=max(ltl.box.right for ltl in cur_layout_text_lines), 1227 ) 1228 step = min( 1229 itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines) 1230 ) 1231 disconnected_text_regions.append( 1232 DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step)) 1233 ) 1234 1235 # Move to next. 1236 begin = end 1237 1238 return disconnected_text_regions 1239 1240 def generate_non_text_regions( 1241 self, 1242 height: int, 1243 width: int, 1244 layout_text_lines: Sequence[LayoutTextLine], 1245 rng: RandomGenerator, 1246 ): 1247 box_overlapping_validator = BoxOverlappingValidator( 1248 layout_text_line.box for layout_text_line in layout_text_lines 1249 ) 1250 directions = [ 1251 LayoutNonTextLineDirection.UP, 1252 LayoutNonTextLineDirection.DOWN, 1253 LayoutNonTextLineDirection.LEFT, 1254 LayoutNonTextLineDirection.RIGHT, 1255 ] 1256 1257 lntl_boxes: List[Box] = [] 1258 for layout_text_line in layout_text_lines: 1259 ltl_box = layout_text_line.box 1260 1261 for direction_idx in rng.permutation(len(directions)): 1262 direction = directions[direction_idx] 1263 1264 if direction == LayoutNonTextLineDirection.UP: 1265 lntl_box = Box( 1266 up=ltl_box.up - ltl_box.height, 1267 down=ltl_box.up - 1, 1268 left=ltl_box.left, 1269 right=ltl_box.right, 1270 ) 1271 1272 elif direction == LayoutNonTextLineDirection.DOWN: 1273 lntl_box = Box( 1274 up=ltl_box.down + 1, 1275 down=ltl_box.down + ltl_box.height, 1276 left=ltl_box.left, 1277 right=ltl_box.right, 1278 ) 1279 1280 elif direction == LayoutNonTextLineDirection.LEFT: 1281 lntl_box = Box( 1282 up=ltl_box.up, 1283 down=ltl_box.down, 1284 left=ltl_box.left - ltl_box.width, 1285 right=ltl_box.left - 1, 1286 ) 1287 1288 elif direction == LayoutNonTextLineDirection.RIGHT: 1289 lntl_box = Box( 1290 up=ltl_box.up, 1291 down=ltl_box.down, 1292 left=ltl_box.right + 1, 1293 right=ltl_box.right + ltl_box.width, 1294 ) 1295 1296 else: 1297 raise NotImplementedError() 1298 1299 # Ignore invalid box. 1300 if not lntl_box.valid: 1301 continue 1302 if lntl_box.down >= height or lntl_box.right >= width: 1303 continue 1304 1305 assert ltl_box.shape == lntl_box.shape 1306 1307 # Ignore box that is overlapped with any text lines. 1308 if box_overlapping_validator.is_overlapped(lntl_box): 1309 continue 1310 1311 # Keep only the first valid direction. 1312 lntl_boxes.append(lntl_box) 1313 break 1314 1315 step = max( 1316 1, 1317 min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)), 1318 ) 1319 non_text_regions = [ 1320 NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes 1321 ] 1322 return non_text_regions 1323 1324 def run(self, input: PageLayoutStepInput, rng: RandomGenerator): 1325 page_shape_step_output = input.page_shape_step_output 1326 height = page_shape_step_output.height 1327 width = page_shape_step_output.width 1328 1329 # Text lines. 1330 ( 1331 layout_text_lines, 1332 large_text_line_gird, 1333 grids, 1334 ) = self.sample_layout_text_lines(height=height, width=width, rng=rng) 1335 1336 # Images. 1337 layout_images = self.sample_layout_images(height=height, width=width, rng=rng) 1338 1339 # QR codes & Bar codes. 1340 # NOTE: Some layout_text_lines could be dropped. 1341 ( 1342 layout_barcode_qrs, 1343 layout_barcode_code39s, 1344 layout_text_lines, 1345 ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s( 1346 height=height, 1347 width=width, 1348 layout_text_lines=layout_text_lines, 1349 rng=rng, 1350 ) 1351 1352 # Non-text symbols. 1353 layout_non_text_symbols = self.sample_layout_non_text_symbols( 1354 height=height, 1355 width=width, 1356 layout_text_lines=layout_text_lines, 1357 rng=rng, 1358 ) 1359 1360 # Seal impressions. 1361 layout_seal_impressions = self.sample_layout_seal_impressions( 1362 height=height, 1363 width=width, 1364 layout_text_lines=layout_text_lines, 1365 rng=rng, 1366 ) 1367 1368 # For char-level polygon regression. 1369 disconnected_text_regions = self.generate_disconnected_text_regions( 1370 layout_text_lines=layout_text_lines, 1371 ) 1372 1373 # For sampling negative text region area. 1374 non_text_regions = self.generate_non_text_regions( 1375 height=height, 1376 width=width, 1377 layout_text_lines=layout_text_lines, 1378 rng=rng, 1379 ) 1380 1381 return PageLayoutStepOutput( 1382 page_layout=PageLayout( 1383 height=height, 1384 width=width, 1385 layout_text_lines=layout_text_lines, 1386 layout_non_text_symbols=layout_non_text_symbols, 1387 layout_seal_impressions=layout_seal_impressions, 1388 layout_images=layout_images, 1389 layout_barcode_qrs=layout_barcode_qrs, 1390 layout_barcode_code39s=layout_barcode_code39s, 1391 disconnected_text_regions=disconnected_text_regions, 1392 non_text_regions=non_text_regions, 1393 ), 1394 debug_large_text_line_gird=large_text_line_gird, 1395 debug_grids=grids, 1396 ) 1397 1398 1399page_layout_step_factory = PipelineStepFactory(PageLayoutStep)
33class PageLayoutStepConfig: 34 # Text line heights. 35 reference_aspect_ratio: float = 1 / 1.4142 36 37 # Grid points. 38 grid_pad_ratio_min: float = 0.01 39 grid_pad_ratio_max: float = 0.05 40 grid_step_ratio_min: float = 1.0 41 grid_step_ratio_max: float = 1.1 42 grid_vert_gap_ratio_min: float = 0.0 43 grid_vert_gap_ratio_max: float = 0.5 44 grid_hori_gap_ratio_min: float = 1.0 45 grid_hori_gap_ratio_max: float = 1.15 46 47 # Large text line. 48 prob_add_large_text_line: float = 0.25 49 large_text_line_height_ratio_min: float = 0.05 50 large_text_line_height_ratio_max: float = 0.075 51 large_text_line_length_ratio_min: float = 0.5 52 large_text_line_length_ratio_max: float = 1.0 53 54 # Normal text line. 55 num_normal_text_line_heights_min: int = 2 56 num_normal_text_line_heights_max: int = 4 57 normal_text_line_height_ratio_min: float = 0.006 58 normal_text_line_height_ratio_max: float = 0.036 59 force_add_normal_text_line_height_ratio_min: bool = True 60 61 # Non-text symbol. 62 num_non_text_symbols_min: int = 0 63 num_non_text_symbols_max: int = 5 64 num_retries_to_get_non_overlapped_non_text_symbol: int = 5 65 non_text_symbol_height_ratio_min: float = 0.018 66 non_text_symbol_height_ratio_max: float = 0.064 67 non_text_symbol_aspect_ratio_min: float = 0.9 68 non_text_symbol_aspect_ratio_max: float = 1.111 69 non_text_symbol_non_overlapped_alpha_min: float = 0.8 70 non_text_symbol_non_overlapped_alpha_max: float = 1.0 71 non_text_symbol_overlapped_alpha_min: float = 0.15 72 non_text_symbol_overlapped_alpha_max: float = 0.55 73 74 prob_normal_text_line_diff_heights_gap: float = 0.5 75 prob_normal_text_line_gap: float = 0.5 76 normal_text_line_gap_ratio_min: float = 0.05 77 normal_text_line_gap_ratio_max: float = 1.25 78 normal_text_line_length_ratio_min: float = 0.5 79 normal_text_line_length_ratio_max: float = 1.0 80 81 # Image. 82 num_images_min: int = 0 83 num_images_max: int = 3 84 image_height_ratio_min: float = 0.1 85 image_height_ratio_max: float = 0.35 86 image_width_ratio_min: float = 0.1 87 image_width_ratio_max: float = 0.35 88 89 # Barcode (qr). 90 num_barcode_qrs_min: int = 0 91 num_barcode_qrs_max: int = 2 92 barcode_qr_length_ratio_min: float = 0.05 93 barcode_qr_length_ratio_max: float = 0.15 94 95 # Barcode (code39). 96 num_barcode_code39s_min: int = 0 97 num_barcode_code39s_max: int = 2 98 barcode_code39_height_ratio_min: float = 0.025 99 barcode_code39_height_ratio_max: float = 0.05 100 barcode_code39_aspect_ratio: float = 0.2854396602149411 101 barcode_code39_num_chars_min: int = 9 102 barcode_code39_num_chars_max: int = 13 103 104 # Seal impression. 105 num_seal_impressions_min: int = 1 106 num_seal_impressions_max: int = 3 107 seal_impression_angle_min: int = -45 108 seal_impression_angle_max: int = 45 109 seal_impression_height_ratio_min: float = 0.1 110 seal_impression_height_ratio_max: float = 0.2 111 seal_impression_weight_circle: float = 1 112 seal_impression_weight_general_ellipse: float = 1 113 seal_impression_general_ellipse_aspect_ratio_min: float = 0.75 114 seal_impression_general_ellipse_aspect_ratio_max: float = 1.333 115 116 # For char-level polygon regression. 117 disconnected_text_region_polygons_height_ratio_max: float = 2.0
2def __init__(self, reference_aspect_ratio=attr_dict['reference_aspect_ratio'].default, grid_pad_ratio_min=attr_dict['grid_pad_ratio_min'].default, grid_pad_ratio_max=attr_dict['grid_pad_ratio_max'].default, grid_step_ratio_min=attr_dict['grid_step_ratio_min'].default, grid_step_ratio_max=attr_dict['grid_step_ratio_max'].default, grid_vert_gap_ratio_min=attr_dict['grid_vert_gap_ratio_min'].default, grid_vert_gap_ratio_max=attr_dict['grid_vert_gap_ratio_max'].default, grid_hori_gap_ratio_min=attr_dict['grid_hori_gap_ratio_min'].default, grid_hori_gap_ratio_max=attr_dict['grid_hori_gap_ratio_max'].default, prob_add_large_text_line=attr_dict['prob_add_large_text_line'].default, large_text_line_height_ratio_min=attr_dict['large_text_line_height_ratio_min'].default, large_text_line_height_ratio_max=attr_dict['large_text_line_height_ratio_max'].default, large_text_line_length_ratio_min=attr_dict['large_text_line_length_ratio_min'].default, large_text_line_length_ratio_max=attr_dict['large_text_line_length_ratio_max'].default, num_normal_text_line_heights_min=attr_dict['num_normal_text_line_heights_min'].default, num_normal_text_line_heights_max=attr_dict['num_normal_text_line_heights_max'].default, normal_text_line_height_ratio_min=attr_dict['normal_text_line_height_ratio_min'].default, normal_text_line_height_ratio_max=attr_dict['normal_text_line_height_ratio_max'].default, force_add_normal_text_line_height_ratio_min=attr_dict['force_add_normal_text_line_height_ratio_min'].default, num_non_text_symbols_min=attr_dict['num_non_text_symbols_min'].default, num_non_text_symbols_max=attr_dict['num_non_text_symbols_max'].default, num_retries_to_get_non_overlapped_non_text_symbol=attr_dict['num_retries_to_get_non_overlapped_non_text_symbol'].default, non_text_symbol_height_ratio_min=attr_dict['non_text_symbol_height_ratio_min'].default, non_text_symbol_height_ratio_max=attr_dict['non_text_symbol_height_ratio_max'].default, non_text_symbol_aspect_ratio_min=attr_dict['non_text_symbol_aspect_ratio_min'].default, non_text_symbol_aspect_ratio_max=attr_dict['non_text_symbol_aspect_ratio_max'].default, non_text_symbol_non_overlapped_alpha_min=attr_dict['non_text_symbol_non_overlapped_alpha_min'].default, non_text_symbol_non_overlapped_alpha_max=attr_dict['non_text_symbol_non_overlapped_alpha_max'].default, non_text_symbol_overlapped_alpha_min=attr_dict['non_text_symbol_overlapped_alpha_min'].default, non_text_symbol_overlapped_alpha_max=attr_dict['non_text_symbol_overlapped_alpha_max'].default, prob_normal_text_line_diff_heights_gap=attr_dict['prob_normal_text_line_diff_heights_gap'].default, prob_normal_text_line_gap=attr_dict['prob_normal_text_line_gap'].default, normal_text_line_gap_ratio_min=attr_dict['normal_text_line_gap_ratio_min'].default, normal_text_line_gap_ratio_max=attr_dict['normal_text_line_gap_ratio_max'].default, normal_text_line_length_ratio_min=attr_dict['normal_text_line_length_ratio_min'].default, normal_text_line_length_ratio_max=attr_dict['normal_text_line_length_ratio_max'].default, num_images_min=attr_dict['num_images_min'].default, num_images_max=attr_dict['num_images_max'].default, image_height_ratio_min=attr_dict['image_height_ratio_min'].default, image_height_ratio_max=attr_dict['image_height_ratio_max'].default, image_width_ratio_min=attr_dict['image_width_ratio_min'].default, image_width_ratio_max=attr_dict['image_width_ratio_max'].default, num_barcode_qrs_min=attr_dict['num_barcode_qrs_min'].default, num_barcode_qrs_max=attr_dict['num_barcode_qrs_max'].default, barcode_qr_length_ratio_min=attr_dict['barcode_qr_length_ratio_min'].default, barcode_qr_length_ratio_max=attr_dict['barcode_qr_length_ratio_max'].default, num_barcode_code39s_min=attr_dict['num_barcode_code39s_min'].default, num_barcode_code39s_max=attr_dict['num_barcode_code39s_max'].default, barcode_code39_height_ratio_min=attr_dict['barcode_code39_height_ratio_min'].default, barcode_code39_height_ratio_max=attr_dict['barcode_code39_height_ratio_max'].default, barcode_code39_aspect_ratio=attr_dict['barcode_code39_aspect_ratio'].default, barcode_code39_num_chars_min=attr_dict['barcode_code39_num_chars_min'].default, barcode_code39_num_chars_max=attr_dict['barcode_code39_num_chars_max'].default, num_seal_impressions_min=attr_dict['num_seal_impressions_min'].default, num_seal_impressions_max=attr_dict['num_seal_impressions_max'].default, seal_impression_angle_min=attr_dict['seal_impression_angle_min'].default, seal_impression_angle_max=attr_dict['seal_impression_angle_max'].default, seal_impression_height_ratio_min=attr_dict['seal_impression_height_ratio_min'].default, seal_impression_height_ratio_max=attr_dict['seal_impression_height_ratio_max'].default, seal_impression_weight_circle=attr_dict['seal_impression_weight_circle'].default, seal_impression_weight_general_ellipse=attr_dict['seal_impression_weight_general_ellipse'].default, seal_impression_general_ellipse_aspect_ratio_min=attr_dict['seal_impression_general_ellipse_aspect_ratio_min'].default, seal_impression_general_ellipse_aspect_ratio_max=attr_dict['seal_impression_general_ellipse_aspect_ratio_max'].default, disconnected_text_region_polygons_height_ratio_max=attr_dict['disconnected_text_region_polygons_height_ratio_max'].default): 3 self.reference_aspect_ratio = reference_aspect_ratio 4 self.grid_pad_ratio_min = grid_pad_ratio_min 5 self.grid_pad_ratio_max = grid_pad_ratio_max 6 self.grid_step_ratio_min = grid_step_ratio_min 7 self.grid_step_ratio_max = grid_step_ratio_max 8 self.grid_vert_gap_ratio_min = grid_vert_gap_ratio_min 9 self.grid_vert_gap_ratio_max = grid_vert_gap_ratio_max 10 self.grid_hori_gap_ratio_min = grid_hori_gap_ratio_min 11 self.grid_hori_gap_ratio_max = grid_hori_gap_ratio_max 12 self.prob_add_large_text_line = prob_add_large_text_line 13 self.large_text_line_height_ratio_min = large_text_line_height_ratio_min 14 self.large_text_line_height_ratio_max = large_text_line_height_ratio_max 15 self.large_text_line_length_ratio_min = large_text_line_length_ratio_min 16 self.large_text_line_length_ratio_max = large_text_line_length_ratio_max 17 self.num_normal_text_line_heights_min = num_normal_text_line_heights_min 18 self.num_normal_text_line_heights_max = num_normal_text_line_heights_max 19 self.normal_text_line_height_ratio_min = normal_text_line_height_ratio_min 20 self.normal_text_line_height_ratio_max = normal_text_line_height_ratio_max 21 self.force_add_normal_text_line_height_ratio_min = force_add_normal_text_line_height_ratio_min 22 self.num_non_text_symbols_min = num_non_text_symbols_min 23 self.num_non_text_symbols_max = num_non_text_symbols_max 24 self.num_retries_to_get_non_overlapped_non_text_symbol = num_retries_to_get_non_overlapped_non_text_symbol 25 self.non_text_symbol_height_ratio_min = non_text_symbol_height_ratio_min 26 self.non_text_symbol_height_ratio_max = non_text_symbol_height_ratio_max 27 self.non_text_symbol_aspect_ratio_min = non_text_symbol_aspect_ratio_min 28 self.non_text_symbol_aspect_ratio_max = non_text_symbol_aspect_ratio_max 29 self.non_text_symbol_non_overlapped_alpha_min = non_text_symbol_non_overlapped_alpha_min 30 self.non_text_symbol_non_overlapped_alpha_max = non_text_symbol_non_overlapped_alpha_max 31 self.non_text_symbol_overlapped_alpha_min = non_text_symbol_overlapped_alpha_min 32 self.non_text_symbol_overlapped_alpha_max = non_text_symbol_overlapped_alpha_max 33 self.prob_normal_text_line_diff_heights_gap = prob_normal_text_line_diff_heights_gap 34 self.prob_normal_text_line_gap = prob_normal_text_line_gap 35 self.normal_text_line_gap_ratio_min = normal_text_line_gap_ratio_min 36 self.normal_text_line_gap_ratio_max = normal_text_line_gap_ratio_max 37 self.normal_text_line_length_ratio_min = normal_text_line_length_ratio_min 38 self.normal_text_line_length_ratio_max = normal_text_line_length_ratio_max 39 self.num_images_min = num_images_min 40 self.num_images_max = num_images_max 41 self.image_height_ratio_min = image_height_ratio_min 42 self.image_height_ratio_max = image_height_ratio_max 43 self.image_width_ratio_min = image_width_ratio_min 44 self.image_width_ratio_max = image_width_ratio_max 45 self.num_barcode_qrs_min = num_barcode_qrs_min 46 self.num_barcode_qrs_max = num_barcode_qrs_max 47 self.barcode_qr_length_ratio_min = barcode_qr_length_ratio_min 48 self.barcode_qr_length_ratio_max = barcode_qr_length_ratio_max 49 self.num_barcode_code39s_min = num_barcode_code39s_min 50 self.num_barcode_code39s_max = num_barcode_code39s_max 51 self.barcode_code39_height_ratio_min = barcode_code39_height_ratio_min 52 self.barcode_code39_height_ratio_max = barcode_code39_height_ratio_max 53 self.barcode_code39_aspect_ratio = barcode_code39_aspect_ratio 54 self.barcode_code39_num_chars_min = barcode_code39_num_chars_min 55 self.barcode_code39_num_chars_max = barcode_code39_num_chars_max 56 self.num_seal_impressions_min = num_seal_impressions_min 57 self.num_seal_impressions_max = num_seal_impressions_max 58 self.seal_impression_angle_min = seal_impression_angle_min 59 self.seal_impression_angle_max = seal_impression_angle_max 60 self.seal_impression_height_ratio_min = seal_impression_height_ratio_min 61 self.seal_impression_height_ratio_max = seal_impression_height_ratio_max 62 self.seal_impression_weight_circle = seal_impression_weight_circle 63 self.seal_impression_weight_general_ellipse = seal_impression_weight_general_ellipse 64 self.seal_impression_general_ellipse_aspect_ratio_min = seal_impression_general_ellipse_aspect_ratio_min 65 self.seal_impression_general_ellipse_aspect_ratio_max = seal_impression_general_ellipse_aspect_ratio_max 66 self.disconnected_text_region_polygons_height_ratio_max = disconnected_text_region_polygons_height_ratio_max
Method generated by attrs for class PageLayoutStepConfig.
Method generated by attrs for class PageLayoutStepInput.
126class LayoutTextLine: 127 # grid_idx: 128 # == -1: for large text line. 129 # >= 0: for normal text lines. 130 grid_idx: int 131 # text_line_idx: index within a grid. 132 text_line_idx: int 133 text_line_height: int 134 box: Box 135 glyph_sequence: FontEngineRunConfigGlyphSequence
2def __init__(self, grid_idx, text_line_idx, text_line_height, box, glyph_sequence): 3 self.grid_idx = grid_idx 4 self.text_line_idx = text_line_idx 5 self.text_line_height = text_line_height 6 self.box = box 7 self.glyph_sequence = glyph_sequence
Method generated by attrs for class LayoutTextLine.
Method generated by attrs for class LayoutNonTextSymbol.
Method generated by attrs for class LayoutSealImpression.
Method generated by attrs for class LayoutBarcodeCode39.
166class LayoutXcodePlacement(Enum): 167 NEXT_TO_UP = 'next_to_up' 168 NEXT_TO_DOWN = 'next_to_down' 169 NEXT_TO_LEFT = 'next_to_left' 170 NEXT_TO_RIGHT = 'next_to_right'
An enumeration.
Inherited Members
- enum.Enum
- name
- value
Method generated by attrs for class DisconnectedTextRegion.
Method generated by attrs for class NonTextRegion.
184class LayoutNonTextLineDirection(Enum): 185 UP = 'up' 186 DOWN = 'down' 187 LEFT = 'left' 188 RIGHT = 'right'
An enumeration.
Inherited Members
- enum.Enum
- name
- value
192class PageLayout: 193 height: int 194 width: int 195 layout_text_lines: Sequence[LayoutTextLine] 196 layout_non_text_symbols: Sequence[LayoutNonTextSymbol] 197 layout_seal_impressions: Sequence[LayoutSealImpression] 198 layout_images: Sequence[LayoutImage] 199 layout_barcode_qrs: Sequence[LayoutBarcodeQr] 200 layout_barcode_code39s: Sequence[LayoutBarcodeCode39] 201 disconnected_text_regions: Sequence[DisconnectedTextRegion] 202 non_text_regions: Sequence[NonTextRegion]
2def __init__(self, height, width, layout_text_lines, layout_non_text_symbols, layout_seal_impressions, layout_images, layout_barcode_qrs, layout_barcode_code39s, disconnected_text_regions, non_text_regions): 3 self.height = height 4 self.width = width 5 self.layout_text_lines = layout_text_lines 6 self.layout_non_text_symbols = layout_non_text_symbols 7 self.layout_seal_impressions = layout_seal_impressions 8 self.layout_images = layout_images 9 self.layout_barcode_qrs = layout_barcode_qrs 10 self.layout_barcode_code39s = layout_barcode_code39s 11 self.disconnected_text_regions = disconnected_text_regions 12 self.non_text_regions = non_text_regions
Method generated by attrs for class PageLayout.
206class PageLayoutStepOutput: 207 page_layout: PageLayout 208 debug_large_text_line_gird: Optional[Box] 209 debug_grids: Sequence[Box]
2def __init__(self, page_layout, debug_large_text_line_gird, debug_grids): 3 self.page_layout = page_layout 4 self.debug_large_text_line_gird = debug_large_text_line_gird 5 self.debug_grids = debug_grids
Method generated by attrs for class PageLayoutStepOutput.
213class PrioritizedSegment: 214 vert_begin_idx: int = attrs.field(order=True) 215 hori_begin_idx: int = attrs.field(order=False) 216 hori_end_idx: int = attrs.field(order=False)
2def __init__(self, vert_begin_idx, hori_begin_idx, hori_end_idx): 3 self.vert_begin_idx = vert_begin_idx 4 self.hori_begin_idx = hori_begin_idx 5 self.hori_end_idx = hori_end_idx
Method generated by attrs for class PrioritizedSegment.
220class SealImpressionEllipseShapeMode(Enum): 221 CIRCLE = 'circle' 222 GENERAL_ELLIPSE = 'general_ellipse'
An enumeration.
Inherited Members
- enum.Enum
- name
- value
225class PageLayoutStep( 226 PipelineStep[ 227 PageLayoutStepConfig, 228 PageLayoutStepInput, 229 PageLayoutStepOutput, 230 ] 231): # yapf: disable 232 233 def __init__(self, config: PageLayoutStepConfig): 234 super().__init__(config) 235 236 ( 237 self.seal_impression_ellipse_shape_modes, 238 self.seal_impression_ellipse_shape_modes_probs, 239 ) = normalize_to_keys_and_probs([ 240 ( 241 SealImpressionEllipseShapeMode.CIRCLE, 242 self.config.seal_impression_weight_circle, 243 ), 244 ( 245 SealImpressionEllipseShapeMode.GENERAL_ELLIPSE, 246 self.config.seal_impression_weight_general_ellipse, 247 ), 248 ]) 249 250 def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator): 251 if rng.random() < self.config.prob_add_large_text_line: 252 large_text_line_height_ratio = rng.uniform( 253 self.config.large_text_line_height_ratio_min, 254 self.config.large_text_line_height_ratio_max, 255 ) 256 return round(large_text_line_height_ratio * reference_height) 257 258 else: 259 return None 260 261 def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator): 262 normal_text_line_heights: List[int] = [] 263 264 if self.config.force_add_normal_text_line_height_ratio_min: 265 normal_text_line_heights.append( 266 round(self.config.normal_text_line_height_ratio_min * reference_height) 267 ) 268 269 num_normal_text_line_heights = rng.integers( 270 self.config.num_normal_text_line_heights_min, 271 self.config.num_normal_text_line_heights_max + 1, 272 ) 273 ratio_step = ( 274 self.config.normal_text_line_height_ratio_max 275 - self.config.normal_text_line_height_ratio_min 276 ) / num_normal_text_line_heights 277 for step_idx in range(num_normal_text_line_heights): 278 ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step 279 ratio_max = ratio_min + ratio_step 280 ratio = rng.uniform(ratio_min, ratio_max) 281 normal_text_line_heights.append(round(ratio * reference_height)) 282 283 assert normal_text_line_heights 284 return sorted(normal_text_line_heights) 285 286 @classmethod 287 def generate_grid_points( 288 cls, 289 grid_pad_ratio: float, 290 grid_step: int, 291 grid_gap: int, 292 grid_gap_min: Optional[int], 293 length: int, 294 rng: RandomGenerator, 295 ): 296 grid_pad = min(length - grid_step, length * grid_pad_ratio) 297 assert grid_pad > 0 298 299 num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap) 300 if not num_steps.is_integer(): 301 num_steps = math.floor(num_steps) 302 num_steps = int(num_steps) 303 304 grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1) 305 assert grid_pad > 0 306 grid_pad = grid_pad // 2 307 308 begin = grid_pad 309 end = grid_pad + grid_step - 1 310 assert end < length - grid_pad 311 312 begins: List[int] = [] 313 ends: List[int] = [] 314 315 while end < length - grid_pad: 316 begins.append(begin) 317 ends.append(end) 318 319 cur_gap = grid_gap 320 if grid_gap_min is not None: 321 cur_gap = rng.integers(grid_gap_min, grid_gap + 1) 322 323 begin = end + cur_gap 324 end = begin + grid_step - 1 325 326 return begins, ends 327 328 def sample_grid_points( 329 self, 330 height: int, 331 width: int, 332 normal_text_line_heights_max: int, 333 rng: RandomGenerator, 334 ): 335 grid_pad_ratio = rng.uniform( 336 self.config.grid_pad_ratio_min, 337 self.config.grid_pad_ratio_max, 338 ) 339 340 grid_step_ratio = rng.uniform( 341 self.config.grid_step_ratio_min, 342 self.config.grid_step_ratio_max, 343 ) 344 grid_step = round(normal_text_line_heights_max * grid_step_ratio) 345 346 grid_vert_gap_min = round( 347 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min 348 ) 349 grid_vert_gap_max = round( 350 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max 351 ) 352 vert_begins, vert_ends = self.generate_grid_points( 353 grid_pad_ratio=grid_pad_ratio, 354 grid_step=grid_step, 355 grid_gap=grid_vert_gap_max, 356 grid_gap_min=grid_vert_gap_min, 357 length=height, 358 rng=rng, 359 ) 360 361 grid_hori_gap_ratio = rng.uniform( 362 self.config.grid_hori_gap_ratio_min, 363 self.config.grid_hori_gap_ratio_max, 364 ) 365 grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio) 366 grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap) 367 hori_begins, hori_ends = self.generate_grid_points( 368 grid_pad_ratio=grid_pad_ratio, 369 grid_step=grid_step, 370 grid_gap=grid_hori_gap, 371 grid_gap_min=None, 372 length=width, 373 rng=rng, 374 ) 375 return (vert_begins, vert_ends), (hori_begins, hori_ends) 376 377 def trim_grid_points_for_large_text_line( 378 self, 379 large_text_line_height: int, 380 vert_begins: Sequence[int], 381 vert_ends: Sequence[int], 382 hori_begins_min: int, 383 hori_ends_max: int, 384 ): 385 idx = 0 386 while idx < len(vert_begins) \ 387 and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height: 388 idx += 1 389 390 if idx >= len(vert_begins) - 1: 391 return None, 0 392 393 large_text_line_gird = Box( 394 up=vert_ends[idx] - large_text_line_height + 1, 395 down=vert_ends[idx], 396 left=hori_begins_min, 397 right=hori_ends_max, 398 ) 399 return large_text_line_gird, idx + 1 400 401 def sample_grids( 402 self, 403 vert_begins: Sequence[int], 404 vert_ends: Sequence[int], 405 hori_begins: Sequence[int], 406 hori_ends: Sequence[int], 407 rng: RandomGenerator, 408 ): 409 num_vert_ends = len(vert_ends) 410 assert num_vert_ends == len(vert_begins) 411 412 num_hori_ends = len(hori_ends) 413 assert num_hori_ends == len(hori_begins) 414 415 priority_queue = [ 416 PrioritizedSegment( 417 vert_begin_idx=0, 418 hori_begin_idx=0, 419 hori_end_idx=num_hori_ends - 1, 420 ) 421 ] 422 grids: List[Box] = [] 423 while priority_queue: 424 cur_segment = heapq.heappop(priority_queue) 425 426 # Deal with segments in the same level. 427 same_vert_segments: List[PrioritizedSegment] = [] 428 while priority_queue \ 429 and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx: 430 same_vert_segments.append(heapq.heappop(priority_queue)) 431 432 if same_vert_segments: 433 # Rebuid segments. 434 same_vert_segments.append(cur_segment) 435 same_vert_segments = sorted( 436 same_vert_segments, 437 key=lambda segment: segment.hori_begin_idx, 438 ) 439 440 rebuilt_segments: List[PrioritizedSegment] = [] 441 rebuilt_begin = 0 442 while rebuilt_begin < len(same_vert_segments): 443 rebuilt_end = rebuilt_begin 444 while rebuilt_end + 1 < len(same_vert_segments) \ 445 and (same_vert_segments[rebuilt_end + 1].hori_begin_idx 446 == same_vert_segments[rebuilt_end].hori_end_idx + 1): 447 rebuilt_end += 1 448 rebuilt_segments.append( 449 PrioritizedSegment( 450 vert_begin_idx=cur_segment.vert_begin_idx, 451 hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx, 452 hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx, 453 ) 454 ) 455 rebuilt_begin = rebuilt_end + 1 456 457 # Re-pick the first segment. 458 cur_segment = rebuilt_segments[0] 459 for other_segment in rebuilt_segments[1:]: 460 heapq.heappush(priority_queue, other_segment) 461 462 # Generate grids for the current segment. 463 vert_begin_idx = cur_segment.vert_begin_idx 464 465 hori_begin_idx = cur_segment.hori_begin_idx 466 hori_end_idx = cur_segment.hori_end_idx 467 while hori_begin_idx <= hori_end_idx: 468 # Randomly generate grid. 469 cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends) 470 471 # Try to sample segment with length >= 2. 472 if hori_end_idx + 1 - hori_begin_idx <= 3: 473 cur_hori_end_idx = hori_end_idx 474 else: 475 cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1) 476 477 grids.append( 478 Box( 479 up=vert_begins[vert_begin_idx], 480 down=vert_ends[cur_vert_end_idx], 481 left=hori_begins[hori_begin_idx], 482 right=hori_ends[cur_hori_end_idx], 483 ) 484 ) 485 next_vert_begin_idx = cur_vert_end_idx + 1 486 if next_vert_begin_idx < num_vert_ends: 487 heapq.heappush( 488 priority_queue, 489 PrioritizedSegment( 490 vert_begin_idx=next_vert_begin_idx, 491 hori_begin_idx=hori_begin_idx, 492 hori_end_idx=cur_hori_end_idx, 493 ), 494 ) 495 496 hori_begin_idx = cur_hori_end_idx + 1 497 498 return grids 499 500 @classmethod 501 def calculate_normal_text_line_heights_probs( 502 cls, 503 normal_text_line_heights_expected_probs: Sequence[float], 504 normal_text_line_heights_acc_areas: List[int], 505 ): 506 if sum(normal_text_line_heights_acc_areas) == 0: 507 normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas) 508 else: 509 normal_text_line_heights_cur_probs = normalize_to_probs( 510 normal_text_line_heights_acc_areas 511 ) 512 513 probs = normalize_to_probs([ 514 max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip( 515 normal_text_line_heights_cur_probs, 516 normal_text_line_heights_expected_probs, 517 ) 518 ]) 519 return probs 520 521 def fill_normal_text_lines_to_grid( 522 self, 523 normal_text_line_heights: Sequence[int], 524 normal_text_line_heights_expected_probs: Sequence[float], 525 normal_text_line_heights_acc_areas: List[int], 526 grid_idx: int, 527 grid: Box, 528 rng: RandomGenerator, 529 ): 530 normal_text_line_heights_indices = list(range(len(normal_text_line_heights))) 531 normal_text_line_heights_max = normal_text_line_heights[-1] 532 533 layout_text_lines: List[LayoutTextLine] = [] 534 up = grid.up 535 prev_text_line_height: Optional[int] = None 536 537 while up + normal_text_line_heights_max - 1 <= grid.down: 538 normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs( 539 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 540 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 541 ) 542 normal_text_line_height_idx = rng_choice( 543 rng=rng, 544 items=normal_text_line_heights_indices, 545 probs=normal_text_line_heights_probs, 546 ) 547 normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx] 548 549 add_gap = False 550 if prev_text_line_height: 551 if prev_text_line_height != normal_text_line_height: 552 add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap) 553 else: 554 add_gap = (rng.random() < self.config.prob_normal_text_line_gap) 555 if add_gap: 556 gap_ratio = rng.uniform( 557 self.config.normal_text_line_gap_ratio_min, 558 self.config.normal_text_line_gap_ratio_max, 559 ) 560 gap = round(gap_ratio * normal_text_line_height) 561 gap = min(grid.down - (up + normal_text_line_height - 1), gap) 562 up += gap 563 down = up + normal_text_line_height - 1 564 assert down <= grid.down 565 566 length_ratio = rng.uniform( 567 self.config.normal_text_line_length_ratio_min, 568 self.config.normal_text_line_length_ratio_max, 569 ) 570 normal_text_line_length = round(grid.width * length_ratio) 571 normal_text_line_length = max(normal_text_line_height, normal_text_line_length) 572 573 pad_max = grid.width - normal_text_line_length 574 pad = rng.integers(0, pad_max + 1) 575 left = grid.left + pad 576 right = left + normal_text_line_length - 1 577 assert right <= grid.right 578 579 text_line_idx = len(layout_text_lines) 580 layout_text_lines.append( 581 LayoutTextLine( 582 grid_idx=grid_idx, 583 text_line_idx=text_line_idx, 584 text_line_height=normal_text_line_height, 585 box=Box(up=up, down=down, left=left, right=right), 586 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 587 ) 588 ) 589 590 prev_text_line_height = normal_text_line_height 591 normal_text_line_heights_acc_areas[normal_text_line_height_idx] \ 592 += normal_text_line_length * normal_text_line_height 593 up = down + 1 594 595 return layout_text_lines 596 597 def fill_large_text_line_to_grid( 598 self, 599 large_text_line_gird: Box, 600 rng: RandomGenerator, 601 ): 602 length_ratio = rng.uniform( 603 self.config.large_text_line_length_ratio_min, 604 self.config.large_text_line_length_ratio_max, 605 ) 606 large_text_line_length = round(large_text_line_gird.width * length_ratio) 607 large_text_line_length = max(large_text_line_gird.height, large_text_line_length) 608 609 pad_max = large_text_line_gird.width - large_text_line_length 610 pad = rng.integers(0, pad_max + 1) 611 left = large_text_line_gird.left + pad 612 right = left + large_text_line_length - 1 613 assert right <= large_text_line_gird.right 614 615 return LayoutTextLine( 616 grid_idx=-1, 617 text_line_idx=0, 618 text_line_height=large_text_line_gird.height, 619 box=attrs.evolve(large_text_line_gird, left=left, right=right), 620 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 621 ) 622 623 def get_reference_height(self, height: int, width: int): 624 area = height * width 625 reference_height = math.ceil(math.sqrt(area / self.config.reference_aspect_ratio)) 626 return reference_height 627 628 def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator): 629 reference_height = self.get_reference_height(height=height, width=width) 630 631 normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng) 632 (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points( 633 height=height, 634 width=width, 635 normal_text_line_heights_max=normal_text_line_heights[-1], 636 rng=rng, 637 ) 638 639 large_text_line_height = self.sample_large_text_line_height(reference_height, rng) 640 large_text_line_gird: Optional[Box] = None 641 if large_text_line_height is not None: 642 large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line( 643 large_text_line_height=large_text_line_height, 644 vert_begins=vert_begins, 645 vert_ends=vert_ends, 646 hori_begins_min=hori_begins[0], 647 hori_ends_max=hori_ends[-1], 648 ) 649 if large_text_line_gird is not None: 650 vert_begins = vert_begins[vert_trim_idx:] 651 vert_ends = vert_ends[vert_trim_idx:] 652 653 grids = self.sample_grids( 654 vert_begins=vert_begins, 655 vert_ends=vert_ends, 656 hori_begins=hori_begins, 657 hori_ends=hori_ends, 658 rng=rng, 659 ) 660 normal_text_line_heights_expected_probs = normalize_to_probs([ 661 1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights 662 ]) 663 normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights) 664 layout_text_lines: List[LayoutTextLine] = [] 665 for grid_idx, grid in enumerate(grids): 666 layout_text_lines.extend( 667 self.fill_normal_text_lines_to_grid( 668 normal_text_line_heights=normal_text_line_heights, 669 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 670 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 671 grid_idx=grid_idx, 672 grid=grid, 673 rng=rng, 674 ) 675 ) 676 677 if large_text_line_gird: 678 layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng)) 679 680 # Must place text line. 681 assert layout_text_lines 682 683 return ( 684 layout_text_lines, 685 large_text_line_gird, 686 grids, 687 ) 688 689 def sample_layout_images(self, height: int, width: int, rng: RandomGenerator): 690 # Image could be overlapped with text lines. 691 layout_images: List[LayoutImage] = [] 692 693 num_layout_images = rng.integers( 694 self.config.num_images_min, 695 self.config.num_images_max + 1, 696 ) 697 for _ in range(num_layout_images): 698 # NOTE: It's ok to have overlapping images. 699 image_height_ratio = rng.uniform( 700 self.config.image_height_ratio_min, 701 self.config.image_height_ratio_max, 702 ) 703 image_height = round(height * image_height_ratio) 704 705 image_width_ratio = rng.uniform( 706 self.config.image_width_ratio_min, 707 self.config.image_width_ratio_max, 708 ) 709 image_width = round(width * image_width_ratio) 710 711 up = rng.integers(0, height - image_height + 1) 712 down = up + image_height - 1 713 left = rng.integers(0, width - image_width + 1) 714 right = left + image_width - 1 715 layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right))) 716 717 return layout_images 718 719 @classmethod 720 def boxes_are_overlapped(cls, box0: Box, box1: Box): 721 vert_overlapped = (box0.down >= box1.up and box1.down >= box0.up) 722 hori_overlapped = (box0.right >= box1.left and box1.right >= box0.left) 723 return vert_overlapped and hori_overlapped 724 725 def sample_layout_barcode_qrs( 726 self, 727 height: int, 728 width: int, 729 layout_text_lines: Sequence[LayoutTextLine], 730 rng: RandomGenerator, 731 ): 732 reference_height = self.get_reference_height(height=height, width=width) 733 734 layout_barcode_qrs: List[LayoutBarcodeQr] = [] 735 736 num_layout_barcode_qrs = rng.integers( 737 self.config.num_barcode_qrs_min, 738 self.config.num_barcode_qrs_max + 1, 739 ) 740 num_retries = 3 741 while num_layout_barcode_qrs > 0 and num_retries > 0: 742 barcode_qr_length_ratio = rng.uniform( 743 self.config.barcode_qr_length_ratio_min, 744 self.config.barcode_qr_length_ratio_max, 745 ) 746 barcode_qr_length = round(barcode_qr_length_ratio * reference_height) 747 barcode_qr_length = min(height, width, barcode_qr_length) 748 749 # Place QR code next to text line. 750 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 751 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 752 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 753 754 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 755 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 756 up = anchor_layout_text_line_box.down + 1 757 down = up + barcode_qr_length - 1 758 if down >= height: 759 num_retries -= 1 760 continue 761 else: 762 assert placement == LayoutXcodePlacement.NEXT_TO_UP 763 down = anchor_layout_text_line_box.up - 1 764 up = down + 1 - barcode_qr_length 765 if up < 0: 766 num_retries -= 1 767 continue 768 769 left_min = max( 770 0, 771 anchor_layout_text_line_box_center.x - barcode_qr_length, 772 ) 773 left_max = min( 774 width - barcode_qr_length, 775 anchor_layout_text_line_box_center.x, 776 ) 777 if left_min > left_max: 778 num_retries -= 1 779 continue 780 left = int(rng.integers(left_min, left_max + 1)) 781 right = left + barcode_qr_length - 1 782 783 else: 784 assert placement in ( 785 LayoutXcodePlacement.NEXT_TO_RIGHT, 786 LayoutXcodePlacement.NEXT_TO_LEFT, 787 ) 788 789 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 790 left = anchor_layout_text_line_box.right + 1 791 right = left + barcode_qr_length - 1 792 if right >= width: 793 num_retries -= 1 794 continue 795 else: 796 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 797 right = anchor_layout_text_line_box.left - 1 798 left = right + 1 - barcode_qr_length 799 if left < 0: 800 num_retries -= 1 801 continue 802 803 up_min = max( 804 0, 805 anchor_layout_text_line_box_center.y - barcode_qr_length, 806 ) 807 up_max = min( 808 height - barcode_qr_length, 809 anchor_layout_text_line_box_center.y, 810 ) 811 if up_min > up_max: 812 num_retries -= 1 813 continue 814 815 up = int(rng.integers(up_min, up_max + 1)) 816 down = up + barcode_qr_length - 1 817 818 num_layout_barcode_qrs -= 1 819 layout_barcode_qrs.append( 820 LayoutBarcodeQr(box=Box( 821 up=up, 822 down=down, 823 left=left, 824 right=right, 825 )) 826 ) 827 828 return layout_barcode_qrs 829 830 def sample_layout_barcode_code39s( 831 self, 832 height: int, 833 width: int, 834 layout_text_lines: Sequence[LayoutTextLine], 835 rng: RandomGenerator, 836 ): 837 reference_height = self.get_reference_height(height=height, width=width) 838 839 layout_barcode_code39s: List[LayoutBarcodeCode39] = [] 840 841 num_layout_barcode_code39s = rng.integers( 842 self.config.num_barcode_code39s_min, 843 self.config.num_barcode_code39s_max + 1, 844 ) 845 num_retries = 3 846 while num_layout_barcode_code39s > 0 and num_retries > 0: 847 barcode_code39_height_ratio = rng.uniform( 848 self.config.barcode_code39_height_ratio_min, 849 self.config.barcode_code39_height_ratio_max, 850 ) 851 barcode_code39_height = round(barcode_code39_height_ratio * reference_height) 852 barcode_code39_height = min(height, width, barcode_code39_height) 853 854 barcode_code39_num_chars = int( 855 rng.integers( 856 self.config.barcode_code39_num_chars_min, 857 self.config.barcode_code39_num_chars_max + 1, 858 ) 859 ) 860 barcode_code39_width = round( 861 barcode_code39_height * self.config.barcode_code39_aspect_ratio 862 * barcode_code39_num_chars 863 ) 864 865 # Place Bar code next to text line. 866 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 867 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 868 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 869 870 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 871 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 872 up = anchor_layout_text_line_box.down + 1 873 down = up + barcode_code39_height - 1 874 if down >= height: 875 num_retries -= 1 876 continue 877 else: 878 assert placement == LayoutXcodePlacement.NEXT_TO_UP 879 down = anchor_layout_text_line_box.up - 1 880 up = down + 1 - barcode_code39_height 881 if up < 0: 882 num_retries -= 1 883 continue 884 885 left_min = max( 886 0, 887 anchor_layout_text_line_box_center.x - barcode_code39_width, 888 ) 889 left_max = min( 890 width - barcode_code39_width, 891 anchor_layout_text_line_box_center.x, 892 ) 893 if left_min > left_max: 894 num_retries -= 1 895 continue 896 left = int(rng.integers(left_min, left_max + 1)) 897 right = left + barcode_code39_width - 1 898 899 else: 900 assert placement in ( 901 LayoutXcodePlacement.NEXT_TO_RIGHT, 902 LayoutXcodePlacement.NEXT_TO_LEFT, 903 ) 904 905 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 906 left = anchor_layout_text_line_box.right + 1 907 right = left + barcode_code39_width - 1 908 if right >= width: 909 num_retries -= 1 910 continue 911 else: 912 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 913 right = anchor_layout_text_line_box.left - 1 914 left = right + 1 - barcode_code39_width 915 if left < 0: 916 num_retries -= 1 917 continue 918 919 up_min = max( 920 0, 921 anchor_layout_text_line_box_center.y - barcode_code39_height, 922 ) 923 up_max = min( 924 height - barcode_code39_height, 925 anchor_layout_text_line_box_center.y, 926 ) 927 if up_min > up_max: 928 num_retries -= 1 929 continue 930 931 up = int(rng.integers(up_min, up_max + 1)) 932 down = up + barcode_code39_height - 1 933 934 num_layout_barcode_code39s -= 1 935 layout_barcode_code39s.append( 936 LayoutBarcodeCode39(box=Box( 937 up=up, 938 down=down, 939 left=left, 940 right=right, 941 )) 942 ) 943 944 return layout_barcode_code39s 945 946 def sample_layout_barcode_qrs_and_layout_barcode_code39s( 947 self, 948 height: int, 949 width: int, 950 layout_text_lines: Sequence[LayoutTextLine], 951 rng: RandomGenerator, 952 ): 953 layout_barcode_qrs = self.sample_layout_barcode_qrs( 954 height=height, 955 width=width, 956 layout_text_lines=layout_text_lines, 957 rng=rng, 958 ) 959 960 layout_barcode_code39s = self.sample_layout_barcode_code39s( 961 height=height, 962 width=width, 963 layout_text_lines=layout_text_lines, 964 rng=rng, 965 ) 966 967 if layout_barcode_qrs or layout_barcode_code39s: 968 # Barcode could not be overlapped with text lines. 969 # Hence need to remove the overlapped text lines. 970 box_overlapping_validator = BoxOverlappingValidator( 971 itertools.chain( 972 (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs), 973 (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s), 974 ) 975 ) 976 977 keep_layout_text_lines: List[LayoutTextLine] = [] 978 for layout_text_line in layout_text_lines: 979 if not box_overlapping_validator.is_overlapped(layout_text_line.box): 980 keep_layout_text_lines.append(layout_text_line) 981 layout_text_lines = keep_layout_text_lines 982 983 return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines 984 985 @classmethod 986 def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]): 987 # Sample within the text line area. 988 text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines) 989 text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines) 990 text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines) 991 text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines) 992 return ( 993 text_line_up, 994 text_line_down, 995 text_line_left, 996 text_line_right, 997 ) 998 999 def sample_layout_non_text_symbols( 1000 self, 1001 height: int, 1002 width: int, 1003 layout_text_lines: Sequence[LayoutTextLine], 1004 rng: RandomGenerator, 1005 ): 1006 reference_height = self.get_reference_height(height=height, width=width) 1007 1008 text_line_up = 0 1009 text_line_down = height - 1 1010 text_line_left = 0 1011 text_line_right = width - 1 1012 1013 layout_non_text_symbols: List[LayoutNonTextSymbol] = [] 1014 1015 num_non_text_symbols = int( 1016 rng.integers( 1017 self.config.num_non_text_symbols_min, 1018 self.config.num_non_text_symbols_max + 1, 1019 ) 1020 ) 1021 for _ in range(num_non_text_symbols): 1022 non_text_symbol_height_ratio = rng.uniform( 1023 self.config.non_text_symbol_height_ratio_min, 1024 self.config.non_text_symbol_height_ratio_max, 1025 ) 1026 non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height) 1027 1028 non_text_symbol_aspect_ratio = rng.uniform( 1029 self.config.non_text_symbol_aspect_ratio_min, 1030 self.config.non_text_symbol_aspect_ratio_max, 1031 ) 1032 non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height) 1033 1034 box = None 1035 overlapped = True 1036 for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol): 1037 up_max = text_line_down + 1 - non_text_symbol_height 1038 up = int(rng.integers(text_line_up, up_max + 1)) 1039 down = up + non_text_symbol_height - 1 1040 assert up < down 1041 1042 left_max = text_line_right + 1 - non_text_symbol_width 1043 left = int(rng.integers(text_line_left, left_max + 1)) 1044 right = left + non_text_symbol_width - 1 1045 assert left < right 1046 1047 box = Box(up=up, down=down, left=left, right=right) 1048 1049 cur_overlapped = False 1050 for layout_text_line in layout_text_lines: 1051 if self.boxes_are_overlapped(box, layout_text_line.box): 1052 cur_overlapped = True 1053 break 1054 1055 if not cur_overlapped: 1056 overlapped = False 1057 break 1058 1059 assert box 1060 1061 if not overlapped: 1062 alpha = float( 1063 rng.uniform( 1064 self.config.non_text_symbol_non_overlapped_alpha_min, 1065 self.config.non_text_symbol_non_overlapped_alpha_max, 1066 ) 1067 ) 1068 else: 1069 alpha = float( 1070 rng.uniform( 1071 self.config.non_text_symbol_overlapped_alpha_min, 1072 self.config.non_text_symbol_overlapped_alpha_max, 1073 ) 1074 ) 1075 1076 layout_non_text_symbols.append(LayoutNonTextSymbol( 1077 box=box, 1078 alpha=alpha, 1079 )) 1080 1081 return layout_non_text_symbols 1082 1083 def sample_layout_seal_impressions( 1084 self, 1085 height: int, 1086 width: int, 1087 layout_text_lines: Sequence[LayoutTextLine], 1088 rng: RandomGenerator, 1089 ): 1090 reference_height = self.get_reference_height(height=height, width=width) 1091 1092 ( 1093 text_line_up, 1094 text_line_down, 1095 text_line_left, 1096 text_line_right, 1097 ) = self.get_text_line_area(layout_text_lines) 1098 1099 # Place seal impressions. 1100 layout_seal_impressions: List[LayoutSealImpression] = [] 1101 1102 num_seal_impressions = int( 1103 rng.integers( 1104 self.config.num_seal_impressions_min, 1105 self.config.num_seal_impressions_max + 1, 1106 ) 1107 ) 1108 for _ in range(num_seal_impressions): 1109 # Sample height. 1110 seal_impression_height_ratio = float( 1111 rng.uniform( 1112 self.config.seal_impression_height_ratio_min, 1113 self.config.seal_impression_height_ratio_max, 1114 ) 1115 ) 1116 seal_impression_height = round(seal_impression_height_ratio * reference_height) 1117 seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height) 1118 1119 # Make sure even. 1120 if seal_impression_height % 2 != 0: 1121 seal_impression_height -= 1 1122 1123 # Sample width. 1124 shape_mode = rng_choice( 1125 rng, 1126 self.seal_impression_ellipse_shape_modes, 1127 probs=self.seal_impression_ellipse_shape_modes_probs, 1128 ) 1129 if shape_mode == SealImpressionEllipseShapeMode.CIRCLE: 1130 seal_impression_width = seal_impression_height 1131 1132 elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE: 1133 aspect_ratio = float( 1134 rng.uniform( 1135 self.config.seal_impression_general_ellipse_aspect_ratio_min, 1136 self.config.seal_impression_general_ellipse_aspect_ratio_max, 1137 ) 1138 ) 1139 seal_impression_width = round(aspect_ratio * seal_impression_height) 1140 1141 else: 1142 raise NotImplementedError() 1143 1144 seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width) 1145 1146 # Make sure even. 1147 if seal_impression_width % 2 != 0: 1148 seal_impression_width -= 1 1149 1150 seal_impression_up_max = text_line_down + 1 - seal_impression_height 1151 seal_impression_up = int(rng.integers( 1152 text_line_up, 1153 seal_impression_up_max + 1, 1154 )) 1155 seal_impression_down = seal_impression_up + seal_impression_height - 1 1156 1157 seal_impression_left_max = text_line_right + 1 - seal_impression_width 1158 seal_impression_left = int(rng.integers( 1159 text_line_left, 1160 seal_impression_left_max + 1, 1161 )) 1162 seal_impression_right = seal_impression_left + seal_impression_width - 1 1163 1164 angle = int( 1165 rng.integers( 1166 self.config.seal_impression_angle_min, 1167 self.config.seal_impression_angle_max + 1, 1168 ) 1169 ) 1170 angle = angle % 360 1171 1172 layout_seal_impressions.append( 1173 LayoutSealImpression( 1174 box=Box( 1175 up=seal_impression_up, 1176 down=seal_impression_down, 1177 left=seal_impression_left, 1178 right=seal_impression_right, 1179 ), 1180 angle=angle, 1181 ) 1182 ) 1183 1184 return layout_seal_impressions 1185 1186 def generate_disconnected_text_regions( 1187 self, 1188 layout_text_lines: Sequence[LayoutTextLine], 1189 ): 1190 grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list) 1191 for layout_text_line in layout_text_lines: 1192 grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line) 1193 1194 disconnected_text_regions: List[DisconnectedTextRegion] = [] 1195 1196 for _, layout_text_lines in sorted( 1197 grid_idx_to_layout_text_lines.items(), 1198 key=lambda p: p[0], 1199 ): 1200 layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx) 1201 1202 begin = 0 1203 while begin < len(layout_text_lines): 1204 text_line_height_min = layout_text_lines[begin].text_line_height 1205 text_line_height_max = text_line_height_min 1206 1207 # Find [begin, end) interval satisfying the condition. 1208 end = begin + 1 1209 while end < len(layout_text_lines): 1210 text_line_height = layout_text_lines[end].text_line_height 1211 text_line_height_min = min(text_line_height_min, text_line_height) 1212 text_line_height_max = max(text_line_height_max, text_line_height) 1213 if text_line_height_max / text_line_height_min \ 1214 > self.config.disconnected_text_region_polygons_height_ratio_max: 1215 break 1216 else: 1217 end += 1 1218 1219 # To polygon. 1220 # NOTE: Simply using a bounding box is enough. 1221 # This method is common to all glyph sequences. 1222 cur_layout_text_lines = layout_text_lines[begin:end] 1223 bounding_box = Box( 1224 up=min(ltl.box.up for ltl in cur_layout_text_lines), 1225 down=max(ltl.box.down for ltl in cur_layout_text_lines), 1226 left=min(ltl.box.left for ltl in cur_layout_text_lines), 1227 right=max(ltl.box.right for ltl in cur_layout_text_lines), 1228 ) 1229 step = min( 1230 itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines) 1231 ) 1232 disconnected_text_regions.append( 1233 DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step)) 1234 ) 1235 1236 # Move to next. 1237 begin = end 1238 1239 return disconnected_text_regions 1240 1241 def generate_non_text_regions( 1242 self, 1243 height: int, 1244 width: int, 1245 layout_text_lines: Sequence[LayoutTextLine], 1246 rng: RandomGenerator, 1247 ): 1248 box_overlapping_validator = BoxOverlappingValidator( 1249 layout_text_line.box for layout_text_line in layout_text_lines 1250 ) 1251 directions = [ 1252 LayoutNonTextLineDirection.UP, 1253 LayoutNonTextLineDirection.DOWN, 1254 LayoutNonTextLineDirection.LEFT, 1255 LayoutNonTextLineDirection.RIGHT, 1256 ] 1257 1258 lntl_boxes: List[Box] = [] 1259 for layout_text_line in layout_text_lines: 1260 ltl_box = layout_text_line.box 1261 1262 for direction_idx in rng.permutation(len(directions)): 1263 direction = directions[direction_idx] 1264 1265 if direction == LayoutNonTextLineDirection.UP: 1266 lntl_box = Box( 1267 up=ltl_box.up - ltl_box.height, 1268 down=ltl_box.up - 1, 1269 left=ltl_box.left, 1270 right=ltl_box.right, 1271 ) 1272 1273 elif direction == LayoutNonTextLineDirection.DOWN: 1274 lntl_box = Box( 1275 up=ltl_box.down + 1, 1276 down=ltl_box.down + ltl_box.height, 1277 left=ltl_box.left, 1278 right=ltl_box.right, 1279 ) 1280 1281 elif direction == LayoutNonTextLineDirection.LEFT: 1282 lntl_box = Box( 1283 up=ltl_box.up, 1284 down=ltl_box.down, 1285 left=ltl_box.left - ltl_box.width, 1286 right=ltl_box.left - 1, 1287 ) 1288 1289 elif direction == LayoutNonTextLineDirection.RIGHT: 1290 lntl_box = Box( 1291 up=ltl_box.up, 1292 down=ltl_box.down, 1293 left=ltl_box.right + 1, 1294 right=ltl_box.right + ltl_box.width, 1295 ) 1296 1297 else: 1298 raise NotImplementedError() 1299 1300 # Ignore invalid box. 1301 if not lntl_box.valid: 1302 continue 1303 if lntl_box.down >= height or lntl_box.right >= width: 1304 continue 1305 1306 assert ltl_box.shape == lntl_box.shape 1307 1308 # Ignore box that is overlapped with any text lines. 1309 if box_overlapping_validator.is_overlapped(lntl_box): 1310 continue 1311 1312 # Keep only the first valid direction. 1313 lntl_boxes.append(lntl_box) 1314 break 1315 1316 step = max( 1317 1, 1318 min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)), 1319 ) 1320 non_text_regions = [ 1321 NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes 1322 ] 1323 return non_text_regions 1324 1325 def run(self, input: PageLayoutStepInput, rng: RandomGenerator): 1326 page_shape_step_output = input.page_shape_step_output 1327 height = page_shape_step_output.height 1328 width = page_shape_step_output.width 1329 1330 # Text lines. 1331 ( 1332 layout_text_lines, 1333 large_text_line_gird, 1334 grids, 1335 ) = self.sample_layout_text_lines(height=height, width=width, rng=rng) 1336 1337 # Images. 1338 layout_images = self.sample_layout_images(height=height, width=width, rng=rng) 1339 1340 # QR codes & Bar codes. 1341 # NOTE: Some layout_text_lines could be dropped. 1342 ( 1343 layout_barcode_qrs, 1344 layout_barcode_code39s, 1345 layout_text_lines, 1346 ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s( 1347 height=height, 1348 width=width, 1349 layout_text_lines=layout_text_lines, 1350 rng=rng, 1351 ) 1352 1353 # Non-text symbols. 1354 layout_non_text_symbols = self.sample_layout_non_text_symbols( 1355 height=height, 1356 width=width, 1357 layout_text_lines=layout_text_lines, 1358 rng=rng, 1359 ) 1360 1361 # Seal impressions. 1362 layout_seal_impressions = self.sample_layout_seal_impressions( 1363 height=height, 1364 width=width, 1365 layout_text_lines=layout_text_lines, 1366 rng=rng, 1367 ) 1368 1369 # For char-level polygon regression. 1370 disconnected_text_regions = self.generate_disconnected_text_regions( 1371 layout_text_lines=layout_text_lines, 1372 ) 1373 1374 # For sampling negative text region area. 1375 non_text_regions = self.generate_non_text_regions( 1376 height=height, 1377 width=width, 1378 layout_text_lines=layout_text_lines, 1379 rng=rng, 1380 ) 1381 1382 return PageLayoutStepOutput( 1383 page_layout=PageLayout( 1384 height=height, 1385 width=width, 1386 layout_text_lines=layout_text_lines, 1387 layout_non_text_symbols=layout_non_text_symbols, 1388 layout_seal_impressions=layout_seal_impressions, 1389 layout_images=layout_images, 1390 layout_barcode_qrs=layout_barcode_qrs, 1391 layout_barcode_code39s=layout_barcode_code39s, 1392 disconnected_text_regions=disconnected_text_regions, 1393 non_text_regions=non_text_regions, 1394 ), 1395 debug_large_text_line_gird=large_text_line_gird, 1396 debug_grids=grids, 1397 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
233 def __init__(self, config: PageLayoutStepConfig): 234 super().__init__(config) 235 236 ( 237 self.seal_impression_ellipse_shape_modes, 238 self.seal_impression_ellipse_shape_modes_probs, 239 ) = normalize_to_keys_and_probs([ 240 ( 241 SealImpressionEllipseShapeMode.CIRCLE, 242 self.config.seal_impression_weight_circle, 243 ), 244 ( 245 SealImpressionEllipseShapeMode.GENERAL_ELLIPSE, 246 self.config.seal_impression_weight_general_ellipse, 247 ), 248 ])
250 def sample_large_text_line_height(self, reference_height: int, rng: RandomGenerator): 251 if rng.random() < self.config.prob_add_large_text_line: 252 large_text_line_height_ratio = rng.uniform( 253 self.config.large_text_line_height_ratio_min, 254 self.config.large_text_line_height_ratio_max, 255 ) 256 return round(large_text_line_height_ratio * reference_height) 257 258 else: 259 return None
261 def sample_normal_text_line_heights(self, reference_height: int, rng: RandomGenerator): 262 normal_text_line_heights: List[int] = [] 263 264 if self.config.force_add_normal_text_line_height_ratio_min: 265 normal_text_line_heights.append( 266 round(self.config.normal_text_line_height_ratio_min * reference_height) 267 ) 268 269 num_normal_text_line_heights = rng.integers( 270 self.config.num_normal_text_line_heights_min, 271 self.config.num_normal_text_line_heights_max + 1, 272 ) 273 ratio_step = ( 274 self.config.normal_text_line_height_ratio_max 275 - self.config.normal_text_line_height_ratio_min 276 ) / num_normal_text_line_heights 277 for step_idx in range(num_normal_text_line_heights): 278 ratio_min = self.config.normal_text_line_height_ratio_min + step_idx * ratio_step 279 ratio_max = ratio_min + ratio_step 280 ratio = rng.uniform(ratio_min, ratio_max) 281 normal_text_line_heights.append(round(ratio * reference_height)) 282 283 assert normal_text_line_heights 284 return sorted(normal_text_line_heights)
286 @classmethod 287 def generate_grid_points( 288 cls, 289 grid_pad_ratio: float, 290 grid_step: int, 291 grid_gap: int, 292 grid_gap_min: Optional[int], 293 length: int, 294 rng: RandomGenerator, 295 ): 296 grid_pad = min(length - grid_step, length * grid_pad_ratio) 297 assert grid_pad > 0 298 299 num_steps = (length - grid_pad + grid_gap) / (grid_step + grid_gap) 300 if not num_steps.is_integer(): 301 num_steps = math.floor(num_steps) 302 num_steps = int(num_steps) 303 304 grid_pad = length - grid_step * num_steps - grid_gap * (num_steps - 1) 305 assert grid_pad > 0 306 grid_pad = grid_pad // 2 307 308 begin = grid_pad 309 end = grid_pad + grid_step - 1 310 assert end < length - grid_pad 311 312 begins: List[int] = [] 313 ends: List[int] = [] 314 315 while end < length - grid_pad: 316 begins.append(begin) 317 ends.append(end) 318 319 cur_gap = grid_gap 320 if grid_gap_min is not None: 321 cur_gap = rng.integers(grid_gap_min, grid_gap + 1) 322 323 begin = end + cur_gap 324 end = begin + grid_step - 1 325 326 return begins, ends
328 def sample_grid_points( 329 self, 330 height: int, 331 width: int, 332 normal_text_line_heights_max: int, 333 rng: RandomGenerator, 334 ): 335 grid_pad_ratio = rng.uniform( 336 self.config.grid_pad_ratio_min, 337 self.config.grid_pad_ratio_max, 338 ) 339 340 grid_step_ratio = rng.uniform( 341 self.config.grid_step_ratio_min, 342 self.config.grid_step_ratio_max, 343 ) 344 grid_step = round(normal_text_line_heights_max * grid_step_ratio) 345 346 grid_vert_gap_min = round( 347 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_min 348 ) 349 grid_vert_gap_max = round( 350 normal_text_line_heights_max * self.config.grid_vert_gap_ratio_max 351 ) 352 vert_begins, vert_ends = self.generate_grid_points( 353 grid_pad_ratio=grid_pad_ratio, 354 grid_step=grid_step, 355 grid_gap=grid_vert_gap_max, 356 grid_gap_min=grid_vert_gap_min, 357 length=height, 358 rng=rng, 359 ) 360 361 grid_hori_gap_ratio = rng.uniform( 362 self.config.grid_hori_gap_ratio_min, 363 self.config.grid_hori_gap_ratio_max, 364 ) 365 grid_hori_gap = round(normal_text_line_heights_max * grid_hori_gap_ratio) 366 grid_hori_gap = max(normal_text_line_heights_max, grid_hori_gap) 367 hori_begins, hori_ends = self.generate_grid_points( 368 grid_pad_ratio=grid_pad_ratio, 369 grid_step=grid_step, 370 grid_gap=grid_hori_gap, 371 grid_gap_min=None, 372 length=width, 373 rng=rng, 374 ) 375 return (vert_begins, vert_ends), (hori_begins, hori_ends)
377 def trim_grid_points_for_large_text_line( 378 self, 379 large_text_line_height: int, 380 vert_begins: Sequence[int], 381 vert_ends: Sequence[int], 382 hori_begins_min: int, 383 hori_ends_max: int, 384 ): 385 idx = 0 386 while idx < len(vert_begins) \ 387 and vert_ends[idx] + 1 - vert_begins[0] < large_text_line_height: 388 idx += 1 389 390 if idx >= len(vert_begins) - 1: 391 return None, 0 392 393 large_text_line_gird = Box( 394 up=vert_ends[idx] - large_text_line_height + 1, 395 down=vert_ends[idx], 396 left=hori_begins_min, 397 right=hori_ends_max, 398 ) 399 return large_text_line_gird, idx + 1
401 def sample_grids( 402 self, 403 vert_begins: Sequence[int], 404 vert_ends: Sequence[int], 405 hori_begins: Sequence[int], 406 hori_ends: Sequence[int], 407 rng: RandomGenerator, 408 ): 409 num_vert_ends = len(vert_ends) 410 assert num_vert_ends == len(vert_begins) 411 412 num_hori_ends = len(hori_ends) 413 assert num_hori_ends == len(hori_begins) 414 415 priority_queue = [ 416 PrioritizedSegment( 417 vert_begin_idx=0, 418 hori_begin_idx=0, 419 hori_end_idx=num_hori_ends - 1, 420 ) 421 ] 422 grids: List[Box] = [] 423 while priority_queue: 424 cur_segment = heapq.heappop(priority_queue) 425 426 # Deal with segments in the same level. 427 same_vert_segments: List[PrioritizedSegment] = [] 428 while priority_queue \ 429 and priority_queue[0].vert_begin_idx == cur_segment.vert_begin_idx: 430 same_vert_segments.append(heapq.heappop(priority_queue)) 431 432 if same_vert_segments: 433 # Rebuid segments. 434 same_vert_segments.append(cur_segment) 435 same_vert_segments = sorted( 436 same_vert_segments, 437 key=lambda segment: segment.hori_begin_idx, 438 ) 439 440 rebuilt_segments: List[PrioritizedSegment] = [] 441 rebuilt_begin = 0 442 while rebuilt_begin < len(same_vert_segments): 443 rebuilt_end = rebuilt_begin 444 while rebuilt_end + 1 < len(same_vert_segments) \ 445 and (same_vert_segments[rebuilt_end + 1].hori_begin_idx 446 == same_vert_segments[rebuilt_end].hori_end_idx + 1): 447 rebuilt_end += 1 448 rebuilt_segments.append( 449 PrioritizedSegment( 450 vert_begin_idx=cur_segment.vert_begin_idx, 451 hori_begin_idx=same_vert_segments[rebuilt_begin].hori_begin_idx, 452 hori_end_idx=same_vert_segments[rebuilt_end].hori_end_idx, 453 ) 454 ) 455 rebuilt_begin = rebuilt_end + 1 456 457 # Re-pick the first segment. 458 cur_segment = rebuilt_segments[0] 459 for other_segment in rebuilt_segments[1:]: 460 heapq.heappush(priority_queue, other_segment) 461 462 # Generate grids for the current segment. 463 vert_begin_idx = cur_segment.vert_begin_idx 464 465 hori_begin_idx = cur_segment.hori_begin_idx 466 hori_end_idx = cur_segment.hori_end_idx 467 while hori_begin_idx <= hori_end_idx: 468 # Randomly generate grid. 469 cur_vert_end_idx = rng.integers(vert_begin_idx, num_vert_ends) 470 471 # Try to sample segment with length >= 2. 472 if hori_end_idx + 1 - hori_begin_idx <= 3: 473 cur_hori_end_idx = hori_end_idx 474 else: 475 cur_hori_end_idx = rng.integers(hori_begin_idx + 1, hori_end_idx + 1) 476 477 grids.append( 478 Box( 479 up=vert_begins[vert_begin_idx], 480 down=vert_ends[cur_vert_end_idx], 481 left=hori_begins[hori_begin_idx], 482 right=hori_ends[cur_hori_end_idx], 483 ) 484 ) 485 next_vert_begin_idx = cur_vert_end_idx + 1 486 if next_vert_begin_idx < num_vert_ends: 487 heapq.heappush( 488 priority_queue, 489 PrioritizedSegment( 490 vert_begin_idx=next_vert_begin_idx, 491 hori_begin_idx=hori_begin_idx, 492 hori_end_idx=cur_hori_end_idx, 493 ), 494 ) 495 496 hori_begin_idx = cur_hori_end_idx + 1 497 498 return grids
500 @classmethod 501 def calculate_normal_text_line_heights_probs( 502 cls, 503 normal_text_line_heights_expected_probs: Sequence[float], 504 normal_text_line_heights_acc_areas: List[int], 505 ): 506 if sum(normal_text_line_heights_acc_areas) == 0: 507 normal_text_line_heights_cur_probs = [0.0] * len(normal_text_line_heights_acc_areas) 508 else: 509 normal_text_line_heights_cur_probs = normalize_to_probs( 510 normal_text_line_heights_acc_areas 511 ) 512 513 probs = normalize_to_probs([ 514 max(0.0, expected_prob - cur_prob) for cur_prob, expected_prob in zip( 515 normal_text_line_heights_cur_probs, 516 normal_text_line_heights_expected_probs, 517 ) 518 ]) 519 return probs
521 def fill_normal_text_lines_to_grid( 522 self, 523 normal_text_line_heights: Sequence[int], 524 normal_text_line_heights_expected_probs: Sequence[float], 525 normal_text_line_heights_acc_areas: List[int], 526 grid_idx: int, 527 grid: Box, 528 rng: RandomGenerator, 529 ): 530 normal_text_line_heights_indices = list(range(len(normal_text_line_heights))) 531 normal_text_line_heights_max = normal_text_line_heights[-1] 532 533 layout_text_lines: List[LayoutTextLine] = [] 534 up = grid.up 535 prev_text_line_height: Optional[int] = None 536 537 while up + normal_text_line_heights_max - 1 <= grid.down: 538 normal_text_line_heights_probs = self.calculate_normal_text_line_heights_probs( 539 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 540 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 541 ) 542 normal_text_line_height_idx = rng_choice( 543 rng=rng, 544 items=normal_text_line_heights_indices, 545 probs=normal_text_line_heights_probs, 546 ) 547 normal_text_line_height = normal_text_line_heights[normal_text_line_height_idx] 548 549 add_gap = False 550 if prev_text_line_height: 551 if prev_text_line_height != normal_text_line_height: 552 add_gap = (rng.random() < self.config.prob_normal_text_line_diff_heights_gap) 553 else: 554 add_gap = (rng.random() < self.config.prob_normal_text_line_gap) 555 if add_gap: 556 gap_ratio = rng.uniform( 557 self.config.normal_text_line_gap_ratio_min, 558 self.config.normal_text_line_gap_ratio_max, 559 ) 560 gap = round(gap_ratio * normal_text_line_height) 561 gap = min(grid.down - (up + normal_text_line_height - 1), gap) 562 up += gap 563 down = up + normal_text_line_height - 1 564 assert down <= grid.down 565 566 length_ratio = rng.uniform( 567 self.config.normal_text_line_length_ratio_min, 568 self.config.normal_text_line_length_ratio_max, 569 ) 570 normal_text_line_length = round(grid.width * length_ratio) 571 normal_text_line_length = max(normal_text_line_height, normal_text_line_length) 572 573 pad_max = grid.width - normal_text_line_length 574 pad = rng.integers(0, pad_max + 1) 575 left = grid.left + pad 576 right = left + normal_text_line_length - 1 577 assert right <= grid.right 578 579 text_line_idx = len(layout_text_lines) 580 layout_text_lines.append( 581 LayoutTextLine( 582 grid_idx=grid_idx, 583 text_line_idx=text_line_idx, 584 text_line_height=normal_text_line_height, 585 box=Box(up=up, down=down, left=left, right=right), 586 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 587 ) 588 ) 589 590 prev_text_line_height = normal_text_line_height 591 normal_text_line_heights_acc_areas[normal_text_line_height_idx] \ 592 += normal_text_line_length * normal_text_line_height 593 up = down + 1 594 595 return layout_text_lines
597 def fill_large_text_line_to_grid( 598 self, 599 large_text_line_gird: Box, 600 rng: RandomGenerator, 601 ): 602 length_ratio = rng.uniform( 603 self.config.large_text_line_length_ratio_min, 604 self.config.large_text_line_length_ratio_max, 605 ) 606 large_text_line_length = round(large_text_line_gird.width * length_ratio) 607 large_text_line_length = max(large_text_line_gird.height, large_text_line_length) 608 609 pad_max = large_text_line_gird.width - large_text_line_length 610 pad = rng.integers(0, pad_max + 1) 611 left = large_text_line_gird.left + pad 612 right = left + large_text_line_length - 1 613 assert right <= large_text_line_gird.right 614 615 return LayoutTextLine( 616 grid_idx=-1, 617 text_line_idx=0, 618 text_line_height=large_text_line_gird.height, 619 box=attrs.evolve(large_text_line_gird, left=left, right=right), 620 glyph_sequence=FontEngineRunConfigGlyphSequence.HORI_DEFAULT, 621 )
628 def sample_layout_text_lines(self, height: int, width: int, rng: RandomGenerator): 629 reference_height = self.get_reference_height(height=height, width=width) 630 631 normal_text_line_heights = self.sample_normal_text_line_heights(reference_height, rng) 632 (vert_begins, vert_ends), (hori_begins, hori_ends) = self.sample_grid_points( 633 height=height, 634 width=width, 635 normal_text_line_heights_max=normal_text_line_heights[-1], 636 rng=rng, 637 ) 638 639 large_text_line_height = self.sample_large_text_line_height(reference_height, rng) 640 large_text_line_gird: Optional[Box] = None 641 if large_text_line_height is not None: 642 large_text_line_gird, vert_trim_idx = self.trim_grid_points_for_large_text_line( 643 large_text_line_height=large_text_line_height, 644 vert_begins=vert_begins, 645 vert_ends=vert_ends, 646 hori_begins_min=hori_begins[0], 647 hori_ends_max=hori_ends[-1], 648 ) 649 if large_text_line_gird is not None: 650 vert_begins = vert_begins[vert_trim_idx:] 651 vert_ends = vert_ends[vert_trim_idx:] 652 653 grids = self.sample_grids( 654 vert_begins=vert_begins, 655 vert_ends=vert_ends, 656 hori_begins=hori_begins, 657 hori_ends=hori_ends, 658 rng=rng, 659 ) 660 normal_text_line_heights_expected_probs = normalize_to_probs([ 661 1 / normal_text_line_height for normal_text_line_height in normal_text_line_heights 662 ]) 663 normal_text_line_heights_acc_areas = [0] * len(normal_text_line_heights) 664 layout_text_lines: List[LayoutTextLine] = [] 665 for grid_idx, grid in enumerate(grids): 666 layout_text_lines.extend( 667 self.fill_normal_text_lines_to_grid( 668 normal_text_line_heights=normal_text_line_heights, 669 normal_text_line_heights_expected_probs=normal_text_line_heights_expected_probs, 670 normal_text_line_heights_acc_areas=normal_text_line_heights_acc_areas, 671 grid_idx=grid_idx, 672 grid=grid, 673 rng=rng, 674 ) 675 ) 676 677 if large_text_line_gird: 678 layout_text_lines.append(self.fill_large_text_line_to_grid(large_text_line_gird, rng)) 679 680 # Must place text line. 681 assert layout_text_lines 682 683 return ( 684 layout_text_lines, 685 large_text_line_gird, 686 grids, 687 )
689 def sample_layout_images(self, height: int, width: int, rng: RandomGenerator): 690 # Image could be overlapped with text lines. 691 layout_images: List[LayoutImage] = [] 692 693 num_layout_images = rng.integers( 694 self.config.num_images_min, 695 self.config.num_images_max + 1, 696 ) 697 for _ in range(num_layout_images): 698 # NOTE: It's ok to have overlapping images. 699 image_height_ratio = rng.uniform( 700 self.config.image_height_ratio_min, 701 self.config.image_height_ratio_max, 702 ) 703 image_height = round(height * image_height_ratio) 704 705 image_width_ratio = rng.uniform( 706 self.config.image_width_ratio_min, 707 self.config.image_width_ratio_max, 708 ) 709 image_width = round(width * image_width_ratio) 710 711 up = rng.integers(0, height - image_height + 1) 712 down = up + image_height - 1 713 left = rng.integers(0, width - image_width + 1) 714 right = left + image_width - 1 715 layout_images.append(LayoutImage(box=Box(up=up, down=down, left=left, right=right))) 716 717 return layout_images
725 def sample_layout_barcode_qrs( 726 self, 727 height: int, 728 width: int, 729 layout_text_lines: Sequence[LayoutTextLine], 730 rng: RandomGenerator, 731 ): 732 reference_height = self.get_reference_height(height=height, width=width) 733 734 layout_barcode_qrs: List[LayoutBarcodeQr] = [] 735 736 num_layout_barcode_qrs = rng.integers( 737 self.config.num_barcode_qrs_min, 738 self.config.num_barcode_qrs_max + 1, 739 ) 740 num_retries = 3 741 while num_layout_barcode_qrs > 0 and num_retries > 0: 742 barcode_qr_length_ratio = rng.uniform( 743 self.config.barcode_qr_length_ratio_min, 744 self.config.barcode_qr_length_ratio_max, 745 ) 746 barcode_qr_length = round(barcode_qr_length_ratio * reference_height) 747 barcode_qr_length = min(height, width, barcode_qr_length) 748 749 # Place QR code next to text line. 750 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 751 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 752 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 753 754 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 755 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 756 up = anchor_layout_text_line_box.down + 1 757 down = up + barcode_qr_length - 1 758 if down >= height: 759 num_retries -= 1 760 continue 761 else: 762 assert placement == LayoutXcodePlacement.NEXT_TO_UP 763 down = anchor_layout_text_line_box.up - 1 764 up = down + 1 - barcode_qr_length 765 if up < 0: 766 num_retries -= 1 767 continue 768 769 left_min = max( 770 0, 771 anchor_layout_text_line_box_center.x - barcode_qr_length, 772 ) 773 left_max = min( 774 width - barcode_qr_length, 775 anchor_layout_text_line_box_center.x, 776 ) 777 if left_min > left_max: 778 num_retries -= 1 779 continue 780 left = int(rng.integers(left_min, left_max + 1)) 781 right = left + barcode_qr_length - 1 782 783 else: 784 assert placement in ( 785 LayoutXcodePlacement.NEXT_TO_RIGHT, 786 LayoutXcodePlacement.NEXT_TO_LEFT, 787 ) 788 789 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 790 left = anchor_layout_text_line_box.right + 1 791 right = left + barcode_qr_length - 1 792 if right >= width: 793 num_retries -= 1 794 continue 795 else: 796 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 797 right = anchor_layout_text_line_box.left - 1 798 left = right + 1 - barcode_qr_length 799 if left < 0: 800 num_retries -= 1 801 continue 802 803 up_min = max( 804 0, 805 anchor_layout_text_line_box_center.y - barcode_qr_length, 806 ) 807 up_max = min( 808 height - barcode_qr_length, 809 anchor_layout_text_line_box_center.y, 810 ) 811 if up_min > up_max: 812 num_retries -= 1 813 continue 814 815 up = int(rng.integers(up_min, up_max + 1)) 816 down = up + barcode_qr_length - 1 817 818 num_layout_barcode_qrs -= 1 819 layout_barcode_qrs.append( 820 LayoutBarcodeQr(box=Box( 821 up=up, 822 down=down, 823 left=left, 824 right=right, 825 )) 826 ) 827 828 return layout_barcode_qrs
830 def sample_layout_barcode_code39s( 831 self, 832 height: int, 833 width: int, 834 layout_text_lines: Sequence[LayoutTextLine], 835 rng: RandomGenerator, 836 ): 837 reference_height = self.get_reference_height(height=height, width=width) 838 839 layout_barcode_code39s: List[LayoutBarcodeCode39] = [] 840 841 num_layout_barcode_code39s = rng.integers( 842 self.config.num_barcode_code39s_min, 843 self.config.num_barcode_code39s_max + 1, 844 ) 845 num_retries = 3 846 while num_layout_barcode_code39s > 0 and num_retries > 0: 847 barcode_code39_height_ratio = rng.uniform( 848 self.config.barcode_code39_height_ratio_min, 849 self.config.barcode_code39_height_ratio_max, 850 ) 851 barcode_code39_height = round(barcode_code39_height_ratio * reference_height) 852 barcode_code39_height = min(height, width, barcode_code39_height) 853 854 barcode_code39_num_chars = int( 855 rng.integers( 856 self.config.barcode_code39_num_chars_min, 857 self.config.barcode_code39_num_chars_max + 1, 858 ) 859 ) 860 barcode_code39_width = round( 861 barcode_code39_height * self.config.barcode_code39_aspect_ratio 862 * barcode_code39_num_chars 863 ) 864 865 # Place Bar code next to text line. 866 anchor_layout_text_line_box = rng_choice(rng, layout_text_lines).box 867 anchor_layout_text_line_box_center = anchor_layout_text_line_box.get_center_point() 868 placement = rng_choice(rng, tuple(LayoutXcodePlacement)) 869 870 if placement in (LayoutXcodePlacement.NEXT_TO_DOWN, LayoutXcodePlacement.NEXT_TO_UP): 871 if placement == LayoutXcodePlacement.NEXT_TO_DOWN: 872 up = anchor_layout_text_line_box.down + 1 873 down = up + barcode_code39_height - 1 874 if down >= height: 875 num_retries -= 1 876 continue 877 else: 878 assert placement == LayoutXcodePlacement.NEXT_TO_UP 879 down = anchor_layout_text_line_box.up - 1 880 up = down + 1 - barcode_code39_height 881 if up < 0: 882 num_retries -= 1 883 continue 884 885 left_min = max( 886 0, 887 anchor_layout_text_line_box_center.x - barcode_code39_width, 888 ) 889 left_max = min( 890 width - barcode_code39_width, 891 anchor_layout_text_line_box_center.x, 892 ) 893 if left_min > left_max: 894 num_retries -= 1 895 continue 896 left = int(rng.integers(left_min, left_max + 1)) 897 right = left + barcode_code39_width - 1 898 899 else: 900 assert placement in ( 901 LayoutXcodePlacement.NEXT_TO_RIGHT, 902 LayoutXcodePlacement.NEXT_TO_LEFT, 903 ) 904 905 if placement == LayoutXcodePlacement.NEXT_TO_RIGHT: 906 left = anchor_layout_text_line_box.right + 1 907 right = left + barcode_code39_width - 1 908 if right >= width: 909 num_retries -= 1 910 continue 911 else: 912 assert placement == LayoutXcodePlacement.NEXT_TO_LEFT 913 right = anchor_layout_text_line_box.left - 1 914 left = right + 1 - barcode_code39_width 915 if left < 0: 916 num_retries -= 1 917 continue 918 919 up_min = max( 920 0, 921 anchor_layout_text_line_box_center.y - barcode_code39_height, 922 ) 923 up_max = min( 924 height - barcode_code39_height, 925 anchor_layout_text_line_box_center.y, 926 ) 927 if up_min > up_max: 928 num_retries -= 1 929 continue 930 931 up = int(rng.integers(up_min, up_max + 1)) 932 down = up + barcode_code39_height - 1 933 934 num_layout_barcode_code39s -= 1 935 layout_barcode_code39s.append( 936 LayoutBarcodeCode39(box=Box( 937 up=up, 938 down=down, 939 left=left, 940 right=right, 941 )) 942 ) 943 944 return layout_barcode_code39s
946 def sample_layout_barcode_qrs_and_layout_barcode_code39s( 947 self, 948 height: int, 949 width: int, 950 layout_text_lines: Sequence[LayoutTextLine], 951 rng: RandomGenerator, 952 ): 953 layout_barcode_qrs = self.sample_layout_barcode_qrs( 954 height=height, 955 width=width, 956 layout_text_lines=layout_text_lines, 957 rng=rng, 958 ) 959 960 layout_barcode_code39s = self.sample_layout_barcode_code39s( 961 height=height, 962 width=width, 963 layout_text_lines=layout_text_lines, 964 rng=rng, 965 ) 966 967 if layout_barcode_qrs or layout_barcode_code39s: 968 # Barcode could not be overlapped with text lines. 969 # Hence need to remove the overlapped text lines. 970 box_overlapping_validator = BoxOverlappingValidator( 971 itertools.chain( 972 (layout_barcode_qr.box for layout_barcode_qr in layout_barcode_qrs), 973 (layout_barcode_code39.box for layout_barcode_code39 in layout_barcode_code39s), 974 ) 975 ) 976 977 keep_layout_text_lines: List[LayoutTextLine] = [] 978 for layout_text_line in layout_text_lines: 979 if not box_overlapping_validator.is_overlapped(layout_text_line.box): 980 keep_layout_text_lines.append(layout_text_line) 981 layout_text_lines = keep_layout_text_lines 982 983 return layout_barcode_qrs, layout_barcode_code39s, layout_text_lines
985 @classmethod 986 def get_text_line_area(cls, layout_text_lines: Sequence[LayoutTextLine]): 987 # Sample within the text line area. 988 text_line_up = min(layout_text_line.box.up for layout_text_line in layout_text_lines) 989 text_line_down = max(layout_text_line.box.down for layout_text_line in layout_text_lines) 990 text_line_left = min(layout_text_line.box.left for layout_text_line in layout_text_lines) 991 text_line_right = max(layout_text_line.box.right for layout_text_line in layout_text_lines) 992 return ( 993 text_line_up, 994 text_line_down, 995 text_line_left, 996 text_line_right, 997 )
999 def sample_layout_non_text_symbols( 1000 self, 1001 height: int, 1002 width: int, 1003 layout_text_lines: Sequence[LayoutTextLine], 1004 rng: RandomGenerator, 1005 ): 1006 reference_height = self.get_reference_height(height=height, width=width) 1007 1008 text_line_up = 0 1009 text_line_down = height - 1 1010 text_line_left = 0 1011 text_line_right = width - 1 1012 1013 layout_non_text_symbols: List[LayoutNonTextSymbol] = [] 1014 1015 num_non_text_symbols = int( 1016 rng.integers( 1017 self.config.num_non_text_symbols_min, 1018 self.config.num_non_text_symbols_max + 1, 1019 ) 1020 ) 1021 for _ in range(num_non_text_symbols): 1022 non_text_symbol_height_ratio = rng.uniform( 1023 self.config.non_text_symbol_height_ratio_min, 1024 self.config.non_text_symbol_height_ratio_max, 1025 ) 1026 non_text_symbol_height = round(non_text_symbol_height_ratio * reference_height) 1027 1028 non_text_symbol_aspect_ratio = rng.uniform( 1029 self.config.non_text_symbol_aspect_ratio_min, 1030 self.config.non_text_symbol_aspect_ratio_max, 1031 ) 1032 non_text_symbol_width = round(non_text_symbol_aspect_ratio * non_text_symbol_height) 1033 1034 box = None 1035 overlapped = True 1036 for _ in range(self.config.num_retries_to_get_non_overlapped_non_text_symbol): 1037 up_max = text_line_down + 1 - non_text_symbol_height 1038 up = int(rng.integers(text_line_up, up_max + 1)) 1039 down = up + non_text_symbol_height - 1 1040 assert up < down 1041 1042 left_max = text_line_right + 1 - non_text_symbol_width 1043 left = int(rng.integers(text_line_left, left_max + 1)) 1044 right = left + non_text_symbol_width - 1 1045 assert left < right 1046 1047 box = Box(up=up, down=down, left=left, right=right) 1048 1049 cur_overlapped = False 1050 for layout_text_line in layout_text_lines: 1051 if self.boxes_are_overlapped(box, layout_text_line.box): 1052 cur_overlapped = True 1053 break 1054 1055 if not cur_overlapped: 1056 overlapped = False 1057 break 1058 1059 assert box 1060 1061 if not overlapped: 1062 alpha = float( 1063 rng.uniform( 1064 self.config.non_text_symbol_non_overlapped_alpha_min, 1065 self.config.non_text_symbol_non_overlapped_alpha_max, 1066 ) 1067 ) 1068 else: 1069 alpha = float( 1070 rng.uniform( 1071 self.config.non_text_symbol_overlapped_alpha_min, 1072 self.config.non_text_symbol_overlapped_alpha_max, 1073 ) 1074 ) 1075 1076 layout_non_text_symbols.append(LayoutNonTextSymbol( 1077 box=box, 1078 alpha=alpha, 1079 )) 1080 1081 return layout_non_text_symbols
1083 def sample_layout_seal_impressions( 1084 self, 1085 height: int, 1086 width: int, 1087 layout_text_lines: Sequence[LayoutTextLine], 1088 rng: RandomGenerator, 1089 ): 1090 reference_height = self.get_reference_height(height=height, width=width) 1091 1092 ( 1093 text_line_up, 1094 text_line_down, 1095 text_line_left, 1096 text_line_right, 1097 ) = self.get_text_line_area(layout_text_lines) 1098 1099 # Place seal impressions. 1100 layout_seal_impressions: List[LayoutSealImpression] = [] 1101 1102 num_seal_impressions = int( 1103 rng.integers( 1104 self.config.num_seal_impressions_min, 1105 self.config.num_seal_impressions_max + 1, 1106 ) 1107 ) 1108 for _ in range(num_seal_impressions): 1109 # Sample height. 1110 seal_impression_height_ratio = float( 1111 rng.uniform( 1112 self.config.seal_impression_height_ratio_min, 1113 self.config.seal_impression_height_ratio_max, 1114 ) 1115 ) 1116 seal_impression_height = round(seal_impression_height_ratio * reference_height) 1117 seal_impression_height = min(text_line_down + 1 - text_line_up, seal_impression_height) 1118 1119 # Make sure even. 1120 if seal_impression_height % 2 != 0: 1121 seal_impression_height -= 1 1122 1123 # Sample width. 1124 shape_mode = rng_choice( 1125 rng, 1126 self.seal_impression_ellipse_shape_modes, 1127 probs=self.seal_impression_ellipse_shape_modes_probs, 1128 ) 1129 if shape_mode == SealImpressionEllipseShapeMode.CIRCLE: 1130 seal_impression_width = seal_impression_height 1131 1132 elif shape_mode == SealImpressionEllipseShapeMode.GENERAL_ELLIPSE: 1133 aspect_ratio = float( 1134 rng.uniform( 1135 self.config.seal_impression_general_ellipse_aspect_ratio_min, 1136 self.config.seal_impression_general_ellipse_aspect_ratio_max, 1137 ) 1138 ) 1139 seal_impression_width = round(aspect_ratio * seal_impression_height) 1140 1141 else: 1142 raise NotImplementedError() 1143 1144 seal_impression_width = min(text_line_right + 1 - text_line_left, seal_impression_width) 1145 1146 # Make sure even. 1147 if seal_impression_width % 2 != 0: 1148 seal_impression_width -= 1 1149 1150 seal_impression_up_max = text_line_down + 1 - seal_impression_height 1151 seal_impression_up = int(rng.integers( 1152 text_line_up, 1153 seal_impression_up_max + 1, 1154 )) 1155 seal_impression_down = seal_impression_up + seal_impression_height - 1 1156 1157 seal_impression_left_max = text_line_right + 1 - seal_impression_width 1158 seal_impression_left = int(rng.integers( 1159 text_line_left, 1160 seal_impression_left_max + 1, 1161 )) 1162 seal_impression_right = seal_impression_left + seal_impression_width - 1 1163 1164 angle = int( 1165 rng.integers( 1166 self.config.seal_impression_angle_min, 1167 self.config.seal_impression_angle_max + 1, 1168 ) 1169 ) 1170 angle = angle % 360 1171 1172 layout_seal_impressions.append( 1173 LayoutSealImpression( 1174 box=Box( 1175 up=seal_impression_up, 1176 down=seal_impression_down, 1177 left=seal_impression_left, 1178 right=seal_impression_right, 1179 ), 1180 angle=angle, 1181 ) 1182 ) 1183 1184 return layout_seal_impressions
1186 def generate_disconnected_text_regions( 1187 self, 1188 layout_text_lines: Sequence[LayoutTextLine], 1189 ): 1190 grid_idx_to_layout_text_lines: DefaultDict[int, List[LayoutTextLine]] = defaultdict(list) 1191 for layout_text_line in layout_text_lines: 1192 grid_idx_to_layout_text_lines[layout_text_line.grid_idx].append(layout_text_line) 1193 1194 disconnected_text_regions: List[DisconnectedTextRegion] = [] 1195 1196 for _, layout_text_lines in sorted( 1197 grid_idx_to_layout_text_lines.items(), 1198 key=lambda p: p[0], 1199 ): 1200 layout_text_lines = sorted(layout_text_lines, key=lambda ltl: ltl.text_line_idx) 1201 1202 begin = 0 1203 while begin < len(layout_text_lines): 1204 text_line_height_min = layout_text_lines[begin].text_line_height 1205 text_line_height_max = text_line_height_min 1206 1207 # Find [begin, end) interval satisfying the condition. 1208 end = begin + 1 1209 while end < len(layout_text_lines): 1210 text_line_height = layout_text_lines[end].text_line_height 1211 text_line_height_min = min(text_line_height_min, text_line_height) 1212 text_line_height_max = max(text_line_height_max, text_line_height) 1213 if text_line_height_max / text_line_height_min \ 1214 > self.config.disconnected_text_region_polygons_height_ratio_max: 1215 break 1216 else: 1217 end += 1 1218 1219 # To polygon. 1220 # NOTE: Simply using a bounding box is enough. 1221 # This method is common to all glyph sequences. 1222 cur_layout_text_lines = layout_text_lines[begin:end] 1223 bounding_box = Box( 1224 up=min(ltl.box.up for ltl in cur_layout_text_lines), 1225 down=max(ltl.box.down for ltl in cur_layout_text_lines), 1226 left=min(ltl.box.left for ltl in cur_layout_text_lines), 1227 right=max(ltl.box.right for ltl in cur_layout_text_lines), 1228 ) 1229 step = min( 1230 itertools.chain.from_iterable(ltl.box.shape for ltl in cur_layout_text_lines) 1231 ) 1232 disconnected_text_regions.append( 1233 DisconnectedTextRegion(polygon=bounding_box.to_polygon(step=step)) 1234 ) 1235 1236 # Move to next. 1237 begin = end 1238 1239 return disconnected_text_regions
1241 def generate_non_text_regions( 1242 self, 1243 height: int, 1244 width: int, 1245 layout_text_lines: Sequence[LayoutTextLine], 1246 rng: RandomGenerator, 1247 ): 1248 box_overlapping_validator = BoxOverlappingValidator( 1249 layout_text_line.box for layout_text_line in layout_text_lines 1250 ) 1251 directions = [ 1252 LayoutNonTextLineDirection.UP, 1253 LayoutNonTextLineDirection.DOWN, 1254 LayoutNonTextLineDirection.LEFT, 1255 LayoutNonTextLineDirection.RIGHT, 1256 ] 1257 1258 lntl_boxes: List[Box] = [] 1259 for layout_text_line in layout_text_lines: 1260 ltl_box = layout_text_line.box 1261 1262 for direction_idx in rng.permutation(len(directions)): 1263 direction = directions[direction_idx] 1264 1265 if direction == LayoutNonTextLineDirection.UP: 1266 lntl_box = Box( 1267 up=ltl_box.up - ltl_box.height, 1268 down=ltl_box.up - 1, 1269 left=ltl_box.left, 1270 right=ltl_box.right, 1271 ) 1272 1273 elif direction == LayoutNonTextLineDirection.DOWN: 1274 lntl_box = Box( 1275 up=ltl_box.down + 1, 1276 down=ltl_box.down + ltl_box.height, 1277 left=ltl_box.left, 1278 right=ltl_box.right, 1279 ) 1280 1281 elif direction == LayoutNonTextLineDirection.LEFT: 1282 lntl_box = Box( 1283 up=ltl_box.up, 1284 down=ltl_box.down, 1285 left=ltl_box.left - ltl_box.width, 1286 right=ltl_box.left - 1, 1287 ) 1288 1289 elif direction == LayoutNonTextLineDirection.RIGHT: 1290 lntl_box = Box( 1291 up=ltl_box.up, 1292 down=ltl_box.down, 1293 left=ltl_box.right + 1, 1294 right=ltl_box.right + ltl_box.width, 1295 ) 1296 1297 else: 1298 raise NotImplementedError() 1299 1300 # Ignore invalid box. 1301 if not lntl_box.valid: 1302 continue 1303 if lntl_box.down >= height or lntl_box.right >= width: 1304 continue 1305 1306 assert ltl_box.shape == lntl_box.shape 1307 1308 # Ignore box that is overlapped with any text lines. 1309 if box_overlapping_validator.is_overlapped(lntl_box): 1310 continue 1311 1312 # Keep only the first valid direction. 1313 lntl_boxes.append(lntl_box) 1314 break 1315 1316 step = max( 1317 1, 1318 min(itertools.chain.from_iterable(lntl_box.shape for lntl_box in lntl_boxes)), 1319 ) 1320 non_text_regions = [ 1321 NonTextRegion(polygon=lntl_box.to_polygon(step=step)) for lntl_box in lntl_boxes 1322 ] 1323 return non_text_regions
1325 def run(self, input: PageLayoutStepInput, rng: RandomGenerator): 1326 page_shape_step_output = input.page_shape_step_output 1327 height = page_shape_step_output.height 1328 width = page_shape_step_output.width 1329 1330 # Text lines. 1331 ( 1332 layout_text_lines, 1333 large_text_line_gird, 1334 grids, 1335 ) = self.sample_layout_text_lines(height=height, width=width, rng=rng) 1336 1337 # Images. 1338 layout_images = self.sample_layout_images(height=height, width=width, rng=rng) 1339 1340 # QR codes & Bar codes. 1341 # NOTE: Some layout_text_lines could be dropped. 1342 ( 1343 layout_barcode_qrs, 1344 layout_barcode_code39s, 1345 layout_text_lines, 1346 ) = self.sample_layout_barcode_qrs_and_layout_barcode_code39s( 1347 height=height, 1348 width=width, 1349 layout_text_lines=layout_text_lines, 1350 rng=rng, 1351 ) 1352 1353 # Non-text symbols. 1354 layout_non_text_symbols = self.sample_layout_non_text_symbols( 1355 height=height, 1356 width=width, 1357 layout_text_lines=layout_text_lines, 1358 rng=rng, 1359 ) 1360 1361 # Seal impressions. 1362 layout_seal_impressions = self.sample_layout_seal_impressions( 1363 height=height, 1364 width=width, 1365 layout_text_lines=layout_text_lines, 1366 rng=rng, 1367 ) 1368 1369 # For char-level polygon regression. 1370 disconnected_text_regions = self.generate_disconnected_text_regions( 1371 layout_text_lines=layout_text_lines, 1372 ) 1373 1374 # For sampling negative text region area. 1375 non_text_regions = self.generate_non_text_regions( 1376 height=height, 1377 width=width, 1378 layout_text_lines=layout_text_lines, 1379 rng=rng, 1380 ) 1381 1382 return PageLayoutStepOutput( 1383 page_layout=PageLayout( 1384 height=height, 1385 width=width, 1386 layout_text_lines=layout_text_lines, 1387 layout_non_text_symbols=layout_non_text_symbols, 1388 layout_seal_impressions=layout_seal_impressions, 1389 layout_images=layout_images, 1390 layout_barcode_qrs=layout_barcode_qrs, 1391 layout_barcode_code39s=layout_barcode_code39s, 1392 disconnected_text_regions=disconnected_text_regions, 1393 non_text_regions=non_text_regions, 1394 ), 1395 debug_large_text_line_gird=large_text_line_gird, 1396 debug_grids=grids, 1397 )