vkit.pipeline.text_detection.page_text_region_label
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Tuple, Sequence, List, Optional, Mapping, Any 15from enum import Enum, unique 16import math 17import logging 18 19import attrs 20from numpy.random import Generator as RandomGenerator 21import numpy as np 22import cv2 as cv 23from sklearn.neighbors import KDTree 24 25from vkit.utility import attrs_lazy_field, unwrap_optional_field, normalize_to_probs 26from vkit.element import Point, PointList, Box, Polygon, Mask, ScoreMap 27from vkit.mechanism.distortion.geometric.affine import affine_points 28from vkit.engine.char_heatmap import ( 29 char_heatmap_default_engine_executor_factory, 30 CharHeatmapDefaultEngineInitConfig, 31) 32from vkit.engine.char_mask import ( 33 char_mask_engine_executor_aggregator_factory, 34 CharMaskEngineRunConfig, 35) 36from ..interface import PipelineStep, PipelineStepFactory 37from .page_text_region import PageTextRegionStepOutput 38 39logger = logging.getLogger(__name__) 40 41 42@attrs.define 43class PageTextRegionLabelStepConfig: 44 char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \ 45 attrs.field(factory=CharHeatmapDefaultEngineInitConfig) 46 char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'}) 47 48 # 1 centrod + n deviate points. 49 num_deviate_char_regression_labels: int = 1 50 num_deviate_char_regression_labels_candiates_factor: int = 3 51 52 53@attrs.define 54class PageTextRegionLabelStepInput: 55 page_text_region_step_output: PageTextRegionStepOutput 56 57 58@unique 59class PageCharRegressionLabelTag(Enum): 60 CENTROID = 'centroid' 61 DEVIATE = 'deviate' 62 63 64PI = float(np.pi) 65TWO_PI = float(2 * np.pi) 66 67 68@attrs.define 69class Vector: 70 y: float 71 x: float 72 73 _distance: Optional[float] = attrs_lazy_field() 74 _theta: Optional[float] = attrs_lazy_field() 75 76 def lazy_post_init(self): 77 initialized = (self._distance is not None) 78 if initialized: 79 return 80 81 self._distance = math.hypot(self.x, self.y) 82 self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI 83 84 @property 85 def distance(self): 86 self.lazy_post_init() 87 return unwrap_optional_field(self._distance) 88 89 @property 90 def theta(self): 91 self.lazy_post_init() 92 return unwrap_optional_field(self._theta) 93 94 @classmethod 95 def calculate_theta_delta( 96 cls, 97 vector0: 'Vector', 98 vector1: 'Vector', 99 clockwise: bool = False, 100 ): 101 theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI 102 if clockwise and theta_delta < 0: 103 theta_delta += TWO_PI 104 return theta_delta 105 106 def dot(self, other: 'Vector'): 107 return self.x * other.x + self.y * other.y 108 109 110@attrs.define 111class PageCharRegressionLabel: 112 char_idx: int 113 tag: PageCharRegressionLabelTag 114 label_point_smooth_y: float 115 label_point_smooth_x: float 116 downsampled_label_point_y: int 117 downsampled_label_point_x: int 118 up_left: Point 119 up_right: Point 120 down_right: Point 121 down_left: Point 122 123 is_downsampled: bool = False 124 downsample_labeling_factor: int = 1 125 126 _bounding_smooth_up: Optional[float] = attrs_lazy_field() 127 _bounding_smooth_down: Optional[float] = attrs_lazy_field() 128 _bounding_smooth_left: Optional[float] = attrs_lazy_field() 129 _bounding_smooth_right: Optional[float] = attrs_lazy_field() 130 _bounding_orientation_idx: Optional[int] = attrs_lazy_field() 131 132 _up_left_vector: Optional[Vector] = attrs_lazy_field() 133 _up_right_vector: Optional[Vector] = attrs_lazy_field() 134 _down_right_vector: Optional[Vector] = attrs_lazy_field() 135 _down_left_vector: Optional[Vector] = attrs_lazy_field() 136 137 _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field() 138 _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field() 139 _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field() 140 _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field() 141 _valid: Optional[bool] = attrs_lazy_field() 142 _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field() 143 144 @property 145 def corner_points(self): 146 yield from (self.up_left, self.up_right, self.down_right, self.down_left) 147 148 @classmethod 149 def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point): 150 vector = Vector( 151 y=down_right.smooth_y - down_left.smooth_y, 152 x=down_right.smooth_x - down_left.smooth_x, 153 ) 154 # 0 155 # ┌───────────┐ 156 # │ │ 157 # 2│ │3 158 # │ │ 159 # └───────────┘ 160 # 1 161 factor = vector.theta / PI 162 if 1.75 <= factor or factor < 0.25: 163 return 1 164 elif 0.25 <= factor < 0.75: 165 return 2 166 elif 0.75 <= factor < 1.25: 167 return 0 168 elif 1.25 <= factor: 169 return 3 170 else: 171 raise RuntimeError() 172 173 def lazy_post_init(self): 174 if self._bounding_smooth_up is None: 175 self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points) 176 self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points) 177 self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points) 178 self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points) 179 self._bounding_orientation_idx = self.get_bounding_orientation_idx( 180 down_left=self.down_left, 181 down_right=self.down_right, 182 ) 183 184 initialized = (self._up_left_vector is not None) 185 if initialized: 186 return 187 188 self._up_left_vector = Vector( 189 y=self.up_left.smooth_y - self.label_point_smooth_y, 190 x=self.up_left.smooth_x - self.label_point_smooth_x, 191 ) 192 self._up_right_vector = Vector( 193 y=self.up_right.smooth_y - self.label_point_smooth_y, 194 x=self.up_right.smooth_x - self.label_point_smooth_x, 195 ) 196 self._down_right_vector = Vector( 197 y=self.down_right.smooth_y - self.label_point_smooth_y, 198 x=self.down_right.smooth_x - self.label_point_smooth_x, 199 ) 200 self._down_left_vector = Vector( 201 y=self.down_left.smooth_y - self.label_point_smooth_y, 202 x=self.down_left.smooth_x - self.label_point_smooth_x, 203 ) 204 205 self._up_left_to_up_right_angle = Vector.calculate_theta_delta( 206 self._up_left_vector, 207 self._up_right_vector, 208 clockwise=True, 209 ) 210 self._up_right_to_down_right_angle = Vector.calculate_theta_delta( 211 self._up_right_vector, 212 self._down_right_vector, 213 clockwise=True, 214 ) 215 self._down_right_to_down_left_angle = Vector.calculate_theta_delta( 216 self._down_right_vector, 217 self._down_left_vector, 218 clockwise=True, 219 ) 220 self._down_left_to_up_left_angle = Vector.calculate_theta_delta( 221 self._down_left_vector, 222 self._up_left_vector, 223 clockwise=True, 224 ) 225 226 sum_of_angles = sum([ 227 self._up_left_to_up_right_angle, 228 self._up_right_to_down_right_angle, 229 self._down_right_to_down_left_angle, 230 self._down_left_to_up_left_angle, 231 ]) 232 # Consider valid if deviate within 4 degrees. 233 self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012) 234 235 self._clockwise_angle_distribution = normalize_to_probs([ 236 self._up_left_to_up_right_angle, 237 self._up_right_to_down_right_angle, 238 self._down_right_to_down_left_angle, 239 self._down_left_to_up_left_angle, 240 ]) 241 242 def copy(self, with_non_bounding_related_lazy_fields: bool = False): 243 copied = attrs.evolve(self) 244 245 if with_non_bounding_related_lazy_fields: 246 # NOTE: Bounding box related properties are not copied. 247 copied._up_left_vector = self._up_left_vector 248 copied._up_right_vector = self._up_right_vector 249 copied._down_right_vector = self._down_right_vector 250 copied._down_left_vector = self._down_left_vector 251 copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle 252 copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle 253 copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle 254 copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle 255 copied._valid = self._valid 256 copied._clockwise_angle_distribution = self._clockwise_angle_distribution 257 258 return copied 259 260 def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int): 261 assert self.valid and not self.is_downsampled 262 263 # Shift operation doesn't change the lazy fields. 264 shifted = self.copy(with_non_bounding_related_lazy_fields=True) 265 266 shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y 267 shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x 268 shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y) 269 shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x) 270 shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 271 shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 272 shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 273 shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 274 275 return shifted 276 277 def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int): 278 assert self.valid and not self.is_downsampled 279 280 # Downsample operation doesn't change the lazy fields. 281 downsampled = self.copy(with_non_bounding_related_lazy_fields=True) 282 # Mark as downsampled hence disables shift & downsample opts. 283 downsampled.is_downsampled = True 284 # Should be helpful in training. 285 downsampled.downsample_labeling_factor = downsample_labeling_factor 286 287 downsampled.downsampled_label_point_y = \ 288 int(self.label_point_smooth_y // downsample_labeling_factor) 289 downsampled.downsampled_label_point_x = \ 290 int(self.label_point_smooth_x // downsample_labeling_factor) 291 292 return downsampled 293 294 @property 295 def bounding_smooth_up(self): 296 self.lazy_post_init() 297 return unwrap_optional_field(self._bounding_smooth_up) 298 299 @property 300 def bounding_smooth_down(self): 301 self.lazy_post_init() 302 return unwrap_optional_field(self._bounding_smooth_down) 303 304 @property 305 def bounding_smooth_left(self): 306 self.lazy_post_init() 307 return unwrap_optional_field(self._bounding_smooth_left) 308 309 @property 310 def bounding_smooth_right(self): 311 self.lazy_post_init() 312 return unwrap_optional_field(self._bounding_smooth_right) 313 314 @property 315 def bounding_center_point(self): 316 return Point.create( 317 y=(self.bounding_smooth_up + self.bounding_smooth_down) / 2, 318 x=(self.bounding_smooth_left + self.bounding_smooth_right) / 2, 319 ) 320 321 @property 322 def bounding_smooth_shape(self): 323 height = self.bounding_smooth_down - self.bounding_smooth_up 324 width = self.bounding_smooth_right - self.bounding_smooth_left 325 return height, width 326 327 @property 328 def bounding_orientation_idx(self): 329 self.lazy_post_init() 330 return unwrap_optional_field(self._bounding_orientation_idx) 331 332 @property 333 def valid(self): 334 self.lazy_post_init() 335 return unwrap_optional_field(self._valid) 336 337 def generate_up_left_offsets(self): 338 self.lazy_post_init() 339 up_left_vector = unwrap_optional_field(self._up_left_vector) 340 return up_left_vector.y, up_left_vector.x 341 342 def generate_clockwise_angle_distribution(self): 343 self.lazy_post_init() 344 return unwrap_optional_field(self._clockwise_angle_distribution) 345 346 def generate_clockwise_distances(self): 347 self.lazy_post_init() 348 return ( 349 unwrap_optional_field(self._up_left_vector).distance, 350 unwrap_optional_field(self._up_right_vector).distance, 351 unwrap_optional_field(self._down_right_vector).distance, 352 unwrap_optional_field(self._down_left_vector).distance, 353 ) 354 355 356@attrs.define 357class PageTextRegionLabelStepOutput: 358 page_char_mask: Mask 359 page_char_height_score_map: ScoreMap 360 page_char_gaussian_score_map: ScoreMap 361 page_char_regression_labels: Sequence[PageCharRegressionLabel] 362 page_char_bounding_box_mask: Mask 363 364 365class PageTextRegionLabelStep( 366 PipelineStep[ 367 PageTextRegionLabelStepConfig, 368 PageTextRegionLabelStepInput, 369 PageTextRegionLabelStepOutput, 370 ] 371): # yapf: disable 372 373 def __init__(self, config: PageTextRegionLabelStepConfig): 374 super().__init__(config) 375 376 self.char_heatmap_default_engine_executor = \ 377 char_heatmap_default_engine_executor_factory.create( 378 self.config.char_heatmap_default_engine_init_config 379 ) 380 self.char_mask_engine_executor = \ 381 char_mask_engine_executor_aggregator_factory.create_engine_executor( 382 self.config.char_mask_engine_config 383 ) 384 385 def generate_page_char_mask( 386 self, 387 shape: Tuple[int, int], 388 page_inactive_mask: Mask, 389 page_char_polygons: Sequence[Polygon], 390 page_text_region_polygons: Sequence[Polygon], 391 page_char_polygon_text_region_polygon_indices: Sequence[int], 392 ): 393 height, width = shape 394 result = self.char_mask_engine_executor.run( 395 CharMaskEngineRunConfig( 396 height=height, 397 width=width, 398 char_polygons=page_char_polygons, 399 char_bounding_polygons=[ 400 page_text_region_polygons[idx] 401 for idx in page_char_polygon_text_region_polygon_indices 402 ], 403 ), 404 ) 405 406 page_inactive_mask.fill_mask(result.combined_chars_mask, 0) 407 408 return result.combined_chars_mask, result.char_masks 409 410 @classmethod 411 def generate_page_char_height_score_map( 412 cls, 413 shape: Tuple[int, int], 414 page_inactive_mask: Mask, 415 page_char_polygons: Sequence[Polygon], 416 fill_score_map_char_masks: Optional[Sequence[Mask]], 417 ): 418 rectangular_heights = [ 419 char_polygon.get_rectangular_height() for char_polygon in page_char_polygons 420 ] 421 sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort())) 422 423 page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False) 424 for idx in sorted_indices: 425 char_polygon = page_char_polygons[idx] 426 rectangular_height = rectangular_heights[idx] 427 if fill_score_map_char_masks is None: 428 char_polygon.fill_score_map( 429 page_char_height_score_map, 430 value=rectangular_height, 431 ) 432 else: 433 char_mask = fill_score_map_char_masks[idx] 434 char_mask.fill_score_map( 435 page_char_height_score_map, 436 value=rectangular_height, 437 ) 438 439 page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0) 440 441 return page_char_height_score_map 442 443 def generate_page_char_gaussian_score_map( 444 self, 445 shape: Tuple[int, int], 446 page_char_polygons: Sequence[Polygon], 447 ): 448 height, width = shape 449 char_heatmap = self.char_heatmap_default_engine_executor.run({ 450 'height': height, 451 'width': width, 452 'char_polygons': page_char_polygons, 453 }) 454 return char_heatmap.score_map 455 456 def generate_page_char_regression_labels( 457 self, 458 shape: Tuple[int, int], 459 page_char_polygons: Sequence[Polygon], 460 rng: RandomGenerator, 461 ): 462 page_height, page_width = shape 463 464 # Build a KD tree to for removing deviate point that is too close to another center point. 465 center_points = PointList() 466 for polygon in page_char_polygons: 467 center_points.append(polygon.get_center_point()) 468 kd_tree = KDTree(center_points.to_np_array()) 469 470 page_char_regression_labels: List[PageCharRegressionLabel] = [] 471 472 for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)): 473 assert polygon.num_points == 4 474 up_left, up_right, down_right, down_left = polygon.points 475 476 # 1. The centroid of char polygon. 477 label = PageCharRegressionLabel( 478 char_idx=char_idx, 479 tag=PageCharRegressionLabelTag.CENTROID, 480 label_point_smooth_y=center_point.smooth_y, 481 label_point_smooth_x=center_point.smooth_x, 482 downsampled_label_point_y=center_point.y, 483 downsampled_label_point_x=center_point.x, 484 up_left=up_left, 485 up_right=up_right, 486 down_right=down_right, 487 down_left=down_left, 488 ) 489 # The centroid labeling must be valid. 490 assert label.valid 491 page_char_regression_labels.append(label) 492 493 # 2. The deviate points. 494 if self.config.num_deviate_char_regression_labels <= 0: 495 # Generating deviate points are optional. 496 continue 497 498 bounding_box = polygon.bounding_box 499 500 # Sample points in shfited bounding box space. 501 deviate_points_in_bounding_box = PointList() 502 # Some points are invalid, hence multiply the number of samplings by a factor. 503 # Also not to sample the points lying on the border to increase the chance of valid. 504 for _ in range( 505 self.config.num_deviate_char_regression_labels_candiates_factor 506 * self.config.num_deviate_char_regression_labels 507 ): 508 y = int(rng.integers(1, bounding_box.height - 1)) 509 x = int(rng.integers(1, bounding_box.width - 1)) 510 deviate_points_in_bounding_box.append(Point.create(y=y, x=x)) 511 512 # Then transform to the polygon space. 513 np_src_points = np.asarray( 514 [ 515 (0, 0), 516 (bounding_box.width - 1, 0), 517 (bounding_box.width - 1, bounding_box.height - 1), 518 (0, bounding_box.height - 1), 519 ], 520 dtype=np.float32, 521 ) 522 np_dst_points = polygon.internals.np_self_relative_points 523 trans_mat = cv.getPerspectiveTransform( 524 np_src_points, 525 np_dst_points, 526 cv.DECOMP_SVD, 527 ) 528 529 deviate_points = PointList() 530 for shifted_deviate_point in affine_points( 531 trans_mat, 532 deviate_points_in_bounding_box.to_point_tuple(), 533 ): 534 y = bounding_box.up + shifted_deviate_point.smooth_y 535 x = bounding_box.left + shifted_deviate_point.smooth_x 536 assert 0 <= y < page_height 537 assert 0 <= x < page_width 538 deviate_points.append(Point.create(y=y, x=x)) 539 540 # Remove those are too close to another center point. 541 _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array()) 542 preserve_flags: List[bool] = [ 543 idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist() 544 ] 545 546 # Build labels. 547 num_valid_deviate_char_regression_labels = 0 548 for deviate_point, preserve_flag in zip(deviate_points, preserve_flags): 549 if num_valid_deviate_char_regression_labels \ 550 >= self.config.num_deviate_char_regression_labels: 551 break 552 553 if not preserve_flag: 554 continue 555 556 label = PageCharRegressionLabel( 557 char_idx=char_idx, 558 tag=PageCharRegressionLabelTag.DEVIATE, 559 label_point_smooth_y=deviate_point.smooth_y, 560 label_point_smooth_x=deviate_point.smooth_x, 561 downsampled_label_point_y=deviate_point.y, 562 downsampled_label_point_x=deviate_point.x, 563 up_left=up_left, 564 up_right=up_right, 565 down_right=down_right, 566 down_left=down_left, 567 ) 568 if label.valid: 569 page_char_regression_labels.append(label) 570 num_valid_deviate_char_regression_labels += 1 571 572 if num_valid_deviate_char_regression_labels \ 573 < self.config.num_deviate_char_regression_labels: 574 logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}') 575 576 return page_char_regression_labels 577 578 def generate_page_char_bounding_box_mask( 579 self, 580 shape: Tuple[int, int], 581 page_char_regression_labels: Sequence[PageCharRegressionLabel], 582 ): 583 page_char_bounding_box_mask = Mask.from_shape(shape) 584 for page_char_regression_label in page_char_regression_labels: 585 box = Box( 586 up=math.floor(page_char_regression_label.bounding_smooth_up), 587 down=math.ceil(page_char_regression_label.bounding_smooth_down), 588 left=math.floor(page_char_regression_label.bounding_smooth_left), 589 right=math.ceil(page_char_regression_label.bounding_smooth_right), 590 ) 591 box.fill_mask(page_char_bounding_box_mask) 592 return page_char_bounding_box_mask 593 594 def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator): 595 page_text_region_step_output = input.page_text_region_step_output 596 page_image = page_text_region_step_output.page_image 597 page_active_mask = page_text_region_step_output.page_active_mask 598 page_char_polygons = page_text_region_step_output.page_char_polygons 599 page_text_region_polygons = page_text_region_step_output.page_text_region_polygons 600 page_char_polygon_text_region_polygon_indices = \ 601 page_text_region_step_output.page_char_polygon_text_region_polygon_indices 602 603 page_inactive_mask = page_active_mask.to_inverted_mask() 604 page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask( 605 shape=page_image.shape, 606 page_inactive_mask=page_inactive_mask, 607 page_char_polygons=page_char_polygons, 608 page_text_region_polygons=page_text_region_polygons, 609 page_char_polygon_text_region_polygon_indices=( 610 page_char_polygon_text_region_polygon_indices 611 ), 612 ) 613 614 # NOTE: page_char_height_score_map is different from the one defined in page distortion. 615 # TODO: Resolve the inconsistency. 616 page_char_height_score_map = self.generate_page_char_height_score_map( 617 shape=page_image.shape, 618 page_inactive_mask=page_inactive_mask, 619 page_char_polygons=page_char_polygons, 620 fill_score_map_char_masks=fill_score_map_char_masks, 621 ) 622 623 page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map( 624 page_image.shape, 625 page_char_polygons, 626 ) 627 628 page_char_regression_labels = self.generate_page_char_regression_labels( 629 page_image.shape, 630 page_char_polygons, 631 rng, 632 ) 633 634 page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask( 635 page_image.shape, 636 page_char_regression_labels, 637 ) 638 639 return PageTextRegionLabelStepOutput( 640 page_char_mask=page_char_mask, 641 page_char_height_score_map=page_char_height_score_map, 642 page_char_gaussian_score_map=page_char_gaussian_score_map, 643 page_char_regression_labels=page_char_regression_labels, 644 page_char_bounding_box_mask=page_char_bounding_box_mask, 645 ) 646 647 648page_text_region_label_step_factory = PipelineStepFactory(PageTextRegionLabelStep)
44class PageTextRegionLabelStepConfig: 45 char_heatmap_default_engine_init_config: CharHeatmapDefaultEngineInitConfig = \ 46 attrs.field(factory=CharHeatmapDefaultEngineInitConfig) 47 char_mask_engine_config: Mapping[str, Any] = attrs.field(factory=lambda: {'type': 'default'}) 48 49 # 1 centrod + n deviate points. 50 num_deviate_char_regression_labels: int = 1 51 num_deviate_char_regression_labels_candiates_factor: int = 3
2def __init__(self, char_heatmap_default_engine_init_config=NOTHING, char_mask_engine_config=NOTHING, num_deviate_char_regression_labels=attr_dict['num_deviate_char_regression_labels'].default, num_deviate_char_regression_labels_candiates_factor=attr_dict['num_deviate_char_regression_labels_candiates_factor'].default): 3 if char_heatmap_default_engine_init_config is not NOTHING: 4 self.char_heatmap_default_engine_init_config = char_heatmap_default_engine_init_config 5 else: 6 self.char_heatmap_default_engine_init_config = __attr_factory_char_heatmap_default_engine_init_config() 7 if char_mask_engine_config is not NOTHING: 8 self.char_mask_engine_config = char_mask_engine_config 9 else: 10 self.char_mask_engine_config = __attr_factory_char_mask_engine_config() 11 self.num_deviate_char_regression_labels = num_deviate_char_regression_labels 12 self.num_deviate_char_regression_labels_candiates_factor = num_deviate_char_regression_labels_candiates_factor
Method generated by attrs for class PageTextRegionLabelStepConfig.
2def __init__(self, page_text_region_step_output): 3 self.page_text_region_step_output = page_text_region_step_output
Method generated by attrs for class PageTextRegionLabelStepInput.
An enumeration.
Inherited Members
- enum.Enum
- name
- value
70class Vector: 71 y: float 72 x: float 73 74 _distance: Optional[float] = attrs_lazy_field() 75 _theta: Optional[float] = attrs_lazy_field() 76 77 def lazy_post_init(self): 78 initialized = (self._distance is not None) 79 if initialized: 80 return 81 82 self._distance = math.hypot(self.x, self.y) 83 self._theta = float(np.arctan2(self.y, self.x)) % TWO_PI 84 85 @property 86 def distance(self): 87 self.lazy_post_init() 88 return unwrap_optional_field(self._distance) 89 90 @property 91 def theta(self): 92 self.lazy_post_init() 93 return unwrap_optional_field(self._theta) 94 95 @classmethod 96 def calculate_theta_delta( 97 cls, 98 vector0: 'Vector', 99 vector1: 'Vector', 100 clockwise: bool = False, 101 ): 102 theta_delta = (vector1.theta - vector0.theta + PI) % TWO_PI - PI 103 if clockwise and theta_delta < 0: 104 theta_delta += TWO_PI 105 return theta_delta 106 107 def dot(self, other: 'Vector'): 108 return self.x * other.x + self.y * other.y
2def __init__(self, y, x): 3 self.y = y 4 self.x = x 5 self._distance = attr_dict['_distance'].default 6 self._theta = attr_dict['_theta'].default
Method generated by attrs for class Vector.
112class PageCharRegressionLabel: 113 char_idx: int 114 tag: PageCharRegressionLabelTag 115 label_point_smooth_y: float 116 label_point_smooth_x: float 117 downsampled_label_point_y: int 118 downsampled_label_point_x: int 119 up_left: Point 120 up_right: Point 121 down_right: Point 122 down_left: Point 123 124 is_downsampled: bool = False 125 downsample_labeling_factor: int = 1 126 127 _bounding_smooth_up: Optional[float] = attrs_lazy_field() 128 _bounding_smooth_down: Optional[float] = attrs_lazy_field() 129 _bounding_smooth_left: Optional[float] = attrs_lazy_field() 130 _bounding_smooth_right: Optional[float] = attrs_lazy_field() 131 _bounding_orientation_idx: Optional[int] = attrs_lazy_field() 132 133 _up_left_vector: Optional[Vector] = attrs_lazy_field() 134 _up_right_vector: Optional[Vector] = attrs_lazy_field() 135 _down_right_vector: Optional[Vector] = attrs_lazy_field() 136 _down_left_vector: Optional[Vector] = attrs_lazy_field() 137 138 _up_left_to_up_right_angle: Optional[float] = attrs_lazy_field() 139 _up_right_to_down_right_angle: Optional[float] = attrs_lazy_field() 140 _down_right_to_down_left_angle: Optional[float] = attrs_lazy_field() 141 _down_left_to_up_left_angle: Optional[float] = attrs_lazy_field() 142 _valid: Optional[bool] = attrs_lazy_field() 143 _clockwise_angle_distribution: Optional[Sequence[float]] = attrs_lazy_field() 144 145 @property 146 def corner_points(self): 147 yield from (self.up_left, self.up_right, self.down_right, self.down_left) 148 149 @classmethod 150 def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point): 151 vector = Vector( 152 y=down_right.smooth_y - down_left.smooth_y, 153 x=down_right.smooth_x - down_left.smooth_x, 154 ) 155 # 0 156 # ┌───────────┐ 157 # │ │ 158 # 2│ │3 159 # │ │ 160 # └───────────┘ 161 # 1 162 factor = vector.theta / PI 163 if 1.75 <= factor or factor < 0.25: 164 return 1 165 elif 0.25 <= factor < 0.75: 166 return 2 167 elif 0.75 <= factor < 1.25: 168 return 0 169 elif 1.25 <= factor: 170 return 3 171 else: 172 raise RuntimeError() 173 174 def lazy_post_init(self): 175 if self._bounding_smooth_up is None: 176 self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points) 177 self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points) 178 self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points) 179 self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points) 180 self._bounding_orientation_idx = self.get_bounding_orientation_idx( 181 down_left=self.down_left, 182 down_right=self.down_right, 183 ) 184 185 initialized = (self._up_left_vector is not None) 186 if initialized: 187 return 188 189 self._up_left_vector = Vector( 190 y=self.up_left.smooth_y - self.label_point_smooth_y, 191 x=self.up_left.smooth_x - self.label_point_smooth_x, 192 ) 193 self._up_right_vector = Vector( 194 y=self.up_right.smooth_y - self.label_point_smooth_y, 195 x=self.up_right.smooth_x - self.label_point_smooth_x, 196 ) 197 self._down_right_vector = Vector( 198 y=self.down_right.smooth_y - self.label_point_smooth_y, 199 x=self.down_right.smooth_x - self.label_point_smooth_x, 200 ) 201 self._down_left_vector = Vector( 202 y=self.down_left.smooth_y - self.label_point_smooth_y, 203 x=self.down_left.smooth_x - self.label_point_smooth_x, 204 ) 205 206 self._up_left_to_up_right_angle = Vector.calculate_theta_delta( 207 self._up_left_vector, 208 self._up_right_vector, 209 clockwise=True, 210 ) 211 self._up_right_to_down_right_angle = Vector.calculate_theta_delta( 212 self._up_right_vector, 213 self._down_right_vector, 214 clockwise=True, 215 ) 216 self._down_right_to_down_left_angle = Vector.calculate_theta_delta( 217 self._down_right_vector, 218 self._down_left_vector, 219 clockwise=True, 220 ) 221 self._down_left_to_up_left_angle = Vector.calculate_theta_delta( 222 self._down_left_vector, 223 self._up_left_vector, 224 clockwise=True, 225 ) 226 227 sum_of_angles = sum([ 228 self._up_left_to_up_right_angle, 229 self._up_right_to_down_right_angle, 230 self._down_right_to_down_left_angle, 231 self._down_left_to_up_left_angle, 232 ]) 233 # Consider valid if deviate within 4 degrees. 234 self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012) 235 236 self._clockwise_angle_distribution = normalize_to_probs([ 237 self._up_left_to_up_right_angle, 238 self._up_right_to_down_right_angle, 239 self._down_right_to_down_left_angle, 240 self._down_left_to_up_left_angle, 241 ]) 242 243 def copy(self, with_non_bounding_related_lazy_fields: bool = False): 244 copied = attrs.evolve(self) 245 246 if with_non_bounding_related_lazy_fields: 247 # NOTE: Bounding box related properties are not copied. 248 copied._up_left_vector = self._up_left_vector 249 copied._up_right_vector = self._up_right_vector 250 copied._down_right_vector = self._down_right_vector 251 copied._down_left_vector = self._down_left_vector 252 copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle 253 copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle 254 copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle 255 copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle 256 copied._valid = self._valid 257 copied._clockwise_angle_distribution = self._clockwise_angle_distribution 258 259 return copied 260 261 def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int): 262 assert self.valid and not self.is_downsampled 263 264 # Shift operation doesn't change the lazy fields. 265 shifted = self.copy(with_non_bounding_related_lazy_fields=True) 266 267 shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y 268 shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x 269 shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y) 270 shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x) 271 shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 272 shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 273 shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 274 shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 275 276 return shifted 277 278 def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int): 279 assert self.valid and not self.is_downsampled 280 281 # Downsample operation doesn't change the lazy fields. 282 downsampled = self.copy(with_non_bounding_related_lazy_fields=True) 283 # Mark as downsampled hence disables shift & downsample opts. 284 downsampled.is_downsampled = True 285 # Should be helpful in training. 286 downsampled.downsample_labeling_factor = downsample_labeling_factor 287 288 downsampled.downsampled_label_point_y = \ 289 int(self.label_point_smooth_y // downsample_labeling_factor) 290 downsampled.downsampled_label_point_x = \ 291 int(self.label_point_smooth_x // downsample_labeling_factor) 292 293 return downsampled 294 295 @property 296 def bounding_smooth_up(self): 297 self.lazy_post_init() 298 return unwrap_optional_field(self._bounding_smooth_up) 299 300 @property 301 def bounding_smooth_down(self): 302 self.lazy_post_init() 303 return unwrap_optional_field(self._bounding_smooth_down) 304 305 @property 306 def bounding_smooth_left(self): 307 self.lazy_post_init() 308 return unwrap_optional_field(self._bounding_smooth_left) 309 310 @property 311 def bounding_smooth_right(self): 312 self.lazy_post_init() 313 return unwrap_optional_field(self._bounding_smooth_right) 314 315 @property 316 def bounding_center_point(self): 317 return Point.create( 318 y=(self.bounding_smooth_up + self.bounding_smooth_down) / 2, 319 x=(self.bounding_smooth_left + self.bounding_smooth_right) / 2, 320 ) 321 322 @property 323 def bounding_smooth_shape(self): 324 height = self.bounding_smooth_down - self.bounding_smooth_up 325 width = self.bounding_smooth_right - self.bounding_smooth_left 326 return height, width 327 328 @property 329 def bounding_orientation_idx(self): 330 self.lazy_post_init() 331 return unwrap_optional_field(self._bounding_orientation_idx) 332 333 @property 334 def valid(self): 335 self.lazy_post_init() 336 return unwrap_optional_field(self._valid) 337 338 def generate_up_left_offsets(self): 339 self.lazy_post_init() 340 up_left_vector = unwrap_optional_field(self._up_left_vector) 341 return up_left_vector.y, up_left_vector.x 342 343 def generate_clockwise_angle_distribution(self): 344 self.lazy_post_init() 345 return unwrap_optional_field(self._clockwise_angle_distribution) 346 347 def generate_clockwise_distances(self): 348 self.lazy_post_init() 349 return ( 350 unwrap_optional_field(self._up_left_vector).distance, 351 unwrap_optional_field(self._up_right_vector).distance, 352 unwrap_optional_field(self._down_right_vector).distance, 353 unwrap_optional_field(self._down_left_vector).distance, 354 )
2def __init__(self, char_idx, tag, label_point_smooth_y, label_point_smooth_x, downsampled_label_point_y, downsampled_label_point_x, up_left, up_right, down_right, down_left, is_downsampled=attr_dict['is_downsampled'].default, downsample_labeling_factor=attr_dict['downsample_labeling_factor'].default): 3 self.char_idx = char_idx 4 self.tag = tag 5 self.label_point_smooth_y = label_point_smooth_y 6 self.label_point_smooth_x = label_point_smooth_x 7 self.downsampled_label_point_y = downsampled_label_point_y 8 self.downsampled_label_point_x = downsampled_label_point_x 9 self.up_left = up_left 10 self.up_right = up_right 11 self.down_right = down_right 12 self.down_left = down_left 13 self.is_downsampled = is_downsampled 14 self.downsample_labeling_factor = downsample_labeling_factor 15 self._bounding_smooth_up = attr_dict['_bounding_smooth_up'].default 16 self._bounding_smooth_down = attr_dict['_bounding_smooth_down'].default 17 self._bounding_smooth_left = attr_dict['_bounding_smooth_left'].default 18 self._bounding_smooth_right = attr_dict['_bounding_smooth_right'].default 19 self._bounding_orientation_idx = attr_dict['_bounding_orientation_idx'].default 20 self._up_left_vector = attr_dict['_up_left_vector'].default 21 self._up_right_vector = attr_dict['_up_right_vector'].default 22 self._down_right_vector = attr_dict['_down_right_vector'].default 23 self._down_left_vector = attr_dict['_down_left_vector'].default 24 self._up_left_to_up_right_angle = attr_dict['_up_left_to_up_right_angle'].default 25 self._up_right_to_down_right_angle = attr_dict['_up_right_to_down_right_angle'].default 26 self._down_right_to_down_left_angle = attr_dict['_down_right_to_down_left_angle'].default 27 self._down_left_to_up_left_angle = attr_dict['_down_left_to_up_left_angle'].default 28 self._valid = attr_dict['_valid'].default 29 self._clockwise_angle_distribution = attr_dict['_clockwise_angle_distribution'].default
Method generated by attrs for class PageCharRegressionLabel.
149 @classmethod 150 def get_bounding_orientation_idx(cls, down_left: Point, down_right: Point): 151 vector = Vector( 152 y=down_right.smooth_y - down_left.smooth_y, 153 x=down_right.smooth_x - down_left.smooth_x, 154 ) 155 # 0 156 # ┌───────────┐ 157 # │ │ 158 # 2│ │3 159 # │ │ 160 # └───────────┘ 161 # 1 162 factor = vector.theta / PI 163 if 1.75 <= factor or factor < 0.25: 164 return 1 165 elif 0.25 <= factor < 0.75: 166 return 2 167 elif 0.75 <= factor < 1.25: 168 return 0 169 elif 1.25 <= factor: 170 return 3 171 else: 172 raise RuntimeError()
174 def lazy_post_init(self): 175 if self._bounding_smooth_up is None: 176 self._bounding_smooth_up = min(point.smooth_y for point in self.corner_points) 177 self._bounding_smooth_down = max(point.smooth_y for point in self.corner_points) 178 self._bounding_smooth_left = min(point.smooth_x for point in self.corner_points) 179 self._bounding_smooth_right = max(point.smooth_x for point in self.corner_points) 180 self._bounding_orientation_idx = self.get_bounding_orientation_idx( 181 down_left=self.down_left, 182 down_right=self.down_right, 183 ) 184 185 initialized = (self._up_left_vector is not None) 186 if initialized: 187 return 188 189 self._up_left_vector = Vector( 190 y=self.up_left.smooth_y - self.label_point_smooth_y, 191 x=self.up_left.smooth_x - self.label_point_smooth_x, 192 ) 193 self._up_right_vector = Vector( 194 y=self.up_right.smooth_y - self.label_point_smooth_y, 195 x=self.up_right.smooth_x - self.label_point_smooth_x, 196 ) 197 self._down_right_vector = Vector( 198 y=self.down_right.smooth_y - self.label_point_smooth_y, 199 x=self.down_right.smooth_x - self.label_point_smooth_x, 200 ) 201 self._down_left_vector = Vector( 202 y=self.down_left.smooth_y - self.label_point_smooth_y, 203 x=self.down_left.smooth_x - self.label_point_smooth_x, 204 ) 205 206 self._up_left_to_up_right_angle = Vector.calculate_theta_delta( 207 self._up_left_vector, 208 self._up_right_vector, 209 clockwise=True, 210 ) 211 self._up_right_to_down_right_angle = Vector.calculate_theta_delta( 212 self._up_right_vector, 213 self._down_right_vector, 214 clockwise=True, 215 ) 216 self._down_right_to_down_left_angle = Vector.calculate_theta_delta( 217 self._down_right_vector, 218 self._down_left_vector, 219 clockwise=True, 220 ) 221 self._down_left_to_up_left_angle = Vector.calculate_theta_delta( 222 self._down_left_vector, 223 self._up_left_vector, 224 clockwise=True, 225 ) 226 227 sum_of_angles = sum([ 228 self._up_left_to_up_right_angle, 229 self._up_right_to_down_right_angle, 230 self._down_right_to_down_left_angle, 231 self._down_left_to_up_left_angle, 232 ]) 233 # Consider valid if deviate within 4 degrees. 234 self._valid = math.isclose(sum_of_angles, TWO_PI, rel_tol=0.012) 235 236 self._clockwise_angle_distribution = normalize_to_probs([ 237 self._up_left_to_up_right_angle, 238 self._up_right_to_down_right_angle, 239 self._down_right_to_down_left_angle, 240 self._down_left_to_up_left_angle, 241 ])
243 def copy(self, with_non_bounding_related_lazy_fields: bool = False): 244 copied = attrs.evolve(self) 245 246 if with_non_bounding_related_lazy_fields: 247 # NOTE: Bounding box related properties are not copied. 248 copied._up_left_vector = self._up_left_vector 249 copied._up_right_vector = self._up_right_vector 250 copied._down_right_vector = self._down_right_vector 251 copied._down_left_vector = self._down_left_vector 252 copied._up_left_to_up_right_angle = self._up_left_to_up_right_angle 253 copied._up_right_to_down_right_angle = self._up_right_to_down_right_angle 254 copied._down_right_to_down_left_angle = self._down_right_to_down_left_angle 255 copied._down_left_to_up_left_angle = self._down_left_to_up_left_angle 256 copied._valid = self._valid 257 copied._clockwise_angle_distribution = self._clockwise_angle_distribution 258 259 return copied
261 def to_shifted_page_char_regression_label(self, offset_y: int, offset_x: int): 262 assert self.valid and not self.is_downsampled 263 264 # Shift operation doesn't change the lazy fields. 265 shifted = self.copy(with_non_bounding_related_lazy_fields=True) 266 267 shifted.label_point_smooth_y = self.label_point_smooth_y + offset_y 268 shifted.label_point_smooth_x = self.label_point_smooth_x + offset_x 269 shifted.downsampled_label_point_y = int(shifted.label_point_smooth_y) 270 shifted.downsampled_label_point_x = int(shifted.label_point_smooth_x) 271 shifted.up_left = self.up_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 272 shifted.up_right = self.up_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 273 shifted.down_right = self.down_right.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 274 shifted.down_left = self.down_left.to_shifted_point(offset_y=offset_y, offset_x=offset_x) 275 276 return shifted
278 def to_downsampled_page_char_regression_label(self, downsample_labeling_factor: int): 279 assert self.valid and not self.is_downsampled 280 281 # Downsample operation doesn't change the lazy fields. 282 downsampled = self.copy(with_non_bounding_related_lazy_fields=True) 283 # Mark as downsampled hence disables shift & downsample opts. 284 downsampled.is_downsampled = True 285 # Should be helpful in training. 286 downsampled.downsample_labeling_factor = downsample_labeling_factor 287 288 downsampled.downsampled_label_point_y = \ 289 int(self.label_point_smooth_y // downsample_labeling_factor) 290 downsampled.downsampled_label_point_x = \ 291 int(self.label_point_smooth_x // downsample_labeling_factor) 292 293 return downsampled
347 def generate_clockwise_distances(self): 348 self.lazy_post_init() 349 return ( 350 unwrap_optional_field(self._up_left_vector).distance, 351 unwrap_optional_field(self._up_right_vector).distance, 352 unwrap_optional_field(self._down_right_vector).distance, 353 unwrap_optional_field(self._down_left_vector).distance, 354 )
358class PageTextRegionLabelStepOutput: 359 page_char_mask: Mask 360 page_char_height_score_map: ScoreMap 361 page_char_gaussian_score_map: ScoreMap 362 page_char_regression_labels: Sequence[PageCharRegressionLabel] 363 page_char_bounding_box_mask: Mask
2def __init__(self, page_char_mask, page_char_height_score_map, page_char_gaussian_score_map, page_char_regression_labels, page_char_bounding_box_mask): 3 self.page_char_mask = page_char_mask 4 self.page_char_height_score_map = page_char_height_score_map 5 self.page_char_gaussian_score_map = page_char_gaussian_score_map 6 self.page_char_regression_labels = page_char_regression_labels 7 self.page_char_bounding_box_mask = page_char_bounding_box_mask
Method generated by attrs for class PageTextRegionLabelStepOutput.
366class PageTextRegionLabelStep( 367 PipelineStep[ 368 PageTextRegionLabelStepConfig, 369 PageTextRegionLabelStepInput, 370 PageTextRegionLabelStepOutput, 371 ] 372): # yapf: disable 373 374 def __init__(self, config: PageTextRegionLabelStepConfig): 375 super().__init__(config) 376 377 self.char_heatmap_default_engine_executor = \ 378 char_heatmap_default_engine_executor_factory.create( 379 self.config.char_heatmap_default_engine_init_config 380 ) 381 self.char_mask_engine_executor = \ 382 char_mask_engine_executor_aggregator_factory.create_engine_executor( 383 self.config.char_mask_engine_config 384 ) 385 386 def generate_page_char_mask( 387 self, 388 shape: Tuple[int, int], 389 page_inactive_mask: Mask, 390 page_char_polygons: Sequence[Polygon], 391 page_text_region_polygons: Sequence[Polygon], 392 page_char_polygon_text_region_polygon_indices: Sequence[int], 393 ): 394 height, width = shape 395 result = self.char_mask_engine_executor.run( 396 CharMaskEngineRunConfig( 397 height=height, 398 width=width, 399 char_polygons=page_char_polygons, 400 char_bounding_polygons=[ 401 page_text_region_polygons[idx] 402 for idx in page_char_polygon_text_region_polygon_indices 403 ], 404 ), 405 ) 406 407 page_inactive_mask.fill_mask(result.combined_chars_mask, 0) 408 409 return result.combined_chars_mask, result.char_masks 410 411 @classmethod 412 def generate_page_char_height_score_map( 413 cls, 414 shape: Tuple[int, int], 415 page_inactive_mask: Mask, 416 page_char_polygons: Sequence[Polygon], 417 fill_score_map_char_masks: Optional[Sequence[Mask]], 418 ): 419 rectangular_heights = [ 420 char_polygon.get_rectangular_height() for char_polygon in page_char_polygons 421 ] 422 sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort())) 423 424 page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False) 425 for idx in sorted_indices: 426 char_polygon = page_char_polygons[idx] 427 rectangular_height = rectangular_heights[idx] 428 if fill_score_map_char_masks is None: 429 char_polygon.fill_score_map( 430 page_char_height_score_map, 431 value=rectangular_height, 432 ) 433 else: 434 char_mask = fill_score_map_char_masks[idx] 435 char_mask.fill_score_map( 436 page_char_height_score_map, 437 value=rectangular_height, 438 ) 439 440 page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0) 441 442 return page_char_height_score_map 443 444 def generate_page_char_gaussian_score_map( 445 self, 446 shape: Tuple[int, int], 447 page_char_polygons: Sequence[Polygon], 448 ): 449 height, width = shape 450 char_heatmap = self.char_heatmap_default_engine_executor.run({ 451 'height': height, 452 'width': width, 453 'char_polygons': page_char_polygons, 454 }) 455 return char_heatmap.score_map 456 457 def generate_page_char_regression_labels( 458 self, 459 shape: Tuple[int, int], 460 page_char_polygons: Sequence[Polygon], 461 rng: RandomGenerator, 462 ): 463 page_height, page_width = shape 464 465 # Build a KD tree to for removing deviate point that is too close to another center point. 466 center_points = PointList() 467 for polygon in page_char_polygons: 468 center_points.append(polygon.get_center_point()) 469 kd_tree = KDTree(center_points.to_np_array()) 470 471 page_char_regression_labels: List[PageCharRegressionLabel] = [] 472 473 for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)): 474 assert polygon.num_points == 4 475 up_left, up_right, down_right, down_left = polygon.points 476 477 # 1. The centroid of char polygon. 478 label = PageCharRegressionLabel( 479 char_idx=char_idx, 480 tag=PageCharRegressionLabelTag.CENTROID, 481 label_point_smooth_y=center_point.smooth_y, 482 label_point_smooth_x=center_point.smooth_x, 483 downsampled_label_point_y=center_point.y, 484 downsampled_label_point_x=center_point.x, 485 up_left=up_left, 486 up_right=up_right, 487 down_right=down_right, 488 down_left=down_left, 489 ) 490 # The centroid labeling must be valid. 491 assert label.valid 492 page_char_regression_labels.append(label) 493 494 # 2. The deviate points. 495 if self.config.num_deviate_char_regression_labels <= 0: 496 # Generating deviate points are optional. 497 continue 498 499 bounding_box = polygon.bounding_box 500 501 # Sample points in shfited bounding box space. 502 deviate_points_in_bounding_box = PointList() 503 # Some points are invalid, hence multiply the number of samplings by a factor. 504 # Also not to sample the points lying on the border to increase the chance of valid. 505 for _ in range( 506 self.config.num_deviate_char_regression_labels_candiates_factor 507 * self.config.num_deviate_char_regression_labels 508 ): 509 y = int(rng.integers(1, bounding_box.height - 1)) 510 x = int(rng.integers(1, bounding_box.width - 1)) 511 deviate_points_in_bounding_box.append(Point.create(y=y, x=x)) 512 513 # Then transform to the polygon space. 514 np_src_points = np.asarray( 515 [ 516 (0, 0), 517 (bounding_box.width - 1, 0), 518 (bounding_box.width - 1, bounding_box.height - 1), 519 (0, bounding_box.height - 1), 520 ], 521 dtype=np.float32, 522 ) 523 np_dst_points = polygon.internals.np_self_relative_points 524 trans_mat = cv.getPerspectiveTransform( 525 np_src_points, 526 np_dst_points, 527 cv.DECOMP_SVD, 528 ) 529 530 deviate_points = PointList() 531 for shifted_deviate_point in affine_points( 532 trans_mat, 533 deviate_points_in_bounding_box.to_point_tuple(), 534 ): 535 y = bounding_box.up + shifted_deviate_point.smooth_y 536 x = bounding_box.left + shifted_deviate_point.smooth_x 537 assert 0 <= y < page_height 538 assert 0 <= x < page_width 539 deviate_points.append(Point.create(y=y, x=x)) 540 541 # Remove those are too close to another center point. 542 _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array()) 543 preserve_flags: List[bool] = [ 544 idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist() 545 ] 546 547 # Build labels. 548 num_valid_deviate_char_regression_labels = 0 549 for deviate_point, preserve_flag in zip(deviate_points, preserve_flags): 550 if num_valid_deviate_char_regression_labels \ 551 >= self.config.num_deviate_char_regression_labels: 552 break 553 554 if not preserve_flag: 555 continue 556 557 label = PageCharRegressionLabel( 558 char_idx=char_idx, 559 tag=PageCharRegressionLabelTag.DEVIATE, 560 label_point_smooth_y=deviate_point.smooth_y, 561 label_point_smooth_x=deviate_point.smooth_x, 562 downsampled_label_point_y=deviate_point.y, 563 downsampled_label_point_x=deviate_point.x, 564 up_left=up_left, 565 up_right=up_right, 566 down_right=down_right, 567 down_left=down_left, 568 ) 569 if label.valid: 570 page_char_regression_labels.append(label) 571 num_valid_deviate_char_regression_labels += 1 572 573 if num_valid_deviate_char_regression_labels \ 574 < self.config.num_deviate_char_regression_labels: 575 logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}') 576 577 return page_char_regression_labels 578 579 def generate_page_char_bounding_box_mask( 580 self, 581 shape: Tuple[int, int], 582 page_char_regression_labels: Sequence[PageCharRegressionLabel], 583 ): 584 page_char_bounding_box_mask = Mask.from_shape(shape) 585 for page_char_regression_label in page_char_regression_labels: 586 box = Box( 587 up=math.floor(page_char_regression_label.bounding_smooth_up), 588 down=math.ceil(page_char_regression_label.bounding_smooth_down), 589 left=math.floor(page_char_regression_label.bounding_smooth_left), 590 right=math.ceil(page_char_regression_label.bounding_smooth_right), 591 ) 592 box.fill_mask(page_char_bounding_box_mask) 593 return page_char_bounding_box_mask 594 595 def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator): 596 page_text_region_step_output = input.page_text_region_step_output 597 page_image = page_text_region_step_output.page_image 598 page_active_mask = page_text_region_step_output.page_active_mask 599 page_char_polygons = page_text_region_step_output.page_char_polygons 600 page_text_region_polygons = page_text_region_step_output.page_text_region_polygons 601 page_char_polygon_text_region_polygon_indices = \ 602 page_text_region_step_output.page_char_polygon_text_region_polygon_indices 603 604 page_inactive_mask = page_active_mask.to_inverted_mask() 605 page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask( 606 shape=page_image.shape, 607 page_inactive_mask=page_inactive_mask, 608 page_char_polygons=page_char_polygons, 609 page_text_region_polygons=page_text_region_polygons, 610 page_char_polygon_text_region_polygon_indices=( 611 page_char_polygon_text_region_polygon_indices 612 ), 613 ) 614 615 # NOTE: page_char_height_score_map is different from the one defined in page distortion. 616 # TODO: Resolve the inconsistency. 617 page_char_height_score_map = self.generate_page_char_height_score_map( 618 shape=page_image.shape, 619 page_inactive_mask=page_inactive_mask, 620 page_char_polygons=page_char_polygons, 621 fill_score_map_char_masks=fill_score_map_char_masks, 622 ) 623 624 page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map( 625 page_image.shape, 626 page_char_polygons, 627 ) 628 629 page_char_regression_labels = self.generate_page_char_regression_labels( 630 page_image.shape, 631 page_char_polygons, 632 rng, 633 ) 634 635 page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask( 636 page_image.shape, 637 page_char_regression_labels, 638 ) 639 640 return PageTextRegionLabelStepOutput( 641 page_char_mask=page_char_mask, 642 page_char_height_score_map=page_char_height_score_map, 643 page_char_gaussian_score_map=page_char_gaussian_score_map, 644 page_char_regression_labels=page_char_regression_labels, 645 page_char_bounding_box_mask=page_char_bounding_box_mask, 646 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
374 def __init__(self, config: PageTextRegionLabelStepConfig): 375 super().__init__(config) 376 377 self.char_heatmap_default_engine_executor = \ 378 char_heatmap_default_engine_executor_factory.create( 379 self.config.char_heatmap_default_engine_init_config 380 ) 381 self.char_mask_engine_executor = \ 382 char_mask_engine_executor_aggregator_factory.create_engine_executor( 383 self.config.char_mask_engine_config 384 )
386 def generate_page_char_mask( 387 self, 388 shape: Tuple[int, int], 389 page_inactive_mask: Mask, 390 page_char_polygons: Sequence[Polygon], 391 page_text_region_polygons: Sequence[Polygon], 392 page_char_polygon_text_region_polygon_indices: Sequence[int], 393 ): 394 height, width = shape 395 result = self.char_mask_engine_executor.run( 396 CharMaskEngineRunConfig( 397 height=height, 398 width=width, 399 char_polygons=page_char_polygons, 400 char_bounding_polygons=[ 401 page_text_region_polygons[idx] 402 for idx in page_char_polygon_text_region_polygon_indices 403 ], 404 ), 405 ) 406 407 page_inactive_mask.fill_mask(result.combined_chars_mask, 0) 408 409 return result.combined_chars_mask, result.char_masks
411 @classmethod 412 def generate_page_char_height_score_map( 413 cls, 414 shape: Tuple[int, int], 415 page_inactive_mask: Mask, 416 page_char_polygons: Sequence[Polygon], 417 fill_score_map_char_masks: Optional[Sequence[Mask]], 418 ): 419 rectangular_heights = [ 420 char_polygon.get_rectangular_height() for char_polygon in page_char_polygons 421 ] 422 sorted_indices: Tuple[int, ...] = tuple(reversed(np.asarray(rectangular_heights).argsort())) 423 424 page_char_height_score_map = ScoreMap.from_shape(shape, is_prob=False) 425 for idx in sorted_indices: 426 char_polygon = page_char_polygons[idx] 427 rectangular_height = rectangular_heights[idx] 428 if fill_score_map_char_masks is None: 429 char_polygon.fill_score_map( 430 page_char_height_score_map, 431 value=rectangular_height, 432 ) 433 else: 434 char_mask = fill_score_map_char_masks[idx] 435 char_mask.fill_score_map( 436 page_char_height_score_map, 437 value=rectangular_height, 438 ) 439 440 page_inactive_mask.fill_score_map(page_char_height_score_map, 0.0) 441 442 return page_char_height_score_map
444 def generate_page_char_gaussian_score_map( 445 self, 446 shape: Tuple[int, int], 447 page_char_polygons: Sequence[Polygon], 448 ): 449 height, width = shape 450 char_heatmap = self.char_heatmap_default_engine_executor.run({ 451 'height': height, 452 'width': width, 453 'char_polygons': page_char_polygons, 454 }) 455 return char_heatmap.score_map
457 def generate_page_char_regression_labels( 458 self, 459 shape: Tuple[int, int], 460 page_char_polygons: Sequence[Polygon], 461 rng: RandomGenerator, 462 ): 463 page_height, page_width = shape 464 465 # Build a KD tree to for removing deviate point that is too close to another center point. 466 center_points = PointList() 467 for polygon in page_char_polygons: 468 center_points.append(polygon.get_center_point()) 469 kd_tree = KDTree(center_points.to_np_array()) 470 471 page_char_regression_labels: List[PageCharRegressionLabel] = [] 472 473 for char_idx, (polygon, center_point) in enumerate(zip(page_char_polygons, center_points)): 474 assert polygon.num_points == 4 475 up_left, up_right, down_right, down_left = polygon.points 476 477 # 1. The centroid of char polygon. 478 label = PageCharRegressionLabel( 479 char_idx=char_idx, 480 tag=PageCharRegressionLabelTag.CENTROID, 481 label_point_smooth_y=center_point.smooth_y, 482 label_point_smooth_x=center_point.smooth_x, 483 downsampled_label_point_y=center_point.y, 484 downsampled_label_point_x=center_point.x, 485 up_left=up_left, 486 up_right=up_right, 487 down_right=down_right, 488 down_left=down_left, 489 ) 490 # The centroid labeling must be valid. 491 assert label.valid 492 page_char_regression_labels.append(label) 493 494 # 2. The deviate points. 495 if self.config.num_deviate_char_regression_labels <= 0: 496 # Generating deviate points are optional. 497 continue 498 499 bounding_box = polygon.bounding_box 500 501 # Sample points in shfited bounding box space. 502 deviate_points_in_bounding_box = PointList() 503 # Some points are invalid, hence multiply the number of samplings by a factor. 504 # Also not to sample the points lying on the border to increase the chance of valid. 505 for _ in range( 506 self.config.num_deviate_char_regression_labels_candiates_factor 507 * self.config.num_deviate_char_regression_labels 508 ): 509 y = int(rng.integers(1, bounding_box.height - 1)) 510 x = int(rng.integers(1, bounding_box.width - 1)) 511 deviate_points_in_bounding_box.append(Point.create(y=y, x=x)) 512 513 # Then transform to the polygon space. 514 np_src_points = np.asarray( 515 [ 516 (0, 0), 517 (bounding_box.width - 1, 0), 518 (bounding_box.width - 1, bounding_box.height - 1), 519 (0, bounding_box.height - 1), 520 ], 521 dtype=np.float32, 522 ) 523 np_dst_points = polygon.internals.np_self_relative_points 524 trans_mat = cv.getPerspectiveTransform( 525 np_src_points, 526 np_dst_points, 527 cv.DECOMP_SVD, 528 ) 529 530 deviate_points = PointList() 531 for shifted_deviate_point in affine_points( 532 trans_mat, 533 deviate_points_in_bounding_box.to_point_tuple(), 534 ): 535 y = bounding_box.up + shifted_deviate_point.smooth_y 536 x = bounding_box.left + shifted_deviate_point.smooth_x 537 assert 0 <= y < page_height 538 assert 0 <= x < page_width 539 deviate_points.append(Point.create(y=y, x=x)) 540 541 # Remove those are too close to another center point. 542 _, np_kd_nbr_indices = kd_tree.query(deviate_points.to_np_array()) 543 preserve_flags: List[bool] = [ 544 idx == char_idx for idx in np_kd_nbr_indices[:, 0].tolist() 545 ] 546 547 # Build labels. 548 num_valid_deviate_char_regression_labels = 0 549 for deviate_point, preserve_flag in zip(deviate_points, preserve_flags): 550 if num_valid_deviate_char_regression_labels \ 551 >= self.config.num_deviate_char_regression_labels: 552 break 553 554 if not preserve_flag: 555 continue 556 557 label = PageCharRegressionLabel( 558 char_idx=char_idx, 559 tag=PageCharRegressionLabelTag.DEVIATE, 560 label_point_smooth_y=deviate_point.smooth_y, 561 label_point_smooth_x=deviate_point.smooth_x, 562 downsampled_label_point_y=deviate_point.y, 563 downsampled_label_point_x=deviate_point.x, 564 up_left=up_left, 565 up_right=up_right, 566 down_right=down_right, 567 down_left=down_left, 568 ) 569 if label.valid: 570 page_char_regression_labels.append(label) 571 num_valid_deviate_char_regression_labels += 1 572 573 if num_valid_deviate_char_regression_labels \ 574 < self.config.num_deviate_char_regression_labels: 575 logger.warning(f'Cannot sample enough deviate labels for char_polygon={polygon}') 576 577 return page_char_regression_labels
579 def generate_page_char_bounding_box_mask( 580 self, 581 shape: Tuple[int, int], 582 page_char_regression_labels: Sequence[PageCharRegressionLabel], 583 ): 584 page_char_bounding_box_mask = Mask.from_shape(shape) 585 for page_char_regression_label in page_char_regression_labels: 586 box = Box( 587 up=math.floor(page_char_regression_label.bounding_smooth_up), 588 down=math.ceil(page_char_regression_label.bounding_smooth_down), 589 left=math.floor(page_char_regression_label.bounding_smooth_left), 590 right=math.ceil(page_char_regression_label.bounding_smooth_right), 591 ) 592 box.fill_mask(page_char_bounding_box_mask) 593 return page_char_bounding_box_mask
595 def run(self, input: PageTextRegionLabelStepInput, rng: RandomGenerator): 596 page_text_region_step_output = input.page_text_region_step_output 597 page_image = page_text_region_step_output.page_image 598 page_active_mask = page_text_region_step_output.page_active_mask 599 page_char_polygons = page_text_region_step_output.page_char_polygons 600 page_text_region_polygons = page_text_region_step_output.page_text_region_polygons 601 page_char_polygon_text_region_polygon_indices = \ 602 page_text_region_step_output.page_char_polygon_text_region_polygon_indices 603 604 page_inactive_mask = page_active_mask.to_inverted_mask() 605 page_char_mask, fill_score_map_char_masks = self.generate_page_char_mask( 606 shape=page_image.shape, 607 page_inactive_mask=page_inactive_mask, 608 page_char_polygons=page_char_polygons, 609 page_text_region_polygons=page_text_region_polygons, 610 page_char_polygon_text_region_polygon_indices=( 611 page_char_polygon_text_region_polygon_indices 612 ), 613 ) 614 615 # NOTE: page_char_height_score_map is different from the one defined in page distortion. 616 # TODO: Resolve the inconsistency. 617 page_char_height_score_map = self.generate_page_char_height_score_map( 618 shape=page_image.shape, 619 page_inactive_mask=page_inactive_mask, 620 page_char_polygons=page_char_polygons, 621 fill_score_map_char_masks=fill_score_map_char_masks, 622 ) 623 624 page_char_gaussian_score_map = self.generate_page_char_gaussian_score_map( 625 page_image.shape, 626 page_char_polygons, 627 ) 628 629 page_char_regression_labels = self.generate_page_char_regression_labels( 630 page_image.shape, 631 page_char_polygons, 632 rng, 633 ) 634 635 page_char_bounding_box_mask = self.generate_page_char_bounding_box_mask( 636 page_image.shape, 637 page_char_regression_labels, 638 ) 639 640 return PageTextRegionLabelStepOutput( 641 page_char_mask=page_char_mask, 642 page_char_height_score_map=page_char_height_score_map, 643 page_char_gaussian_score_map=page_char_gaussian_score_map, 644 page_char_regression_labels=page_char_regression_labels, 645 page_char_bounding_box_mask=page_char_bounding_box_mask, 646 )