vkit.pipeline.text_detection.page_text_line
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, Mapping, Any, List, Union, Optional 15from enum import Enum, unique 16import logging 17 18import attrs 19from numpy.random import Generator as RandomGenerator 20 21from vkit.element import Box, LexiconCollection 22from vkit.utility import ( 23 normalize_to_keys_and_probs, 24 rng_choice, 25 PathType, 26) 27from vkit.engine.font import ( 28 font_engine_executor_aggregator_factory, 29 FontEngineRunConfigStyle, 30 FontCollection, 31 TextLine, 32) 33from vkit.engine.char_sampler import char_sampler_engine_executor_aggregator_factory 34from vkit.engine.char_and_font_sampler import char_and_font_sampler_engine_executor_factory 35from vkit.engine.seal_impression import SealImpression 36from ..interface import PipelineStep, PipelineStepFactory 37from .page_layout import PageLayoutStepOutput 38from .page_seal_impression import PageSealImpresssionStepOutput 39 40logger = logging.getLogger(__name__) 41 42 43@attrs.define 44class PageTextLineStepConfig: 45 lexicon_collection_json: str 46 font_collection_folder: str 47 char_sampler_configs: Union[Sequence[Mapping[str, Any]], PathType] 48 font_configs: Union[Sequence[Mapping[str, Any]], PathType] 49 font_style: FontEngineRunConfigStyle = attrs.field(factory=FontEngineRunConfigStyle) 50 weight_font_style_glyph_color_grayscale: float = 0.9 51 font_style_glyph_color_grayscale_min: int = 0 52 font_style_glyph_color_grayscale_max: int = 75 53 weight_font_style_glyph_color_red: float = 0.04 54 weight_font_style_glyph_color_green: float = 0.02 55 weight_font_style_glyph_color_blue: float = 0.04 56 font_style_glyph_color_rgb_min: int = 128 57 font_style_glyph_color_rgb_max: int = 255 58 return_font_variant: bool = False 59 short_text_line_char_sampler_configs: Optional[ 60 Union[Sequence[Mapping[str, Any]], PathType] 61 ] = None # yapf: disable 62 prob_short_text_line: float = 0.2 63 short_text_line_num_chars_max: int = 2 64 65 66@attrs.define 67class PageTextLineStepInput: 68 page_layout_step_output: PageLayoutStepOutput 69 page_seal_impresssion_step_output: PageSealImpresssionStepOutput 70 71 72@unique 73class PageTextLineStepKey(Enum): 74 FONT_STYLE_GLYPH_COLOR_GRAYSCALE = 'font_style_glyph_color_grayscale' 75 FONT_STYLE_GLYPH_COLOR_RED = 'font_style_glyph_color_red' 76 FONT_STYLE_GLYPH_COLOR_GREEN = 'font_style_glyph_color_green' 77 FONT_STYLE_GLYPH_COLOR_BLUE = 'font_style_glyph_color_blue' 78 79 80@attrs.define 81class PageTextLineCollection: 82 height: int 83 width: int 84 text_lines: Sequence[TextLine] 85 short_text_line_flags: Sequence[bool] 86 87 @property 88 def shape(self): 89 return self.height, self.width 90 91 92@attrs.define 93class SealImpressionResource: 94 box: Box 95 angle: int 96 text_line_slot_indices: Sequence[int] 97 text_lines: Sequence[TextLine] 98 internal_text_line: Optional[TextLine] 99 100 101@attrs.define 102class PageSealImpressionTextLineCollection: 103 height: int 104 width: int 105 seal_impressions: Sequence[SealImpression] 106 seal_impression_resources: Sequence[SealImpressionResource] 107 108 109@attrs.define 110class PageTextLineStepOutput: 111 page_text_line_collection: PageTextLineCollection 112 page_seal_impression_text_line_collection: PageSealImpressionTextLineCollection 113 114 115class PageTextLineStep( 116 PipelineStep[ 117 PageTextLineStepConfig, 118 PageTextLineStepInput, 119 PageTextLineStepOutput, 120 ] 121): # yapf: disable 122 123 def __init__(self, config: PageTextLineStepConfig): 124 super().__init__(config) 125 126 lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json) 127 font_collection = FontCollection.from_folder(self.config.font_collection_folder) 128 char_sampler_engine_executor_aggregator = \ 129 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 130 self.config.char_sampler_configs, 131 { 132 'lexicon_collection': lexicon_collection, 133 }, 134 ) 135 136 self.char_and_font_sampler_engine_executor = \ 137 char_and_font_sampler_engine_executor_factory.create( 138 {}, 139 { 140 'lexicon_collection': lexicon_collection, 141 'font_collection': font_collection, 142 'char_sampler_engine_executor_aggregator': 143 char_sampler_engine_executor_aggregator, 144 }, 145 ) 146 147 self.short_text_line_char_and_font_sampler_engine_executor = \ 148 self.char_and_font_sampler_engine_executor 149 150 if self.config.short_text_line_char_sampler_configs is not None: 151 short_text_line_char_sampler_engine_executor_aggregator = \ 152 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 153 self.config.short_text_line_char_sampler_configs, 154 { 155 'lexicon_collection': lexicon_collection, 156 }, 157 ) 158 self.short_text_line_char_and_font_sampler_engine_executor = \ 159 char_and_font_sampler_engine_executor_factory.create( 160 {}, 161 { 162 'lexicon_collection': lexicon_collection, 163 'font_collection': font_collection, 164 'char_sampler_engine_executor_aggregator': 165 short_text_line_char_sampler_engine_executor_aggregator, 166 }, 167 ) 168 169 self.keys, self.probs = normalize_to_keys_and_probs([ 170 ( 171 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE, 172 self.config.weight_font_style_glyph_color_grayscale, 173 ), 174 ( 175 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED, 176 self.config.weight_font_style_glyph_color_red, 177 ), 178 ( 179 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN, 180 self.config.weight_font_style_glyph_color_green, 181 ), 182 ( 183 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE, 184 self.config.weight_font_style_glyph_color_blue, 185 ), 186 ]) 187 self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create( 188 self.config.font_configs 189 ) 190 191 def run(self, input: PageTextLineStepInput, rng: RandomGenerator): 192 page_layout_step_output = input.page_layout_step_output 193 page_layout = page_layout_step_output.page_layout 194 195 # Text lines to be recognized. 196 text_lines: List[TextLine] = [] 197 short_text_line_flags: List[bool] = [] 198 199 for layout_text_line in page_layout.layout_text_lines: 200 char_and_font = None 201 is_short_text_line = False 202 203 num_retries = 3 204 while num_retries > 0: 205 is_short_text_line = (rng.random() < self.config.prob_short_text_line) 206 207 if is_short_text_line: 208 char_and_font_sampler_engine_executor = \ 209 self.short_text_line_char_and_font_sampler_engine_executor 210 else: 211 char_and_font_sampler_engine_executor = \ 212 self.char_and_font_sampler_engine_executor 213 214 char_and_font = char_and_font_sampler_engine_executor.run( 215 run_config={ 216 'height': layout_text_line.box.height, 217 'width': layout_text_line.box.width, 218 }, 219 rng=rng, 220 ) 221 if char_and_font: 222 break 223 224 num_retries -= 1 225 226 if num_retries <= 0: 227 logger.warning( 228 f'Cannot sample char_and_font for layout_text_line={layout_text_line}' 229 ) 230 continue 231 assert char_and_font 232 233 if is_short_text_line: 234 # Trim to short text line. 235 short_text_line_num_chars = int( 236 rng.integers( 237 1, 238 self.config.short_text_line_num_chars_max + 1, 239 ) 240 ) 241 chars = [char for char in char_and_font.chars if not char.isspace()] 242 if len(chars) > short_text_line_num_chars: 243 begin = int(rng.integers( 244 0, 245 len(chars) - short_text_line_num_chars + 1, 246 )) 247 end = begin + short_text_line_num_chars - 1 248 chars = chars[begin:end + 1] 249 250 logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.') 251 char_and_font = attrs.evolve(char_and_font, chars=chars) 252 253 key = rng_choice(rng, self.keys, probs=self.probs) 254 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE: 255 grayscale_value = int( 256 rng.integers( 257 self.config.font_style_glyph_color_grayscale_min, 258 self.config.font_style_glyph_color_grayscale_max + 1, 259 ) 260 ) 261 glyph_color = (grayscale_value,) * 3 262 263 else: 264 rgb_value = int( 265 rng.integers( 266 self.config.font_style_glyph_color_rgb_min, 267 self.config.font_style_glyph_color_rgb_max + 1, 268 ) 269 ) 270 271 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED: 272 glyph_color = (rgb_value, 0, 0) 273 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN: 274 glyph_color = (0, rgb_value, 0) 275 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE: 276 glyph_color = (0, 0, rgb_value) 277 else: 278 raise NotImplementedError() 279 280 font_style = attrs.evolve( 281 self.config.font_style, 282 glyph_color=glyph_color, 283 ) 284 text_line = self.font_engine_executor_aggregator.run( 285 run_config={ 286 'height': layout_text_line.box.height, 287 'width': layout_text_line.box.width, 288 'chars': char_and_font.chars, 289 'font_variant': char_and_font.font_variant, 290 'glyph_sequence': layout_text_line.glyph_sequence, 291 'style': font_style, 292 'return_font_variant': self.config.return_font_variant, 293 }, 294 rng=rng, 295 ) 296 if text_line: 297 text_line = text_line.to_shifted_text_line( 298 offset_y=layout_text_line.box.up, 299 offset_x=layout_text_line.box.left, 300 ) 301 split_text_lines = text_line.split() 302 text_lines.extend(split_text_lines) 303 short_text_line_flags.extend([is_short_text_line] * len(split_text_lines)) 304 305 assert text_lines 306 assert len(text_lines) == len(short_text_line_flags) 307 page_text_line_collection = PageTextLineCollection( 308 height=page_layout.height, 309 width=page_layout.width, 310 text_lines=text_lines, 311 short_text_line_flags=short_text_line_flags, 312 ) 313 314 # Text lines for seal impressions. 315 page_seal_impresssion_step_output = input.page_seal_impresssion_step_output 316 317 seal_impressions: List[SealImpression] = [] 318 seal_impression_resources: List[SealImpressionResource] = [] 319 320 for seal_impression, box, angle in zip( 321 page_seal_impresssion_step_output.seal_impressions, 322 page_seal_impresssion_step_output.boxes, 323 page_seal_impresssion_step_output.angles, 324 ): 325 text_line_slot_indices: List[int] = [] 326 text_lines: List[TextLine] = [] 327 328 for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots): 329 char_and_font = None 330 331 num_retries = 3 332 while num_retries > 0: 333 char_and_font = self.char_and_font_sampler_engine_executor.run( 334 run_config={ 335 'height': text_line_slot.text_line_height, 336 'width': 2**32 - 1, 337 'num_chars': len(text_line_slot.char_slots), 338 }, 339 rng=rng, 340 ) 341 if char_and_font: 342 break 343 num_retries -= 1 344 345 if num_retries <= 0: 346 logger.warning( 347 f'Cannot sample char_and_font for seal_impression={seal_impression}' 348 ) 349 continue 350 assert char_and_font 351 352 text_line = self.font_engine_executor_aggregator.run( 353 run_config={ 354 'height': text_line_slot.text_line_height, 355 'width': 2**32 - 1, 356 'chars': char_and_font.chars, 357 'font_variant': char_and_font.font_variant, 358 }, 359 rng=rng, 360 ) 361 if text_line: 362 text_line_slot_indices.append(text_line_slot_idx) 363 text_lines.append(text_line) 364 365 internal_text_line = None 366 if seal_impression.internal_text_line_box: 367 char_and_font = None 368 369 num_retries = 3 370 while num_retries > 0: 371 char_and_font = self.char_and_font_sampler_engine_executor.run( 372 run_config={ 373 'height': seal_impression.internal_text_line_box.height, 374 'width': seal_impression.internal_text_line_box.width, 375 }, 376 rng=rng, 377 ) 378 if char_and_font: 379 break 380 num_retries -= 1 381 382 if num_retries <= 0: 383 logger.warning( 384 f'Cannot sample char_and_font for seal_impression={seal_impression}' 385 ) 386 else: 387 assert char_and_font 388 389 internal_text_line = self.font_engine_executor_aggregator.run( 390 run_config={ 391 'height': seal_impression.internal_text_line_box.height, 392 'width': seal_impression.internal_text_line_box.width, 393 'chars': char_and_font.chars, 394 'font_variant': char_and_font.font_variant, 395 }, 396 rng=rng, 397 ) 398 399 if text_lines: 400 seal_impressions.append(seal_impression) 401 seal_impression_resources.append( 402 SealImpressionResource( 403 box=box, 404 angle=angle, 405 text_line_slot_indices=text_line_slot_indices, 406 text_lines=text_lines, 407 internal_text_line=internal_text_line, 408 ) 409 ) 410 411 page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection( 412 height=page_layout.height, 413 width=page_layout.width, 414 seal_impressions=seal_impressions, 415 seal_impression_resources=seal_impression_resources, 416 ) 417 418 return PageTextLineStepOutput( 419 page_text_line_collection=page_text_line_collection, 420 page_seal_impression_text_line_collection=page_seal_impression_text_line_collection, 421 ) 422 423 424page_text_line_step_factory = PipelineStepFactory(PageTextLineStep)
45class PageTextLineStepConfig: 46 lexicon_collection_json: str 47 font_collection_folder: str 48 char_sampler_configs: Union[Sequence[Mapping[str, Any]], PathType] 49 font_configs: Union[Sequence[Mapping[str, Any]], PathType] 50 font_style: FontEngineRunConfigStyle = attrs.field(factory=FontEngineRunConfigStyle) 51 weight_font_style_glyph_color_grayscale: float = 0.9 52 font_style_glyph_color_grayscale_min: int = 0 53 font_style_glyph_color_grayscale_max: int = 75 54 weight_font_style_glyph_color_red: float = 0.04 55 weight_font_style_glyph_color_green: float = 0.02 56 weight_font_style_glyph_color_blue: float = 0.04 57 font_style_glyph_color_rgb_min: int = 128 58 font_style_glyph_color_rgb_max: int = 255 59 return_font_variant: bool = False 60 short_text_line_char_sampler_configs: Optional[ 61 Union[Sequence[Mapping[str, Any]], PathType] 62 ] = None # yapf: disable 63 prob_short_text_line: float = 0.2 64 short_text_line_num_chars_max: int = 2
2def __init__(self, lexicon_collection_json, font_collection_folder, char_sampler_configs, font_configs, font_style=NOTHING, weight_font_style_glyph_color_grayscale=attr_dict['weight_font_style_glyph_color_grayscale'].default, font_style_glyph_color_grayscale_min=attr_dict['font_style_glyph_color_grayscale_min'].default, font_style_glyph_color_grayscale_max=attr_dict['font_style_glyph_color_grayscale_max'].default, weight_font_style_glyph_color_red=attr_dict['weight_font_style_glyph_color_red'].default, weight_font_style_glyph_color_green=attr_dict['weight_font_style_glyph_color_green'].default, weight_font_style_glyph_color_blue=attr_dict['weight_font_style_glyph_color_blue'].default, font_style_glyph_color_rgb_min=attr_dict['font_style_glyph_color_rgb_min'].default, font_style_glyph_color_rgb_max=attr_dict['font_style_glyph_color_rgb_max'].default, return_font_variant=attr_dict['return_font_variant'].default, short_text_line_char_sampler_configs=attr_dict['short_text_line_char_sampler_configs'].default, prob_short_text_line=attr_dict['prob_short_text_line'].default, short_text_line_num_chars_max=attr_dict['short_text_line_num_chars_max'].default): 3 self.lexicon_collection_json = lexicon_collection_json 4 self.font_collection_folder = font_collection_folder 5 self.char_sampler_configs = char_sampler_configs 6 self.font_configs = font_configs 7 if font_style is not NOTHING: 8 self.font_style = font_style 9 else: 10 self.font_style = __attr_factory_font_style() 11 self.weight_font_style_glyph_color_grayscale = weight_font_style_glyph_color_grayscale 12 self.font_style_glyph_color_grayscale_min = font_style_glyph_color_grayscale_min 13 self.font_style_glyph_color_grayscale_max = font_style_glyph_color_grayscale_max 14 self.weight_font_style_glyph_color_red = weight_font_style_glyph_color_red 15 self.weight_font_style_glyph_color_green = weight_font_style_glyph_color_green 16 self.weight_font_style_glyph_color_blue = weight_font_style_glyph_color_blue 17 self.font_style_glyph_color_rgb_min = font_style_glyph_color_rgb_min 18 self.font_style_glyph_color_rgb_max = font_style_glyph_color_rgb_max 19 self.return_font_variant = return_font_variant 20 self.short_text_line_char_sampler_configs = short_text_line_char_sampler_configs 21 self.prob_short_text_line = prob_short_text_line 22 self.short_text_line_num_chars_max = short_text_line_num_chars_max
Method generated by attrs for class PageTextLineStepConfig.
68class PageTextLineStepInput: 69 page_layout_step_output: PageLayoutStepOutput 70 page_seal_impresssion_step_output: PageSealImpresssionStepOutput
2def __init__(self, page_layout_step_output, page_seal_impresssion_step_output): 3 self.page_layout_step_output = page_layout_step_output 4 self.page_seal_impresssion_step_output = page_seal_impresssion_step_output
Method generated by attrs for class PageTextLineStepInput.
74class PageTextLineStepKey(Enum): 75 FONT_STYLE_GLYPH_COLOR_GRAYSCALE = 'font_style_glyph_color_grayscale' 76 FONT_STYLE_GLYPH_COLOR_RED = 'font_style_glyph_color_red' 77 FONT_STYLE_GLYPH_COLOR_GREEN = 'font_style_glyph_color_green' 78 FONT_STYLE_GLYPH_COLOR_BLUE = 'font_style_glyph_color_blue'
An enumeration.
Inherited Members
- enum.Enum
- name
- value
82class PageTextLineCollection: 83 height: int 84 width: int 85 text_lines: Sequence[TextLine] 86 short_text_line_flags: Sequence[bool] 87 88 @property 89 def shape(self): 90 return self.height, self.width
2def __init__(self, height, width, text_lines, short_text_line_flags): 3 self.height = height 4 self.width = width 5 self.text_lines = text_lines 6 self.short_text_line_flags = short_text_line_flags
Method generated by attrs for class PageTextLineCollection.
94class SealImpressionResource: 95 box: Box 96 angle: int 97 text_line_slot_indices: Sequence[int] 98 text_lines: Sequence[TextLine] 99 internal_text_line: Optional[TextLine]
2def __init__(self, box, angle, text_line_slot_indices, text_lines, internal_text_line): 3 self.box = box 4 self.angle = angle 5 self.text_line_slot_indices = text_line_slot_indices 6 self.text_lines = text_lines 7 self.internal_text_line = internal_text_line
Method generated by attrs for class SealImpressionResource.
103class PageSealImpressionTextLineCollection: 104 height: int 105 width: int 106 seal_impressions: Sequence[SealImpression] 107 seal_impression_resources: Sequence[SealImpressionResource]
2def __init__(self, height, width, seal_impressions, seal_impression_resources): 3 self.height = height 4 self.width = width 5 self.seal_impressions = seal_impressions 6 self.seal_impression_resources = seal_impression_resources
Method generated by attrs for class PageSealImpressionTextLineCollection.
111class PageTextLineStepOutput: 112 page_text_line_collection: PageTextLineCollection 113 page_seal_impression_text_line_collection: PageSealImpressionTextLineCollection
2def __init__(self, page_text_line_collection, page_seal_impression_text_line_collection): 3 self.page_text_line_collection = page_text_line_collection 4 self.page_seal_impression_text_line_collection = page_seal_impression_text_line_collection
Method generated by attrs for class PageTextLineStepOutput.
116class PageTextLineStep( 117 PipelineStep[ 118 PageTextLineStepConfig, 119 PageTextLineStepInput, 120 PageTextLineStepOutput, 121 ] 122): # yapf: disable 123 124 def __init__(self, config: PageTextLineStepConfig): 125 super().__init__(config) 126 127 lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json) 128 font_collection = FontCollection.from_folder(self.config.font_collection_folder) 129 char_sampler_engine_executor_aggregator = \ 130 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 131 self.config.char_sampler_configs, 132 { 133 'lexicon_collection': lexicon_collection, 134 }, 135 ) 136 137 self.char_and_font_sampler_engine_executor = \ 138 char_and_font_sampler_engine_executor_factory.create( 139 {}, 140 { 141 'lexicon_collection': lexicon_collection, 142 'font_collection': font_collection, 143 'char_sampler_engine_executor_aggregator': 144 char_sampler_engine_executor_aggregator, 145 }, 146 ) 147 148 self.short_text_line_char_and_font_sampler_engine_executor = \ 149 self.char_and_font_sampler_engine_executor 150 151 if self.config.short_text_line_char_sampler_configs is not None: 152 short_text_line_char_sampler_engine_executor_aggregator = \ 153 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 154 self.config.short_text_line_char_sampler_configs, 155 { 156 'lexicon_collection': lexicon_collection, 157 }, 158 ) 159 self.short_text_line_char_and_font_sampler_engine_executor = \ 160 char_and_font_sampler_engine_executor_factory.create( 161 {}, 162 { 163 'lexicon_collection': lexicon_collection, 164 'font_collection': font_collection, 165 'char_sampler_engine_executor_aggregator': 166 short_text_line_char_sampler_engine_executor_aggregator, 167 }, 168 ) 169 170 self.keys, self.probs = normalize_to_keys_and_probs([ 171 ( 172 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE, 173 self.config.weight_font_style_glyph_color_grayscale, 174 ), 175 ( 176 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED, 177 self.config.weight_font_style_glyph_color_red, 178 ), 179 ( 180 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN, 181 self.config.weight_font_style_glyph_color_green, 182 ), 183 ( 184 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE, 185 self.config.weight_font_style_glyph_color_blue, 186 ), 187 ]) 188 self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create( 189 self.config.font_configs 190 ) 191 192 def run(self, input: PageTextLineStepInput, rng: RandomGenerator): 193 page_layout_step_output = input.page_layout_step_output 194 page_layout = page_layout_step_output.page_layout 195 196 # Text lines to be recognized. 197 text_lines: List[TextLine] = [] 198 short_text_line_flags: List[bool] = [] 199 200 for layout_text_line in page_layout.layout_text_lines: 201 char_and_font = None 202 is_short_text_line = False 203 204 num_retries = 3 205 while num_retries > 0: 206 is_short_text_line = (rng.random() < self.config.prob_short_text_line) 207 208 if is_short_text_line: 209 char_and_font_sampler_engine_executor = \ 210 self.short_text_line_char_and_font_sampler_engine_executor 211 else: 212 char_and_font_sampler_engine_executor = \ 213 self.char_and_font_sampler_engine_executor 214 215 char_and_font = char_and_font_sampler_engine_executor.run( 216 run_config={ 217 'height': layout_text_line.box.height, 218 'width': layout_text_line.box.width, 219 }, 220 rng=rng, 221 ) 222 if char_and_font: 223 break 224 225 num_retries -= 1 226 227 if num_retries <= 0: 228 logger.warning( 229 f'Cannot sample char_and_font for layout_text_line={layout_text_line}' 230 ) 231 continue 232 assert char_and_font 233 234 if is_short_text_line: 235 # Trim to short text line. 236 short_text_line_num_chars = int( 237 rng.integers( 238 1, 239 self.config.short_text_line_num_chars_max + 1, 240 ) 241 ) 242 chars = [char for char in char_and_font.chars if not char.isspace()] 243 if len(chars) > short_text_line_num_chars: 244 begin = int(rng.integers( 245 0, 246 len(chars) - short_text_line_num_chars + 1, 247 )) 248 end = begin + short_text_line_num_chars - 1 249 chars = chars[begin:end + 1] 250 251 logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.') 252 char_and_font = attrs.evolve(char_and_font, chars=chars) 253 254 key = rng_choice(rng, self.keys, probs=self.probs) 255 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE: 256 grayscale_value = int( 257 rng.integers( 258 self.config.font_style_glyph_color_grayscale_min, 259 self.config.font_style_glyph_color_grayscale_max + 1, 260 ) 261 ) 262 glyph_color = (grayscale_value,) * 3 263 264 else: 265 rgb_value = int( 266 rng.integers( 267 self.config.font_style_glyph_color_rgb_min, 268 self.config.font_style_glyph_color_rgb_max + 1, 269 ) 270 ) 271 272 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED: 273 glyph_color = (rgb_value, 0, 0) 274 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN: 275 glyph_color = (0, rgb_value, 0) 276 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE: 277 glyph_color = (0, 0, rgb_value) 278 else: 279 raise NotImplementedError() 280 281 font_style = attrs.evolve( 282 self.config.font_style, 283 glyph_color=glyph_color, 284 ) 285 text_line = self.font_engine_executor_aggregator.run( 286 run_config={ 287 'height': layout_text_line.box.height, 288 'width': layout_text_line.box.width, 289 'chars': char_and_font.chars, 290 'font_variant': char_and_font.font_variant, 291 'glyph_sequence': layout_text_line.glyph_sequence, 292 'style': font_style, 293 'return_font_variant': self.config.return_font_variant, 294 }, 295 rng=rng, 296 ) 297 if text_line: 298 text_line = text_line.to_shifted_text_line( 299 offset_y=layout_text_line.box.up, 300 offset_x=layout_text_line.box.left, 301 ) 302 split_text_lines = text_line.split() 303 text_lines.extend(split_text_lines) 304 short_text_line_flags.extend([is_short_text_line] * len(split_text_lines)) 305 306 assert text_lines 307 assert len(text_lines) == len(short_text_line_flags) 308 page_text_line_collection = PageTextLineCollection( 309 height=page_layout.height, 310 width=page_layout.width, 311 text_lines=text_lines, 312 short_text_line_flags=short_text_line_flags, 313 ) 314 315 # Text lines for seal impressions. 316 page_seal_impresssion_step_output = input.page_seal_impresssion_step_output 317 318 seal_impressions: List[SealImpression] = [] 319 seal_impression_resources: List[SealImpressionResource] = [] 320 321 for seal_impression, box, angle in zip( 322 page_seal_impresssion_step_output.seal_impressions, 323 page_seal_impresssion_step_output.boxes, 324 page_seal_impresssion_step_output.angles, 325 ): 326 text_line_slot_indices: List[int] = [] 327 text_lines: List[TextLine] = [] 328 329 for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots): 330 char_and_font = None 331 332 num_retries = 3 333 while num_retries > 0: 334 char_and_font = self.char_and_font_sampler_engine_executor.run( 335 run_config={ 336 'height': text_line_slot.text_line_height, 337 'width': 2**32 - 1, 338 'num_chars': len(text_line_slot.char_slots), 339 }, 340 rng=rng, 341 ) 342 if char_and_font: 343 break 344 num_retries -= 1 345 346 if num_retries <= 0: 347 logger.warning( 348 f'Cannot sample char_and_font for seal_impression={seal_impression}' 349 ) 350 continue 351 assert char_and_font 352 353 text_line = self.font_engine_executor_aggregator.run( 354 run_config={ 355 'height': text_line_slot.text_line_height, 356 'width': 2**32 - 1, 357 'chars': char_and_font.chars, 358 'font_variant': char_and_font.font_variant, 359 }, 360 rng=rng, 361 ) 362 if text_line: 363 text_line_slot_indices.append(text_line_slot_idx) 364 text_lines.append(text_line) 365 366 internal_text_line = None 367 if seal_impression.internal_text_line_box: 368 char_and_font = None 369 370 num_retries = 3 371 while num_retries > 0: 372 char_and_font = self.char_and_font_sampler_engine_executor.run( 373 run_config={ 374 'height': seal_impression.internal_text_line_box.height, 375 'width': seal_impression.internal_text_line_box.width, 376 }, 377 rng=rng, 378 ) 379 if char_and_font: 380 break 381 num_retries -= 1 382 383 if num_retries <= 0: 384 logger.warning( 385 f'Cannot sample char_and_font for seal_impression={seal_impression}' 386 ) 387 else: 388 assert char_and_font 389 390 internal_text_line = self.font_engine_executor_aggregator.run( 391 run_config={ 392 'height': seal_impression.internal_text_line_box.height, 393 'width': seal_impression.internal_text_line_box.width, 394 'chars': char_and_font.chars, 395 'font_variant': char_and_font.font_variant, 396 }, 397 rng=rng, 398 ) 399 400 if text_lines: 401 seal_impressions.append(seal_impression) 402 seal_impression_resources.append( 403 SealImpressionResource( 404 box=box, 405 angle=angle, 406 text_line_slot_indices=text_line_slot_indices, 407 text_lines=text_lines, 408 internal_text_line=internal_text_line, 409 ) 410 ) 411 412 page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection( 413 height=page_layout.height, 414 width=page_layout.width, 415 seal_impressions=seal_impressions, 416 seal_impression_resources=seal_impression_resources, 417 ) 418 419 return PageTextLineStepOutput( 420 page_text_line_collection=page_text_line_collection, 421 page_seal_impression_text_line_collection=page_seal_impression_text_line_collection, 422 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
124 def __init__(self, config: PageTextLineStepConfig): 125 super().__init__(config) 126 127 lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json) 128 font_collection = FontCollection.from_folder(self.config.font_collection_folder) 129 char_sampler_engine_executor_aggregator = \ 130 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 131 self.config.char_sampler_configs, 132 { 133 'lexicon_collection': lexicon_collection, 134 }, 135 ) 136 137 self.char_and_font_sampler_engine_executor = \ 138 char_and_font_sampler_engine_executor_factory.create( 139 {}, 140 { 141 'lexicon_collection': lexicon_collection, 142 'font_collection': font_collection, 143 'char_sampler_engine_executor_aggregator': 144 char_sampler_engine_executor_aggregator, 145 }, 146 ) 147 148 self.short_text_line_char_and_font_sampler_engine_executor = \ 149 self.char_and_font_sampler_engine_executor 150 151 if self.config.short_text_line_char_sampler_configs is not None: 152 short_text_line_char_sampler_engine_executor_aggregator = \ 153 char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource( 154 self.config.short_text_line_char_sampler_configs, 155 { 156 'lexicon_collection': lexicon_collection, 157 }, 158 ) 159 self.short_text_line_char_and_font_sampler_engine_executor = \ 160 char_and_font_sampler_engine_executor_factory.create( 161 {}, 162 { 163 'lexicon_collection': lexicon_collection, 164 'font_collection': font_collection, 165 'char_sampler_engine_executor_aggregator': 166 short_text_line_char_sampler_engine_executor_aggregator, 167 }, 168 ) 169 170 self.keys, self.probs = normalize_to_keys_and_probs([ 171 ( 172 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE, 173 self.config.weight_font_style_glyph_color_grayscale, 174 ), 175 ( 176 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED, 177 self.config.weight_font_style_glyph_color_red, 178 ), 179 ( 180 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN, 181 self.config.weight_font_style_glyph_color_green, 182 ), 183 ( 184 PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE, 185 self.config.weight_font_style_glyph_color_blue, 186 ), 187 ]) 188 self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create( 189 self.config.font_configs 190 )
192 def run(self, input: PageTextLineStepInput, rng: RandomGenerator): 193 page_layout_step_output = input.page_layout_step_output 194 page_layout = page_layout_step_output.page_layout 195 196 # Text lines to be recognized. 197 text_lines: List[TextLine] = [] 198 short_text_line_flags: List[bool] = [] 199 200 for layout_text_line in page_layout.layout_text_lines: 201 char_and_font = None 202 is_short_text_line = False 203 204 num_retries = 3 205 while num_retries > 0: 206 is_short_text_line = (rng.random() < self.config.prob_short_text_line) 207 208 if is_short_text_line: 209 char_and_font_sampler_engine_executor = \ 210 self.short_text_line_char_and_font_sampler_engine_executor 211 else: 212 char_and_font_sampler_engine_executor = \ 213 self.char_and_font_sampler_engine_executor 214 215 char_and_font = char_and_font_sampler_engine_executor.run( 216 run_config={ 217 'height': layout_text_line.box.height, 218 'width': layout_text_line.box.width, 219 }, 220 rng=rng, 221 ) 222 if char_and_font: 223 break 224 225 num_retries -= 1 226 227 if num_retries <= 0: 228 logger.warning( 229 f'Cannot sample char_and_font for layout_text_line={layout_text_line}' 230 ) 231 continue 232 assert char_and_font 233 234 if is_short_text_line: 235 # Trim to short text line. 236 short_text_line_num_chars = int( 237 rng.integers( 238 1, 239 self.config.short_text_line_num_chars_max + 1, 240 ) 241 ) 242 chars = [char for char in char_and_font.chars if not char.isspace()] 243 if len(chars) > short_text_line_num_chars: 244 begin = int(rng.integers( 245 0, 246 len(chars) - short_text_line_num_chars + 1, 247 )) 248 end = begin + short_text_line_num_chars - 1 249 chars = chars[begin:end + 1] 250 251 logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.') 252 char_and_font = attrs.evolve(char_and_font, chars=chars) 253 254 key = rng_choice(rng, self.keys, probs=self.probs) 255 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE: 256 grayscale_value = int( 257 rng.integers( 258 self.config.font_style_glyph_color_grayscale_min, 259 self.config.font_style_glyph_color_grayscale_max + 1, 260 ) 261 ) 262 glyph_color = (grayscale_value,) * 3 263 264 else: 265 rgb_value = int( 266 rng.integers( 267 self.config.font_style_glyph_color_rgb_min, 268 self.config.font_style_glyph_color_rgb_max + 1, 269 ) 270 ) 271 272 if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED: 273 glyph_color = (rgb_value, 0, 0) 274 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN: 275 glyph_color = (0, rgb_value, 0) 276 elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE: 277 glyph_color = (0, 0, rgb_value) 278 else: 279 raise NotImplementedError() 280 281 font_style = attrs.evolve( 282 self.config.font_style, 283 glyph_color=glyph_color, 284 ) 285 text_line = self.font_engine_executor_aggregator.run( 286 run_config={ 287 'height': layout_text_line.box.height, 288 'width': layout_text_line.box.width, 289 'chars': char_and_font.chars, 290 'font_variant': char_and_font.font_variant, 291 'glyph_sequence': layout_text_line.glyph_sequence, 292 'style': font_style, 293 'return_font_variant': self.config.return_font_variant, 294 }, 295 rng=rng, 296 ) 297 if text_line: 298 text_line = text_line.to_shifted_text_line( 299 offset_y=layout_text_line.box.up, 300 offset_x=layout_text_line.box.left, 301 ) 302 split_text_lines = text_line.split() 303 text_lines.extend(split_text_lines) 304 short_text_line_flags.extend([is_short_text_line] * len(split_text_lines)) 305 306 assert text_lines 307 assert len(text_lines) == len(short_text_line_flags) 308 page_text_line_collection = PageTextLineCollection( 309 height=page_layout.height, 310 width=page_layout.width, 311 text_lines=text_lines, 312 short_text_line_flags=short_text_line_flags, 313 ) 314 315 # Text lines for seal impressions. 316 page_seal_impresssion_step_output = input.page_seal_impresssion_step_output 317 318 seal_impressions: List[SealImpression] = [] 319 seal_impression_resources: List[SealImpressionResource] = [] 320 321 for seal_impression, box, angle in zip( 322 page_seal_impresssion_step_output.seal_impressions, 323 page_seal_impresssion_step_output.boxes, 324 page_seal_impresssion_step_output.angles, 325 ): 326 text_line_slot_indices: List[int] = [] 327 text_lines: List[TextLine] = [] 328 329 for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots): 330 char_and_font = None 331 332 num_retries = 3 333 while num_retries > 0: 334 char_and_font = self.char_and_font_sampler_engine_executor.run( 335 run_config={ 336 'height': text_line_slot.text_line_height, 337 'width': 2**32 - 1, 338 'num_chars': len(text_line_slot.char_slots), 339 }, 340 rng=rng, 341 ) 342 if char_and_font: 343 break 344 num_retries -= 1 345 346 if num_retries <= 0: 347 logger.warning( 348 f'Cannot sample char_and_font for seal_impression={seal_impression}' 349 ) 350 continue 351 assert char_and_font 352 353 text_line = self.font_engine_executor_aggregator.run( 354 run_config={ 355 'height': text_line_slot.text_line_height, 356 'width': 2**32 - 1, 357 'chars': char_and_font.chars, 358 'font_variant': char_and_font.font_variant, 359 }, 360 rng=rng, 361 ) 362 if text_line: 363 text_line_slot_indices.append(text_line_slot_idx) 364 text_lines.append(text_line) 365 366 internal_text_line = None 367 if seal_impression.internal_text_line_box: 368 char_and_font = None 369 370 num_retries = 3 371 while num_retries > 0: 372 char_and_font = self.char_and_font_sampler_engine_executor.run( 373 run_config={ 374 'height': seal_impression.internal_text_line_box.height, 375 'width': seal_impression.internal_text_line_box.width, 376 }, 377 rng=rng, 378 ) 379 if char_and_font: 380 break 381 num_retries -= 1 382 383 if num_retries <= 0: 384 logger.warning( 385 f'Cannot sample char_and_font for seal_impression={seal_impression}' 386 ) 387 else: 388 assert char_and_font 389 390 internal_text_line = self.font_engine_executor_aggregator.run( 391 run_config={ 392 'height': seal_impression.internal_text_line_box.height, 393 'width': seal_impression.internal_text_line_box.width, 394 'chars': char_and_font.chars, 395 'font_variant': char_and_font.font_variant, 396 }, 397 rng=rng, 398 ) 399 400 if text_lines: 401 seal_impressions.append(seal_impression) 402 seal_impression_resources.append( 403 SealImpressionResource( 404 box=box, 405 angle=angle, 406 text_line_slot_indices=text_line_slot_indices, 407 text_lines=text_lines, 408 internal_text_line=internal_text_line, 409 ) 410 ) 411 412 page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection( 413 height=page_layout.height, 414 width=page_layout.width, 415 seal_impressions=seal_impressions, 416 seal_impression_resources=seal_impression_resources, 417 ) 418 419 return PageTextLineStepOutput( 420 page_text_line_collection=page_text_line_collection, 421 page_seal_impression_text_line_collection=page_seal_impression_text_line_collection, 422 )