vkit.pipeline.text_detection.page_text_line

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, Mapping, Any, List, Union, Optional
 15from enum import Enum, unique
 16import logging
 17
 18import attrs
 19from numpy.random import Generator as RandomGenerator
 20
 21from vkit.element import Box, LexiconCollection
 22from vkit.utility import (
 23    normalize_to_keys_and_probs,
 24    rng_choice,
 25    PathType,
 26)
 27from vkit.engine.font import (
 28    font_engine_executor_aggregator_factory,
 29    FontEngineRunConfigStyle,
 30    FontCollection,
 31    TextLine,
 32)
 33from vkit.engine.char_sampler import char_sampler_engine_executor_aggregator_factory
 34from vkit.engine.char_and_font_sampler import char_and_font_sampler_engine_executor_factory
 35from vkit.engine.seal_impression import SealImpression
 36from ..interface import PipelineStep, PipelineStepFactory
 37from .page_layout import PageLayoutStepOutput
 38from .page_seal_impression import PageSealImpresssionStepOutput
 39
 40logger = logging.getLogger(__name__)
 41
 42
 43@attrs.define
 44class PageTextLineStepConfig:
 45    lexicon_collection_json: str
 46    font_collection_folder: str
 47    char_sampler_configs: Union[Sequence[Mapping[str, Any]], PathType]
 48    font_configs: Union[Sequence[Mapping[str, Any]], PathType]
 49    font_style: FontEngineRunConfigStyle = attrs.field(factory=FontEngineRunConfigStyle)
 50    weight_font_style_glyph_color_grayscale: float = 0.9
 51    font_style_glyph_color_grayscale_min: int = 0
 52    font_style_glyph_color_grayscale_max: int = 75
 53    weight_font_style_glyph_color_red: float = 0.04
 54    weight_font_style_glyph_color_green: float = 0.02
 55    weight_font_style_glyph_color_blue: float = 0.04
 56    font_style_glyph_color_rgb_min: int = 128
 57    font_style_glyph_color_rgb_max: int = 255
 58    return_font_variant: bool = False
 59    short_text_line_char_sampler_configs: Optional[
 60        Union[Sequence[Mapping[str, Any]], PathType]
 61    ] = None  # yapf: disable
 62    prob_short_text_line: float = 0.2
 63    short_text_line_num_chars_max: int = 2
 64
 65
 66@attrs.define
 67class PageTextLineStepInput:
 68    page_layout_step_output: PageLayoutStepOutput
 69    page_seal_impresssion_step_output: PageSealImpresssionStepOutput
 70
 71
 72@unique
 73class PageTextLineStepKey(Enum):
 74    FONT_STYLE_GLYPH_COLOR_GRAYSCALE = 'font_style_glyph_color_grayscale'
 75    FONT_STYLE_GLYPH_COLOR_RED = 'font_style_glyph_color_red'
 76    FONT_STYLE_GLYPH_COLOR_GREEN = 'font_style_glyph_color_green'
 77    FONT_STYLE_GLYPH_COLOR_BLUE = 'font_style_glyph_color_blue'
 78
 79
 80@attrs.define
 81class PageTextLineCollection:
 82    height: int
 83    width: int
 84    text_lines: Sequence[TextLine]
 85    short_text_line_flags: Sequence[bool]
 86
 87    @property
 88    def shape(self):
 89        return self.height, self.width
 90
 91
 92@attrs.define
 93class SealImpressionResource:
 94    box: Box
 95    angle: int
 96    text_line_slot_indices: Sequence[int]
 97    text_lines: Sequence[TextLine]
 98    internal_text_line: Optional[TextLine]
 99
100
101@attrs.define
102class PageSealImpressionTextLineCollection:
103    height: int
104    width: int
105    seal_impressions: Sequence[SealImpression]
106    seal_impression_resources: Sequence[SealImpressionResource]
107
108
109@attrs.define
110class PageTextLineStepOutput:
111    page_text_line_collection: PageTextLineCollection
112    page_seal_impression_text_line_collection: PageSealImpressionTextLineCollection
113
114
115class PageTextLineStep(
116    PipelineStep[
117        PageTextLineStepConfig,
118        PageTextLineStepInput,
119        PageTextLineStepOutput,
120    ]
121):  # yapf: disable
122
123    def __init__(self, config: PageTextLineStepConfig):
124        super().__init__(config)
125
126        lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json)
127        font_collection = FontCollection.from_folder(self.config.font_collection_folder)
128        char_sampler_engine_executor_aggregator = \
129            char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
130                self.config.char_sampler_configs,
131                {
132                    'lexicon_collection': lexicon_collection,
133                },
134            )
135
136        self.char_and_font_sampler_engine_executor = \
137            char_and_font_sampler_engine_executor_factory.create(
138                {},
139                {
140                    'lexicon_collection': lexicon_collection,
141                    'font_collection': font_collection,
142                    'char_sampler_engine_executor_aggregator':
143                        char_sampler_engine_executor_aggregator,
144                },
145            )
146
147        self.short_text_line_char_and_font_sampler_engine_executor = \
148            self.char_and_font_sampler_engine_executor
149
150        if self.config.short_text_line_char_sampler_configs is not None:
151            short_text_line_char_sampler_engine_executor_aggregator = \
152                char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
153                    self.config.short_text_line_char_sampler_configs,
154                    {
155                        'lexicon_collection': lexicon_collection,
156                    },
157                )
158            self.short_text_line_char_and_font_sampler_engine_executor = \
159                char_and_font_sampler_engine_executor_factory.create(
160                    {},
161                    {
162                        'lexicon_collection': lexicon_collection,
163                        'font_collection': font_collection,
164                        'char_sampler_engine_executor_aggregator':
165                            short_text_line_char_sampler_engine_executor_aggregator,
166                    },
167                )
168
169        self.keys, self.probs = normalize_to_keys_and_probs([
170            (
171                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE,
172                self.config.weight_font_style_glyph_color_grayscale,
173            ),
174            (
175                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED,
176                self.config.weight_font_style_glyph_color_red,
177            ),
178            (
179                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN,
180                self.config.weight_font_style_glyph_color_green,
181            ),
182            (
183                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE,
184                self.config.weight_font_style_glyph_color_blue,
185            ),
186        ])
187        self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create(
188            self.config.font_configs
189        )
190
191    def run(self, input: PageTextLineStepInput, rng: RandomGenerator):
192        page_layout_step_output = input.page_layout_step_output
193        page_layout = page_layout_step_output.page_layout
194
195        # Text lines to be recognized.
196        text_lines: List[TextLine] = []
197        short_text_line_flags: List[bool] = []
198
199        for layout_text_line in page_layout.layout_text_lines:
200            char_and_font = None
201            is_short_text_line = False
202
203            num_retries = 3
204            while num_retries > 0:
205                is_short_text_line = (rng.random() < self.config.prob_short_text_line)
206
207                if is_short_text_line:
208                    char_and_font_sampler_engine_executor = \
209                        self.short_text_line_char_and_font_sampler_engine_executor
210                else:
211                    char_and_font_sampler_engine_executor = \
212                        self.char_and_font_sampler_engine_executor
213
214                char_and_font = char_and_font_sampler_engine_executor.run(
215                    run_config={
216                        'height': layout_text_line.box.height,
217                        'width': layout_text_line.box.width,
218                    },
219                    rng=rng,
220                )
221                if char_and_font:
222                    break
223
224                num_retries -= 1
225
226            if num_retries <= 0:
227                logger.warning(
228                    f'Cannot sample char_and_font for layout_text_line={layout_text_line}'
229                )
230                continue
231            assert char_and_font
232
233            if is_short_text_line:
234                # Trim to short text line.
235                short_text_line_num_chars = int(
236                    rng.integers(
237                        1,
238                        self.config.short_text_line_num_chars_max + 1,
239                    )
240                )
241                chars = [char for char in char_and_font.chars if not char.isspace()]
242                if len(chars) > short_text_line_num_chars:
243                    begin = int(rng.integers(
244                        0,
245                        len(chars) - short_text_line_num_chars + 1,
246                    ))
247                    end = begin + short_text_line_num_chars - 1
248                    chars = chars[begin:end + 1]
249
250                logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.')
251                char_and_font = attrs.evolve(char_and_font, chars=chars)
252
253            key = rng_choice(rng, self.keys, probs=self.probs)
254            if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE:
255                grayscale_value = int(
256                    rng.integers(
257                        self.config.font_style_glyph_color_grayscale_min,
258                        self.config.font_style_glyph_color_grayscale_max + 1,
259                    )
260                )
261                glyph_color = (grayscale_value,) * 3
262
263            else:
264                rgb_value = int(
265                    rng.integers(
266                        self.config.font_style_glyph_color_rgb_min,
267                        self.config.font_style_glyph_color_rgb_max + 1,
268                    )
269                )
270
271                if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED:
272                    glyph_color = (rgb_value, 0, 0)
273                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN:
274                    glyph_color = (0, rgb_value, 0)
275                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE:
276                    glyph_color = (0, 0, rgb_value)
277                else:
278                    raise NotImplementedError()
279
280            font_style = attrs.evolve(
281                self.config.font_style,
282                glyph_color=glyph_color,
283            )
284            text_line = self.font_engine_executor_aggregator.run(
285                run_config={
286                    'height': layout_text_line.box.height,
287                    'width': layout_text_line.box.width,
288                    'chars': char_and_font.chars,
289                    'font_variant': char_and_font.font_variant,
290                    'glyph_sequence': layout_text_line.glyph_sequence,
291                    'style': font_style,
292                    'return_font_variant': self.config.return_font_variant,
293                },
294                rng=rng,
295            )
296            if text_line:
297                text_line = text_line.to_shifted_text_line(
298                    offset_y=layout_text_line.box.up,
299                    offset_x=layout_text_line.box.left,
300                )
301                split_text_lines = text_line.split()
302                text_lines.extend(split_text_lines)
303                short_text_line_flags.extend([is_short_text_line] * len(split_text_lines))
304
305        assert text_lines
306        assert len(text_lines) == len(short_text_line_flags)
307        page_text_line_collection = PageTextLineCollection(
308            height=page_layout.height,
309            width=page_layout.width,
310            text_lines=text_lines,
311            short_text_line_flags=short_text_line_flags,
312        )
313
314        # Text lines for seal impressions.
315        page_seal_impresssion_step_output = input.page_seal_impresssion_step_output
316
317        seal_impressions: List[SealImpression] = []
318        seal_impression_resources: List[SealImpressionResource] = []
319
320        for seal_impression, box, angle in zip(
321            page_seal_impresssion_step_output.seal_impressions,
322            page_seal_impresssion_step_output.boxes,
323            page_seal_impresssion_step_output.angles,
324        ):
325            text_line_slot_indices: List[int] = []
326            text_lines: List[TextLine] = []
327
328            for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots):
329                char_and_font = None
330
331                num_retries = 3
332                while num_retries > 0:
333                    char_and_font = self.char_and_font_sampler_engine_executor.run(
334                        run_config={
335                            'height': text_line_slot.text_line_height,
336                            'width': 2**32 - 1,
337                            'num_chars': len(text_line_slot.char_slots),
338                        },
339                        rng=rng,
340                    )
341                    if char_and_font:
342                        break
343                    num_retries -= 1
344
345                if num_retries <= 0:
346                    logger.warning(
347                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
348                    )
349                    continue
350                assert char_and_font
351
352                text_line = self.font_engine_executor_aggregator.run(
353                    run_config={
354                        'height': text_line_slot.text_line_height,
355                        'width': 2**32 - 1,
356                        'chars': char_and_font.chars,
357                        'font_variant': char_and_font.font_variant,
358                    },
359                    rng=rng,
360                )
361                if text_line:
362                    text_line_slot_indices.append(text_line_slot_idx)
363                    text_lines.append(text_line)
364
365            internal_text_line = None
366            if seal_impression.internal_text_line_box:
367                char_and_font = None
368
369                num_retries = 3
370                while num_retries > 0:
371                    char_and_font = self.char_and_font_sampler_engine_executor.run(
372                        run_config={
373                            'height': seal_impression.internal_text_line_box.height,
374                            'width': seal_impression.internal_text_line_box.width,
375                        },
376                        rng=rng,
377                    )
378                    if char_and_font:
379                        break
380                    num_retries -= 1
381
382                if num_retries <= 0:
383                    logger.warning(
384                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
385                    )
386                else:
387                    assert char_and_font
388
389                    internal_text_line = self.font_engine_executor_aggregator.run(
390                        run_config={
391                            'height': seal_impression.internal_text_line_box.height,
392                            'width': seal_impression.internal_text_line_box.width,
393                            'chars': char_and_font.chars,
394                            'font_variant': char_and_font.font_variant,
395                        },
396                        rng=rng,
397                    )
398
399            if text_lines:
400                seal_impressions.append(seal_impression)
401                seal_impression_resources.append(
402                    SealImpressionResource(
403                        box=box,
404                        angle=angle,
405                        text_line_slot_indices=text_line_slot_indices,
406                        text_lines=text_lines,
407                        internal_text_line=internal_text_line,
408                    )
409                )
410
411        page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection(
412            height=page_layout.height,
413            width=page_layout.width,
414            seal_impressions=seal_impressions,
415            seal_impression_resources=seal_impression_resources,
416        )
417
418        return PageTextLineStepOutput(
419            page_text_line_collection=page_text_line_collection,
420            page_seal_impression_text_line_collection=page_seal_impression_text_line_collection,
421        )
422
423
424page_text_line_step_factory = PipelineStepFactory(PageTextLineStep)
class PageTextLineStepConfig:
45class PageTextLineStepConfig:
46    lexicon_collection_json: str
47    font_collection_folder: str
48    char_sampler_configs: Union[Sequence[Mapping[str, Any]], PathType]
49    font_configs: Union[Sequence[Mapping[str, Any]], PathType]
50    font_style: FontEngineRunConfigStyle = attrs.field(factory=FontEngineRunConfigStyle)
51    weight_font_style_glyph_color_grayscale: float = 0.9
52    font_style_glyph_color_grayscale_min: int = 0
53    font_style_glyph_color_grayscale_max: int = 75
54    weight_font_style_glyph_color_red: float = 0.04
55    weight_font_style_glyph_color_green: float = 0.02
56    weight_font_style_glyph_color_blue: float = 0.04
57    font_style_glyph_color_rgb_min: int = 128
58    font_style_glyph_color_rgb_max: int = 255
59    return_font_variant: bool = False
60    short_text_line_char_sampler_configs: Optional[
61        Union[Sequence[Mapping[str, Any]], PathType]
62    ] = None  # yapf: disable
63    prob_short_text_line: float = 0.2
64    short_text_line_num_chars_max: int = 2
PageTextLineStepConfig( lexicon_collection_json: str, font_collection_folder: str, char_sampler_configs: Union[Sequence[Mapping[str, Any]], str, os.PathLike], font_configs: Union[Sequence[Mapping[str, Any]], str, os.PathLike], font_style: vkit.engine.font.type.FontEngineRunConfigStyle = NOTHING, weight_font_style_glyph_color_grayscale: float = 0.9, font_style_glyph_color_grayscale_min: int = 0, font_style_glyph_color_grayscale_max: int = 75, weight_font_style_glyph_color_red: float = 0.04, weight_font_style_glyph_color_green: float = 0.02, weight_font_style_glyph_color_blue: float = 0.04, font_style_glyph_color_rgb_min: int = 128, font_style_glyph_color_rgb_max: int = 255, return_font_variant: bool = False, short_text_line_char_sampler_configs: Union[Sequence[Mapping[str, Any]], str, os.PathLike, NoneType] = None, prob_short_text_line: float = 0.2, short_text_line_num_chars_max: int = 2)
 2def __init__(self, lexicon_collection_json, font_collection_folder, char_sampler_configs, font_configs, font_style=NOTHING, weight_font_style_glyph_color_grayscale=attr_dict['weight_font_style_glyph_color_grayscale'].default, font_style_glyph_color_grayscale_min=attr_dict['font_style_glyph_color_grayscale_min'].default, font_style_glyph_color_grayscale_max=attr_dict['font_style_glyph_color_grayscale_max'].default, weight_font_style_glyph_color_red=attr_dict['weight_font_style_glyph_color_red'].default, weight_font_style_glyph_color_green=attr_dict['weight_font_style_glyph_color_green'].default, weight_font_style_glyph_color_blue=attr_dict['weight_font_style_glyph_color_blue'].default, font_style_glyph_color_rgb_min=attr_dict['font_style_glyph_color_rgb_min'].default, font_style_glyph_color_rgb_max=attr_dict['font_style_glyph_color_rgb_max'].default, return_font_variant=attr_dict['return_font_variant'].default, short_text_line_char_sampler_configs=attr_dict['short_text_line_char_sampler_configs'].default, prob_short_text_line=attr_dict['prob_short_text_line'].default, short_text_line_num_chars_max=attr_dict['short_text_line_num_chars_max'].default):
 3    self.lexicon_collection_json = lexicon_collection_json
 4    self.font_collection_folder = font_collection_folder
 5    self.char_sampler_configs = char_sampler_configs
 6    self.font_configs = font_configs
 7    if font_style is not NOTHING:
 8        self.font_style = font_style
 9    else:
10        self.font_style = __attr_factory_font_style()
11    self.weight_font_style_glyph_color_grayscale = weight_font_style_glyph_color_grayscale
12    self.font_style_glyph_color_grayscale_min = font_style_glyph_color_grayscale_min
13    self.font_style_glyph_color_grayscale_max = font_style_glyph_color_grayscale_max
14    self.weight_font_style_glyph_color_red = weight_font_style_glyph_color_red
15    self.weight_font_style_glyph_color_green = weight_font_style_glyph_color_green
16    self.weight_font_style_glyph_color_blue = weight_font_style_glyph_color_blue
17    self.font_style_glyph_color_rgb_min = font_style_glyph_color_rgb_min
18    self.font_style_glyph_color_rgb_max = font_style_glyph_color_rgb_max
19    self.return_font_variant = return_font_variant
20    self.short_text_line_char_sampler_configs = short_text_line_char_sampler_configs
21    self.prob_short_text_line = prob_short_text_line
22    self.short_text_line_num_chars_max = short_text_line_num_chars_max

Method generated by attrs for class PageTextLineStepConfig.

class PageTextLineStepInput:
68class PageTextLineStepInput:
69    page_layout_step_output: PageLayoutStepOutput
70    page_seal_impresssion_step_output: PageSealImpresssionStepOutput
PageTextLineStepInput( page_layout_step_output: vkit.pipeline.text_detection.page_layout.PageLayoutStepOutput, page_seal_impresssion_step_output: vkit.pipeline.text_detection.page_seal_impression.PageSealImpresssionStepOutput)
2def __init__(self, page_layout_step_output, page_seal_impresssion_step_output):
3    self.page_layout_step_output = page_layout_step_output
4    self.page_seal_impresssion_step_output = page_seal_impresssion_step_output

Method generated by attrs for class PageTextLineStepInput.

class PageTextLineStepKey(enum.Enum):
74class PageTextLineStepKey(Enum):
75    FONT_STYLE_GLYPH_COLOR_GRAYSCALE = 'font_style_glyph_color_grayscale'
76    FONT_STYLE_GLYPH_COLOR_RED = 'font_style_glyph_color_red'
77    FONT_STYLE_GLYPH_COLOR_GREEN = 'font_style_glyph_color_green'
78    FONT_STYLE_GLYPH_COLOR_BLUE = 'font_style_glyph_color_blue'

An enumeration.

Inherited Members
enum.Enum
name
value
class PageTextLineCollection:
82class PageTextLineCollection:
83    height: int
84    width: int
85    text_lines: Sequence[TextLine]
86    short_text_line_flags: Sequence[bool]
87
88    @property
89    def shape(self):
90        return self.height, self.width
PageTextLineCollection( height: int, width: int, text_lines: Sequence[vkit.engine.font.type.TextLine], short_text_line_flags: Sequence[bool])
2def __init__(self, height, width, text_lines, short_text_line_flags):
3    self.height = height
4    self.width = width
5    self.text_lines = text_lines
6    self.short_text_line_flags = short_text_line_flags

Method generated by attrs for class PageTextLineCollection.

class SealImpressionResource:
94class SealImpressionResource:
95    box: Box
96    angle: int
97    text_line_slot_indices: Sequence[int]
98    text_lines: Sequence[TextLine]
99    internal_text_line: Optional[TextLine]
SealImpressionResource( box: vkit.element.box.Box, angle: int, text_line_slot_indices: Sequence[int], text_lines: Sequence[vkit.engine.font.type.TextLine], internal_text_line: Union[vkit.engine.font.type.TextLine, NoneType])
2def __init__(self, box, angle, text_line_slot_indices, text_lines, internal_text_line):
3    self.box = box
4    self.angle = angle
5    self.text_line_slot_indices = text_line_slot_indices
6    self.text_lines = text_lines
7    self.internal_text_line = internal_text_line

Method generated by attrs for class SealImpressionResource.

class PageSealImpressionTextLineCollection:
103class PageSealImpressionTextLineCollection:
104    height: int
105    width: int
106    seal_impressions: Sequence[SealImpression]
107    seal_impression_resources: Sequence[SealImpressionResource]
PageSealImpressionTextLineCollection( height: int, width: int, seal_impressions: Sequence[vkit.engine.seal_impression.type.SealImpression], seal_impression_resources: Sequence[vkit.pipeline.text_detection.page_text_line.SealImpressionResource])
2def __init__(self, height, width, seal_impressions, seal_impression_resources):
3    self.height = height
4    self.width = width
5    self.seal_impressions = seal_impressions
6    self.seal_impression_resources = seal_impression_resources

Method generated by attrs for class PageSealImpressionTextLineCollection.

class PageTextLineStepOutput:
111class PageTextLineStepOutput:
112    page_text_line_collection: PageTextLineCollection
113    page_seal_impression_text_line_collection: PageSealImpressionTextLineCollection
PageTextLineStepOutput( page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection, page_seal_impression_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageSealImpressionTextLineCollection)
2def __init__(self, page_text_line_collection, page_seal_impression_text_line_collection):
3    self.page_text_line_collection = page_text_line_collection
4    self.page_seal_impression_text_line_collection = page_seal_impression_text_line_collection

Method generated by attrs for class PageTextLineStepOutput.

116class PageTextLineStep(
117    PipelineStep[
118        PageTextLineStepConfig,
119        PageTextLineStepInput,
120        PageTextLineStepOutput,
121    ]
122):  # yapf: disable
123
124    def __init__(self, config: PageTextLineStepConfig):
125        super().__init__(config)
126
127        lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json)
128        font_collection = FontCollection.from_folder(self.config.font_collection_folder)
129        char_sampler_engine_executor_aggregator = \
130            char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
131                self.config.char_sampler_configs,
132                {
133                    'lexicon_collection': lexicon_collection,
134                },
135            )
136
137        self.char_and_font_sampler_engine_executor = \
138            char_and_font_sampler_engine_executor_factory.create(
139                {},
140                {
141                    'lexicon_collection': lexicon_collection,
142                    'font_collection': font_collection,
143                    'char_sampler_engine_executor_aggregator':
144                        char_sampler_engine_executor_aggregator,
145                },
146            )
147
148        self.short_text_line_char_and_font_sampler_engine_executor = \
149            self.char_and_font_sampler_engine_executor
150
151        if self.config.short_text_line_char_sampler_configs is not None:
152            short_text_line_char_sampler_engine_executor_aggregator = \
153                char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
154                    self.config.short_text_line_char_sampler_configs,
155                    {
156                        'lexicon_collection': lexicon_collection,
157                    },
158                )
159            self.short_text_line_char_and_font_sampler_engine_executor = \
160                char_and_font_sampler_engine_executor_factory.create(
161                    {},
162                    {
163                        'lexicon_collection': lexicon_collection,
164                        'font_collection': font_collection,
165                        'char_sampler_engine_executor_aggregator':
166                            short_text_line_char_sampler_engine_executor_aggregator,
167                    },
168                )
169
170        self.keys, self.probs = normalize_to_keys_and_probs([
171            (
172                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE,
173                self.config.weight_font_style_glyph_color_grayscale,
174            ),
175            (
176                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED,
177                self.config.weight_font_style_glyph_color_red,
178            ),
179            (
180                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN,
181                self.config.weight_font_style_glyph_color_green,
182            ),
183            (
184                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE,
185                self.config.weight_font_style_glyph_color_blue,
186            ),
187        ])
188        self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create(
189            self.config.font_configs
190        )
191
192    def run(self, input: PageTextLineStepInput, rng: RandomGenerator):
193        page_layout_step_output = input.page_layout_step_output
194        page_layout = page_layout_step_output.page_layout
195
196        # Text lines to be recognized.
197        text_lines: List[TextLine] = []
198        short_text_line_flags: List[bool] = []
199
200        for layout_text_line in page_layout.layout_text_lines:
201            char_and_font = None
202            is_short_text_line = False
203
204            num_retries = 3
205            while num_retries > 0:
206                is_short_text_line = (rng.random() < self.config.prob_short_text_line)
207
208                if is_short_text_line:
209                    char_and_font_sampler_engine_executor = \
210                        self.short_text_line_char_and_font_sampler_engine_executor
211                else:
212                    char_and_font_sampler_engine_executor = \
213                        self.char_and_font_sampler_engine_executor
214
215                char_and_font = char_and_font_sampler_engine_executor.run(
216                    run_config={
217                        'height': layout_text_line.box.height,
218                        'width': layout_text_line.box.width,
219                    },
220                    rng=rng,
221                )
222                if char_and_font:
223                    break
224
225                num_retries -= 1
226
227            if num_retries <= 0:
228                logger.warning(
229                    f'Cannot sample char_and_font for layout_text_line={layout_text_line}'
230                )
231                continue
232            assert char_and_font
233
234            if is_short_text_line:
235                # Trim to short text line.
236                short_text_line_num_chars = int(
237                    rng.integers(
238                        1,
239                        self.config.short_text_line_num_chars_max + 1,
240                    )
241                )
242                chars = [char for char in char_and_font.chars if not char.isspace()]
243                if len(chars) > short_text_line_num_chars:
244                    begin = int(rng.integers(
245                        0,
246                        len(chars) - short_text_line_num_chars + 1,
247                    ))
248                    end = begin + short_text_line_num_chars - 1
249                    chars = chars[begin:end + 1]
250
251                logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.')
252                char_and_font = attrs.evolve(char_and_font, chars=chars)
253
254            key = rng_choice(rng, self.keys, probs=self.probs)
255            if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE:
256                grayscale_value = int(
257                    rng.integers(
258                        self.config.font_style_glyph_color_grayscale_min,
259                        self.config.font_style_glyph_color_grayscale_max + 1,
260                    )
261                )
262                glyph_color = (grayscale_value,) * 3
263
264            else:
265                rgb_value = int(
266                    rng.integers(
267                        self.config.font_style_glyph_color_rgb_min,
268                        self.config.font_style_glyph_color_rgb_max + 1,
269                    )
270                )
271
272                if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED:
273                    glyph_color = (rgb_value, 0, 0)
274                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN:
275                    glyph_color = (0, rgb_value, 0)
276                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE:
277                    glyph_color = (0, 0, rgb_value)
278                else:
279                    raise NotImplementedError()
280
281            font_style = attrs.evolve(
282                self.config.font_style,
283                glyph_color=glyph_color,
284            )
285            text_line = self.font_engine_executor_aggregator.run(
286                run_config={
287                    'height': layout_text_line.box.height,
288                    'width': layout_text_line.box.width,
289                    'chars': char_and_font.chars,
290                    'font_variant': char_and_font.font_variant,
291                    'glyph_sequence': layout_text_line.glyph_sequence,
292                    'style': font_style,
293                    'return_font_variant': self.config.return_font_variant,
294                },
295                rng=rng,
296            )
297            if text_line:
298                text_line = text_line.to_shifted_text_line(
299                    offset_y=layout_text_line.box.up,
300                    offset_x=layout_text_line.box.left,
301                )
302                split_text_lines = text_line.split()
303                text_lines.extend(split_text_lines)
304                short_text_line_flags.extend([is_short_text_line] * len(split_text_lines))
305
306        assert text_lines
307        assert len(text_lines) == len(short_text_line_flags)
308        page_text_line_collection = PageTextLineCollection(
309            height=page_layout.height,
310            width=page_layout.width,
311            text_lines=text_lines,
312            short_text_line_flags=short_text_line_flags,
313        )
314
315        # Text lines for seal impressions.
316        page_seal_impresssion_step_output = input.page_seal_impresssion_step_output
317
318        seal_impressions: List[SealImpression] = []
319        seal_impression_resources: List[SealImpressionResource] = []
320
321        for seal_impression, box, angle in zip(
322            page_seal_impresssion_step_output.seal_impressions,
323            page_seal_impresssion_step_output.boxes,
324            page_seal_impresssion_step_output.angles,
325        ):
326            text_line_slot_indices: List[int] = []
327            text_lines: List[TextLine] = []
328
329            for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots):
330                char_and_font = None
331
332                num_retries = 3
333                while num_retries > 0:
334                    char_and_font = self.char_and_font_sampler_engine_executor.run(
335                        run_config={
336                            'height': text_line_slot.text_line_height,
337                            'width': 2**32 - 1,
338                            'num_chars': len(text_line_slot.char_slots),
339                        },
340                        rng=rng,
341                    )
342                    if char_and_font:
343                        break
344                    num_retries -= 1
345
346                if num_retries <= 0:
347                    logger.warning(
348                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
349                    )
350                    continue
351                assert char_and_font
352
353                text_line = self.font_engine_executor_aggregator.run(
354                    run_config={
355                        'height': text_line_slot.text_line_height,
356                        'width': 2**32 - 1,
357                        'chars': char_and_font.chars,
358                        'font_variant': char_and_font.font_variant,
359                    },
360                    rng=rng,
361                )
362                if text_line:
363                    text_line_slot_indices.append(text_line_slot_idx)
364                    text_lines.append(text_line)
365
366            internal_text_line = None
367            if seal_impression.internal_text_line_box:
368                char_and_font = None
369
370                num_retries = 3
371                while num_retries > 0:
372                    char_and_font = self.char_and_font_sampler_engine_executor.run(
373                        run_config={
374                            'height': seal_impression.internal_text_line_box.height,
375                            'width': seal_impression.internal_text_line_box.width,
376                        },
377                        rng=rng,
378                    )
379                    if char_and_font:
380                        break
381                    num_retries -= 1
382
383                if num_retries <= 0:
384                    logger.warning(
385                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
386                    )
387                else:
388                    assert char_and_font
389
390                    internal_text_line = self.font_engine_executor_aggregator.run(
391                        run_config={
392                            'height': seal_impression.internal_text_line_box.height,
393                            'width': seal_impression.internal_text_line_box.width,
394                            'chars': char_and_font.chars,
395                            'font_variant': char_and_font.font_variant,
396                        },
397                        rng=rng,
398                    )
399
400            if text_lines:
401                seal_impressions.append(seal_impression)
402                seal_impression_resources.append(
403                    SealImpressionResource(
404                        box=box,
405                        angle=angle,
406                        text_line_slot_indices=text_line_slot_indices,
407                        text_lines=text_lines,
408                        internal_text_line=internal_text_line,
409                    )
410                )
411
412        page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection(
413            height=page_layout.height,
414            width=page_layout.width,
415            seal_impressions=seal_impressions,
416            seal_impression_resources=seal_impression_resources,
417        )
418
419        return PageTextLineStepOutput(
420            page_text_line_collection=page_text_line_collection,
421            page_seal_impression_text_line_collection=page_seal_impression_text_line_collection,
422        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

124    def __init__(self, config: PageTextLineStepConfig):
125        super().__init__(config)
126
127        lexicon_collection = LexiconCollection.from_file(self.config.lexicon_collection_json)
128        font_collection = FontCollection.from_folder(self.config.font_collection_folder)
129        char_sampler_engine_executor_aggregator = \
130            char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
131                self.config.char_sampler_configs,
132                {
133                    'lexicon_collection': lexicon_collection,
134                },
135            )
136
137        self.char_and_font_sampler_engine_executor = \
138            char_and_font_sampler_engine_executor_factory.create(
139                {},
140                {
141                    'lexicon_collection': lexicon_collection,
142                    'font_collection': font_collection,
143                    'char_sampler_engine_executor_aggregator':
144                        char_sampler_engine_executor_aggregator,
145                },
146            )
147
148        self.short_text_line_char_and_font_sampler_engine_executor = \
149            self.char_and_font_sampler_engine_executor
150
151        if self.config.short_text_line_char_sampler_configs is not None:
152            short_text_line_char_sampler_engine_executor_aggregator = \
153                char_sampler_engine_executor_aggregator_factory.create_with_repeated_init_resource(
154                    self.config.short_text_line_char_sampler_configs,
155                    {
156                        'lexicon_collection': lexicon_collection,
157                    },
158                )
159            self.short_text_line_char_and_font_sampler_engine_executor = \
160                char_and_font_sampler_engine_executor_factory.create(
161                    {},
162                    {
163                        'lexicon_collection': lexicon_collection,
164                        'font_collection': font_collection,
165                        'char_sampler_engine_executor_aggregator':
166                            short_text_line_char_sampler_engine_executor_aggregator,
167                    },
168                )
169
170        self.keys, self.probs = normalize_to_keys_and_probs([
171            (
172                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE,
173                self.config.weight_font_style_glyph_color_grayscale,
174            ),
175            (
176                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED,
177                self.config.weight_font_style_glyph_color_red,
178            ),
179            (
180                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN,
181                self.config.weight_font_style_glyph_color_green,
182            ),
183            (
184                PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE,
185                self.config.weight_font_style_glyph_color_blue,
186            ),
187        ])
188        self.font_engine_executor_aggregator = font_engine_executor_aggregator_factory.create(
189            self.config.font_configs
190        )
def run( self, input: vkit.pipeline.text_detection.page_text_line.PageTextLineStepInput, rng: numpy.random._generator.Generator):
192    def run(self, input: PageTextLineStepInput, rng: RandomGenerator):
193        page_layout_step_output = input.page_layout_step_output
194        page_layout = page_layout_step_output.page_layout
195
196        # Text lines to be recognized.
197        text_lines: List[TextLine] = []
198        short_text_line_flags: List[bool] = []
199
200        for layout_text_line in page_layout.layout_text_lines:
201            char_and_font = None
202            is_short_text_line = False
203
204            num_retries = 3
205            while num_retries > 0:
206                is_short_text_line = (rng.random() < self.config.prob_short_text_line)
207
208                if is_short_text_line:
209                    char_and_font_sampler_engine_executor = \
210                        self.short_text_line_char_and_font_sampler_engine_executor
211                else:
212                    char_and_font_sampler_engine_executor = \
213                        self.char_and_font_sampler_engine_executor
214
215                char_and_font = char_and_font_sampler_engine_executor.run(
216                    run_config={
217                        'height': layout_text_line.box.height,
218                        'width': layout_text_line.box.width,
219                    },
220                    rng=rng,
221                )
222                if char_and_font:
223                    break
224
225                num_retries -= 1
226
227            if num_retries <= 0:
228                logger.warning(
229                    f'Cannot sample char_and_font for layout_text_line={layout_text_line}'
230                )
231                continue
232            assert char_and_font
233
234            if is_short_text_line:
235                # Trim to short text line.
236                short_text_line_num_chars = int(
237                    rng.integers(
238                        1,
239                        self.config.short_text_line_num_chars_max + 1,
240                    )
241                )
242                chars = [char for char in char_and_font.chars if not char.isspace()]
243                if len(chars) > short_text_line_num_chars:
244                    begin = int(rng.integers(
245                        0,
246                        len(chars) - short_text_line_num_chars + 1,
247                    ))
248                    end = begin + short_text_line_num_chars - 1
249                    chars = chars[begin:end + 1]
250
251                logger.debug(f'short_text_line: trim chars={char_and_font.chars} to {chars}.')
252                char_and_font = attrs.evolve(char_and_font, chars=chars)
253
254            key = rng_choice(rng, self.keys, probs=self.probs)
255            if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GRAYSCALE:
256                grayscale_value = int(
257                    rng.integers(
258                        self.config.font_style_glyph_color_grayscale_min,
259                        self.config.font_style_glyph_color_grayscale_max + 1,
260                    )
261                )
262                glyph_color = (grayscale_value,) * 3
263
264            else:
265                rgb_value = int(
266                    rng.integers(
267                        self.config.font_style_glyph_color_rgb_min,
268                        self.config.font_style_glyph_color_rgb_max + 1,
269                    )
270                )
271
272                if key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_RED:
273                    glyph_color = (rgb_value, 0, 0)
274                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_GREEN:
275                    glyph_color = (0, rgb_value, 0)
276                elif key == PageTextLineStepKey.FONT_STYLE_GLYPH_COLOR_BLUE:
277                    glyph_color = (0, 0, rgb_value)
278                else:
279                    raise NotImplementedError()
280
281            font_style = attrs.evolve(
282                self.config.font_style,
283                glyph_color=glyph_color,
284            )
285            text_line = self.font_engine_executor_aggregator.run(
286                run_config={
287                    'height': layout_text_line.box.height,
288                    'width': layout_text_line.box.width,
289                    'chars': char_and_font.chars,
290                    'font_variant': char_and_font.font_variant,
291                    'glyph_sequence': layout_text_line.glyph_sequence,
292                    'style': font_style,
293                    'return_font_variant': self.config.return_font_variant,
294                },
295                rng=rng,
296            )
297            if text_line:
298                text_line = text_line.to_shifted_text_line(
299                    offset_y=layout_text_line.box.up,
300                    offset_x=layout_text_line.box.left,
301                )
302                split_text_lines = text_line.split()
303                text_lines.extend(split_text_lines)
304                short_text_line_flags.extend([is_short_text_line] * len(split_text_lines))
305
306        assert text_lines
307        assert len(text_lines) == len(short_text_line_flags)
308        page_text_line_collection = PageTextLineCollection(
309            height=page_layout.height,
310            width=page_layout.width,
311            text_lines=text_lines,
312            short_text_line_flags=short_text_line_flags,
313        )
314
315        # Text lines for seal impressions.
316        page_seal_impresssion_step_output = input.page_seal_impresssion_step_output
317
318        seal_impressions: List[SealImpression] = []
319        seal_impression_resources: List[SealImpressionResource] = []
320
321        for seal_impression, box, angle in zip(
322            page_seal_impresssion_step_output.seal_impressions,
323            page_seal_impresssion_step_output.boxes,
324            page_seal_impresssion_step_output.angles,
325        ):
326            text_line_slot_indices: List[int] = []
327            text_lines: List[TextLine] = []
328
329            for text_line_slot_idx, text_line_slot in enumerate(seal_impression.text_line_slots):
330                char_and_font = None
331
332                num_retries = 3
333                while num_retries > 0:
334                    char_and_font = self.char_and_font_sampler_engine_executor.run(
335                        run_config={
336                            'height': text_line_slot.text_line_height,
337                            'width': 2**32 - 1,
338                            'num_chars': len(text_line_slot.char_slots),
339                        },
340                        rng=rng,
341                    )
342                    if char_and_font:
343                        break
344                    num_retries -= 1
345
346                if num_retries <= 0:
347                    logger.warning(
348                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
349                    )
350                    continue
351                assert char_and_font
352
353                text_line = self.font_engine_executor_aggregator.run(
354                    run_config={
355                        'height': text_line_slot.text_line_height,
356                        'width': 2**32 - 1,
357                        'chars': char_and_font.chars,
358                        'font_variant': char_and_font.font_variant,
359                    },
360                    rng=rng,
361                )
362                if text_line:
363                    text_line_slot_indices.append(text_line_slot_idx)
364                    text_lines.append(text_line)
365
366            internal_text_line = None
367            if seal_impression.internal_text_line_box:
368                char_and_font = None
369
370                num_retries = 3
371                while num_retries > 0:
372                    char_and_font = self.char_and_font_sampler_engine_executor.run(
373                        run_config={
374                            'height': seal_impression.internal_text_line_box.height,
375                            'width': seal_impression.internal_text_line_box.width,
376                        },
377                        rng=rng,
378                    )
379                    if char_and_font:
380                        break
381                    num_retries -= 1
382
383                if num_retries <= 0:
384                    logger.warning(
385                        f'Cannot sample char_and_font for seal_impression={seal_impression}'
386                    )
387                else:
388                    assert char_and_font
389
390                    internal_text_line = self.font_engine_executor_aggregator.run(
391                        run_config={
392                            'height': seal_impression.internal_text_line_box.height,
393                            'width': seal_impression.internal_text_line_box.width,
394                            'chars': char_and_font.chars,
395                            'font_variant': char_and_font.font_variant,
396                        },
397                        rng=rng,
398                    )
399
400            if text_lines:
401                seal_impressions.append(seal_impression)
402                seal_impression_resources.append(
403                    SealImpressionResource(
404                        box=box,
405                        angle=angle,
406                        text_line_slot_indices=text_line_slot_indices,
407                        text_lines=text_lines,
408                        internal_text_line=internal_text_line,
409                    )
410                )
411
412        page_seal_impression_text_line_collection = PageSealImpressionTextLineCollection(
413            height=page_layout.height,
414            width=page_layout.width,
415            seal_impressions=seal_impressions,
416            seal_impression_resources=seal_impression_resources,
417        )
418
419        return PageTextLineStepOutput(
420            page_text_line_collection=page_text_line_collection,
421            page_seal_impression_text_line_collection=page_seal_impression_text_line_collection,
422        )