vkit.pipeline.text_detection.page_non_text_symbol

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, List, Union
 15from enum import Enum, unique
 16
 17import attrs
 18from numpy.random import Generator as RandomGenerator
 19import numpy as np
 20
 21from vkit.utility import normalize_to_keys_and_probs, rng_choice
 22from vkit.element import Box, Image, ImageMode
 23from vkit.engine.image import image_selector_engine_executor_factory
 24from ..interface import PipelineStep, PipelineStepFactory
 25from .page_layout import PageLayoutStepOutput
 26
 27
 28@attrs.define
 29class PageNonTextSymbolStepConfig:
 30    symbol_image_folders: Sequence[str]
 31
 32    weight_color_grayscale: float = 0.9
 33    color_grayscale_min: int = 0
 34    color_grayscale_max: int = 75
 35    weight_color_red: float = 0.04
 36    weight_color_green: float = 0.02
 37    weight_color_blue: float = 0.04
 38    color_rgb_min: int = 128
 39    color_rgb_max: int = 255
 40
 41
 42@attrs.define
 43class PageNonTextSymbolStepInput:
 44    page_layout_step_output: PageLayoutStepOutput
 45
 46
 47@attrs.define
 48class PageNonTextSymbolStepOutput:
 49    images: Sequence[Image]
 50    boxes: Sequence[Box]
 51    alphas: Sequence[Union[np.ndarray, float]]
 52
 53
 54@unique
 55class NonTextSymbolColorMode(Enum):
 56    GRAYSCALE = 'grayscale'
 57    RED = 'red'
 58    GREEN = 'green'
 59    BLUE = 'blue'
 60
 61
 62class PageNonTextSymbolStep(
 63    PipelineStep[
 64        PageNonTextSymbolStepConfig,
 65        PageNonTextSymbolStepInput,
 66        PageNonTextSymbolStepOutput,
 67    ]
 68):  # yapf: disable
 69
 70    def __init__(self, config: PageNonTextSymbolStepConfig):
 71        super().__init__(config)
 72
 73        self.symbol_image_selector_engine_executor = \
 74            image_selector_engine_executor_factory.create({
 75                'image_folders': self.config.symbol_image_folders,
 76                'target_image_mode': None,
 77                'force_resize': True,
 78            })
 79
 80        self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([
 81            (
 82                NonTextSymbolColorMode.GRAYSCALE,
 83                self.config.weight_color_grayscale,
 84            ),
 85            (
 86                NonTextSymbolColorMode.RED,
 87                self.config.weight_color_red,
 88            ),
 89            (
 90                NonTextSymbolColorMode.GREEN,
 91                self.config.weight_color_green,
 92            ),
 93            (
 94                NonTextSymbolColorMode.BLUE,
 95                self.config.weight_color_blue,
 96            ),
 97        ])
 98
 99    def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator):
100        page_layout_step_output = input.page_layout_step_output
101        page_layout = page_layout_step_output.page_layout
102
103        images: List[Image] = []
104        boxes: Sequence[Box] = []
105        alphas: List[Union[np.ndarray, float]] = []
106
107        for layout_non_text_symbol in page_layout.layout_non_text_symbols:
108            box = layout_non_text_symbol.box
109
110            image = self.symbol_image_selector_engine_executor.run(
111                {
112                    'height': box.height,
113                    'width': box.width
114                },
115                rng,
116            )
117            alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha
118
119            if image.mode == ImageMode.RGBA:
120                # Extract and rescale alpha.
121                np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255
122                np_alpha_max = np_alpha.max()
123                np_alpha *= layout_non_text_symbol.alpha
124                np_alpha /= np_alpha_max
125                alpha = np_alpha
126
127                # Force to rgb (ignoring alpha channel).
128                image = Image(mat=image.mat[:, :, :3])
129
130            elif image.mode == ImageMode.GRAYSCALE:
131                # As mask.
132                alpha = (image.mat > 0).astype(np.float32)
133                alpha *= layout_non_text_symbol.alpha
134
135                # Generate image with color.
136                color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs)
137                if color_mode == NonTextSymbolColorMode.GRAYSCALE:
138                    grayscale_value = int(
139                        rng.integers(
140                            self.config.color_grayscale_min,
141                            self.config.color_grayscale_max + 1,
142                        )
143                    )
144                    symbol_color = (grayscale_value,) * 3
145
146                else:
147                    rgb_value = int(
148                        rng.integers(
149                            self.config.color_rgb_min,
150                            self.config.color_rgb_max + 1,
151                        )
152                    )
153                    if color_mode == NonTextSymbolColorMode.RED:
154                        symbol_color = (rgb_value, 0, 0)
155                    elif color_mode == NonTextSymbolColorMode.GREEN:
156                        symbol_color = (0, rgb_value, 0)
157                    elif color_mode == NonTextSymbolColorMode.BLUE:
158                        symbol_color = (0, 0, rgb_value)
159                    else:
160                        raise NotImplementedError()
161
162                image = Image.from_shapable(image, value=symbol_color)
163
164            else:
165                raise NotImplementedError()
166
167            images.append(image)
168            boxes.append(layout_non_text_symbol.box)
169            alphas.append(alpha)
170
171        return PageNonTextSymbolStepOutput(
172            images=images,
173            boxes=boxes,
174            alphas=alphas,
175        )
176
177
178page_non_text_symbol_step_factory = PipelineStepFactory(PageNonTextSymbolStep)
class PageNonTextSymbolStepConfig:
30class PageNonTextSymbolStepConfig:
31    symbol_image_folders: Sequence[str]
32
33    weight_color_grayscale: float = 0.9
34    color_grayscale_min: int = 0
35    color_grayscale_max: int = 75
36    weight_color_red: float = 0.04
37    weight_color_green: float = 0.02
38    weight_color_blue: float = 0.04
39    color_rgb_min: int = 128
40    color_rgb_max: int = 255
PageNonTextSymbolStepConfig( symbol_image_folders: Sequence[str], weight_color_grayscale: float = 0.9, color_grayscale_min: int = 0, color_grayscale_max: int = 75, weight_color_red: float = 0.04, weight_color_green: float = 0.02, weight_color_blue: float = 0.04, color_rgb_min: int = 128, color_rgb_max: int = 255)
 2def __init__(self, symbol_image_folders, weight_color_grayscale=attr_dict['weight_color_grayscale'].default, color_grayscale_min=attr_dict['color_grayscale_min'].default, color_grayscale_max=attr_dict['color_grayscale_max'].default, weight_color_red=attr_dict['weight_color_red'].default, weight_color_green=attr_dict['weight_color_green'].default, weight_color_blue=attr_dict['weight_color_blue'].default, color_rgb_min=attr_dict['color_rgb_min'].default, color_rgb_max=attr_dict['color_rgb_max'].default):
 3    self.symbol_image_folders = symbol_image_folders
 4    self.weight_color_grayscale = weight_color_grayscale
 5    self.color_grayscale_min = color_grayscale_min
 6    self.color_grayscale_max = color_grayscale_max
 7    self.weight_color_red = weight_color_red
 8    self.weight_color_green = weight_color_green
 9    self.weight_color_blue = weight_color_blue
10    self.color_rgb_min = color_rgb_min
11    self.color_rgb_max = color_rgb_max

Method generated by attrs for class PageNonTextSymbolStepConfig.

class PageNonTextSymbolStepInput:
44class PageNonTextSymbolStepInput:
45    page_layout_step_output: PageLayoutStepOutput
PageNonTextSymbolStepInput( page_layout_step_output: vkit.pipeline.text_detection.page_layout.PageLayoutStepOutput)
2def __init__(self, page_layout_step_output):
3    self.page_layout_step_output = page_layout_step_output

Method generated by attrs for class PageNonTextSymbolStepInput.

class PageNonTextSymbolStepOutput:
49class PageNonTextSymbolStepOutput:
50    images: Sequence[Image]
51    boxes: Sequence[Box]
52    alphas: Sequence[Union[np.ndarray, float]]
PageNonTextSymbolStepOutput( images: Sequence[vkit.element.image.Image], boxes: Sequence[vkit.element.box.Box], alphas: Sequence[Union[numpy.ndarray, float]])
2def __init__(self, images, boxes, alphas):
3    self.images = images
4    self.boxes = boxes
5    self.alphas = alphas

Method generated by attrs for class PageNonTextSymbolStepOutput.

class NonTextSymbolColorMode(enum.Enum):
56class NonTextSymbolColorMode(Enum):
57    GRAYSCALE = 'grayscale'
58    RED = 'red'
59    GREEN = 'green'
60    BLUE = 'blue'

An enumeration.

GRAYSCALE = <NonTextSymbolColorMode.GRAYSCALE: 'grayscale'>
GREEN = <NonTextSymbolColorMode.GREEN: 'green'>
BLUE = <NonTextSymbolColorMode.BLUE: 'blue'>
Inherited Members
enum.Enum
name
value
 63class PageNonTextSymbolStep(
 64    PipelineStep[
 65        PageNonTextSymbolStepConfig,
 66        PageNonTextSymbolStepInput,
 67        PageNonTextSymbolStepOutput,
 68    ]
 69):  # yapf: disable
 70
 71    def __init__(self, config: PageNonTextSymbolStepConfig):
 72        super().__init__(config)
 73
 74        self.symbol_image_selector_engine_executor = \
 75            image_selector_engine_executor_factory.create({
 76                'image_folders': self.config.symbol_image_folders,
 77                'target_image_mode': None,
 78                'force_resize': True,
 79            })
 80
 81        self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([
 82            (
 83                NonTextSymbolColorMode.GRAYSCALE,
 84                self.config.weight_color_grayscale,
 85            ),
 86            (
 87                NonTextSymbolColorMode.RED,
 88                self.config.weight_color_red,
 89            ),
 90            (
 91                NonTextSymbolColorMode.GREEN,
 92                self.config.weight_color_green,
 93            ),
 94            (
 95                NonTextSymbolColorMode.BLUE,
 96                self.config.weight_color_blue,
 97            ),
 98        ])
 99
100    def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator):
101        page_layout_step_output = input.page_layout_step_output
102        page_layout = page_layout_step_output.page_layout
103
104        images: List[Image] = []
105        boxes: Sequence[Box] = []
106        alphas: List[Union[np.ndarray, float]] = []
107
108        for layout_non_text_symbol in page_layout.layout_non_text_symbols:
109            box = layout_non_text_symbol.box
110
111            image = self.symbol_image_selector_engine_executor.run(
112                {
113                    'height': box.height,
114                    'width': box.width
115                },
116                rng,
117            )
118            alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha
119
120            if image.mode == ImageMode.RGBA:
121                # Extract and rescale alpha.
122                np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255
123                np_alpha_max = np_alpha.max()
124                np_alpha *= layout_non_text_symbol.alpha
125                np_alpha /= np_alpha_max
126                alpha = np_alpha
127
128                # Force to rgb (ignoring alpha channel).
129                image = Image(mat=image.mat[:, :, :3])
130
131            elif image.mode == ImageMode.GRAYSCALE:
132                # As mask.
133                alpha = (image.mat > 0).astype(np.float32)
134                alpha *= layout_non_text_symbol.alpha
135
136                # Generate image with color.
137                color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs)
138                if color_mode == NonTextSymbolColorMode.GRAYSCALE:
139                    grayscale_value = int(
140                        rng.integers(
141                            self.config.color_grayscale_min,
142                            self.config.color_grayscale_max + 1,
143                        )
144                    )
145                    symbol_color = (grayscale_value,) * 3
146
147                else:
148                    rgb_value = int(
149                        rng.integers(
150                            self.config.color_rgb_min,
151                            self.config.color_rgb_max + 1,
152                        )
153                    )
154                    if color_mode == NonTextSymbolColorMode.RED:
155                        symbol_color = (rgb_value, 0, 0)
156                    elif color_mode == NonTextSymbolColorMode.GREEN:
157                        symbol_color = (0, rgb_value, 0)
158                    elif color_mode == NonTextSymbolColorMode.BLUE:
159                        symbol_color = (0, 0, rgb_value)
160                    else:
161                        raise NotImplementedError()
162
163                image = Image.from_shapable(image, value=symbol_color)
164
165            else:
166                raise NotImplementedError()
167
168            images.append(image)
169            boxes.append(layout_non_text_symbol.box)
170            alphas.append(alpha)
171
172        return PageNonTextSymbolStepOutput(
173            images=images,
174            boxes=boxes,
175            alphas=alphas,
176        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

71    def __init__(self, config: PageNonTextSymbolStepConfig):
72        super().__init__(config)
73
74        self.symbol_image_selector_engine_executor = \
75            image_selector_engine_executor_factory.create({
76                'image_folders': self.config.symbol_image_folders,
77                'target_image_mode': None,
78                'force_resize': True,
79            })
80
81        self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([
82            (
83                NonTextSymbolColorMode.GRAYSCALE,
84                self.config.weight_color_grayscale,
85            ),
86            (
87                NonTextSymbolColorMode.RED,
88                self.config.weight_color_red,
89            ),
90            (
91                NonTextSymbolColorMode.GREEN,
92                self.config.weight_color_green,
93            ),
94            (
95                NonTextSymbolColorMode.BLUE,
96                self.config.weight_color_blue,
97            ),
98        ])
def run( self, input: vkit.pipeline.text_detection.page_non_text_symbol.PageNonTextSymbolStepInput, rng: numpy.random._generator.Generator):
100    def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator):
101        page_layout_step_output = input.page_layout_step_output
102        page_layout = page_layout_step_output.page_layout
103
104        images: List[Image] = []
105        boxes: Sequence[Box] = []
106        alphas: List[Union[np.ndarray, float]] = []
107
108        for layout_non_text_symbol in page_layout.layout_non_text_symbols:
109            box = layout_non_text_symbol.box
110
111            image = self.symbol_image_selector_engine_executor.run(
112                {
113                    'height': box.height,
114                    'width': box.width
115                },
116                rng,
117            )
118            alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha
119
120            if image.mode == ImageMode.RGBA:
121                # Extract and rescale alpha.
122                np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255
123                np_alpha_max = np_alpha.max()
124                np_alpha *= layout_non_text_symbol.alpha
125                np_alpha /= np_alpha_max
126                alpha = np_alpha
127
128                # Force to rgb (ignoring alpha channel).
129                image = Image(mat=image.mat[:, :, :3])
130
131            elif image.mode == ImageMode.GRAYSCALE:
132                # As mask.
133                alpha = (image.mat > 0).astype(np.float32)
134                alpha *= layout_non_text_symbol.alpha
135
136                # Generate image with color.
137                color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs)
138                if color_mode == NonTextSymbolColorMode.GRAYSCALE:
139                    grayscale_value = int(
140                        rng.integers(
141                            self.config.color_grayscale_min,
142                            self.config.color_grayscale_max + 1,
143                        )
144                    )
145                    symbol_color = (grayscale_value,) * 3
146
147                else:
148                    rgb_value = int(
149                        rng.integers(
150                            self.config.color_rgb_min,
151                            self.config.color_rgb_max + 1,
152                        )
153                    )
154                    if color_mode == NonTextSymbolColorMode.RED:
155                        symbol_color = (rgb_value, 0, 0)
156                    elif color_mode == NonTextSymbolColorMode.GREEN:
157                        symbol_color = (0, rgb_value, 0)
158                    elif color_mode == NonTextSymbolColorMode.BLUE:
159                        symbol_color = (0, 0, rgb_value)
160                    else:
161                        raise NotImplementedError()
162
163                image = Image.from_shapable(image, value=symbol_color)
164
165            else:
166                raise NotImplementedError()
167
168            images.append(image)
169            boxes.append(layout_non_text_symbol.box)
170            alphas.append(alpha)
171
172        return PageNonTextSymbolStepOutput(
173            images=images,
174            boxes=boxes,
175            alphas=alphas,
176        )