vkit.pipeline.text_detection.page_non_text_symbol
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, List, Union 15from enum import Enum, unique 16 17import attrs 18from numpy.random import Generator as RandomGenerator 19import numpy as np 20 21from vkit.utility import normalize_to_keys_and_probs, rng_choice 22from vkit.element import Box, Image, ImageMode 23from vkit.engine.image import image_selector_engine_executor_factory 24from ..interface import PipelineStep, PipelineStepFactory 25from .page_layout import PageLayoutStepOutput 26 27 28@attrs.define 29class PageNonTextSymbolStepConfig: 30 symbol_image_folders: Sequence[str] 31 32 weight_color_grayscale: float = 0.9 33 color_grayscale_min: int = 0 34 color_grayscale_max: int = 75 35 weight_color_red: float = 0.04 36 weight_color_green: float = 0.02 37 weight_color_blue: float = 0.04 38 color_rgb_min: int = 128 39 color_rgb_max: int = 255 40 41 42@attrs.define 43class PageNonTextSymbolStepInput: 44 page_layout_step_output: PageLayoutStepOutput 45 46 47@attrs.define 48class PageNonTextSymbolStepOutput: 49 images: Sequence[Image] 50 boxes: Sequence[Box] 51 alphas: Sequence[Union[np.ndarray, float]] 52 53 54@unique 55class NonTextSymbolColorMode(Enum): 56 GRAYSCALE = 'grayscale' 57 RED = 'red' 58 GREEN = 'green' 59 BLUE = 'blue' 60 61 62class PageNonTextSymbolStep( 63 PipelineStep[ 64 PageNonTextSymbolStepConfig, 65 PageNonTextSymbolStepInput, 66 PageNonTextSymbolStepOutput, 67 ] 68): # yapf: disable 69 70 def __init__(self, config: PageNonTextSymbolStepConfig): 71 super().__init__(config) 72 73 self.symbol_image_selector_engine_executor = \ 74 image_selector_engine_executor_factory.create({ 75 'image_folders': self.config.symbol_image_folders, 76 'target_image_mode': None, 77 'force_resize': True, 78 }) 79 80 self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([ 81 ( 82 NonTextSymbolColorMode.GRAYSCALE, 83 self.config.weight_color_grayscale, 84 ), 85 ( 86 NonTextSymbolColorMode.RED, 87 self.config.weight_color_red, 88 ), 89 ( 90 NonTextSymbolColorMode.GREEN, 91 self.config.weight_color_green, 92 ), 93 ( 94 NonTextSymbolColorMode.BLUE, 95 self.config.weight_color_blue, 96 ), 97 ]) 98 99 def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator): 100 page_layout_step_output = input.page_layout_step_output 101 page_layout = page_layout_step_output.page_layout 102 103 images: List[Image] = [] 104 boxes: Sequence[Box] = [] 105 alphas: List[Union[np.ndarray, float]] = [] 106 107 for layout_non_text_symbol in page_layout.layout_non_text_symbols: 108 box = layout_non_text_symbol.box 109 110 image = self.symbol_image_selector_engine_executor.run( 111 { 112 'height': box.height, 113 'width': box.width 114 }, 115 rng, 116 ) 117 alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha 118 119 if image.mode == ImageMode.RGBA: 120 # Extract and rescale alpha. 121 np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255 122 np_alpha_max = np_alpha.max() 123 np_alpha *= layout_non_text_symbol.alpha 124 np_alpha /= np_alpha_max 125 alpha = np_alpha 126 127 # Force to rgb (ignoring alpha channel). 128 image = Image(mat=image.mat[:, :, :3]) 129 130 elif image.mode == ImageMode.GRAYSCALE: 131 # As mask. 132 alpha = (image.mat > 0).astype(np.float32) 133 alpha *= layout_non_text_symbol.alpha 134 135 # Generate image with color. 136 color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs) 137 if color_mode == NonTextSymbolColorMode.GRAYSCALE: 138 grayscale_value = int( 139 rng.integers( 140 self.config.color_grayscale_min, 141 self.config.color_grayscale_max + 1, 142 ) 143 ) 144 symbol_color = (grayscale_value,) * 3 145 146 else: 147 rgb_value = int( 148 rng.integers( 149 self.config.color_rgb_min, 150 self.config.color_rgb_max + 1, 151 ) 152 ) 153 if color_mode == NonTextSymbolColorMode.RED: 154 symbol_color = (rgb_value, 0, 0) 155 elif color_mode == NonTextSymbolColorMode.GREEN: 156 symbol_color = (0, rgb_value, 0) 157 elif color_mode == NonTextSymbolColorMode.BLUE: 158 symbol_color = (0, 0, rgb_value) 159 else: 160 raise NotImplementedError() 161 162 image = Image.from_shapable(image, value=symbol_color) 163 164 else: 165 raise NotImplementedError() 166 167 images.append(image) 168 boxes.append(layout_non_text_symbol.box) 169 alphas.append(alpha) 170 171 return PageNonTextSymbolStepOutput( 172 images=images, 173 boxes=boxes, 174 alphas=alphas, 175 ) 176 177 178page_non_text_symbol_step_factory = PipelineStepFactory(PageNonTextSymbolStep)
class
PageNonTextSymbolStepConfig:
30class PageNonTextSymbolStepConfig: 31 symbol_image_folders: Sequence[str] 32 33 weight_color_grayscale: float = 0.9 34 color_grayscale_min: int = 0 35 color_grayscale_max: int = 75 36 weight_color_red: float = 0.04 37 weight_color_green: float = 0.02 38 weight_color_blue: float = 0.04 39 color_rgb_min: int = 128 40 color_rgb_max: int = 255
PageNonTextSymbolStepConfig( symbol_image_folders: Sequence[str], weight_color_grayscale: float = 0.9, color_grayscale_min: int = 0, color_grayscale_max: int = 75, weight_color_red: float = 0.04, weight_color_green: float = 0.02, weight_color_blue: float = 0.04, color_rgb_min: int = 128, color_rgb_max: int = 255)
2def __init__(self, symbol_image_folders, weight_color_grayscale=attr_dict['weight_color_grayscale'].default, color_grayscale_min=attr_dict['color_grayscale_min'].default, color_grayscale_max=attr_dict['color_grayscale_max'].default, weight_color_red=attr_dict['weight_color_red'].default, weight_color_green=attr_dict['weight_color_green'].default, weight_color_blue=attr_dict['weight_color_blue'].default, color_rgb_min=attr_dict['color_rgb_min'].default, color_rgb_max=attr_dict['color_rgb_max'].default): 3 self.symbol_image_folders = symbol_image_folders 4 self.weight_color_grayscale = weight_color_grayscale 5 self.color_grayscale_min = color_grayscale_min 6 self.color_grayscale_max = color_grayscale_max 7 self.weight_color_red = weight_color_red 8 self.weight_color_green = weight_color_green 9 self.weight_color_blue = weight_color_blue 10 self.color_rgb_min = color_rgb_min 11 self.color_rgb_max = color_rgb_max
Method generated by attrs for class PageNonTextSymbolStepConfig.
class
PageNonTextSymbolStepInput:
PageNonTextSymbolStepInput( page_layout_step_output: vkit.pipeline.text_detection.page_layout.PageLayoutStepOutput)
2def __init__(self, page_layout_step_output): 3 self.page_layout_step_output = page_layout_step_output
Method generated by attrs for class PageNonTextSymbolStepInput.
class
PageNonTextSymbolStepOutput:
49class PageNonTextSymbolStepOutput: 50 images: Sequence[Image] 51 boxes: Sequence[Box] 52 alphas: Sequence[Union[np.ndarray, float]]
PageNonTextSymbolStepOutput( images: Sequence[vkit.element.image.Image], boxes: Sequence[vkit.element.box.Box], alphas: Sequence[Union[numpy.ndarray, float]])
2def __init__(self, images, boxes, alphas): 3 self.images = images 4 self.boxes = boxes 5 self.alphas = alphas
Method generated by attrs for class PageNonTextSymbolStepOutput.
class
NonTextSymbolColorMode(enum.Enum):
56class NonTextSymbolColorMode(Enum): 57 GRAYSCALE = 'grayscale' 58 RED = 'red' 59 GREEN = 'green' 60 BLUE = 'blue'
An enumeration.
GRAYSCALE =
<NonTextSymbolColorMode.GRAYSCALE: 'grayscale'>
RED =
<NonTextSymbolColorMode.RED: 'red'>
GREEN =
<NonTextSymbolColorMode.GREEN: 'green'>
BLUE =
<NonTextSymbolColorMode.BLUE: 'blue'>
Inherited Members
- enum.Enum
- name
- value
63class PageNonTextSymbolStep( 64 PipelineStep[ 65 PageNonTextSymbolStepConfig, 66 PageNonTextSymbolStepInput, 67 PageNonTextSymbolStepOutput, 68 ] 69): # yapf: disable 70 71 def __init__(self, config: PageNonTextSymbolStepConfig): 72 super().__init__(config) 73 74 self.symbol_image_selector_engine_executor = \ 75 image_selector_engine_executor_factory.create({ 76 'image_folders': self.config.symbol_image_folders, 77 'target_image_mode': None, 78 'force_resize': True, 79 }) 80 81 self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([ 82 ( 83 NonTextSymbolColorMode.GRAYSCALE, 84 self.config.weight_color_grayscale, 85 ), 86 ( 87 NonTextSymbolColorMode.RED, 88 self.config.weight_color_red, 89 ), 90 ( 91 NonTextSymbolColorMode.GREEN, 92 self.config.weight_color_green, 93 ), 94 ( 95 NonTextSymbolColorMode.BLUE, 96 self.config.weight_color_blue, 97 ), 98 ]) 99 100 def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator): 101 page_layout_step_output = input.page_layout_step_output 102 page_layout = page_layout_step_output.page_layout 103 104 images: List[Image] = [] 105 boxes: Sequence[Box] = [] 106 alphas: List[Union[np.ndarray, float]] = [] 107 108 for layout_non_text_symbol in page_layout.layout_non_text_symbols: 109 box = layout_non_text_symbol.box 110 111 image = self.symbol_image_selector_engine_executor.run( 112 { 113 'height': box.height, 114 'width': box.width 115 }, 116 rng, 117 ) 118 alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha 119 120 if image.mode == ImageMode.RGBA: 121 # Extract and rescale alpha. 122 np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255 123 np_alpha_max = np_alpha.max() 124 np_alpha *= layout_non_text_symbol.alpha 125 np_alpha /= np_alpha_max 126 alpha = np_alpha 127 128 # Force to rgb (ignoring alpha channel). 129 image = Image(mat=image.mat[:, :, :3]) 130 131 elif image.mode == ImageMode.GRAYSCALE: 132 # As mask. 133 alpha = (image.mat > 0).astype(np.float32) 134 alpha *= layout_non_text_symbol.alpha 135 136 # Generate image with color. 137 color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs) 138 if color_mode == NonTextSymbolColorMode.GRAYSCALE: 139 grayscale_value = int( 140 rng.integers( 141 self.config.color_grayscale_min, 142 self.config.color_grayscale_max + 1, 143 ) 144 ) 145 symbol_color = (grayscale_value,) * 3 146 147 else: 148 rgb_value = int( 149 rng.integers( 150 self.config.color_rgb_min, 151 self.config.color_rgb_max + 1, 152 ) 153 ) 154 if color_mode == NonTextSymbolColorMode.RED: 155 symbol_color = (rgb_value, 0, 0) 156 elif color_mode == NonTextSymbolColorMode.GREEN: 157 symbol_color = (0, rgb_value, 0) 158 elif color_mode == NonTextSymbolColorMode.BLUE: 159 symbol_color = (0, 0, rgb_value) 160 else: 161 raise NotImplementedError() 162 163 image = Image.from_shapable(image, value=symbol_color) 164 165 else: 166 raise NotImplementedError() 167 168 images.append(image) 169 boxes.append(layout_non_text_symbol.box) 170 alphas.append(alpha) 171 172 return PageNonTextSymbolStepOutput( 173 images=images, 174 boxes=boxes, 175 alphas=alphas, 176 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
PageNonTextSymbolStep( config: vkit.pipeline.text_detection.page_non_text_symbol.PageNonTextSymbolStepConfig)
71 def __init__(self, config: PageNonTextSymbolStepConfig): 72 super().__init__(config) 73 74 self.symbol_image_selector_engine_executor = \ 75 image_selector_engine_executor_factory.create({ 76 'image_folders': self.config.symbol_image_folders, 77 'target_image_mode': None, 78 'force_resize': True, 79 }) 80 81 self.color_modes, self.color_modes_probs = normalize_to_keys_and_probs([ 82 ( 83 NonTextSymbolColorMode.GRAYSCALE, 84 self.config.weight_color_grayscale, 85 ), 86 ( 87 NonTextSymbolColorMode.RED, 88 self.config.weight_color_red, 89 ), 90 ( 91 NonTextSymbolColorMode.GREEN, 92 self.config.weight_color_green, 93 ), 94 ( 95 NonTextSymbolColorMode.BLUE, 96 self.config.weight_color_blue, 97 ), 98 ])
def
run( self, input: vkit.pipeline.text_detection.page_non_text_symbol.PageNonTextSymbolStepInput, rng: numpy.random._generator.Generator):
100 def run(self, input: PageNonTextSymbolStepInput, rng: RandomGenerator): 101 page_layout_step_output = input.page_layout_step_output 102 page_layout = page_layout_step_output.page_layout 103 104 images: List[Image] = [] 105 boxes: Sequence[Box] = [] 106 alphas: List[Union[np.ndarray, float]] = [] 107 108 for layout_non_text_symbol in page_layout.layout_non_text_symbols: 109 box = layout_non_text_symbol.box 110 111 image = self.symbol_image_selector_engine_executor.run( 112 { 113 'height': box.height, 114 'width': box.width 115 }, 116 rng, 117 ) 118 alpha: Union[np.ndarray, float] = layout_non_text_symbol.alpha 119 120 if image.mode == ImageMode.RGBA: 121 # Extract and rescale alpha. 122 np_alpha = (image.mat[:, :, 3]).astype(np.float32) / 255 123 np_alpha_max = np_alpha.max() 124 np_alpha *= layout_non_text_symbol.alpha 125 np_alpha /= np_alpha_max 126 alpha = np_alpha 127 128 # Force to rgb (ignoring alpha channel). 129 image = Image(mat=image.mat[:, :, :3]) 130 131 elif image.mode == ImageMode.GRAYSCALE: 132 # As mask. 133 alpha = (image.mat > 0).astype(np.float32) 134 alpha *= layout_non_text_symbol.alpha 135 136 # Generate image with color. 137 color_mode = rng_choice(rng, self.color_modes, probs=self.color_modes_probs) 138 if color_mode == NonTextSymbolColorMode.GRAYSCALE: 139 grayscale_value = int( 140 rng.integers( 141 self.config.color_grayscale_min, 142 self.config.color_grayscale_max + 1, 143 ) 144 ) 145 symbol_color = (grayscale_value,) * 3 146 147 else: 148 rgb_value = int( 149 rng.integers( 150 self.config.color_rgb_min, 151 self.config.color_rgb_max + 1, 152 ) 153 ) 154 if color_mode == NonTextSymbolColorMode.RED: 155 symbol_color = (rgb_value, 0, 0) 156 elif color_mode == NonTextSymbolColorMode.GREEN: 157 symbol_color = (0, rgb_value, 0) 158 elif color_mode == NonTextSymbolColorMode.BLUE: 159 symbol_color = (0, 0, rgb_value) 160 else: 161 raise NotImplementedError() 162 163 image = Image.from_shapable(image, value=symbol_color) 164 165 else: 166 raise NotImplementedError() 167 168 images.append(image) 169 boxes.append(layout_non_text_symbol.box) 170 alphas.append(alpha) 171 172 return PageNonTextSymbolStepOutput( 173 images=images, 174 boxes=boxes, 175 alphas=alphas, 176 )