vkit.pipeline.text_detection.page_text_line_bounding_box
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence, List, Tuple 15 16import attrs 17from numpy.random import Generator as RandomGenerator 18 19from vkit.element import Box, ScoreMap 20from vkit.engine.font import TextLine 21from ..interface import PipelineStep, PipelineStepFactory 22from .page_text_line import PageTextLineStepOutput 23 24 25@attrs.define 26class PageTextLineBoundingBoxStepConfig: 27 prob_non_short_text_line: float = 0.05 28 prob_short_text_line: float = 0.3 29 offset_ratio_min: float = 0.1 30 offset_ratio_max: float = 2.0 31 border_thickness_ratio_min: float = 0.0 32 border_thickness_ratio_max: float = 0.125 33 border_thickness_min: int = 1 34 alpha_min: float = 0.9 35 alpha_max: float = 1.0 36 37 38@attrs.define 39class PageTextLineBoundingBoxStepInput: 40 page_text_line_step_output: PageTextLineStepOutput 41 42 43@attrs.define 44class PageTextLineBoundingBoxStepOutput: 45 score_maps: Sequence[ScoreMap] 46 colors: Sequence[Tuple[int, int, int]] 47 48 49class PageTextLineBoundingBoxStep( 50 PipelineStep[ 51 PageTextLineBoundingBoxStepConfig, 52 PageTextLineBoundingBoxStepInput, 53 PageTextLineBoundingBoxStepOutput, 54 ] 55): # yapf: disable 56 57 def sample_offset(self, ref_char_height: int, rng: RandomGenerator): 58 offset_ratio = rng.uniform( 59 self.config.offset_ratio_min, 60 self.config.offset_ratio_max, 61 ) 62 return round(offset_ratio * ref_char_height) 63 64 def sample_border_thickness(self, ref_char_height: int, rng: RandomGenerator): 65 offset_ratio = rng.uniform( 66 self.config.border_thickness_ratio_min, 67 self.config.border_thickness_ratio_max, 68 ) 69 return max(round(offset_ratio * ref_char_height), self.config.border_thickness_min) 70 71 def sample_text_line_bounding_box( 72 self, 73 height: int, 74 width: int, 75 text_line: TextLine, 76 rng: RandomGenerator, 77 ): 78 ref_char_height_max = max( 79 char_glyph.ref_char_height for char_glyph in text_line.char_glyphs 80 ) 81 82 # Sample shape. 83 offset_up = self.sample_offset(ref_char_height_max, rng) 84 offset_down = self.sample_offset(ref_char_height_max, rng) 85 offset_left = self.sample_offset(ref_char_height_max, rng) 86 offset_right = self.sample_offset(ref_char_height_max, rng) 87 88 box_height = text_line.box.height + offset_up + offset_down 89 box_width = text_line.box.width + offset_left + offset_right 90 91 border_thickness = self.sample_border_thickness(ref_char_height_max, rng) 92 alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max)) 93 94 # Fill empty area. 95 score_map = ScoreMap.from_shape((box_height, box_width), value=alpha) 96 97 empty_box = Box( 98 up=border_thickness, 99 down=box_height - border_thickness - 1, 100 left=border_thickness, 101 right=box_width - border_thickness - 1, 102 ) 103 assert empty_box.up < empty_box.down 104 assert empty_box.left < empty_box.right 105 empty_box.fill_score_map(score_map, 0.0) 106 107 # Trim if out-of-boundary. 108 page_box_up = text_line.box.up - offset_up 109 page_box_down = text_line.box.down + offset_down 110 page_box_left = text_line.box.left - offset_left 111 page_box_right = text_line.box.right + offset_right 112 113 trim_up_size = 0 114 if page_box_up < 0: 115 trim_up_size = abs(page_box_up) 116 117 trim_down_size = 0 118 if page_box_down >= height: 119 trim_down_size = page_box_down - height + 1 120 121 trim_left_size = 0 122 if page_box_left < 0: 123 trim_left_size = abs(page_box_left) 124 125 trim_right_size = 0 126 if page_box_right >= width: 127 trim_right_size = page_box_right - width + 1 128 129 if trim_up_size > 0 \ 130 or trim_down_size > 0 \ 131 or trim_left_size > 0 \ 132 or trim_right_size > 0: 133 trim_box = Box( 134 up=trim_up_size, 135 down=box_height - 1 - trim_down_size, 136 left=trim_left_size, 137 right=box_width - 1 - trim_right_size, 138 ) 139 score_map = trim_box.extract_score_map(score_map) 140 141 page_box = Box( 142 up=max(0, page_box_up), 143 down=min(height - 1, page_box_down), 144 left=max(0, page_box_left), 145 right=min(width - 1, page_box_right), 146 ) 147 score_map = score_map.to_box_attached(page_box) 148 149 return score_map, text_line.glyph_color 150 151 def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator): 152 page_text_line_step_output = input.page_text_line_step_output 153 page_text_line_collection = page_text_line_step_output.page_text_line_collection 154 155 score_maps: List[ScoreMap] = [] 156 colors: List[Tuple[int, int, int]] = [] 157 158 for text_line, is_short_text_line in zip( 159 page_text_line_collection.text_lines, 160 page_text_line_collection.short_text_line_flags, 161 ): 162 add_text_line_bounding_box = False 163 if is_short_text_line: 164 if rng.random() < self.config.prob_short_text_line: 165 add_text_line_bounding_box = True 166 else: 167 if rng.random() < self.config.prob_non_short_text_line: 168 add_text_line_bounding_box = True 169 if not add_text_line_bounding_box: 170 continue 171 172 # Assign a bounding box. 173 score_map, color = self.sample_text_line_bounding_box( 174 height=page_text_line_collection.height, 175 width=page_text_line_collection.width, 176 text_line=text_line, 177 rng=rng, 178 ) 179 score_maps.append(score_map) 180 colors.append(color) 181 182 return PageTextLineBoundingBoxStepOutput( 183 score_maps=score_maps, 184 colors=colors, 185 ) 186 187 188page_text_line_bounding_box_step_factory = PipelineStepFactory(PageTextLineBoundingBoxStep)
class
PageTextLineBoundingBoxStepConfig:
27class PageTextLineBoundingBoxStepConfig: 28 prob_non_short_text_line: float = 0.05 29 prob_short_text_line: float = 0.3 30 offset_ratio_min: float = 0.1 31 offset_ratio_max: float = 2.0 32 border_thickness_ratio_min: float = 0.0 33 border_thickness_ratio_max: float = 0.125 34 border_thickness_min: int = 1 35 alpha_min: float = 0.9 36 alpha_max: float = 1.0
PageTextLineBoundingBoxStepConfig( prob_non_short_text_line: float = 0.05, prob_short_text_line: float = 0.3, offset_ratio_min: float = 0.1, offset_ratio_max: float = 2.0, border_thickness_ratio_min: float = 0.0, border_thickness_ratio_max: float = 0.125, border_thickness_min: int = 1, alpha_min: float = 0.9, alpha_max: float = 1.0)
2def __init__(self, prob_non_short_text_line=attr_dict['prob_non_short_text_line'].default, prob_short_text_line=attr_dict['prob_short_text_line'].default, offset_ratio_min=attr_dict['offset_ratio_min'].default, offset_ratio_max=attr_dict['offset_ratio_max'].default, border_thickness_ratio_min=attr_dict['border_thickness_ratio_min'].default, border_thickness_ratio_max=attr_dict['border_thickness_ratio_max'].default, border_thickness_min=attr_dict['border_thickness_min'].default, alpha_min=attr_dict['alpha_min'].default, alpha_max=attr_dict['alpha_max'].default): 3 self.prob_non_short_text_line = prob_non_short_text_line 4 self.prob_short_text_line = prob_short_text_line 5 self.offset_ratio_min = offset_ratio_min 6 self.offset_ratio_max = offset_ratio_max 7 self.border_thickness_ratio_min = border_thickness_ratio_min 8 self.border_thickness_ratio_max = border_thickness_ratio_max 9 self.border_thickness_min = border_thickness_min 10 self.alpha_min = alpha_min 11 self.alpha_max = alpha_max
Method generated by attrs for class PageTextLineBoundingBoxStepConfig.
class
PageTextLineBoundingBoxStepInput:
PageTextLineBoundingBoxStepInput( page_text_line_step_output: vkit.pipeline.text_detection.page_text_line.PageTextLineStepOutput)
2def __init__(self, page_text_line_step_output): 3 self.page_text_line_step_output = page_text_line_step_output
Method generated by attrs for class PageTextLineBoundingBoxStepInput.
class
PageTextLineBoundingBoxStepOutput:
45class PageTextLineBoundingBoxStepOutput: 46 score_maps: Sequence[ScoreMap] 47 colors: Sequence[Tuple[int, int, int]]
PageTextLineBoundingBoxStepOutput( score_maps: Sequence[vkit.element.score_map.ScoreMap], colors: Sequence[Tuple[int, int, int]])
Method generated by attrs for class PageTextLineBoundingBoxStepOutput.
class
PageTextLineBoundingBoxStep(vkit.pipeline.interface.PipelineStep[vkit.pipeline.text_detection.page_text_line_bounding_box.PageTextLineBoundingBoxStepConfig, vkit.pipeline.text_detection.page_text_line_bounding_box.PageTextLineBoundingBoxStepInput, vkit.pipeline.text_detection.page_text_line_bounding_box.PageTextLineBoundingBoxStepOutput]):
50class PageTextLineBoundingBoxStep( 51 PipelineStep[ 52 PageTextLineBoundingBoxStepConfig, 53 PageTextLineBoundingBoxStepInput, 54 PageTextLineBoundingBoxStepOutput, 55 ] 56): # yapf: disable 57 58 def sample_offset(self, ref_char_height: int, rng: RandomGenerator): 59 offset_ratio = rng.uniform( 60 self.config.offset_ratio_min, 61 self.config.offset_ratio_max, 62 ) 63 return round(offset_ratio * ref_char_height) 64 65 def sample_border_thickness(self, ref_char_height: int, rng: RandomGenerator): 66 offset_ratio = rng.uniform( 67 self.config.border_thickness_ratio_min, 68 self.config.border_thickness_ratio_max, 69 ) 70 return max(round(offset_ratio * ref_char_height), self.config.border_thickness_min) 71 72 def sample_text_line_bounding_box( 73 self, 74 height: int, 75 width: int, 76 text_line: TextLine, 77 rng: RandomGenerator, 78 ): 79 ref_char_height_max = max( 80 char_glyph.ref_char_height for char_glyph in text_line.char_glyphs 81 ) 82 83 # Sample shape. 84 offset_up = self.sample_offset(ref_char_height_max, rng) 85 offset_down = self.sample_offset(ref_char_height_max, rng) 86 offset_left = self.sample_offset(ref_char_height_max, rng) 87 offset_right = self.sample_offset(ref_char_height_max, rng) 88 89 box_height = text_line.box.height + offset_up + offset_down 90 box_width = text_line.box.width + offset_left + offset_right 91 92 border_thickness = self.sample_border_thickness(ref_char_height_max, rng) 93 alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max)) 94 95 # Fill empty area. 96 score_map = ScoreMap.from_shape((box_height, box_width), value=alpha) 97 98 empty_box = Box( 99 up=border_thickness, 100 down=box_height - border_thickness - 1, 101 left=border_thickness, 102 right=box_width - border_thickness - 1, 103 ) 104 assert empty_box.up < empty_box.down 105 assert empty_box.left < empty_box.right 106 empty_box.fill_score_map(score_map, 0.0) 107 108 # Trim if out-of-boundary. 109 page_box_up = text_line.box.up - offset_up 110 page_box_down = text_line.box.down + offset_down 111 page_box_left = text_line.box.left - offset_left 112 page_box_right = text_line.box.right + offset_right 113 114 trim_up_size = 0 115 if page_box_up < 0: 116 trim_up_size = abs(page_box_up) 117 118 trim_down_size = 0 119 if page_box_down >= height: 120 trim_down_size = page_box_down - height + 1 121 122 trim_left_size = 0 123 if page_box_left < 0: 124 trim_left_size = abs(page_box_left) 125 126 trim_right_size = 0 127 if page_box_right >= width: 128 trim_right_size = page_box_right - width + 1 129 130 if trim_up_size > 0 \ 131 or trim_down_size > 0 \ 132 or trim_left_size > 0 \ 133 or trim_right_size > 0: 134 trim_box = Box( 135 up=trim_up_size, 136 down=box_height - 1 - trim_down_size, 137 left=trim_left_size, 138 right=box_width - 1 - trim_right_size, 139 ) 140 score_map = trim_box.extract_score_map(score_map) 141 142 page_box = Box( 143 up=max(0, page_box_up), 144 down=min(height - 1, page_box_down), 145 left=max(0, page_box_left), 146 right=min(width - 1, page_box_right), 147 ) 148 score_map = score_map.to_box_attached(page_box) 149 150 return score_map, text_line.glyph_color 151 152 def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator): 153 page_text_line_step_output = input.page_text_line_step_output 154 page_text_line_collection = page_text_line_step_output.page_text_line_collection 155 156 score_maps: List[ScoreMap] = [] 157 colors: List[Tuple[int, int, int]] = [] 158 159 for text_line, is_short_text_line in zip( 160 page_text_line_collection.text_lines, 161 page_text_line_collection.short_text_line_flags, 162 ): 163 add_text_line_bounding_box = False 164 if is_short_text_line: 165 if rng.random() < self.config.prob_short_text_line: 166 add_text_line_bounding_box = True 167 else: 168 if rng.random() < self.config.prob_non_short_text_line: 169 add_text_line_bounding_box = True 170 if not add_text_line_bounding_box: 171 continue 172 173 # Assign a bounding box. 174 score_map, color = self.sample_text_line_bounding_box( 175 height=page_text_line_collection.height, 176 width=page_text_line_collection.width, 177 text_line=text_line, 178 rng=rng, 179 ) 180 score_maps.append(score_map) 181 colors.append(color) 182 183 return PageTextLineBoundingBoxStepOutput( 184 score_maps=score_maps, 185 colors=colors, 186 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
def
sample_text_line_bounding_box( self, height: int, width: int, text_line: vkit.engine.font.type.TextLine, rng: numpy.random._generator.Generator):
72 def sample_text_line_bounding_box( 73 self, 74 height: int, 75 width: int, 76 text_line: TextLine, 77 rng: RandomGenerator, 78 ): 79 ref_char_height_max = max( 80 char_glyph.ref_char_height for char_glyph in text_line.char_glyphs 81 ) 82 83 # Sample shape. 84 offset_up = self.sample_offset(ref_char_height_max, rng) 85 offset_down = self.sample_offset(ref_char_height_max, rng) 86 offset_left = self.sample_offset(ref_char_height_max, rng) 87 offset_right = self.sample_offset(ref_char_height_max, rng) 88 89 box_height = text_line.box.height + offset_up + offset_down 90 box_width = text_line.box.width + offset_left + offset_right 91 92 border_thickness = self.sample_border_thickness(ref_char_height_max, rng) 93 alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max)) 94 95 # Fill empty area. 96 score_map = ScoreMap.from_shape((box_height, box_width), value=alpha) 97 98 empty_box = Box( 99 up=border_thickness, 100 down=box_height - border_thickness - 1, 101 left=border_thickness, 102 right=box_width - border_thickness - 1, 103 ) 104 assert empty_box.up < empty_box.down 105 assert empty_box.left < empty_box.right 106 empty_box.fill_score_map(score_map, 0.0) 107 108 # Trim if out-of-boundary. 109 page_box_up = text_line.box.up - offset_up 110 page_box_down = text_line.box.down + offset_down 111 page_box_left = text_line.box.left - offset_left 112 page_box_right = text_line.box.right + offset_right 113 114 trim_up_size = 0 115 if page_box_up < 0: 116 trim_up_size = abs(page_box_up) 117 118 trim_down_size = 0 119 if page_box_down >= height: 120 trim_down_size = page_box_down - height + 1 121 122 trim_left_size = 0 123 if page_box_left < 0: 124 trim_left_size = abs(page_box_left) 125 126 trim_right_size = 0 127 if page_box_right >= width: 128 trim_right_size = page_box_right - width + 1 129 130 if trim_up_size > 0 \ 131 or trim_down_size > 0 \ 132 or trim_left_size > 0 \ 133 or trim_right_size > 0: 134 trim_box = Box( 135 up=trim_up_size, 136 down=box_height - 1 - trim_down_size, 137 left=trim_left_size, 138 right=box_width - 1 - trim_right_size, 139 ) 140 score_map = trim_box.extract_score_map(score_map) 141 142 page_box = Box( 143 up=max(0, page_box_up), 144 down=min(height - 1, page_box_down), 145 left=max(0, page_box_left), 146 right=min(width - 1, page_box_right), 147 ) 148 score_map = score_map.to_box_attached(page_box) 149 150 return score_map, text_line.glyph_color
def
run( self, input: vkit.pipeline.text_detection.page_text_line_bounding_box.PageTextLineBoundingBoxStepInput, rng: numpy.random._generator.Generator):
152 def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator): 153 page_text_line_step_output = input.page_text_line_step_output 154 page_text_line_collection = page_text_line_step_output.page_text_line_collection 155 156 score_maps: List[ScoreMap] = [] 157 colors: List[Tuple[int, int, int]] = [] 158 159 for text_line, is_short_text_line in zip( 160 page_text_line_collection.text_lines, 161 page_text_line_collection.short_text_line_flags, 162 ): 163 add_text_line_bounding_box = False 164 if is_short_text_line: 165 if rng.random() < self.config.prob_short_text_line: 166 add_text_line_bounding_box = True 167 else: 168 if rng.random() < self.config.prob_non_short_text_line: 169 add_text_line_bounding_box = True 170 if not add_text_line_bounding_box: 171 continue 172 173 # Assign a bounding box. 174 score_map, color = self.sample_text_line_bounding_box( 175 height=page_text_line_collection.height, 176 width=page_text_line_collection.width, 177 text_line=text_line, 178 rng=rng, 179 ) 180 score_maps.append(score_map) 181 colors.append(color) 182 183 return PageTextLineBoundingBoxStepOutput( 184 score_maps=score_maps, 185 colors=colors, 186 )