vkit.pipeline.text_detection.page_text_line_bounding_box

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence, List, Tuple
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18
 19from vkit.element import Box, ScoreMap
 20from vkit.engine.font import TextLine
 21from ..interface import PipelineStep, PipelineStepFactory
 22from .page_text_line import PageTextLineStepOutput
 23
 24
 25@attrs.define
 26class PageTextLineBoundingBoxStepConfig:
 27    prob_non_short_text_line: float = 0.05
 28    prob_short_text_line: float = 0.3
 29    offset_ratio_min: float = 0.1
 30    offset_ratio_max: float = 2.0
 31    border_thickness_ratio_min: float = 0.0
 32    border_thickness_ratio_max: float = 0.125
 33    border_thickness_min: int = 1
 34    alpha_min: float = 0.9
 35    alpha_max: float = 1.0
 36
 37
 38@attrs.define
 39class PageTextLineBoundingBoxStepInput:
 40    page_text_line_step_output: PageTextLineStepOutput
 41
 42
 43@attrs.define
 44class PageTextLineBoundingBoxStepOutput:
 45    score_maps: Sequence[ScoreMap]
 46    colors: Sequence[Tuple[int, int, int]]
 47
 48
 49class PageTextLineBoundingBoxStep(
 50    PipelineStep[
 51        PageTextLineBoundingBoxStepConfig,
 52        PageTextLineBoundingBoxStepInput,
 53        PageTextLineBoundingBoxStepOutput,
 54    ]
 55):  # yapf: disable
 56
 57    def sample_offset(self, ref_char_height: int, rng: RandomGenerator):
 58        offset_ratio = rng.uniform(
 59            self.config.offset_ratio_min,
 60            self.config.offset_ratio_max,
 61        )
 62        return round(offset_ratio * ref_char_height)
 63
 64    def sample_border_thickness(self, ref_char_height: int, rng: RandomGenerator):
 65        offset_ratio = rng.uniform(
 66            self.config.border_thickness_ratio_min,
 67            self.config.border_thickness_ratio_max,
 68        )
 69        return max(round(offset_ratio * ref_char_height), self.config.border_thickness_min)
 70
 71    def sample_text_line_bounding_box(
 72        self,
 73        height: int,
 74        width: int,
 75        text_line: TextLine,
 76        rng: RandomGenerator,
 77    ):
 78        ref_char_height_max = max(
 79            char_glyph.ref_char_height for char_glyph in text_line.char_glyphs
 80        )
 81
 82        # Sample shape.
 83        offset_up = self.sample_offset(ref_char_height_max, rng)
 84        offset_down = self.sample_offset(ref_char_height_max, rng)
 85        offset_left = self.sample_offset(ref_char_height_max, rng)
 86        offset_right = self.sample_offset(ref_char_height_max, rng)
 87
 88        box_height = text_line.box.height + offset_up + offset_down
 89        box_width = text_line.box.width + offset_left + offset_right
 90
 91        border_thickness = self.sample_border_thickness(ref_char_height_max, rng)
 92        alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max))
 93
 94        # Fill empty area.
 95        score_map = ScoreMap.from_shape((box_height, box_width), value=alpha)
 96
 97        empty_box = Box(
 98            up=border_thickness,
 99            down=box_height - border_thickness - 1,
100            left=border_thickness,
101            right=box_width - border_thickness - 1,
102        )
103        assert empty_box.up < empty_box.down
104        assert empty_box.left < empty_box.right
105        empty_box.fill_score_map(score_map, 0.0)
106
107        # Trim if out-of-boundary.
108        page_box_up = text_line.box.up - offset_up
109        page_box_down = text_line.box.down + offset_down
110        page_box_left = text_line.box.left - offset_left
111        page_box_right = text_line.box.right + offset_right
112
113        trim_up_size = 0
114        if page_box_up < 0:
115            trim_up_size = abs(page_box_up)
116
117        trim_down_size = 0
118        if page_box_down >= height:
119            trim_down_size = page_box_down - height + 1
120
121        trim_left_size = 0
122        if page_box_left < 0:
123            trim_left_size = abs(page_box_left)
124
125        trim_right_size = 0
126        if page_box_right >= width:
127            trim_right_size = page_box_right - width + 1
128
129        if trim_up_size > 0 \
130                or trim_down_size > 0 \
131                or trim_left_size > 0 \
132                or trim_right_size > 0:
133            trim_box = Box(
134                up=trim_up_size,
135                down=box_height - 1 - trim_down_size,
136                left=trim_left_size,
137                right=box_width - 1 - trim_right_size,
138            )
139            score_map = trim_box.extract_score_map(score_map)
140
141        page_box = Box(
142            up=max(0, page_box_up),
143            down=min(height - 1, page_box_down),
144            left=max(0, page_box_left),
145            right=min(width - 1, page_box_right),
146        )
147        score_map = score_map.to_box_attached(page_box)
148
149        return score_map, text_line.glyph_color
150
151    def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator):
152        page_text_line_step_output = input.page_text_line_step_output
153        page_text_line_collection = page_text_line_step_output.page_text_line_collection
154
155        score_maps: List[ScoreMap] = []
156        colors: List[Tuple[int, int, int]] = []
157
158        for text_line, is_short_text_line in zip(
159            page_text_line_collection.text_lines,
160            page_text_line_collection.short_text_line_flags,
161        ):
162            add_text_line_bounding_box = False
163            if is_short_text_line:
164                if rng.random() < self.config.prob_short_text_line:
165                    add_text_line_bounding_box = True
166            else:
167                if rng.random() < self.config.prob_non_short_text_line:
168                    add_text_line_bounding_box = True
169            if not add_text_line_bounding_box:
170                continue
171
172            # Assign a bounding box.
173            score_map, color = self.sample_text_line_bounding_box(
174                height=page_text_line_collection.height,
175                width=page_text_line_collection.width,
176                text_line=text_line,
177                rng=rng,
178            )
179            score_maps.append(score_map)
180            colors.append(color)
181
182        return PageTextLineBoundingBoxStepOutput(
183            score_maps=score_maps,
184            colors=colors,
185        )
186
187
188page_text_line_bounding_box_step_factory = PipelineStepFactory(PageTextLineBoundingBoxStep)
class PageTextLineBoundingBoxStepConfig:
27class PageTextLineBoundingBoxStepConfig:
28    prob_non_short_text_line: float = 0.05
29    prob_short_text_line: float = 0.3
30    offset_ratio_min: float = 0.1
31    offset_ratio_max: float = 2.0
32    border_thickness_ratio_min: float = 0.0
33    border_thickness_ratio_max: float = 0.125
34    border_thickness_min: int = 1
35    alpha_min: float = 0.9
36    alpha_max: float = 1.0
PageTextLineBoundingBoxStepConfig( prob_non_short_text_line: float = 0.05, prob_short_text_line: float = 0.3, offset_ratio_min: float = 0.1, offset_ratio_max: float = 2.0, border_thickness_ratio_min: float = 0.0, border_thickness_ratio_max: float = 0.125, border_thickness_min: int = 1, alpha_min: float = 0.9, alpha_max: float = 1.0)
 2def __init__(self, prob_non_short_text_line=attr_dict['prob_non_short_text_line'].default, prob_short_text_line=attr_dict['prob_short_text_line'].default, offset_ratio_min=attr_dict['offset_ratio_min'].default, offset_ratio_max=attr_dict['offset_ratio_max'].default, border_thickness_ratio_min=attr_dict['border_thickness_ratio_min'].default, border_thickness_ratio_max=attr_dict['border_thickness_ratio_max'].default, border_thickness_min=attr_dict['border_thickness_min'].default, alpha_min=attr_dict['alpha_min'].default, alpha_max=attr_dict['alpha_max'].default):
 3    self.prob_non_short_text_line = prob_non_short_text_line
 4    self.prob_short_text_line = prob_short_text_line
 5    self.offset_ratio_min = offset_ratio_min
 6    self.offset_ratio_max = offset_ratio_max
 7    self.border_thickness_ratio_min = border_thickness_ratio_min
 8    self.border_thickness_ratio_max = border_thickness_ratio_max
 9    self.border_thickness_min = border_thickness_min
10    self.alpha_min = alpha_min
11    self.alpha_max = alpha_max

Method generated by attrs for class PageTextLineBoundingBoxStepConfig.

class PageTextLineBoundingBoxStepInput:
40class PageTextLineBoundingBoxStepInput:
41    page_text_line_step_output: PageTextLineStepOutput
PageTextLineBoundingBoxStepInput( page_text_line_step_output: vkit.pipeline.text_detection.page_text_line.PageTextLineStepOutput)
2def __init__(self, page_text_line_step_output):
3    self.page_text_line_step_output = page_text_line_step_output

Method generated by attrs for class PageTextLineBoundingBoxStepInput.

class PageTextLineBoundingBoxStepOutput:
45class PageTextLineBoundingBoxStepOutput:
46    score_maps: Sequence[ScoreMap]
47    colors: Sequence[Tuple[int, int, int]]
PageTextLineBoundingBoxStepOutput( score_maps: Sequence[vkit.element.score_map.ScoreMap], colors: Sequence[Tuple[int, int, int]])
2def __init__(self, score_maps, colors):
3    self.score_maps = score_maps
4    self.colors = colors

Method generated by attrs for class PageTextLineBoundingBoxStepOutput.

 50class PageTextLineBoundingBoxStep(
 51    PipelineStep[
 52        PageTextLineBoundingBoxStepConfig,
 53        PageTextLineBoundingBoxStepInput,
 54        PageTextLineBoundingBoxStepOutput,
 55    ]
 56):  # yapf: disable
 57
 58    def sample_offset(self, ref_char_height: int, rng: RandomGenerator):
 59        offset_ratio = rng.uniform(
 60            self.config.offset_ratio_min,
 61            self.config.offset_ratio_max,
 62        )
 63        return round(offset_ratio * ref_char_height)
 64
 65    def sample_border_thickness(self, ref_char_height: int, rng: RandomGenerator):
 66        offset_ratio = rng.uniform(
 67            self.config.border_thickness_ratio_min,
 68            self.config.border_thickness_ratio_max,
 69        )
 70        return max(round(offset_ratio * ref_char_height), self.config.border_thickness_min)
 71
 72    def sample_text_line_bounding_box(
 73        self,
 74        height: int,
 75        width: int,
 76        text_line: TextLine,
 77        rng: RandomGenerator,
 78    ):
 79        ref_char_height_max = max(
 80            char_glyph.ref_char_height for char_glyph in text_line.char_glyphs
 81        )
 82
 83        # Sample shape.
 84        offset_up = self.sample_offset(ref_char_height_max, rng)
 85        offset_down = self.sample_offset(ref_char_height_max, rng)
 86        offset_left = self.sample_offset(ref_char_height_max, rng)
 87        offset_right = self.sample_offset(ref_char_height_max, rng)
 88
 89        box_height = text_line.box.height + offset_up + offset_down
 90        box_width = text_line.box.width + offset_left + offset_right
 91
 92        border_thickness = self.sample_border_thickness(ref_char_height_max, rng)
 93        alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max))
 94
 95        # Fill empty area.
 96        score_map = ScoreMap.from_shape((box_height, box_width), value=alpha)
 97
 98        empty_box = Box(
 99            up=border_thickness,
100            down=box_height - border_thickness - 1,
101            left=border_thickness,
102            right=box_width - border_thickness - 1,
103        )
104        assert empty_box.up < empty_box.down
105        assert empty_box.left < empty_box.right
106        empty_box.fill_score_map(score_map, 0.0)
107
108        # Trim if out-of-boundary.
109        page_box_up = text_line.box.up - offset_up
110        page_box_down = text_line.box.down + offset_down
111        page_box_left = text_line.box.left - offset_left
112        page_box_right = text_line.box.right + offset_right
113
114        trim_up_size = 0
115        if page_box_up < 0:
116            trim_up_size = abs(page_box_up)
117
118        trim_down_size = 0
119        if page_box_down >= height:
120            trim_down_size = page_box_down - height + 1
121
122        trim_left_size = 0
123        if page_box_left < 0:
124            trim_left_size = abs(page_box_left)
125
126        trim_right_size = 0
127        if page_box_right >= width:
128            trim_right_size = page_box_right - width + 1
129
130        if trim_up_size > 0 \
131                or trim_down_size > 0 \
132                or trim_left_size > 0 \
133                or trim_right_size > 0:
134            trim_box = Box(
135                up=trim_up_size,
136                down=box_height - 1 - trim_down_size,
137                left=trim_left_size,
138                right=box_width - 1 - trim_right_size,
139            )
140            score_map = trim_box.extract_score_map(score_map)
141
142        page_box = Box(
143            up=max(0, page_box_up),
144            down=min(height - 1, page_box_down),
145            left=max(0, page_box_left),
146            right=min(width - 1, page_box_right),
147        )
148        score_map = score_map.to_box_attached(page_box)
149
150        return score_map, text_line.glyph_color
151
152    def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator):
153        page_text_line_step_output = input.page_text_line_step_output
154        page_text_line_collection = page_text_line_step_output.page_text_line_collection
155
156        score_maps: List[ScoreMap] = []
157        colors: List[Tuple[int, int, int]] = []
158
159        for text_line, is_short_text_line in zip(
160            page_text_line_collection.text_lines,
161            page_text_line_collection.short_text_line_flags,
162        ):
163            add_text_line_bounding_box = False
164            if is_short_text_line:
165                if rng.random() < self.config.prob_short_text_line:
166                    add_text_line_bounding_box = True
167            else:
168                if rng.random() < self.config.prob_non_short_text_line:
169                    add_text_line_bounding_box = True
170            if not add_text_line_bounding_box:
171                continue
172
173            # Assign a bounding box.
174            score_map, color = self.sample_text_line_bounding_box(
175                height=page_text_line_collection.height,
176                width=page_text_line_collection.width,
177                text_line=text_line,
178                rng=rng,
179            )
180            score_maps.append(score_map)
181            colors.append(color)
182
183        return PageTextLineBoundingBoxStepOutput(
184            score_maps=score_maps,
185            colors=colors,
186        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

def sample_offset(self, ref_char_height: int, rng: numpy.random._generator.Generator):
58    def sample_offset(self, ref_char_height: int, rng: RandomGenerator):
59        offset_ratio = rng.uniform(
60            self.config.offset_ratio_min,
61            self.config.offset_ratio_max,
62        )
63        return round(offset_ratio * ref_char_height)
def sample_border_thickness(self, ref_char_height: int, rng: numpy.random._generator.Generator):
65    def sample_border_thickness(self, ref_char_height: int, rng: RandomGenerator):
66        offset_ratio = rng.uniform(
67            self.config.border_thickness_ratio_min,
68            self.config.border_thickness_ratio_max,
69        )
70        return max(round(offset_ratio * ref_char_height), self.config.border_thickness_min)
def sample_text_line_bounding_box( self, height: int, width: int, text_line: vkit.engine.font.type.TextLine, rng: numpy.random._generator.Generator):
 72    def sample_text_line_bounding_box(
 73        self,
 74        height: int,
 75        width: int,
 76        text_line: TextLine,
 77        rng: RandomGenerator,
 78    ):
 79        ref_char_height_max = max(
 80            char_glyph.ref_char_height for char_glyph in text_line.char_glyphs
 81        )
 82
 83        # Sample shape.
 84        offset_up = self.sample_offset(ref_char_height_max, rng)
 85        offset_down = self.sample_offset(ref_char_height_max, rng)
 86        offset_left = self.sample_offset(ref_char_height_max, rng)
 87        offset_right = self.sample_offset(ref_char_height_max, rng)
 88
 89        box_height = text_line.box.height + offset_up + offset_down
 90        box_width = text_line.box.width + offset_left + offset_right
 91
 92        border_thickness = self.sample_border_thickness(ref_char_height_max, rng)
 93        alpha = float(rng.uniform(self.config.alpha_max, self.config.alpha_max))
 94
 95        # Fill empty area.
 96        score_map = ScoreMap.from_shape((box_height, box_width), value=alpha)
 97
 98        empty_box = Box(
 99            up=border_thickness,
100            down=box_height - border_thickness - 1,
101            left=border_thickness,
102            right=box_width - border_thickness - 1,
103        )
104        assert empty_box.up < empty_box.down
105        assert empty_box.left < empty_box.right
106        empty_box.fill_score_map(score_map, 0.0)
107
108        # Trim if out-of-boundary.
109        page_box_up = text_line.box.up - offset_up
110        page_box_down = text_line.box.down + offset_down
111        page_box_left = text_line.box.left - offset_left
112        page_box_right = text_line.box.right + offset_right
113
114        trim_up_size = 0
115        if page_box_up < 0:
116            trim_up_size = abs(page_box_up)
117
118        trim_down_size = 0
119        if page_box_down >= height:
120            trim_down_size = page_box_down - height + 1
121
122        trim_left_size = 0
123        if page_box_left < 0:
124            trim_left_size = abs(page_box_left)
125
126        trim_right_size = 0
127        if page_box_right >= width:
128            trim_right_size = page_box_right - width + 1
129
130        if trim_up_size > 0 \
131                or trim_down_size > 0 \
132                or trim_left_size > 0 \
133                or trim_right_size > 0:
134            trim_box = Box(
135                up=trim_up_size,
136                down=box_height - 1 - trim_down_size,
137                left=trim_left_size,
138                right=box_width - 1 - trim_right_size,
139            )
140            score_map = trim_box.extract_score_map(score_map)
141
142        page_box = Box(
143            up=max(0, page_box_up),
144            down=min(height - 1, page_box_down),
145            left=max(0, page_box_left),
146            right=min(width - 1, page_box_right),
147        )
148        score_map = score_map.to_box_attached(page_box)
149
150        return score_map, text_line.glyph_color
def run( self, input: vkit.pipeline.text_detection.page_text_line_bounding_box.PageTextLineBoundingBoxStepInput, rng: numpy.random._generator.Generator):
152    def run(self, input: PageTextLineBoundingBoxStepInput, rng: RandomGenerator):
153        page_text_line_step_output = input.page_text_line_step_output
154        page_text_line_collection = page_text_line_step_output.page_text_line_collection
155
156        score_maps: List[ScoreMap] = []
157        colors: List[Tuple[int, int, int]] = []
158
159        for text_line, is_short_text_line in zip(
160            page_text_line_collection.text_lines,
161            page_text_line_collection.short_text_line_flags,
162        ):
163            add_text_line_bounding_box = False
164            if is_short_text_line:
165                if rng.random() < self.config.prob_short_text_line:
166                    add_text_line_bounding_box = True
167            else:
168                if rng.random() < self.config.prob_non_short_text_line:
169                    add_text_line_bounding_box = True
170            if not add_text_line_bounding_box:
171                continue
172
173            # Assign a bounding box.
174            score_map, color = self.sample_text_line_bounding_box(
175                height=page_text_line_collection.height,
176                width=page_text_line_collection.width,
177                text_line=text_line,
178                rng=rng,
179            )
180            score_maps.append(score_map)
181            colors.append(color)
182
183        return PageTextLineBoundingBoxStepOutput(
184            score_maps=score_maps,
185            colors=colors,
186        )