vkit.pipeline.text_detection.page_text_line_label

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import List, Sequence, Optional
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18
 19from vkit.element import Point, PointList, Box, Mask, ScoreMap, Polygon
 20from ..interface import PipelineStep, PipelineStepFactory
 21from .page_text_line import PageTextLineStepOutput, PageTextLineCollection
 22
 23
 24@attrs.define
 25class PageTextLineLabelStepConfig:
 26    num_sample_height_points: int = 3
 27    enable_text_line_mask: bool = False
 28    enable_boundary_mask: bool = False
 29    boundary_dilate_ratio: float = 0.5
 30    enable_boundary_score_map: bool = False
 31
 32
 33@attrs.define
 34class PageTextLineLabelStepInput:
 35    page_text_line_step_output: PageTextLineStepOutput
 36
 37
 38@attrs.define
 39class PageTextLinePolygonCollection:
 40    height: int
 41    width: int
 42    polygons: Sequence[Polygon]
 43    height_points_group_sizes: Sequence[int]
 44    height_points_up: PointList
 45    height_points_down: PointList
 46
 47
 48@attrs.define
 49class PageCharPolygonCollection:
 50    height: int
 51    width: int
 52    polygons: Sequence[Polygon]
 53    height_points_up: PointList
 54    height_points_down: PointList
 55
 56
 57@attrs.define
 58class PageTextLineLabelStepOutput:
 59    page_char_polygon_collection: PageCharPolygonCollection
 60    page_text_line_polygon_collection: PageTextLinePolygonCollection
 61    page_text_line_mask: Optional[Mask]
 62    page_text_line_boundary_mask: Optional[Mask]
 63    page_text_line_and_boundary_mask: Optional[Mask]
 64    page_text_line_boundary_score_map: Optional[ScoreMap]
 65
 66
 67class PageTextLineLabelStep(
 68    PipelineStep[
 69        PageTextLineLabelStepConfig,
 70        PageTextLineLabelStepInput,
 71        PageTextLineLabelStepOutput,
 72    ]
 73):  # yapf: disable
 74
 75    def generate_page_char_polygon_collection(
 76        self,
 77        page_text_line_collection: PageTextLineCollection,
 78    ):
 79        char_polygons: List[Polygon] = []
 80        height_points_up = PointList()
 81        height_points_down = PointList()
 82
 83        for text_line in page_text_line_collection.text_lines:
 84            char_polygons.extend(
 85                text_line.to_char_polygons(
 86                    page_height=page_text_line_collection.height,
 87                    page_width=page_text_line_collection.width,
 88                )
 89            )
 90            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
 91            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
 92
 93        assert len(char_polygons) == len(height_points_up) == len(height_points_down)
 94        return PageCharPolygonCollection(
 95            height=page_text_line_collection.height,
 96            width=page_text_line_collection.width,
 97            polygons=char_polygons,
 98            height_points_up=height_points_up,
 99            height_points_down=height_points_down,
100        )
101
102    def generate_page_text_line_polygon_collection(
103        self,
104        page_text_line_collection: PageTextLineCollection,
105    ):
106        text_line_polygons: List[Polygon] = []
107
108        height_points_group_sizes: List[int] = []
109        height_points_up = PointList()
110        height_points_down = PointList()
111
112        for text_line in page_text_line_collection.text_lines:
113            text_line_polygons.append(text_line.to_polygon())
114
115            cur_height_points_up = text_line.get_height_points(
116                num_points=self.config.num_sample_height_points,
117                is_up=True,
118            )
119            cur_height_points_down = text_line.get_height_points(
120                num_points=self.config.num_sample_height_points,
121                is_up=False,
122            )
123            height_points_group_size = len(cur_height_points_up)
124            assert height_points_group_size == len(cur_height_points_down)
125            assert height_points_group_size > 0
126            height_points_group_sizes.append(height_points_group_size)
127            height_points_up.extend(cur_height_points_up)
128            height_points_down.extend(cur_height_points_down)
129
130        return PageTextLinePolygonCollection(
131            height=page_text_line_collection.height,
132            width=page_text_line_collection.width,
133            polygons=text_line_polygons,
134            height_points_group_sizes=height_points_group_sizes,
135            height_points_up=height_points_up,
136            height_points_down=height_points_down,
137        )
138
139    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
140        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
141
142        text_lines = page_text_line_collection.text_lines
143        for text_line in text_lines:
144            text_line.box.fill_mask(page_text_line_mask)
145        return page_text_line_mask
146
147    def generate_text_line_boxes_and_dilated_boxes(
148        self, page_text_line_collection: PageTextLineCollection
149    ):
150        text_lines = page_text_line_collection.text_lines
151        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
152
153        boxes: List[Box] = []
154        dilated_boxes: List[Box] = []
155
156        for text_line in text_lines:
157            box = text_line.box
158            boxes.append(box)
159
160            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
161            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
162            dilated_boxes.append(dilated_box)
163
164        return boxes, dilated_boxes
165
166    @classmethod
167    def generate_dilated_only_boxes(
168        cls,
169        box: Box,
170        dilated_box: Box,
171    ):
172        dilated_up_box = attrs.evolve(
173            dilated_box,
174            down=box.up - 1,
175        )
176        if dilated_up_box.up > dilated_box.down:
177            dilated_up_box = None
178
179        dilated_down_box = attrs.evolve(
180            dilated_box,
181            up=box.down + 1,
182        )
183        if dilated_down_box.up > dilated_down_box.down:
184            dilated_down_box = None
185
186        dilated_left_box = attrs.evolve(
187            box,
188            left=dilated_box.left,
189            right=box.left - 1,
190        )
191        if dilated_left_box.left > dilated_left_box.right:
192            dilated_left_box = None
193
194        dilated_right_box = attrs.evolve(
195            box,
196            left=box.right + 1,
197            right=dilated_box.right,
198        )
199        if dilated_right_box.left > dilated_right_box.right:
200            dilated_right_box = None
201
202        return (
203            dilated_up_box,
204            dilated_down_box,
205            dilated_left_box,
206            dilated_right_box,
207        )
208
209    def generate_page_text_line_boundary_masks(
210        self,
211        page_text_line_collection: PageTextLineCollection,
212        boxes: Sequence[Box],
213        dilated_boxes: Sequence[Box],
214        page_text_line_mask: Mask,
215    ):
216        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
217
218        for box, dilated_box in zip(boxes, dilated_boxes):
219            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
220            for dilated_only_box in dilated_only_boxes:
221                if dilated_only_box:
222                    dilated_only_box.fill_mask(boundary_mask)
223
224        page_text_line_mask.fill_mask(boundary_mask, 0)
225
226        text_line_and_boundary_mask = boundary_mask.copy()
227        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
228
229        return boundary_mask, text_line_and_boundary_mask
230
231    def generate_page_text_line_boundary_score_map(
232        self,
233        page_text_line_collection: PageTextLineCollection,
234        boxes: Sequence[Box],
235        dilated_boxes: Sequence[Box],
236        page_text_line_boundary_mask: Mask,
237    ):
238        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
239
240        for box, dilated_box in zip(boxes, dilated_boxes):
241            (
242                dilated_up_box,
243                dilated_down_box,
244                dilated_left_box,
245                dilated_right_box,
246            ) = self.generate_dilated_only_boxes(box, dilated_box)
247
248            if dilated_up_box:
249                boundary_score_map.fill_by_quad_interpolation(
250                    point0=Point.create(y=box.up, x=box.right),
251                    point1=Point.create(y=box.up, x=box.left),
252                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
253                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
254                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
255                    keep_min_value=True,
256                )
257
258            if dilated_down_box:
259                boundary_score_map.fill_by_quad_interpolation(
260                    point0=Point.create(y=box.down, x=box.left),
261                    point1=Point.create(y=box.down, x=box.right),
262                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
263                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
264                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
265                    keep_min_value=True,
266                )
267
268            if dilated_left_box:
269                boundary_score_map.fill_by_quad_interpolation(
270                    point0=Point.create(y=box.up, x=box.left),
271                    point1=Point.create(y=box.down, x=box.left),
272                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
273                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
274                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
275                    keep_min_value=True,
276                )
277
278            if dilated_right_box:
279                boundary_score_map.fill_by_quad_interpolation(
280                    point0=Point.create(y=box.down, x=box.right),
281                    point1=Point.create(y=box.up, x=box.right),
282                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
283                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
284                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
285                    keep_min_value=True,
286                )
287
288        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
289        return boundary_score_map
290
291    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
292        page_text_line_step_output = input.page_text_line_step_output
293        page_text_line_collection = page_text_line_step_output.page_text_line_collection
294
295        page_char_polygon_collection = self.generate_page_char_polygon_collection(
296            page_text_line_collection,
297        )
298        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
299            page_text_line_collection,
300        )
301
302        page_text_line_mask: Optional[Mask] = None
303        page_text_line_boundary_mask: Optional[Mask] = None
304        page_text_line_and_boundary_mask: Optional[Mask] = None
305        page_text_line_boundary_score_map: Optional[ScoreMap] = None
306
307        if self.config.enable_text_line_mask:
308            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
309
310            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
311                page_text_line_collection
312            )
313            if self.config.enable_boundary_mask:
314                (
315                    page_text_line_boundary_mask,
316                    page_text_line_and_boundary_mask,
317                ) = self.generate_page_text_line_boundary_masks(
318                    page_text_line_collection,
319                    boxes,
320                    dilated_boxes,
321                    page_text_line_mask,
322                )
323
324                if self.config.enable_boundary_score_map:
325                    page_text_line_boundary_score_map = \
326                        self.generate_page_text_line_boundary_score_map(
327                            page_text_line_collection,
328                            boxes,
329                            dilated_boxes,
330                            page_text_line_boundary_mask,
331                        )
332
333        return PageTextLineLabelStepOutput(
334            page_char_polygon_collection=page_char_polygon_collection,
335            page_text_line_polygon_collection=page_text_line_polygon_collection,
336            page_text_line_mask=page_text_line_mask,
337            page_text_line_boundary_mask=page_text_line_boundary_mask,
338            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
339            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
340        )
341
342
343page_text_line_label_step_factory = PipelineStepFactory(PageTextLineLabelStep)
class PageTextLineLabelStepConfig:
26class PageTextLineLabelStepConfig:
27    num_sample_height_points: int = 3
28    enable_text_line_mask: bool = False
29    enable_boundary_mask: bool = False
30    boundary_dilate_ratio: float = 0.5
31    enable_boundary_score_map: bool = False
PageTextLineLabelStepConfig( num_sample_height_points: int = 3, enable_text_line_mask: bool = False, enable_boundary_mask: bool = False, boundary_dilate_ratio: float = 0.5, enable_boundary_score_map: bool = False)
2def __init__(self, num_sample_height_points=attr_dict['num_sample_height_points'].default, enable_text_line_mask=attr_dict['enable_text_line_mask'].default, enable_boundary_mask=attr_dict['enable_boundary_mask'].default, boundary_dilate_ratio=attr_dict['boundary_dilate_ratio'].default, enable_boundary_score_map=attr_dict['enable_boundary_score_map'].default):
3    self.num_sample_height_points = num_sample_height_points
4    self.enable_text_line_mask = enable_text_line_mask
5    self.enable_boundary_mask = enable_boundary_mask
6    self.boundary_dilate_ratio = boundary_dilate_ratio
7    self.enable_boundary_score_map = enable_boundary_score_map

Method generated by attrs for class PageTextLineLabelStepConfig.

class PageTextLineLabelStepInput:
35class PageTextLineLabelStepInput:
36    page_text_line_step_output: PageTextLineStepOutput
PageTextLineLabelStepInput( page_text_line_step_output: vkit.pipeline.text_detection.page_text_line.PageTextLineStepOutput)
2def __init__(self, page_text_line_step_output):
3    self.page_text_line_step_output = page_text_line_step_output

Method generated by attrs for class PageTextLineLabelStepInput.

class PageTextLinePolygonCollection:
40class PageTextLinePolygonCollection:
41    height: int
42    width: int
43    polygons: Sequence[Polygon]
44    height_points_group_sizes: Sequence[int]
45    height_points_up: PointList
46    height_points_down: PointList
PageTextLinePolygonCollection( height: int, width: int, polygons: Sequence[vkit.element.polygon.Polygon], height_points_group_sizes: Sequence[int], height_points_up: vkit.element.point.PointList, height_points_down: vkit.element.point.PointList)
2def __init__(self, height, width, polygons, height_points_group_sizes, height_points_up, height_points_down):
3    self.height = height
4    self.width = width
5    self.polygons = polygons
6    self.height_points_group_sizes = height_points_group_sizes
7    self.height_points_up = height_points_up
8    self.height_points_down = height_points_down

Method generated by attrs for class PageTextLinePolygonCollection.

class PageCharPolygonCollection:
50class PageCharPolygonCollection:
51    height: int
52    width: int
53    polygons: Sequence[Polygon]
54    height_points_up: PointList
55    height_points_down: PointList
PageCharPolygonCollection( height: int, width: int, polygons: Sequence[vkit.element.polygon.Polygon], height_points_up: vkit.element.point.PointList, height_points_down: vkit.element.point.PointList)
2def __init__(self, height, width, polygons, height_points_up, height_points_down):
3    self.height = height
4    self.width = width
5    self.polygons = polygons
6    self.height_points_up = height_points_up
7    self.height_points_down = height_points_down

Method generated by attrs for class PageCharPolygonCollection.

class PageTextLineLabelStepOutput:
59class PageTextLineLabelStepOutput:
60    page_char_polygon_collection: PageCharPolygonCollection
61    page_text_line_polygon_collection: PageTextLinePolygonCollection
62    page_text_line_mask: Optional[Mask]
63    page_text_line_boundary_mask: Optional[Mask]
64    page_text_line_and_boundary_mask: Optional[Mask]
65    page_text_line_boundary_score_map: Optional[ScoreMap]
PageTextLineLabelStepOutput( page_char_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageCharPolygonCollection, page_text_line_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageTextLinePolygonCollection, page_text_line_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_boundary_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_and_boundary_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_boundary_score_map: Union[vkit.element.score_map.ScoreMap, NoneType])
2def __init__(self, page_char_polygon_collection, page_text_line_polygon_collection, page_text_line_mask, page_text_line_boundary_mask, page_text_line_and_boundary_mask, page_text_line_boundary_score_map):
3    self.page_char_polygon_collection = page_char_polygon_collection
4    self.page_text_line_polygon_collection = page_text_line_polygon_collection
5    self.page_text_line_mask = page_text_line_mask
6    self.page_text_line_boundary_mask = page_text_line_boundary_mask
7    self.page_text_line_and_boundary_mask = page_text_line_and_boundary_mask
8    self.page_text_line_boundary_score_map = page_text_line_boundary_score_map

Method generated by attrs for class PageTextLineLabelStepOutput.

 68class PageTextLineLabelStep(
 69    PipelineStep[
 70        PageTextLineLabelStepConfig,
 71        PageTextLineLabelStepInput,
 72        PageTextLineLabelStepOutput,
 73    ]
 74):  # yapf: disable
 75
 76    def generate_page_char_polygon_collection(
 77        self,
 78        page_text_line_collection: PageTextLineCollection,
 79    ):
 80        char_polygons: List[Polygon] = []
 81        height_points_up = PointList()
 82        height_points_down = PointList()
 83
 84        for text_line in page_text_line_collection.text_lines:
 85            char_polygons.extend(
 86                text_line.to_char_polygons(
 87                    page_height=page_text_line_collection.height,
 88                    page_width=page_text_line_collection.width,
 89                )
 90            )
 91            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
 92            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
 93
 94        assert len(char_polygons) == len(height_points_up) == len(height_points_down)
 95        return PageCharPolygonCollection(
 96            height=page_text_line_collection.height,
 97            width=page_text_line_collection.width,
 98            polygons=char_polygons,
 99            height_points_up=height_points_up,
100            height_points_down=height_points_down,
101        )
102
103    def generate_page_text_line_polygon_collection(
104        self,
105        page_text_line_collection: PageTextLineCollection,
106    ):
107        text_line_polygons: List[Polygon] = []
108
109        height_points_group_sizes: List[int] = []
110        height_points_up = PointList()
111        height_points_down = PointList()
112
113        for text_line in page_text_line_collection.text_lines:
114            text_line_polygons.append(text_line.to_polygon())
115
116            cur_height_points_up = text_line.get_height_points(
117                num_points=self.config.num_sample_height_points,
118                is_up=True,
119            )
120            cur_height_points_down = text_line.get_height_points(
121                num_points=self.config.num_sample_height_points,
122                is_up=False,
123            )
124            height_points_group_size = len(cur_height_points_up)
125            assert height_points_group_size == len(cur_height_points_down)
126            assert height_points_group_size > 0
127            height_points_group_sizes.append(height_points_group_size)
128            height_points_up.extend(cur_height_points_up)
129            height_points_down.extend(cur_height_points_down)
130
131        return PageTextLinePolygonCollection(
132            height=page_text_line_collection.height,
133            width=page_text_line_collection.width,
134            polygons=text_line_polygons,
135            height_points_group_sizes=height_points_group_sizes,
136            height_points_up=height_points_up,
137            height_points_down=height_points_down,
138        )
139
140    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
141        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
142
143        text_lines = page_text_line_collection.text_lines
144        for text_line in text_lines:
145            text_line.box.fill_mask(page_text_line_mask)
146        return page_text_line_mask
147
148    def generate_text_line_boxes_and_dilated_boxes(
149        self, page_text_line_collection: PageTextLineCollection
150    ):
151        text_lines = page_text_line_collection.text_lines
152        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
153
154        boxes: List[Box] = []
155        dilated_boxes: List[Box] = []
156
157        for text_line in text_lines:
158            box = text_line.box
159            boxes.append(box)
160
161            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
162            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
163            dilated_boxes.append(dilated_box)
164
165        return boxes, dilated_boxes
166
167    @classmethod
168    def generate_dilated_only_boxes(
169        cls,
170        box: Box,
171        dilated_box: Box,
172    ):
173        dilated_up_box = attrs.evolve(
174            dilated_box,
175            down=box.up - 1,
176        )
177        if dilated_up_box.up > dilated_box.down:
178            dilated_up_box = None
179
180        dilated_down_box = attrs.evolve(
181            dilated_box,
182            up=box.down + 1,
183        )
184        if dilated_down_box.up > dilated_down_box.down:
185            dilated_down_box = None
186
187        dilated_left_box = attrs.evolve(
188            box,
189            left=dilated_box.left,
190            right=box.left - 1,
191        )
192        if dilated_left_box.left > dilated_left_box.right:
193            dilated_left_box = None
194
195        dilated_right_box = attrs.evolve(
196            box,
197            left=box.right + 1,
198            right=dilated_box.right,
199        )
200        if dilated_right_box.left > dilated_right_box.right:
201            dilated_right_box = None
202
203        return (
204            dilated_up_box,
205            dilated_down_box,
206            dilated_left_box,
207            dilated_right_box,
208        )
209
210    def generate_page_text_line_boundary_masks(
211        self,
212        page_text_line_collection: PageTextLineCollection,
213        boxes: Sequence[Box],
214        dilated_boxes: Sequence[Box],
215        page_text_line_mask: Mask,
216    ):
217        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
218
219        for box, dilated_box in zip(boxes, dilated_boxes):
220            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
221            for dilated_only_box in dilated_only_boxes:
222                if dilated_only_box:
223                    dilated_only_box.fill_mask(boundary_mask)
224
225        page_text_line_mask.fill_mask(boundary_mask, 0)
226
227        text_line_and_boundary_mask = boundary_mask.copy()
228        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
229
230        return boundary_mask, text_line_and_boundary_mask
231
232    def generate_page_text_line_boundary_score_map(
233        self,
234        page_text_line_collection: PageTextLineCollection,
235        boxes: Sequence[Box],
236        dilated_boxes: Sequence[Box],
237        page_text_line_boundary_mask: Mask,
238    ):
239        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
240
241        for box, dilated_box in zip(boxes, dilated_boxes):
242            (
243                dilated_up_box,
244                dilated_down_box,
245                dilated_left_box,
246                dilated_right_box,
247            ) = self.generate_dilated_only_boxes(box, dilated_box)
248
249            if dilated_up_box:
250                boundary_score_map.fill_by_quad_interpolation(
251                    point0=Point.create(y=box.up, x=box.right),
252                    point1=Point.create(y=box.up, x=box.left),
253                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
254                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
255                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
256                    keep_min_value=True,
257                )
258
259            if dilated_down_box:
260                boundary_score_map.fill_by_quad_interpolation(
261                    point0=Point.create(y=box.down, x=box.left),
262                    point1=Point.create(y=box.down, x=box.right),
263                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
264                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
265                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
266                    keep_min_value=True,
267                )
268
269            if dilated_left_box:
270                boundary_score_map.fill_by_quad_interpolation(
271                    point0=Point.create(y=box.up, x=box.left),
272                    point1=Point.create(y=box.down, x=box.left),
273                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
274                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
275                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
276                    keep_min_value=True,
277                )
278
279            if dilated_right_box:
280                boundary_score_map.fill_by_quad_interpolation(
281                    point0=Point.create(y=box.down, x=box.right),
282                    point1=Point.create(y=box.up, x=box.right),
283                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
284                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
285                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
286                    keep_min_value=True,
287                )
288
289        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
290        return boundary_score_map
291
292    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
293        page_text_line_step_output = input.page_text_line_step_output
294        page_text_line_collection = page_text_line_step_output.page_text_line_collection
295
296        page_char_polygon_collection = self.generate_page_char_polygon_collection(
297            page_text_line_collection,
298        )
299        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
300            page_text_line_collection,
301        )
302
303        page_text_line_mask: Optional[Mask] = None
304        page_text_line_boundary_mask: Optional[Mask] = None
305        page_text_line_and_boundary_mask: Optional[Mask] = None
306        page_text_line_boundary_score_map: Optional[ScoreMap] = None
307
308        if self.config.enable_text_line_mask:
309            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
310
311            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
312                page_text_line_collection
313            )
314            if self.config.enable_boundary_mask:
315                (
316                    page_text_line_boundary_mask,
317                    page_text_line_and_boundary_mask,
318                ) = self.generate_page_text_line_boundary_masks(
319                    page_text_line_collection,
320                    boxes,
321                    dilated_boxes,
322                    page_text_line_mask,
323                )
324
325                if self.config.enable_boundary_score_map:
326                    page_text_line_boundary_score_map = \
327                        self.generate_page_text_line_boundary_score_map(
328                            page_text_line_collection,
329                            boxes,
330                            dilated_boxes,
331                            page_text_line_boundary_mask,
332                        )
333
334        return PageTextLineLabelStepOutput(
335            page_char_polygon_collection=page_char_polygon_collection,
336            page_text_line_polygon_collection=page_text_line_polygon_collection,
337            page_text_line_mask=page_text_line_mask,
338            page_text_line_boundary_mask=page_text_line_boundary_mask,
339            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
340            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
341        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

def generate_page_char_polygon_collection( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
 76    def generate_page_char_polygon_collection(
 77        self,
 78        page_text_line_collection: PageTextLineCollection,
 79    ):
 80        char_polygons: List[Polygon] = []
 81        height_points_up = PointList()
 82        height_points_down = PointList()
 83
 84        for text_line in page_text_line_collection.text_lines:
 85            char_polygons.extend(
 86                text_line.to_char_polygons(
 87                    page_height=page_text_line_collection.height,
 88                    page_width=page_text_line_collection.width,
 89                )
 90            )
 91            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
 92            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
 93
 94        assert len(char_polygons) == len(height_points_up) == len(height_points_down)
 95        return PageCharPolygonCollection(
 96            height=page_text_line_collection.height,
 97            width=page_text_line_collection.width,
 98            polygons=char_polygons,
 99            height_points_up=height_points_up,
100            height_points_down=height_points_down,
101        )
def generate_page_text_line_polygon_collection( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
103    def generate_page_text_line_polygon_collection(
104        self,
105        page_text_line_collection: PageTextLineCollection,
106    ):
107        text_line_polygons: List[Polygon] = []
108
109        height_points_group_sizes: List[int] = []
110        height_points_up = PointList()
111        height_points_down = PointList()
112
113        for text_line in page_text_line_collection.text_lines:
114            text_line_polygons.append(text_line.to_polygon())
115
116            cur_height_points_up = text_line.get_height_points(
117                num_points=self.config.num_sample_height_points,
118                is_up=True,
119            )
120            cur_height_points_down = text_line.get_height_points(
121                num_points=self.config.num_sample_height_points,
122                is_up=False,
123            )
124            height_points_group_size = len(cur_height_points_up)
125            assert height_points_group_size == len(cur_height_points_down)
126            assert height_points_group_size > 0
127            height_points_group_sizes.append(height_points_group_size)
128            height_points_up.extend(cur_height_points_up)
129            height_points_down.extend(cur_height_points_down)
130
131        return PageTextLinePolygonCollection(
132            height=page_text_line_collection.height,
133            width=page_text_line_collection.width,
134            polygons=text_line_polygons,
135            height_points_group_sizes=height_points_group_sizes,
136            height_points_up=height_points_up,
137            height_points_down=height_points_down,
138        )
def generate_page_text_line_mask( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
140    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
141        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
142
143        text_lines = page_text_line_collection.text_lines
144        for text_line in text_lines:
145            text_line.box.fill_mask(page_text_line_mask)
146        return page_text_line_mask
def generate_text_line_boxes_and_dilated_boxes( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
148    def generate_text_line_boxes_and_dilated_boxes(
149        self, page_text_line_collection: PageTextLineCollection
150    ):
151        text_lines = page_text_line_collection.text_lines
152        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
153
154        boxes: List[Box] = []
155        dilated_boxes: List[Box] = []
156
157        for text_line in text_lines:
158            box = text_line.box
159            boxes.append(box)
160
161            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
162            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
163            dilated_boxes.append(dilated_box)
164
165        return boxes, dilated_boxes
@classmethod
def generate_dilated_only_boxes(cls, box: vkit.element.box.Box, dilated_box: vkit.element.box.Box):
167    @classmethod
168    def generate_dilated_only_boxes(
169        cls,
170        box: Box,
171        dilated_box: Box,
172    ):
173        dilated_up_box = attrs.evolve(
174            dilated_box,
175            down=box.up - 1,
176        )
177        if dilated_up_box.up > dilated_box.down:
178            dilated_up_box = None
179
180        dilated_down_box = attrs.evolve(
181            dilated_box,
182            up=box.down + 1,
183        )
184        if dilated_down_box.up > dilated_down_box.down:
185            dilated_down_box = None
186
187        dilated_left_box = attrs.evolve(
188            box,
189            left=dilated_box.left,
190            right=box.left - 1,
191        )
192        if dilated_left_box.left > dilated_left_box.right:
193            dilated_left_box = None
194
195        dilated_right_box = attrs.evolve(
196            box,
197            left=box.right + 1,
198            right=dilated_box.right,
199        )
200        if dilated_right_box.left > dilated_right_box.right:
201            dilated_right_box = None
202
203        return (
204            dilated_up_box,
205            dilated_down_box,
206            dilated_left_box,
207            dilated_right_box,
208        )
def generate_page_text_line_boundary_masks( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection, boxes: Sequence[vkit.element.box.Box], dilated_boxes: Sequence[vkit.element.box.Box], page_text_line_mask: vkit.element.mask.Mask):
210    def generate_page_text_line_boundary_masks(
211        self,
212        page_text_line_collection: PageTextLineCollection,
213        boxes: Sequence[Box],
214        dilated_boxes: Sequence[Box],
215        page_text_line_mask: Mask,
216    ):
217        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
218
219        for box, dilated_box in zip(boxes, dilated_boxes):
220            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
221            for dilated_only_box in dilated_only_boxes:
222                if dilated_only_box:
223                    dilated_only_box.fill_mask(boundary_mask)
224
225        page_text_line_mask.fill_mask(boundary_mask, 0)
226
227        text_line_and_boundary_mask = boundary_mask.copy()
228        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
229
230        return boundary_mask, text_line_and_boundary_mask
def generate_page_text_line_boundary_score_map( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection, boxes: Sequence[vkit.element.box.Box], dilated_boxes: Sequence[vkit.element.box.Box], page_text_line_boundary_mask: vkit.element.mask.Mask):
232    def generate_page_text_line_boundary_score_map(
233        self,
234        page_text_line_collection: PageTextLineCollection,
235        boxes: Sequence[Box],
236        dilated_boxes: Sequence[Box],
237        page_text_line_boundary_mask: Mask,
238    ):
239        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
240
241        for box, dilated_box in zip(boxes, dilated_boxes):
242            (
243                dilated_up_box,
244                dilated_down_box,
245                dilated_left_box,
246                dilated_right_box,
247            ) = self.generate_dilated_only_boxes(box, dilated_box)
248
249            if dilated_up_box:
250                boundary_score_map.fill_by_quad_interpolation(
251                    point0=Point.create(y=box.up, x=box.right),
252                    point1=Point.create(y=box.up, x=box.left),
253                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
254                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
255                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
256                    keep_min_value=True,
257                )
258
259            if dilated_down_box:
260                boundary_score_map.fill_by_quad_interpolation(
261                    point0=Point.create(y=box.down, x=box.left),
262                    point1=Point.create(y=box.down, x=box.right),
263                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
264                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
265                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
266                    keep_min_value=True,
267                )
268
269            if dilated_left_box:
270                boundary_score_map.fill_by_quad_interpolation(
271                    point0=Point.create(y=box.up, x=box.left),
272                    point1=Point.create(y=box.down, x=box.left),
273                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
274                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
275                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
276                    keep_min_value=True,
277                )
278
279            if dilated_right_box:
280                boundary_score_map.fill_by_quad_interpolation(
281                    point0=Point.create(y=box.down, x=box.right),
282                    point1=Point.create(y=box.up, x=box.right),
283                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
284                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
285                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
286                    keep_min_value=True,
287                )
288
289        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
290        return boundary_score_map
def run( self, input: vkit.pipeline.text_detection.page_text_line_label.PageTextLineLabelStepInput, rng: numpy.random._generator.Generator):
292    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
293        page_text_line_step_output = input.page_text_line_step_output
294        page_text_line_collection = page_text_line_step_output.page_text_line_collection
295
296        page_char_polygon_collection = self.generate_page_char_polygon_collection(
297            page_text_line_collection,
298        )
299        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
300            page_text_line_collection,
301        )
302
303        page_text_line_mask: Optional[Mask] = None
304        page_text_line_boundary_mask: Optional[Mask] = None
305        page_text_line_and_boundary_mask: Optional[Mask] = None
306        page_text_line_boundary_score_map: Optional[ScoreMap] = None
307
308        if self.config.enable_text_line_mask:
309            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
310
311            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
312                page_text_line_collection
313            )
314            if self.config.enable_boundary_mask:
315                (
316                    page_text_line_boundary_mask,
317                    page_text_line_and_boundary_mask,
318                ) = self.generate_page_text_line_boundary_masks(
319                    page_text_line_collection,
320                    boxes,
321                    dilated_boxes,
322                    page_text_line_mask,
323                )
324
325                if self.config.enable_boundary_score_map:
326                    page_text_line_boundary_score_map = \
327                        self.generate_page_text_line_boundary_score_map(
328                            page_text_line_collection,
329                            boxes,
330                            dilated_boxes,
331                            page_text_line_boundary_mask,
332                        )
333
334        return PageTextLineLabelStepOutput(
335            page_char_polygon_collection=page_char_polygon_collection,
336            page_text_line_polygon_collection=page_text_line_polygon_collection,
337            page_text_line_mask=page_text_line_mask,
338            page_text_line_boundary_mask=page_text_line_boundary_mask,
339            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
340            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
341        )