vkit.pipeline.text_detection.page_text_line_label

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import List, Sequence, Optional
 15
 16import attrs
 17from numpy.random import Generator as RandomGenerator
 18
 19from vkit.element import Point, PointList, Box, Mask, ScoreMap, Polygon
 20from ..interface import PipelineStep, PipelineStepFactory
 21from .page_text_line import PageTextLineStepOutput, PageTextLineCollection
 22
 23
 24@attrs.define
 25class PageTextLineLabelStepConfig:
 26    num_sample_height_points: int = 3
 27    enable_text_line_mask: bool = False
 28    enable_boundary_mask: bool = False
 29    boundary_dilate_ratio: float = 0.5
 30    enable_boundary_score_map: bool = False
 31    adjusted_ref_char_height_ratio: float = 0.6
 32    adjusted_ref_char_width_ratio: float = 0.6
 33
 34
 35@attrs.define
 36class PageTextLineLabelStepInput:
 37    page_text_line_step_output: PageTextLineStepOutput
 38
 39
 40@attrs.define
 41class PageTextLinePolygonCollection:
 42    height: int
 43    width: int
 44    polygons: Sequence[Polygon]
 45    height_points_group_sizes: Sequence[int]
 46    height_points_up: PointList
 47    height_points_down: PointList
 48
 49
 50@attrs.define
 51class PageCharPolygonCollection:
 52    height: int
 53    width: int
 54    char_polygons: Sequence[Polygon]
 55    adjusted_char_polygons: Sequence[Polygon]
 56    height_points_up: PointList
 57    height_points_down: PointList
 58
 59
 60@attrs.define
 61class PageTextLineLabelStepOutput:
 62    page_char_polygon_collection: PageCharPolygonCollection
 63    page_text_line_polygon_collection: PageTextLinePolygonCollection
 64    page_text_line_mask: Optional[Mask]
 65    page_text_line_boundary_mask: Optional[Mask]
 66    page_text_line_and_boundary_mask: Optional[Mask]
 67    page_text_line_boundary_score_map: Optional[ScoreMap]
 68
 69
 70class PageTextLineLabelStep(
 71    PipelineStep[
 72        PageTextLineLabelStepConfig,
 73        PageTextLineLabelStepInput,
 74        PageTextLineLabelStepOutput,
 75    ]
 76):  # yapf: disable
 77
 78    def generate_page_char_polygon_collection(
 79        self,
 80        page_text_line_collection: PageTextLineCollection,
 81    ):
 82        char_polygons: List[Polygon] = []
 83        adjusted_char_polygons: List[Polygon] = []
 84        height_points_up = PointList()
 85        height_points_down = PointList()
 86
 87        for text_line in page_text_line_collection.text_lines:
 88            char_polygons.extend(
 89                text_line.to_char_polygons(
 90                    page_height=page_text_line_collection.height,
 91                    page_width=page_text_line_collection.width,
 92                )
 93            )
 94            adjusted_char_polygons.extend(
 95                text_line.to_char_polygons(
 96                    page_height=page_text_line_collection.height,
 97                    page_width=page_text_line_collection.width,
 98                    ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio,
 99                    ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio,
100                )
101            )
102            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
103            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
104
105        assert len(char_polygons) \
106            == len(adjusted_char_polygons) \
107            == len(height_points_up) \
108            == len(height_points_down)
109
110        return PageCharPolygonCollection(
111            height=page_text_line_collection.height,
112            width=page_text_line_collection.width,
113            char_polygons=char_polygons,
114            adjusted_char_polygons=adjusted_char_polygons,
115            height_points_up=height_points_up,
116            height_points_down=height_points_down,
117        )
118
119    def generate_page_text_line_polygon_collection(
120        self,
121        page_text_line_collection: PageTextLineCollection,
122    ):
123        text_line_polygons: List[Polygon] = []
124
125        height_points_group_sizes: List[int] = []
126        height_points_up = PointList()
127        height_points_down = PointList()
128
129        for text_line in page_text_line_collection.text_lines:
130            text_line_polygons.append(text_line.to_polygon())
131
132            cur_height_points_up = text_line.get_height_points(
133                num_points=self.config.num_sample_height_points,
134                is_up=True,
135            )
136            cur_height_points_down = text_line.get_height_points(
137                num_points=self.config.num_sample_height_points,
138                is_up=False,
139            )
140            height_points_group_size = len(cur_height_points_up)
141            assert height_points_group_size == len(cur_height_points_down)
142            assert height_points_group_size > 0
143            height_points_group_sizes.append(height_points_group_size)
144            height_points_up.extend(cur_height_points_up)
145            height_points_down.extend(cur_height_points_down)
146
147        return PageTextLinePolygonCollection(
148            height=page_text_line_collection.height,
149            width=page_text_line_collection.width,
150            polygons=text_line_polygons,
151            height_points_group_sizes=height_points_group_sizes,
152            height_points_up=height_points_up,
153            height_points_down=height_points_down,
154        )
155
156    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
157        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
158
159        text_lines = page_text_line_collection.text_lines
160        for text_line in text_lines:
161            text_line.box.fill_mask(page_text_line_mask)
162        return page_text_line_mask
163
164    def generate_text_line_boxes_and_dilated_boxes(
165        self, page_text_line_collection: PageTextLineCollection
166    ):
167        text_lines = page_text_line_collection.text_lines
168        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
169
170        boxes: List[Box] = []
171        dilated_boxes: List[Box] = []
172
173        for text_line in text_lines:
174            box = text_line.box
175            boxes.append(box)
176
177            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
178            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
179            dilated_boxes.append(dilated_box)
180
181        return boxes, dilated_boxes
182
183    @classmethod
184    def generate_dilated_only_boxes(
185        cls,
186        box: Box,
187        dilated_box: Box,
188    ):
189        dilated_up_box = attrs.evolve(
190            dilated_box,
191            down=box.up - 1,
192        )
193        if dilated_up_box.up > dilated_box.down:
194            dilated_up_box = None
195
196        dilated_down_box = attrs.evolve(
197            dilated_box,
198            up=box.down + 1,
199        )
200        if dilated_down_box.up > dilated_down_box.down:
201            dilated_down_box = None
202
203        dilated_left_box = attrs.evolve(
204            box,
205            left=dilated_box.left,
206            right=box.left - 1,
207        )
208        if dilated_left_box.left > dilated_left_box.right:
209            dilated_left_box = None
210
211        dilated_right_box = attrs.evolve(
212            box,
213            left=box.right + 1,
214            right=dilated_box.right,
215        )
216        if dilated_right_box.left > dilated_right_box.right:
217            dilated_right_box = None
218
219        return (
220            dilated_up_box,
221            dilated_down_box,
222            dilated_left_box,
223            dilated_right_box,
224        )
225
226    def generate_page_text_line_boundary_masks(
227        self,
228        page_text_line_collection: PageTextLineCollection,
229        boxes: Sequence[Box],
230        dilated_boxes: Sequence[Box],
231        page_text_line_mask: Mask,
232    ):
233        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
234
235        for box, dilated_box in zip(boxes, dilated_boxes):
236            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
237            for dilated_only_box in dilated_only_boxes:
238                if dilated_only_box:
239                    dilated_only_box.fill_mask(boundary_mask)
240
241        page_text_line_mask.fill_mask(boundary_mask, 0)
242
243        text_line_and_boundary_mask = boundary_mask.copy()
244        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
245
246        return boundary_mask, text_line_and_boundary_mask
247
248    def generate_page_text_line_boundary_score_map(
249        self,
250        page_text_line_collection: PageTextLineCollection,
251        boxes: Sequence[Box],
252        dilated_boxes: Sequence[Box],
253        page_text_line_boundary_mask: Mask,
254    ):
255        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
256
257        for box, dilated_box in zip(boxes, dilated_boxes):
258            (
259                dilated_up_box,
260                dilated_down_box,
261                dilated_left_box,
262                dilated_right_box,
263            ) = self.generate_dilated_only_boxes(box, dilated_box)
264
265            if dilated_up_box:
266                boundary_score_map.fill_by_quad_interpolation(
267                    point0=Point.create(y=box.up, x=box.right),
268                    point1=Point.create(y=box.up, x=box.left),
269                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
270                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
271                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
272                    keep_min_value=True,
273                )
274
275            if dilated_down_box:
276                boundary_score_map.fill_by_quad_interpolation(
277                    point0=Point.create(y=box.down, x=box.left),
278                    point1=Point.create(y=box.down, x=box.right),
279                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
280                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
281                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
282                    keep_min_value=True,
283                )
284
285            if dilated_left_box:
286                boundary_score_map.fill_by_quad_interpolation(
287                    point0=Point.create(y=box.up, x=box.left),
288                    point1=Point.create(y=box.down, x=box.left),
289                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
290                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
291                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
292                    keep_min_value=True,
293                )
294
295            if dilated_right_box:
296                boundary_score_map.fill_by_quad_interpolation(
297                    point0=Point.create(y=box.down, x=box.right),
298                    point1=Point.create(y=box.up, x=box.right),
299                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
300                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
301                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
302                    keep_min_value=True,
303                )
304
305        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
306        return boundary_score_map
307
308    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
309        page_text_line_step_output = input.page_text_line_step_output
310        page_text_line_collection = page_text_line_step_output.page_text_line_collection
311
312        page_char_polygon_collection = self.generate_page_char_polygon_collection(
313            page_text_line_collection,
314        )
315        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
316            page_text_line_collection,
317        )
318
319        page_text_line_mask: Optional[Mask] = None
320        page_text_line_boundary_mask: Optional[Mask] = None
321        page_text_line_and_boundary_mask: Optional[Mask] = None
322        page_text_line_boundary_score_map: Optional[ScoreMap] = None
323
324        if self.config.enable_text_line_mask:
325            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
326
327            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
328                page_text_line_collection
329            )
330            if self.config.enable_boundary_mask:
331                (
332                    page_text_line_boundary_mask,
333                    page_text_line_and_boundary_mask,
334                ) = self.generate_page_text_line_boundary_masks(
335                    page_text_line_collection,
336                    boxes,
337                    dilated_boxes,
338                    page_text_line_mask,
339                )
340
341                if self.config.enable_boundary_score_map:
342                    page_text_line_boundary_score_map = \
343                        self.generate_page_text_line_boundary_score_map(
344                            page_text_line_collection,
345                            boxes,
346                            dilated_boxes,
347                            page_text_line_boundary_mask,
348                        )
349
350        return PageTextLineLabelStepOutput(
351            page_char_polygon_collection=page_char_polygon_collection,
352            page_text_line_polygon_collection=page_text_line_polygon_collection,
353            page_text_line_mask=page_text_line_mask,
354            page_text_line_boundary_mask=page_text_line_boundary_mask,
355            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
356            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
357        )
358
359
360page_text_line_label_step_factory = PipelineStepFactory(PageTextLineLabelStep)
class PageTextLineLabelStepConfig:
26class PageTextLineLabelStepConfig:
27    num_sample_height_points: int = 3
28    enable_text_line_mask: bool = False
29    enable_boundary_mask: bool = False
30    boundary_dilate_ratio: float = 0.5
31    enable_boundary_score_map: bool = False
32    adjusted_ref_char_height_ratio: float = 0.6
33    adjusted_ref_char_width_ratio: float = 0.6
PageTextLineLabelStepConfig( num_sample_height_points: int = 3, enable_text_line_mask: bool = False, enable_boundary_mask: bool = False, boundary_dilate_ratio: float = 0.5, enable_boundary_score_map: bool = False, adjusted_ref_char_height_ratio: float = 0.6, adjusted_ref_char_width_ratio: float = 0.6)
2def __init__(self, num_sample_height_points=attr_dict['num_sample_height_points'].default, enable_text_line_mask=attr_dict['enable_text_line_mask'].default, enable_boundary_mask=attr_dict['enable_boundary_mask'].default, boundary_dilate_ratio=attr_dict['boundary_dilate_ratio'].default, enable_boundary_score_map=attr_dict['enable_boundary_score_map'].default, adjusted_ref_char_height_ratio=attr_dict['adjusted_ref_char_height_ratio'].default, adjusted_ref_char_width_ratio=attr_dict['adjusted_ref_char_width_ratio'].default):
3    self.num_sample_height_points = num_sample_height_points
4    self.enable_text_line_mask = enable_text_line_mask
5    self.enable_boundary_mask = enable_boundary_mask
6    self.boundary_dilate_ratio = boundary_dilate_ratio
7    self.enable_boundary_score_map = enable_boundary_score_map
8    self.adjusted_ref_char_height_ratio = adjusted_ref_char_height_ratio
9    self.adjusted_ref_char_width_ratio = adjusted_ref_char_width_ratio

Method generated by attrs for class PageTextLineLabelStepConfig.

class PageTextLineLabelStepInput:
37class PageTextLineLabelStepInput:
38    page_text_line_step_output: PageTextLineStepOutput
PageTextLineLabelStepInput( page_text_line_step_output: vkit.pipeline.text_detection.page_text_line.PageTextLineStepOutput)
2def __init__(self, page_text_line_step_output):
3    self.page_text_line_step_output = page_text_line_step_output

Method generated by attrs for class PageTextLineLabelStepInput.

class PageTextLinePolygonCollection:
42class PageTextLinePolygonCollection:
43    height: int
44    width: int
45    polygons: Sequence[Polygon]
46    height_points_group_sizes: Sequence[int]
47    height_points_up: PointList
48    height_points_down: PointList
PageTextLinePolygonCollection( height: int, width: int, polygons: Sequence[vkit.element.polygon.Polygon], height_points_group_sizes: Sequence[int], height_points_up: vkit.element.point.PointList, height_points_down: vkit.element.point.PointList)
2def __init__(self, height, width, polygons, height_points_group_sizes, height_points_up, height_points_down):
3    self.height = height
4    self.width = width
5    self.polygons = polygons
6    self.height_points_group_sizes = height_points_group_sizes
7    self.height_points_up = height_points_up
8    self.height_points_down = height_points_down

Method generated by attrs for class PageTextLinePolygonCollection.

class PageCharPolygonCollection:
52class PageCharPolygonCollection:
53    height: int
54    width: int
55    char_polygons: Sequence[Polygon]
56    adjusted_char_polygons: Sequence[Polygon]
57    height_points_up: PointList
58    height_points_down: PointList
PageCharPolygonCollection( height: int, width: int, char_polygons: Sequence[vkit.element.polygon.Polygon], adjusted_char_polygons: Sequence[vkit.element.polygon.Polygon], height_points_up: vkit.element.point.PointList, height_points_down: vkit.element.point.PointList)
2def __init__(self, height, width, char_polygons, adjusted_char_polygons, height_points_up, height_points_down):
3    self.height = height
4    self.width = width
5    self.char_polygons = char_polygons
6    self.adjusted_char_polygons = adjusted_char_polygons
7    self.height_points_up = height_points_up
8    self.height_points_down = height_points_down

Method generated by attrs for class PageCharPolygonCollection.

class PageTextLineLabelStepOutput:
62class PageTextLineLabelStepOutput:
63    page_char_polygon_collection: PageCharPolygonCollection
64    page_text_line_polygon_collection: PageTextLinePolygonCollection
65    page_text_line_mask: Optional[Mask]
66    page_text_line_boundary_mask: Optional[Mask]
67    page_text_line_and_boundary_mask: Optional[Mask]
68    page_text_line_boundary_score_map: Optional[ScoreMap]
PageTextLineLabelStepOutput( page_char_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageCharPolygonCollection, page_text_line_polygon_collection: vkit.pipeline.text_detection.page_text_line_label.PageTextLinePolygonCollection, page_text_line_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_boundary_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_and_boundary_mask: Union[vkit.element.mask.Mask, NoneType], page_text_line_boundary_score_map: Union[vkit.element.score_map.ScoreMap, NoneType])
2def __init__(self, page_char_polygon_collection, page_text_line_polygon_collection, page_text_line_mask, page_text_line_boundary_mask, page_text_line_and_boundary_mask, page_text_line_boundary_score_map):
3    self.page_char_polygon_collection = page_char_polygon_collection
4    self.page_text_line_polygon_collection = page_text_line_polygon_collection
5    self.page_text_line_mask = page_text_line_mask
6    self.page_text_line_boundary_mask = page_text_line_boundary_mask
7    self.page_text_line_and_boundary_mask = page_text_line_and_boundary_mask
8    self.page_text_line_boundary_score_map = page_text_line_boundary_score_map

Method generated by attrs for class PageTextLineLabelStepOutput.

 71class PageTextLineLabelStep(
 72    PipelineStep[
 73        PageTextLineLabelStepConfig,
 74        PageTextLineLabelStepInput,
 75        PageTextLineLabelStepOutput,
 76    ]
 77):  # yapf: disable
 78
 79    def generate_page_char_polygon_collection(
 80        self,
 81        page_text_line_collection: PageTextLineCollection,
 82    ):
 83        char_polygons: List[Polygon] = []
 84        adjusted_char_polygons: List[Polygon] = []
 85        height_points_up = PointList()
 86        height_points_down = PointList()
 87
 88        for text_line in page_text_line_collection.text_lines:
 89            char_polygons.extend(
 90                text_line.to_char_polygons(
 91                    page_height=page_text_line_collection.height,
 92                    page_width=page_text_line_collection.width,
 93                )
 94            )
 95            adjusted_char_polygons.extend(
 96                text_line.to_char_polygons(
 97                    page_height=page_text_line_collection.height,
 98                    page_width=page_text_line_collection.width,
 99                    ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio,
100                    ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio,
101                )
102            )
103            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
104            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
105
106        assert len(char_polygons) \
107            == len(adjusted_char_polygons) \
108            == len(height_points_up) \
109            == len(height_points_down)
110
111        return PageCharPolygonCollection(
112            height=page_text_line_collection.height,
113            width=page_text_line_collection.width,
114            char_polygons=char_polygons,
115            adjusted_char_polygons=adjusted_char_polygons,
116            height_points_up=height_points_up,
117            height_points_down=height_points_down,
118        )
119
120    def generate_page_text_line_polygon_collection(
121        self,
122        page_text_line_collection: PageTextLineCollection,
123    ):
124        text_line_polygons: List[Polygon] = []
125
126        height_points_group_sizes: List[int] = []
127        height_points_up = PointList()
128        height_points_down = PointList()
129
130        for text_line in page_text_line_collection.text_lines:
131            text_line_polygons.append(text_line.to_polygon())
132
133            cur_height_points_up = text_line.get_height_points(
134                num_points=self.config.num_sample_height_points,
135                is_up=True,
136            )
137            cur_height_points_down = text_line.get_height_points(
138                num_points=self.config.num_sample_height_points,
139                is_up=False,
140            )
141            height_points_group_size = len(cur_height_points_up)
142            assert height_points_group_size == len(cur_height_points_down)
143            assert height_points_group_size > 0
144            height_points_group_sizes.append(height_points_group_size)
145            height_points_up.extend(cur_height_points_up)
146            height_points_down.extend(cur_height_points_down)
147
148        return PageTextLinePolygonCollection(
149            height=page_text_line_collection.height,
150            width=page_text_line_collection.width,
151            polygons=text_line_polygons,
152            height_points_group_sizes=height_points_group_sizes,
153            height_points_up=height_points_up,
154            height_points_down=height_points_down,
155        )
156
157    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
158        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
159
160        text_lines = page_text_line_collection.text_lines
161        for text_line in text_lines:
162            text_line.box.fill_mask(page_text_line_mask)
163        return page_text_line_mask
164
165    def generate_text_line_boxes_and_dilated_boxes(
166        self, page_text_line_collection: PageTextLineCollection
167    ):
168        text_lines = page_text_line_collection.text_lines
169        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
170
171        boxes: List[Box] = []
172        dilated_boxes: List[Box] = []
173
174        for text_line in text_lines:
175            box = text_line.box
176            boxes.append(box)
177
178            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
179            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
180            dilated_boxes.append(dilated_box)
181
182        return boxes, dilated_boxes
183
184    @classmethod
185    def generate_dilated_only_boxes(
186        cls,
187        box: Box,
188        dilated_box: Box,
189    ):
190        dilated_up_box = attrs.evolve(
191            dilated_box,
192            down=box.up - 1,
193        )
194        if dilated_up_box.up > dilated_box.down:
195            dilated_up_box = None
196
197        dilated_down_box = attrs.evolve(
198            dilated_box,
199            up=box.down + 1,
200        )
201        if dilated_down_box.up > dilated_down_box.down:
202            dilated_down_box = None
203
204        dilated_left_box = attrs.evolve(
205            box,
206            left=dilated_box.left,
207            right=box.left - 1,
208        )
209        if dilated_left_box.left > dilated_left_box.right:
210            dilated_left_box = None
211
212        dilated_right_box = attrs.evolve(
213            box,
214            left=box.right + 1,
215            right=dilated_box.right,
216        )
217        if dilated_right_box.left > dilated_right_box.right:
218            dilated_right_box = None
219
220        return (
221            dilated_up_box,
222            dilated_down_box,
223            dilated_left_box,
224            dilated_right_box,
225        )
226
227    def generate_page_text_line_boundary_masks(
228        self,
229        page_text_line_collection: PageTextLineCollection,
230        boxes: Sequence[Box],
231        dilated_boxes: Sequence[Box],
232        page_text_line_mask: Mask,
233    ):
234        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
235
236        for box, dilated_box in zip(boxes, dilated_boxes):
237            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
238            for dilated_only_box in dilated_only_boxes:
239                if dilated_only_box:
240                    dilated_only_box.fill_mask(boundary_mask)
241
242        page_text_line_mask.fill_mask(boundary_mask, 0)
243
244        text_line_and_boundary_mask = boundary_mask.copy()
245        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
246
247        return boundary_mask, text_line_and_boundary_mask
248
249    def generate_page_text_line_boundary_score_map(
250        self,
251        page_text_line_collection: PageTextLineCollection,
252        boxes: Sequence[Box],
253        dilated_boxes: Sequence[Box],
254        page_text_line_boundary_mask: Mask,
255    ):
256        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
257
258        for box, dilated_box in zip(boxes, dilated_boxes):
259            (
260                dilated_up_box,
261                dilated_down_box,
262                dilated_left_box,
263                dilated_right_box,
264            ) = self.generate_dilated_only_boxes(box, dilated_box)
265
266            if dilated_up_box:
267                boundary_score_map.fill_by_quad_interpolation(
268                    point0=Point.create(y=box.up, x=box.right),
269                    point1=Point.create(y=box.up, x=box.left),
270                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
271                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
272                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
273                    keep_min_value=True,
274                )
275
276            if dilated_down_box:
277                boundary_score_map.fill_by_quad_interpolation(
278                    point0=Point.create(y=box.down, x=box.left),
279                    point1=Point.create(y=box.down, x=box.right),
280                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
281                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
282                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
283                    keep_min_value=True,
284                )
285
286            if dilated_left_box:
287                boundary_score_map.fill_by_quad_interpolation(
288                    point0=Point.create(y=box.up, x=box.left),
289                    point1=Point.create(y=box.down, x=box.left),
290                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
291                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
292                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
293                    keep_min_value=True,
294                )
295
296            if dilated_right_box:
297                boundary_score_map.fill_by_quad_interpolation(
298                    point0=Point.create(y=box.down, x=box.right),
299                    point1=Point.create(y=box.up, x=box.right),
300                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
301                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
302                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
303                    keep_min_value=True,
304                )
305
306        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
307        return boundary_score_map
308
309    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
310        page_text_line_step_output = input.page_text_line_step_output
311        page_text_line_collection = page_text_line_step_output.page_text_line_collection
312
313        page_char_polygon_collection = self.generate_page_char_polygon_collection(
314            page_text_line_collection,
315        )
316        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
317            page_text_line_collection,
318        )
319
320        page_text_line_mask: Optional[Mask] = None
321        page_text_line_boundary_mask: Optional[Mask] = None
322        page_text_line_and_boundary_mask: Optional[Mask] = None
323        page_text_line_boundary_score_map: Optional[ScoreMap] = None
324
325        if self.config.enable_text_line_mask:
326            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
327
328            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
329                page_text_line_collection
330            )
331            if self.config.enable_boundary_mask:
332                (
333                    page_text_line_boundary_mask,
334                    page_text_line_and_boundary_mask,
335                ) = self.generate_page_text_line_boundary_masks(
336                    page_text_line_collection,
337                    boxes,
338                    dilated_boxes,
339                    page_text_line_mask,
340                )
341
342                if self.config.enable_boundary_score_map:
343                    page_text_line_boundary_score_map = \
344                        self.generate_page_text_line_boundary_score_map(
345                            page_text_line_collection,
346                            boxes,
347                            dilated_boxes,
348                            page_text_line_boundary_mask,
349                        )
350
351        return PageTextLineLabelStepOutput(
352            page_char_polygon_collection=page_char_polygon_collection,
353            page_text_line_polygon_collection=page_text_line_polygon_collection,
354            page_text_line_mask=page_text_line_mask,
355            page_text_line_boundary_mask=page_text_line_boundary_mask,
356            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
357            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
358        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

def generate_page_char_polygon_collection( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
 79    def generate_page_char_polygon_collection(
 80        self,
 81        page_text_line_collection: PageTextLineCollection,
 82    ):
 83        char_polygons: List[Polygon] = []
 84        adjusted_char_polygons: List[Polygon] = []
 85        height_points_up = PointList()
 86        height_points_down = PointList()
 87
 88        for text_line in page_text_line_collection.text_lines:
 89            char_polygons.extend(
 90                text_line.to_char_polygons(
 91                    page_height=page_text_line_collection.height,
 92                    page_width=page_text_line_collection.width,
 93                )
 94            )
 95            adjusted_char_polygons.extend(
 96                text_line.to_char_polygons(
 97                    page_height=page_text_line_collection.height,
 98                    page_width=page_text_line_collection.width,
 99                    ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio,
100                    ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio,
101                )
102            )
103            height_points_up.extend(text_line.get_char_level_height_points(is_up=True))
104            height_points_down.extend(text_line.get_char_level_height_points(is_up=False))
105
106        assert len(char_polygons) \
107            == len(adjusted_char_polygons) \
108            == len(height_points_up) \
109            == len(height_points_down)
110
111        return PageCharPolygonCollection(
112            height=page_text_line_collection.height,
113            width=page_text_line_collection.width,
114            char_polygons=char_polygons,
115            adjusted_char_polygons=adjusted_char_polygons,
116            height_points_up=height_points_up,
117            height_points_down=height_points_down,
118        )
def generate_page_text_line_polygon_collection( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
120    def generate_page_text_line_polygon_collection(
121        self,
122        page_text_line_collection: PageTextLineCollection,
123    ):
124        text_line_polygons: List[Polygon] = []
125
126        height_points_group_sizes: List[int] = []
127        height_points_up = PointList()
128        height_points_down = PointList()
129
130        for text_line in page_text_line_collection.text_lines:
131            text_line_polygons.append(text_line.to_polygon())
132
133            cur_height_points_up = text_line.get_height_points(
134                num_points=self.config.num_sample_height_points,
135                is_up=True,
136            )
137            cur_height_points_down = text_line.get_height_points(
138                num_points=self.config.num_sample_height_points,
139                is_up=False,
140            )
141            height_points_group_size = len(cur_height_points_up)
142            assert height_points_group_size == len(cur_height_points_down)
143            assert height_points_group_size > 0
144            height_points_group_sizes.append(height_points_group_size)
145            height_points_up.extend(cur_height_points_up)
146            height_points_down.extend(cur_height_points_down)
147
148        return PageTextLinePolygonCollection(
149            height=page_text_line_collection.height,
150            width=page_text_line_collection.width,
151            polygons=text_line_polygons,
152            height_points_group_sizes=height_points_group_sizes,
153            height_points_up=height_points_up,
154            height_points_down=height_points_down,
155        )
def generate_page_text_line_mask( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
157    def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection):
158        page_text_line_mask = Mask.from_shape(page_text_line_collection.shape)
159
160        text_lines = page_text_line_collection.text_lines
161        for text_line in text_lines:
162            text_line.box.fill_mask(page_text_line_mask)
163        return page_text_line_mask
def generate_text_line_boxes_and_dilated_boxes( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection):
165    def generate_text_line_boxes_and_dilated_boxes(
166        self, page_text_line_collection: PageTextLineCollection
167    ):
168        text_lines = page_text_line_collection.text_lines
169        text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True)
170
171        boxes: List[Box] = []
172        dilated_boxes: List[Box] = []
173
174        for text_line in text_lines:
175            box = text_line.box
176            boxes.append(box)
177
178            dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True)
179            dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape)
180            dilated_boxes.append(dilated_box)
181
182        return boxes, dilated_boxes
@classmethod
def generate_dilated_only_boxes(cls, box: vkit.element.box.Box, dilated_box: vkit.element.box.Box):
184    @classmethod
185    def generate_dilated_only_boxes(
186        cls,
187        box: Box,
188        dilated_box: Box,
189    ):
190        dilated_up_box = attrs.evolve(
191            dilated_box,
192            down=box.up - 1,
193        )
194        if dilated_up_box.up > dilated_box.down:
195            dilated_up_box = None
196
197        dilated_down_box = attrs.evolve(
198            dilated_box,
199            up=box.down + 1,
200        )
201        if dilated_down_box.up > dilated_down_box.down:
202            dilated_down_box = None
203
204        dilated_left_box = attrs.evolve(
205            box,
206            left=dilated_box.left,
207            right=box.left - 1,
208        )
209        if dilated_left_box.left > dilated_left_box.right:
210            dilated_left_box = None
211
212        dilated_right_box = attrs.evolve(
213            box,
214            left=box.right + 1,
215            right=dilated_box.right,
216        )
217        if dilated_right_box.left > dilated_right_box.right:
218            dilated_right_box = None
219
220        return (
221            dilated_up_box,
222            dilated_down_box,
223            dilated_left_box,
224            dilated_right_box,
225        )
def generate_page_text_line_boundary_masks( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection, boxes: Sequence[vkit.element.box.Box], dilated_boxes: Sequence[vkit.element.box.Box], page_text_line_mask: vkit.element.mask.Mask):
227    def generate_page_text_line_boundary_masks(
228        self,
229        page_text_line_collection: PageTextLineCollection,
230        boxes: Sequence[Box],
231        dilated_boxes: Sequence[Box],
232        page_text_line_mask: Mask,
233    ):
234        boundary_mask = Mask.from_shape(page_text_line_collection.shape)
235
236        for box, dilated_box in zip(boxes, dilated_boxes):
237            dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box)
238            for dilated_only_box in dilated_only_boxes:
239                if dilated_only_box:
240                    dilated_only_box.fill_mask(boundary_mask)
241
242        page_text_line_mask.fill_mask(boundary_mask, 0)
243
244        text_line_and_boundary_mask = boundary_mask.copy()
245        page_text_line_mask.fill_mask(text_line_and_boundary_mask)
246
247        return boundary_mask, text_line_and_boundary_mask
def generate_page_text_line_boundary_score_map( self, page_text_line_collection: vkit.pipeline.text_detection.page_text_line.PageTextLineCollection, boxes: Sequence[vkit.element.box.Box], dilated_boxes: Sequence[vkit.element.box.Box], page_text_line_boundary_mask: vkit.element.mask.Mask):
249    def generate_page_text_line_boundary_score_map(
250        self,
251        page_text_line_collection: PageTextLineCollection,
252        boxes: Sequence[Box],
253        dilated_boxes: Sequence[Box],
254        page_text_line_boundary_mask: Mask,
255    ):
256        boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0)
257
258        for box, dilated_box in zip(boxes, dilated_boxes):
259            (
260                dilated_up_box,
261                dilated_down_box,
262                dilated_left_box,
263                dilated_right_box,
264            ) = self.generate_dilated_only_boxes(box, dilated_box)
265
266            if dilated_up_box:
267                boundary_score_map.fill_by_quad_interpolation(
268                    point0=Point.create(y=box.up, x=box.right),
269                    point1=Point.create(y=box.up, x=box.left),
270                    point2=Point.create(y=dilated_box.up, x=dilated_box.left),
271                    point3=Point.create(y=dilated_box.up, x=dilated_box.right),
272                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
273                    keep_min_value=True,
274                )
275
276            if dilated_down_box:
277                boundary_score_map.fill_by_quad_interpolation(
278                    point0=Point.create(y=box.down, x=box.left),
279                    point1=Point.create(y=box.down, x=box.right),
280                    point2=Point.create(y=dilated_box.down, x=dilated_box.right),
281                    point3=Point.create(y=dilated_box.down, x=dilated_box.left),
282                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
283                    keep_min_value=True,
284                )
285
286            if dilated_left_box:
287                boundary_score_map.fill_by_quad_interpolation(
288                    point0=Point.create(y=box.up, x=box.left),
289                    point1=Point.create(y=box.down, x=box.left),
290                    point2=Point.create(y=dilated_box.down, x=dilated_box.left),
291                    point3=Point.create(y=dilated_box.up, x=dilated_box.left),
292                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
293                    keep_min_value=True,
294                )
295
296            if dilated_right_box:
297                boundary_score_map.fill_by_quad_interpolation(
298                    point0=Point.create(y=box.down, x=box.right),
299                    point1=Point.create(y=box.up, x=box.right),
300                    point2=Point.create(y=dilated_box.up, x=dilated_box.right),
301                    point3=Point.create(y=dilated_box.down, x=dilated_box.right),
302                    func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1],
303                    keep_min_value=True,
304                )
305
306        page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0)
307        return boundary_score_map
def run( self, input: vkit.pipeline.text_detection.page_text_line_label.PageTextLineLabelStepInput, rng: numpy.random._generator.Generator):
309    def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator):
310        page_text_line_step_output = input.page_text_line_step_output
311        page_text_line_collection = page_text_line_step_output.page_text_line_collection
312
313        page_char_polygon_collection = self.generate_page_char_polygon_collection(
314            page_text_line_collection,
315        )
316        page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection(
317            page_text_line_collection,
318        )
319
320        page_text_line_mask: Optional[Mask] = None
321        page_text_line_boundary_mask: Optional[Mask] = None
322        page_text_line_and_boundary_mask: Optional[Mask] = None
323        page_text_line_boundary_score_map: Optional[ScoreMap] = None
324
325        if self.config.enable_text_line_mask:
326            page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection)
327
328            boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes(
329                page_text_line_collection
330            )
331            if self.config.enable_boundary_mask:
332                (
333                    page_text_line_boundary_mask,
334                    page_text_line_and_boundary_mask,
335                ) = self.generate_page_text_line_boundary_masks(
336                    page_text_line_collection,
337                    boxes,
338                    dilated_boxes,
339                    page_text_line_mask,
340                )
341
342                if self.config.enable_boundary_score_map:
343                    page_text_line_boundary_score_map = \
344                        self.generate_page_text_line_boundary_score_map(
345                            page_text_line_collection,
346                            boxes,
347                            dilated_boxes,
348                            page_text_line_boundary_mask,
349                        )
350
351        return PageTextLineLabelStepOutput(
352            page_char_polygon_collection=page_char_polygon_collection,
353            page_text_line_polygon_collection=page_text_line_polygon_collection,
354            page_text_line_mask=page_text_line_mask,
355            page_text_line_boundary_mask=page_text_line_boundary_mask,
356            page_text_line_and_boundary_mask=page_text_line_and_boundary_mask,
357            page_text_line_boundary_score_map=page_text_line_boundary_score_map,
358        )