vkit.pipeline.text_detection.page_text_line_label
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import List, Sequence, Optional 15 16import attrs 17from numpy.random import Generator as RandomGenerator 18 19from vkit.element import Point, PointList, Box, Mask, ScoreMap, Polygon 20from ..interface import PipelineStep, PipelineStepFactory 21from .page_text_line import PageTextLineStepOutput, PageTextLineCollection 22 23 24@attrs.define 25class PageTextLineLabelStepConfig: 26 num_sample_height_points: int = 3 27 enable_text_line_mask: bool = False 28 enable_boundary_mask: bool = False 29 boundary_dilate_ratio: float = 0.5 30 enable_boundary_score_map: bool = False 31 adjusted_ref_char_height_ratio: float = 0.6 32 adjusted_ref_char_width_ratio: float = 0.6 33 34 35@attrs.define 36class PageTextLineLabelStepInput: 37 page_text_line_step_output: PageTextLineStepOutput 38 39 40@attrs.define 41class PageTextLinePolygonCollection: 42 height: int 43 width: int 44 polygons: Sequence[Polygon] 45 height_points_group_sizes: Sequence[int] 46 height_points_up: PointList 47 height_points_down: PointList 48 49 50@attrs.define 51class PageCharPolygonCollection: 52 height: int 53 width: int 54 char_polygons: Sequence[Polygon] 55 adjusted_char_polygons: Sequence[Polygon] 56 height_points_up: PointList 57 height_points_down: PointList 58 59 60@attrs.define 61class PageTextLineLabelStepOutput: 62 page_char_polygon_collection: PageCharPolygonCollection 63 page_text_line_polygon_collection: PageTextLinePolygonCollection 64 page_text_line_mask: Optional[Mask] 65 page_text_line_boundary_mask: Optional[Mask] 66 page_text_line_and_boundary_mask: Optional[Mask] 67 page_text_line_boundary_score_map: Optional[ScoreMap] 68 69 70class PageTextLineLabelStep( 71 PipelineStep[ 72 PageTextLineLabelStepConfig, 73 PageTextLineLabelStepInput, 74 PageTextLineLabelStepOutput, 75 ] 76): # yapf: disable 77 78 def generate_page_char_polygon_collection( 79 self, 80 page_text_line_collection: PageTextLineCollection, 81 ): 82 char_polygons: List[Polygon] = [] 83 adjusted_char_polygons: List[Polygon] = [] 84 height_points_up = PointList() 85 height_points_down = PointList() 86 87 for text_line in page_text_line_collection.text_lines: 88 char_polygons.extend( 89 text_line.to_char_polygons( 90 page_height=page_text_line_collection.height, 91 page_width=page_text_line_collection.width, 92 ) 93 ) 94 adjusted_char_polygons.extend( 95 text_line.to_char_polygons( 96 page_height=page_text_line_collection.height, 97 page_width=page_text_line_collection.width, 98 ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio, 99 ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio, 100 ) 101 ) 102 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 103 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 104 105 assert len(char_polygons) \ 106 == len(adjusted_char_polygons) \ 107 == len(height_points_up) \ 108 == len(height_points_down) 109 110 return PageCharPolygonCollection( 111 height=page_text_line_collection.height, 112 width=page_text_line_collection.width, 113 char_polygons=char_polygons, 114 adjusted_char_polygons=adjusted_char_polygons, 115 height_points_up=height_points_up, 116 height_points_down=height_points_down, 117 ) 118 119 def generate_page_text_line_polygon_collection( 120 self, 121 page_text_line_collection: PageTextLineCollection, 122 ): 123 text_line_polygons: List[Polygon] = [] 124 125 height_points_group_sizes: List[int] = [] 126 height_points_up = PointList() 127 height_points_down = PointList() 128 129 for text_line in page_text_line_collection.text_lines: 130 text_line_polygons.append(text_line.to_polygon()) 131 132 cur_height_points_up = text_line.get_height_points( 133 num_points=self.config.num_sample_height_points, 134 is_up=True, 135 ) 136 cur_height_points_down = text_line.get_height_points( 137 num_points=self.config.num_sample_height_points, 138 is_up=False, 139 ) 140 height_points_group_size = len(cur_height_points_up) 141 assert height_points_group_size == len(cur_height_points_down) 142 assert height_points_group_size > 0 143 height_points_group_sizes.append(height_points_group_size) 144 height_points_up.extend(cur_height_points_up) 145 height_points_down.extend(cur_height_points_down) 146 147 return PageTextLinePolygonCollection( 148 height=page_text_line_collection.height, 149 width=page_text_line_collection.width, 150 polygons=text_line_polygons, 151 height_points_group_sizes=height_points_group_sizes, 152 height_points_up=height_points_up, 153 height_points_down=height_points_down, 154 ) 155 156 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 157 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 158 159 text_lines = page_text_line_collection.text_lines 160 for text_line in text_lines: 161 text_line.box.fill_mask(page_text_line_mask) 162 return page_text_line_mask 163 164 def generate_text_line_boxes_and_dilated_boxes( 165 self, page_text_line_collection: PageTextLineCollection 166 ): 167 text_lines = page_text_line_collection.text_lines 168 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 169 170 boxes: List[Box] = [] 171 dilated_boxes: List[Box] = [] 172 173 for text_line in text_lines: 174 box = text_line.box 175 boxes.append(box) 176 177 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 178 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 179 dilated_boxes.append(dilated_box) 180 181 return boxes, dilated_boxes 182 183 @classmethod 184 def generate_dilated_only_boxes( 185 cls, 186 box: Box, 187 dilated_box: Box, 188 ): 189 dilated_up_box = attrs.evolve( 190 dilated_box, 191 down=box.up - 1, 192 ) 193 if dilated_up_box.up > dilated_box.down: 194 dilated_up_box = None 195 196 dilated_down_box = attrs.evolve( 197 dilated_box, 198 up=box.down + 1, 199 ) 200 if dilated_down_box.up > dilated_down_box.down: 201 dilated_down_box = None 202 203 dilated_left_box = attrs.evolve( 204 box, 205 left=dilated_box.left, 206 right=box.left - 1, 207 ) 208 if dilated_left_box.left > dilated_left_box.right: 209 dilated_left_box = None 210 211 dilated_right_box = attrs.evolve( 212 box, 213 left=box.right + 1, 214 right=dilated_box.right, 215 ) 216 if dilated_right_box.left > dilated_right_box.right: 217 dilated_right_box = None 218 219 return ( 220 dilated_up_box, 221 dilated_down_box, 222 dilated_left_box, 223 dilated_right_box, 224 ) 225 226 def generate_page_text_line_boundary_masks( 227 self, 228 page_text_line_collection: PageTextLineCollection, 229 boxes: Sequence[Box], 230 dilated_boxes: Sequence[Box], 231 page_text_line_mask: Mask, 232 ): 233 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 234 235 for box, dilated_box in zip(boxes, dilated_boxes): 236 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 237 for dilated_only_box in dilated_only_boxes: 238 if dilated_only_box: 239 dilated_only_box.fill_mask(boundary_mask) 240 241 page_text_line_mask.fill_mask(boundary_mask, 0) 242 243 text_line_and_boundary_mask = boundary_mask.copy() 244 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 245 246 return boundary_mask, text_line_and_boundary_mask 247 248 def generate_page_text_line_boundary_score_map( 249 self, 250 page_text_line_collection: PageTextLineCollection, 251 boxes: Sequence[Box], 252 dilated_boxes: Sequence[Box], 253 page_text_line_boundary_mask: Mask, 254 ): 255 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 256 257 for box, dilated_box in zip(boxes, dilated_boxes): 258 ( 259 dilated_up_box, 260 dilated_down_box, 261 dilated_left_box, 262 dilated_right_box, 263 ) = self.generate_dilated_only_boxes(box, dilated_box) 264 265 if dilated_up_box: 266 boundary_score_map.fill_by_quad_interpolation( 267 point0=Point.create(y=box.up, x=box.right), 268 point1=Point.create(y=box.up, x=box.left), 269 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 270 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 271 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 272 keep_min_value=True, 273 ) 274 275 if dilated_down_box: 276 boundary_score_map.fill_by_quad_interpolation( 277 point0=Point.create(y=box.down, x=box.left), 278 point1=Point.create(y=box.down, x=box.right), 279 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 280 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 281 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 282 keep_min_value=True, 283 ) 284 285 if dilated_left_box: 286 boundary_score_map.fill_by_quad_interpolation( 287 point0=Point.create(y=box.up, x=box.left), 288 point1=Point.create(y=box.down, x=box.left), 289 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 290 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 291 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 292 keep_min_value=True, 293 ) 294 295 if dilated_right_box: 296 boundary_score_map.fill_by_quad_interpolation( 297 point0=Point.create(y=box.down, x=box.right), 298 point1=Point.create(y=box.up, x=box.right), 299 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 300 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 301 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 302 keep_min_value=True, 303 ) 304 305 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 306 return boundary_score_map 307 308 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 309 page_text_line_step_output = input.page_text_line_step_output 310 page_text_line_collection = page_text_line_step_output.page_text_line_collection 311 312 page_char_polygon_collection = self.generate_page_char_polygon_collection( 313 page_text_line_collection, 314 ) 315 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 316 page_text_line_collection, 317 ) 318 319 page_text_line_mask: Optional[Mask] = None 320 page_text_line_boundary_mask: Optional[Mask] = None 321 page_text_line_and_boundary_mask: Optional[Mask] = None 322 page_text_line_boundary_score_map: Optional[ScoreMap] = None 323 324 if self.config.enable_text_line_mask: 325 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 326 327 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 328 page_text_line_collection 329 ) 330 if self.config.enable_boundary_mask: 331 ( 332 page_text_line_boundary_mask, 333 page_text_line_and_boundary_mask, 334 ) = self.generate_page_text_line_boundary_masks( 335 page_text_line_collection, 336 boxes, 337 dilated_boxes, 338 page_text_line_mask, 339 ) 340 341 if self.config.enable_boundary_score_map: 342 page_text_line_boundary_score_map = \ 343 self.generate_page_text_line_boundary_score_map( 344 page_text_line_collection, 345 boxes, 346 dilated_boxes, 347 page_text_line_boundary_mask, 348 ) 349 350 return PageTextLineLabelStepOutput( 351 page_char_polygon_collection=page_char_polygon_collection, 352 page_text_line_polygon_collection=page_text_line_polygon_collection, 353 page_text_line_mask=page_text_line_mask, 354 page_text_line_boundary_mask=page_text_line_boundary_mask, 355 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 356 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 357 ) 358 359 360page_text_line_label_step_factory = PipelineStepFactory(PageTextLineLabelStep)
26class PageTextLineLabelStepConfig: 27 num_sample_height_points: int = 3 28 enable_text_line_mask: bool = False 29 enable_boundary_mask: bool = False 30 boundary_dilate_ratio: float = 0.5 31 enable_boundary_score_map: bool = False 32 adjusted_ref_char_height_ratio: float = 0.6 33 adjusted_ref_char_width_ratio: float = 0.6
2def __init__(self, num_sample_height_points=attr_dict['num_sample_height_points'].default, enable_text_line_mask=attr_dict['enable_text_line_mask'].default, enable_boundary_mask=attr_dict['enable_boundary_mask'].default, boundary_dilate_ratio=attr_dict['boundary_dilate_ratio'].default, enable_boundary_score_map=attr_dict['enable_boundary_score_map'].default, adjusted_ref_char_height_ratio=attr_dict['adjusted_ref_char_height_ratio'].default, adjusted_ref_char_width_ratio=attr_dict['adjusted_ref_char_width_ratio'].default): 3 self.num_sample_height_points = num_sample_height_points 4 self.enable_text_line_mask = enable_text_line_mask 5 self.enable_boundary_mask = enable_boundary_mask 6 self.boundary_dilate_ratio = boundary_dilate_ratio 7 self.enable_boundary_score_map = enable_boundary_score_map 8 self.adjusted_ref_char_height_ratio = adjusted_ref_char_height_ratio 9 self.adjusted_ref_char_width_ratio = adjusted_ref_char_width_ratio
Method generated by attrs for class PageTextLineLabelStepConfig.
2def __init__(self, page_text_line_step_output): 3 self.page_text_line_step_output = page_text_line_step_output
Method generated by attrs for class PageTextLineLabelStepInput.
42class PageTextLinePolygonCollection: 43 height: int 44 width: int 45 polygons: Sequence[Polygon] 46 height_points_group_sizes: Sequence[int] 47 height_points_up: PointList 48 height_points_down: PointList
2def __init__(self, height, width, polygons, height_points_group_sizes, height_points_up, height_points_down): 3 self.height = height 4 self.width = width 5 self.polygons = polygons 6 self.height_points_group_sizes = height_points_group_sizes 7 self.height_points_up = height_points_up 8 self.height_points_down = height_points_down
Method generated by attrs for class PageTextLinePolygonCollection.
52class PageCharPolygonCollection: 53 height: int 54 width: int 55 char_polygons: Sequence[Polygon] 56 adjusted_char_polygons: Sequence[Polygon] 57 height_points_up: PointList 58 height_points_down: PointList
2def __init__(self, height, width, char_polygons, adjusted_char_polygons, height_points_up, height_points_down): 3 self.height = height 4 self.width = width 5 self.char_polygons = char_polygons 6 self.adjusted_char_polygons = adjusted_char_polygons 7 self.height_points_up = height_points_up 8 self.height_points_down = height_points_down
Method generated by attrs for class PageCharPolygonCollection.
62class PageTextLineLabelStepOutput: 63 page_char_polygon_collection: PageCharPolygonCollection 64 page_text_line_polygon_collection: PageTextLinePolygonCollection 65 page_text_line_mask: Optional[Mask] 66 page_text_line_boundary_mask: Optional[Mask] 67 page_text_line_and_boundary_mask: Optional[Mask] 68 page_text_line_boundary_score_map: Optional[ScoreMap]
2def __init__(self, page_char_polygon_collection, page_text_line_polygon_collection, page_text_line_mask, page_text_line_boundary_mask, page_text_line_and_boundary_mask, page_text_line_boundary_score_map): 3 self.page_char_polygon_collection = page_char_polygon_collection 4 self.page_text_line_polygon_collection = page_text_line_polygon_collection 5 self.page_text_line_mask = page_text_line_mask 6 self.page_text_line_boundary_mask = page_text_line_boundary_mask 7 self.page_text_line_and_boundary_mask = page_text_line_and_boundary_mask 8 self.page_text_line_boundary_score_map = page_text_line_boundary_score_map
Method generated by attrs for class PageTextLineLabelStepOutput.
71class PageTextLineLabelStep( 72 PipelineStep[ 73 PageTextLineLabelStepConfig, 74 PageTextLineLabelStepInput, 75 PageTextLineLabelStepOutput, 76 ] 77): # yapf: disable 78 79 def generate_page_char_polygon_collection( 80 self, 81 page_text_line_collection: PageTextLineCollection, 82 ): 83 char_polygons: List[Polygon] = [] 84 adjusted_char_polygons: List[Polygon] = [] 85 height_points_up = PointList() 86 height_points_down = PointList() 87 88 for text_line in page_text_line_collection.text_lines: 89 char_polygons.extend( 90 text_line.to_char_polygons( 91 page_height=page_text_line_collection.height, 92 page_width=page_text_line_collection.width, 93 ) 94 ) 95 adjusted_char_polygons.extend( 96 text_line.to_char_polygons( 97 page_height=page_text_line_collection.height, 98 page_width=page_text_line_collection.width, 99 ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio, 100 ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio, 101 ) 102 ) 103 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 104 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 105 106 assert len(char_polygons) \ 107 == len(adjusted_char_polygons) \ 108 == len(height_points_up) \ 109 == len(height_points_down) 110 111 return PageCharPolygonCollection( 112 height=page_text_line_collection.height, 113 width=page_text_line_collection.width, 114 char_polygons=char_polygons, 115 adjusted_char_polygons=adjusted_char_polygons, 116 height_points_up=height_points_up, 117 height_points_down=height_points_down, 118 ) 119 120 def generate_page_text_line_polygon_collection( 121 self, 122 page_text_line_collection: PageTextLineCollection, 123 ): 124 text_line_polygons: List[Polygon] = [] 125 126 height_points_group_sizes: List[int] = [] 127 height_points_up = PointList() 128 height_points_down = PointList() 129 130 for text_line in page_text_line_collection.text_lines: 131 text_line_polygons.append(text_line.to_polygon()) 132 133 cur_height_points_up = text_line.get_height_points( 134 num_points=self.config.num_sample_height_points, 135 is_up=True, 136 ) 137 cur_height_points_down = text_line.get_height_points( 138 num_points=self.config.num_sample_height_points, 139 is_up=False, 140 ) 141 height_points_group_size = len(cur_height_points_up) 142 assert height_points_group_size == len(cur_height_points_down) 143 assert height_points_group_size > 0 144 height_points_group_sizes.append(height_points_group_size) 145 height_points_up.extend(cur_height_points_up) 146 height_points_down.extend(cur_height_points_down) 147 148 return PageTextLinePolygonCollection( 149 height=page_text_line_collection.height, 150 width=page_text_line_collection.width, 151 polygons=text_line_polygons, 152 height_points_group_sizes=height_points_group_sizes, 153 height_points_up=height_points_up, 154 height_points_down=height_points_down, 155 ) 156 157 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 158 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 159 160 text_lines = page_text_line_collection.text_lines 161 for text_line in text_lines: 162 text_line.box.fill_mask(page_text_line_mask) 163 return page_text_line_mask 164 165 def generate_text_line_boxes_and_dilated_boxes( 166 self, page_text_line_collection: PageTextLineCollection 167 ): 168 text_lines = page_text_line_collection.text_lines 169 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 170 171 boxes: List[Box] = [] 172 dilated_boxes: List[Box] = [] 173 174 for text_line in text_lines: 175 box = text_line.box 176 boxes.append(box) 177 178 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 179 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 180 dilated_boxes.append(dilated_box) 181 182 return boxes, dilated_boxes 183 184 @classmethod 185 def generate_dilated_only_boxes( 186 cls, 187 box: Box, 188 dilated_box: Box, 189 ): 190 dilated_up_box = attrs.evolve( 191 dilated_box, 192 down=box.up - 1, 193 ) 194 if dilated_up_box.up > dilated_box.down: 195 dilated_up_box = None 196 197 dilated_down_box = attrs.evolve( 198 dilated_box, 199 up=box.down + 1, 200 ) 201 if dilated_down_box.up > dilated_down_box.down: 202 dilated_down_box = None 203 204 dilated_left_box = attrs.evolve( 205 box, 206 left=dilated_box.left, 207 right=box.left - 1, 208 ) 209 if dilated_left_box.left > dilated_left_box.right: 210 dilated_left_box = None 211 212 dilated_right_box = attrs.evolve( 213 box, 214 left=box.right + 1, 215 right=dilated_box.right, 216 ) 217 if dilated_right_box.left > dilated_right_box.right: 218 dilated_right_box = None 219 220 return ( 221 dilated_up_box, 222 dilated_down_box, 223 dilated_left_box, 224 dilated_right_box, 225 ) 226 227 def generate_page_text_line_boundary_masks( 228 self, 229 page_text_line_collection: PageTextLineCollection, 230 boxes: Sequence[Box], 231 dilated_boxes: Sequence[Box], 232 page_text_line_mask: Mask, 233 ): 234 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 235 236 for box, dilated_box in zip(boxes, dilated_boxes): 237 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 238 for dilated_only_box in dilated_only_boxes: 239 if dilated_only_box: 240 dilated_only_box.fill_mask(boundary_mask) 241 242 page_text_line_mask.fill_mask(boundary_mask, 0) 243 244 text_line_and_boundary_mask = boundary_mask.copy() 245 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 246 247 return boundary_mask, text_line_and_boundary_mask 248 249 def generate_page_text_line_boundary_score_map( 250 self, 251 page_text_line_collection: PageTextLineCollection, 252 boxes: Sequence[Box], 253 dilated_boxes: Sequence[Box], 254 page_text_line_boundary_mask: Mask, 255 ): 256 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 257 258 for box, dilated_box in zip(boxes, dilated_boxes): 259 ( 260 dilated_up_box, 261 dilated_down_box, 262 dilated_left_box, 263 dilated_right_box, 264 ) = self.generate_dilated_only_boxes(box, dilated_box) 265 266 if dilated_up_box: 267 boundary_score_map.fill_by_quad_interpolation( 268 point0=Point.create(y=box.up, x=box.right), 269 point1=Point.create(y=box.up, x=box.left), 270 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 271 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 272 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 273 keep_min_value=True, 274 ) 275 276 if dilated_down_box: 277 boundary_score_map.fill_by_quad_interpolation( 278 point0=Point.create(y=box.down, x=box.left), 279 point1=Point.create(y=box.down, x=box.right), 280 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 281 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 282 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 283 keep_min_value=True, 284 ) 285 286 if dilated_left_box: 287 boundary_score_map.fill_by_quad_interpolation( 288 point0=Point.create(y=box.up, x=box.left), 289 point1=Point.create(y=box.down, x=box.left), 290 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 291 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 292 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 293 keep_min_value=True, 294 ) 295 296 if dilated_right_box: 297 boundary_score_map.fill_by_quad_interpolation( 298 point0=Point.create(y=box.down, x=box.right), 299 point1=Point.create(y=box.up, x=box.right), 300 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 301 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 302 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 303 keep_min_value=True, 304 ) 305 306 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 307 return boundary_score_map 308 309 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 310 page_text_line_step_output = input.page_text_line_step_output 311 page_text_line_collection = page_text_line_step_output.page_text_line_collection 312 313 page_char_polygon_collection = self.generate_page_char_polygon_collection( 314 page_text_line_collection, 315 ) 316 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 317 page_text_line_collection, 318 ) 319 320 page_text_line_mask: Optional[Mask] = None 321 page_text_line_boundary_mask: Optional[Mask] = None 322 page_text_line_and_boundary_mask: Optional[Mask] = None 323 page_text_line_boundary_score_map: Optional[ScoreMap] = None 324 325 if self.config.enable_text_line_mask: 326 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 327 328 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 329 page_text_line_collection 330 ) 331 if self.config.enable_boundary_mask: 332 ( 333 page_text_line_boundary_mask, 334 page_text_line_and_boundary_mask, 335 ) = self.generate_page_text_line_boundary_masks( 336 page_text_line_collection, 337 boxes, 338 dilated_boxes, 339 page_text_line_mask, 340 ) 341 342 if self.config.enable_boundary_score_map: 343 page_text_line_boundary_score_map = \ 344 self.generate_page_text_line_boundary_score_map( 345 page_text_line_collection, 346 boxes, 347 dilated_boxes, 348 page_text_line_boundary_mask, 349 ) 350 351 return PageTextLineLabelStepOutput( 352 page_char_polygon_collection=page_char_polygon_collection, 353 page_text_line_polygon_collection=page_text_line_polygon_collection, 354 page_text_line_mask=page_text_line_mask, 355 page_text_line_boundary_mask=page_text_line_boundary_mask, 356 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 357 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 358 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
79 def generate_page_char_polygon_collection( 80 self, 81 page_text_line_collection: PageTextLineCollection, 82 ): 83 char_polygons: List[Polygon] = [] 84 adjusted_char_polygons: List[Polygon] = [] 85 height_points_up = PointList() 86 height_points_down = PointList() 87 88 for text_line in page_text_line_collection.text_lines: 89 char_polygons.extend( 90 text_line.to_char_polygons( 91 page_height=page_text_line_collection.height, 92 page_width=page_text_line_collection.width, 93 ) 94 ) 95 adjusted_char_polygons.extend( 96 text_line.to_char_polygons( 97 page_height=page_text_line_collection.height, 98 page_width=page_text_line_collection.width, 99 ref_char_height_ratio=self.config.adjusted_ref_char_height_ratio, 100 ref_char_width_ratio=self.config.adjusted_ref_char_width_ratio, 101 ) 102 ) 103 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 104 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 105 106 assert len(char_polygons) \ 107 == len(adjusted_char_polygons) \ 108 == len(height_points_up) \ 109 == len(height_points_down) 110 111 return PageCharPolygonCollection( 112 height=page_text_line_collection.height, 113 width=page_text_line_collection.width, 114 char_polygons=char_polygons, 115 adjusted_char_polygons=adjusted_char_polygons, 116 height_points_up=height_points_up, 117 height_points_down=height_points_down, 118 )
120 def generate_page_text_line_polygon_collection( 121 self, 122 page_text_line_collection: PageTextLineCollection, 123 ): 124 text_line_polygons: List[Polygon] = [] 125 126 height_points_group_sizes: List[int] = [] 127 height_points_up = PointList() 128 height_points_down = PointList() 129 130 for text_line in page_text_line_collection.text_lines: 131 text_line_polygons.append(text_line.to_polygon()) 132 133 cur_height_points_up = text_line.get_height_points( 134 num_points=self.config.num_sample_height_points, 135 is_up=True, 136 ) 137 cur_height_points_down = text_line.get_height_points( 138 num_points=self.config.num_sample_height_points, 139 is_up=False, 140 ) 141 height_points_group_size = len(cur_height_points_up) 142 assert height_points_group_size == len(cur_height_points_down) 143 assert height_points_group_size > 0 144 height_points_group_sizes.append(height_points_group_size) 145 height_points_up.extend(cur_height_points_up) 146 height_points_down.extend(cur_height_points_down) 147 148 return PageTextLinePolygonCollection( 149 height=page_text_line_collection.height, 150 width=page_text_line_collection.width, 151 polygons=text_line_polygons, 152 height_points_group_sizes=height_points_group_sizes, 153 height_points_up=height_points_up, 154 height_points_down=height_points_down, 155 )
157 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 158 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 159 160 text_lines = page_text_line_collection.text_lines 161 for text_line in text_lines: 162 text_line.box.fill_mask(page_text_line_mask) 163 return page_text_line_mask
165 def generate_text_line_boxes_and_dilated_boxes( 166 self, page_text_line_collection: PageTextLineCollection 167 ): 168 text_lines = page_text_line_collection.text_lines 169 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 170 171 boxes: List[Box] = [] 172 dilated_boxes: List[Box] = [] 173 174 for text_line in text_lines: 175 box = text_line.box 176 boxes.append(box) 177 178 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 179 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 180 dilated_boxes.append(dilated_box) 181 182 return boxes, dilated_boxes
184 @classmethod 185 def generate_dilated_only_boxes( 186 cls, 187 box: Box, 188 dilated_box: Box, 189 ): 190 dilated_up_box = attrs.evolve( 191 dilated_box, 192 down=box.up - 1, 193 ) 194 if dilated_up_box.up > dilated_box.down: 195 dilated_up_box = None 196 197 dilated_down_box = attrs.evolve( 198 dilated_box, 199 up=box.down + 1, 200 ) 201 if dilated_down_box.up > dilated_down_box.down: 202 dilated_down_box = None 203 204 dilated_left_box = attrs.evolve( 205 box, 206 left=dilated_box.left, 207 right=box.left - 1, 208 ) 209 if dilated_left_box.left > dilated_left_box.right: 210 dilated_left_box = None 211 212 dilated_right_box = attrs.evolve( 213 box, 214 left=box.right + 1, 215 right=dilated_box.right, 216 ) 217 if dilated_right_box.left > dilated_right_box.right: 218 dilated_right_box = None 219 220 return ( 221 dilated_up_box, 222 dilated_down_box, 223 dilated_left_box, 224 dilated_right_box, 225 )
227 def generate_page_text_line_boundary_masks( 228 self, 229 page_text_line_collection: PageTextLineCollection, 230 boxes: Sequence[Box], 231 dilated_boxes: Sequence[Box], 232 page_text_line_mask: Mask, 233 ): 234 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 235 236 for box, dilated_box in zip(boxes, dilated_boxes): 237 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 238 for dilated_only_box in dilated_only_boxes: 239 if dilated_only_box: 240 dilated_only_box.fill_mask(boundary_mask) 241 242 page_text_line_mask.fill_mask(boundary_mask, 0) 243 244 text_line_and_boundary_mask = boundary_mask.copy() 245 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 246 247 return boundary_mask, text_line_and_boundary_mask
249 def generate_page_text_line_boundary_score_map( 250 self, 251 page_text_line_collection: PageTextLineCollection, 252 boxes: Sequence[Box], 253 dilated_boxes: Sequence[Box], 254 page_text_line_boundary_mask: Mask, 255 ): 256 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 257 258 for box, dilated_box in zip(boxes, dilated_boxes): 259 ( 260 dilated_up_box, 261 dilated_down_box, 262 dilated_left_box, 263 dilated_right_box, 264 ) = self.generate_dilated_only_boxes(box, dilated_box) 265 266 if dilated_up_box: 267 boundary_score_map.fill_by_quad_interpolation( 268 point0=Point.create(y=box.up, x=box.right), 269 point1=Point.create(y=box.up, x=box.left), 270 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 271 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 272 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 273 keep_min_value=True, 274 ) 275 276 if dilated_down_box: 277 boundary_score_map.fill_by_quad_interpolation( 278 point0=Point.create(y=box.down, x=box.left), 279 point1=Point.create(y=box.down, x=box.right), 280 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 281 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 282 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 283 keep_min_value=True, 284 ) 285 286 if dilated_left_box: 287 boundary_score_map.fill_by_quad_interpolation( 288 point0=Point.create(y=box.up, x=box.left), 289 point1=Point.create(y=box.down, x=box.left), 290 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 291 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 292 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 293 keep_min_value=True, 294 ) 295 296 if dilated_right_box: 297 boundary_score_map.fill_by_quad_interpolation( 298 point0=Point.create(y=box.down, x=box.right), 299 point1=Point.create(y=box.up, x=box.right), 300 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 301 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 302 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 303 keep_min_value=True, 304 ) 305 306 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 307 return boundary_score_map
309 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 310 page_text_line_step_output = input.page_text_line_step_output 311 page_text_line_collection = page_text_line_step_output.page_text_line_collection 312 313 page_char_polygon_collection = self.generate_page_char_polygon_collection( 314 page_text_line_collection, 315 ) 316 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 317 page_text_line_collection, 318 ) 319 320 page_text_line_mask: Optional[Mask] = None 321 page_text_line_boundary_mask: Optional[Mask] = None 322 page_text_line_and_boundary_mask: Optional[Mask] = None 323 page_text_line_boundary_score_map: Optional[ScoreMap] = None 324 325 if self.config.enable_text_line_mask: 326 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 327 328 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 329 page_text_line_collection 330 ) 331 if self.config.enable_boundary_mask: 332 ( 333 page_text_line_boundary_mask, 334 page_text_line_and_boundary_mask, 335 ) = self.generate_page_text_line_boundary_masks( 336 page_text_line_collection, 337 boxes, 338 dilated_boxes, 339 page_text_line_mask, 340 ) 341 342 if self.config.enable_boundary_score_map: 343 page_text_line_boundary_score_map = \ 344 self.generate_page_text_line_boundary_score_map( 345 page_text_line_collection, 346 boxes, 347 dilated_boxes, 348 page_text_line_boundary_mask, 349 ) 350 351 return PageTextLineLabelStepOutput( 352 page_char_polygon_collection=page_char_polygon_collection, 353 page_text_line_polygon_collection=page_text_line_polygon_collection, 354 page_text_line_mask=page_text_line_mask, 355 page_text_line_boundary_mask=page_text_line_boundary_mask, 356 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 357 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 358 )