vkit.pipeline.text_detection.page_text_line_label
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import List, Sequence, Optional 15 16import attrs 17from numpy.random import Generator as RandomGenerator 18 19from vkit.element import Point, PointList, Box, Mask, ScoreMap, Polygon 20from ..interface import PipelineStep, PipelineStepFactory 21from .page_text_line import PageTextLineStepOutput, PageTextLineCollection 22 23 24@attrs.define 25class PageTextLineLabelStepConfig: 26 num_sample_height_points: int = 3 27 enable_text_line_mask: bool = False 28 enable_boundary_mask: bool = False 29 boundary_dilate_ratio: float = 0.5 30 enable_boundary_score_map: bool = False 31 32 33@attrs.define 34class PageTextLineLabelStepInput: 35 page_text_line_step_output: PageTextLineStepOutput 36 37 38@attrs.define 39class PageTextLinePolygonCollection: 40 height: int 41 width: int 42 polygons: Sequence[Polygon] 43 height_points_group_sizes: Sequence[int] 44 height_points_up: PointList 45 height_points_down: PointList 46 47 48@attrs.define 49class PageCharPolygonCollection: 50 height: int 51 width: int 52 polygons: Sequence[Polygon] 53 height_points_up: PointList 54 height_points_down: PointList 55 56 57@attrs.define 58class PageTextLineLabelStepOutput: 59 page_char_polygon_collection: PageCharPolygonCollection 60 page_text_line_polygon_collection: PageTextLinePolygonCollection 61 page_text_line_mask: Optional[Mask] 62 page_text_line_boundary_mask: Optional[Mask] 63 page_text_line_and_boundary_mask: Optional[Mask] 64 page_text_line_boundary_score_map: Optional[ScoreMap] 65 66 67class PageTextLineLabelStep( 68 PipelineStep[ 69 PageTextLineLabelStepConfig, 70 PageTextLineLabelStepInput, 71 PageTextLineLabelStepOutput, 72 ] 73): # yapf: disable 74 75 def generate_page_char_polygon_collection( 76 self, 77 page_text_line_collection: PageTextLineCollection, 78 ): 79 char_polygons: List[Polygon] = [] 80 height_points_up = PointList() 81 height_points_down = PointList() 82 83 for text_line in page_text_line_collection.text_lines: 84 char_polygons.extend( 85 text_line.to_char_polygons( 86 page_height=page_text_line_collection.height, 87 page_width=page_text_line_collection.width, 88 ) 89 ) 90 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 91 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 92 93 assert len(char_polygons) == len(height_points_up) == len(height_points_down) 94 return PageCharPolygonCollection( 95 height=page_text_line_collection.height, 96 width=page_text_line_collection.width, 97 polygons=char_polygons, 98 height_points_up=height_points_up, 99 height_points_down=height_points_down, 100 ) 101 102 def generate_page_text_line_polygon_collection( 103 self, 104 page_text_line_collection: PageTextLineCollection, 105 ): 106 text_line_polygons: List[Polygon] = [] 107 108 height_points_group_sizes: List[int] = [] 109 height_points_up = PointList() 110 height_points_down = PointList() 111 112 for text_line in page_text_line_collection.text_lines: 113 text_line_polygons.append(text_line.to_polygon()) 114 115 cur_height_points_up = text_line.get_height_points( 116 num_points=self.config.num_sample_height_points, 117 is_up=True, 118 ) 119 cur_height_points_down = text_line.get_height_points( 120 num_points=self.config.num_sample_height_points, 121 is_up=False, 122 ) 123 height_points_group_size = len(cur_height_points_up) 124 assert height_points_group_size == len(cur_height_points_down) 125 assert height_points_group_size > 0 126 height_points_group_sizes.append(height_points_group_size) 127 height_points_up.extend(cur_height_points_up) 128 height_points_down.extend(cur_height_points_down) 129 130 return PageTextLinePolygonCollection( 131 height=page_text_line_collection.height, 132 width=page_text_line_collection.width, 133 polygons=text_line_polygons, 134 height_points_group_sizes=height_points_group_sizes, 135 height_points_up=height_points_up, 136 height_points_down=height_points_down, 137 ) 138 139 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 140 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 141 142 text_lines = page_text_line_collection.text_lines 143 for text_line in text_lines: 144 text_line.box.fill_mask(page_text_line_mask) 145 return page_text_line_mask 146 147 def generate_text_line_boxes_and_dilated_boxes( 148 self, page_text_line_collection: PageTextLineCollection 149 ): 150 text_lines = page_text_line_collection.text_lines 151 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 152 153 boxes: List[Box] = [] 154 dilated_boxes: List[Box] = [] 155 156 for text_line in text_lines: 157 box = text_line.box 158 boxes.append(box) 159 160 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 161 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 162 dilated_boxes.append(dilated_box) 163 164 return boxes, dilated_boxes 165 166 @classmethod 167 def generate_dilated_only_boxes( 168 cls, 169 box: Box, 170 dilated_box: Box, 171 ): 172 dilated_up_box = attrs.evolve( 173 dilated_box, 174 down=box.up - 1, 175 ) 176 if dilated_up_box.up > dilated_box.down: 177 dilated_up_box = None 178 179 dilated_down_box = attrs.evolve( 180 dilated_box, 181 up=box.down + 1, 182 ) 183 if dilated_down_box.up > dilated_down_box.down: 184 dilated_down_box = None 185 186 dilated_left_box = attrs.evolve( 187 box, 188 left=dilated_box.left, 189 right=box.left - 1, 190 ) 191 if dilated_left_box.left > dilated_left_box.right: 192 dilated_left_box = None 193 194 dilated_right_box = attrs.evolve( 195 box, 196 left=box.right + 1, 197 right=dilated_box.right, 198 ) 199 if dilated_right_box.left > dilated_right_box.right: 200 dilated_right_box = None 201 202 return ( 203 dilated_up_box, 204 dilated_down_box, 205 dilated_left_box, 206 dilated_right_box, 207 ) 208 209 def generate_page_text_line_boundary_masks( 210 self, 211 page_text_line_collection: PageTextLineCollection, 212 boxes: Sequence[Box], 213 dilated_boxes: Sequence[Box], 214 page_text_line_mask: Mask, 215 ): 216 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 217 218 for box, dilated_box in zip(boxes, dilated_boxes): 219 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 220 for dilated_only_box in dilated_only_boxes: 221 if dilated_only_box: 222 dilated_only_box.fill_mask(boundary_mask) 223 224 page_text_line_mask.fill_mask(boundary_mask, 0) 225 226 text_line_and_boundary_mask = boundary_mask.copy() 227 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 228 229 return boundary_mask, text_line_and_boundary_mask 230 231 def generate_page_text_line_boundary_score_map( 232 self, 233 page_text_line_collection: PageTextLineCollection, 234 boxes: Sequence[Box], 235 dilated_boxes: Sequence[Box], 236 page_text_line_boundary_mask: Mask, 237 ): 238 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 239 240 for box, dilated_box in zip(boxes, dilated_boxes): 241 ( 242 dilated_up_box, 243 dilated_down_box, 244 dilated_left_box, 245 dilated_right_box, 246 ) = self.generate_dilated_only_boxes(box, dilated_box) 247 248 if dilated_up_box: 249 boundary_score_map.fill_by_quad_interpolation( 250 point0=Point.create(y=box.up, x=box.right), 251 point1=Point.create(y=box.up, x=box.left), 252 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 253 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 254 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 255 keep_min_value=True, 256 ) 257 258 if dilated_down_box: 259 boundary_score_map.fill_by_quad_interpolation( 260 point0=Point.create(y=box.down, x=box.left), 261 point1=Point.create(y=box.down, x=box.right), 262 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 263 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 264 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 265 keep_min_value=True, 266 ) 267 268 if dilated_left_box: 269 boundary_score_map.fill_by_quad_interpolation( 270 point0=Point.create(y=box.up, x=box.left), 271 point1=Point.create(y=box.down, x=box.left), 272 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 273 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 274 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 275 keep_min_value=True, 276 ) 277 278 if dilated_right_box: 279 boundary_score_map.fill_by_quad_interpolation( 280 point0=Point.create(y=box.down, x=box.right), 281 point1=Point.create(y=box.up, x=box.right), 282 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 283 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 284 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 285 keep_min_value=True, 286 ) 287 288 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 289 return boundary_score_map 290 291 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 292 page_text_line_step_output = input.page_text_line_step_output 293 page_text_line_collection = page_text_line_step_output.page_text_line_collection 294 295 page_char_polygon_collection = self.generate_page_char_polygon_collection( 296 page_text_line_collection, 297 ) 298 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 299 page_text_line_collection, 300 ) 301 302 page_text_line_mask: Optional[Mask] = None 303 page_text_line_boundary_mask: Optional[Mask] = None 304 page_text_line_and_boundary_mask: Optional[Mask] = None 305 page_text_line_boundary_score_map: Optional[ScoreMap] = None 306 307 if self.config.enable_text_line_mask: 308 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 309 310 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 311 page_text_line_collection 312 ) 313 if self.config.enable_boundary_mask: 314 ( 315 page_text_line_boundary_mask, 316 page_text_line_and_boundary_mask, 317 ) = self.generate_page_text_line_boundary_masks( 318 page_text_line_collection, 319 boxes, 320 dilated_boxes, 321 page_text_line_mask, 322 ) 323 324 if self.config.enable_boundary_score_map: 325 page_text_line_boundary_score_map = \ 326 self.generate_page_text_line_boundary_score_map( 327 page_text_line_collection, 328 boxes, 329 dilated_boxes, 330 page_text_line_boundary_mask, 331 ) 332 333 return PageTextLineLabelStepOutput( 334 page_char_polygon_collection=page_char_polygon_collection, 335 page_text_line_polygon_collection=page_text_line_polygon_collection, 336 page_text_line_mask=page_text_line_mask, 337 page_text_line_boundary_mask=page_text_line_boundary_mask, 338 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 339 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 340 ) 341 342 343page_text_line_label_step_factory = PipelineStepFactory(PageTextLineLabelStep)
26class PageTextLineLabelStepConfig: 27 num_sample_height_points: int = 3 28 enable_text_line_mask: bool = False 29 enable_boundary_mask: bool = False 30 boundary_dilate_ratio: float = 0.5 31 enable_boundary_score_map: bool = False
2def __init__(self, num_sample_height_points=attr_dict['num_sample_height_points'].default, enable_text_line_mask=attr_dict['enable_text_line_mask'].default, enable_boundary_mask=attr_dict['enable_boundary_mask'].default, boundary_dilate_ratio=attr_dict['boundary_dilate_ratio'].default, enable_boundary_score_map=attr_dict['enable_boundary_score_map'].default): 3 self.num_sample_height_points = num_sample_height_points 4 self.enable_text_line_mask = enable_text_line_mask 5 self.enable_boundary_mask = enable_boundary_mask 6 self.boundary_dilate_ratio = boundary_dilate_ratio 7 self.enable_boundary_score_map = enable_boundary_score_map
Method generated by attrs for class PageTextLineLabelStepConfig.
2def __init__(self, page_text_line_step_output): 3 self.page_text_line_step_output = page_text_line_step_output
Method generated by attrs for class PageTextLineLabelStepInput.
40class PageTextLinePolygonCollection: 41 height: int 42 width: int 43 polygons: Sequence[Polygon] 44 height_points_group_sizes: Sequence[int] 45 height_points_up: PointList 46 height_points_down: PointList
2def __init__(self, height, width, polygons, height_points_group_sizes, height_points_up, height_points_down): 3 self.height = height 4 self.width = width 5 self.polygons = polygons 6 self.height_points_group_sizes = height_points_group_sizes 7 self.height_points_up = height_points_up 8 self.height_points_down = height_points_down
Method generated by attrs for class PageTextLinePolygonCollection.
50class PageCharPolygonCollection: 51 height: int 52 width: int 53 polygons: Sequence[Polygon] 54 height_points_up: PointList 55 height_points_down: PointList
2def __init__(self, height, width, polygons, height_points_up, height_points_down): 3 self.height = height 4 self.width = width 5 self.polygons = polygons 6 self.height_points_up = height_points_up 7 self.height_points_down = height_points_down
Method generated by attrs for class PageCharPolygonCollection.
59class PageTextLineLabelStepOutput: 60 page_char_polygon_collection: PageCharPolygonCollection 61 page_text_line_polygon_collection: PageTextLinePolygonCollection 62 page_text_line_mask: Optional[Mask] 63 page_text_line_boundary_mask: Optional[Mask] 64 page_text_line_and_boundary_mask: Optional[Mask] 65 page_text_line_boundary_score_map: Optional[ScoreMap]
2def __init__(self, page_char_polygon_collection, page_text_line_polygon_collection, page_text_line_mask, page_text_line_boundary_mask, page_text_line_and_boundary_mask, page_text_line_boundary_score_map): 3 self.page_char_polygon_collection = page_char_polygon_collection 4 self.page_text_line_polygon_collection = page_text_line_polygon_collection 5 self.page_text_line_mask = page_text_line_mask 6 self.page_text_line_boundary_mask = page_text_line_boundary_mask 7 self.page_text_line_and_boundary_mask = page_text_line_and_boundary_mask 8 self.page_text_line_boundary_score_map = page_text_line_boundary_score_map
Method generated by attrs for class PageTextLineLabelStepOutput.
68class PageTextLineLabelStep( 69 PipelineStep[ 70 PageTextLineLabelStepConfig, 71 PageTextLineLabelStepInput, 72 PageTextLineLabelStepOutput, 73 ] 74): # yapf: disable 75 76 def generate_page_char_polygon_collection( 77 self, 78 page_text_line_collection: PageTextLineCollection, 79 ): 80 char_polygons: List[Polygon] = [] 81 height_points_up = PointList() 82 height_points_down = PointList() 83 84 for text_line in page_text_line_collection.text_lines: 85 char_polygons.extend( 86 text_line.to_char_polygons( 87 page_height=page_text_line_collection.height, 88 page_width=page_text_line_collection.width, 89 ) 90 ) 91 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 92 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 93 94 assert len(char_polygons) == len(height_points_up) == len(height_points_down) 95 return PageCharPolygonCollection( 96 height=page_text_line_collection.height, 97 width=page_text_line_collection.width, 98 polygons=char_polygons, 99 height_points_up=height_points_up, 100 height_points_down=height_points_down, 101 ) 102 103 def generate_page_text_line_polygon_collection( 104 self, 105 page_text_line_collection: PageTextLineCollection, 106 ): 107 text_line_polygons: List[Polygon] = [] 108 109 height_points_group_sizes: List[int] = [] 110 height_points_up = PointList() 111 height_points_down = PointList() 112 113 for text_line in page_text_line_collection.text_lines: 114 text_line_polygons.append(text_line.to_polygon()) 115 116 cur_height_points_up = text_line.get_height_points( 117 num_points=self.config.num_sample_height_points, 118 is_up=True, 119 ) 120 cur_height_points_down = text_line.get_height_points( 121 num_points=self.config.num_sample_height_points, 122 is_up=False, 123 ) 124 height_points_group_size = len(cur_height_points_up) 125 assert height_points_group_size == len(cur_height_points_down) 126 assert height_points_group_size > 0 127 height_points_group_sizes.append(height_points_group_size) 128 height_points_up.extend(cur_height_points_up) 129 height_points_down.extend(cur_height_points_down) 130 131 return PageTextLinePolygonCollection( 132 height=page_text_line_collection.height, 133 width=page_text_line_collection.width, 134 polygons=text_line_polygons, 135 height_points_group_sizes=height_points_group_sizes, 136 height_points_up=height_points_up, 137 height_points_down=height_points_down, 138 ) 139 140 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 141 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 142 143 text_lines = page_text_line_collection.text_lines 144 for text_line in text_lines: 145 text_line.box.fill_mask(page_text_line_mask) 146 return page_text_line_mask 147 148 def generate_text_line_boxes_and_dilated_boxes( 149 self, page_text_line_collection: PageTextLineCollection 150 ): 151 text_lines = page_text_line_collection.text_lines 152 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 153 154 boxes: List[Box] = [] 155 dilated_boxes: List[Box] = [] 156 157 for text_line in text_lines: 158 box = text_line.box 159 boxes.append(box) 160 161 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 162 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 163 dilated_boxes.append(dilated_box) 164 165 return boxes, dilated_boxes 166 167 @classmethod 168 def generate_dilated_only_boxes( 169 cls, 170 box: Box, 171 dilated_box: Box, 172 ): 173 dilated_up_box = attrs.evolve( 174 dilated_box, 175 down=box.up - 1, 176 ) 177 if dilated_up_box.up > dilated_box.down: 178 dilated_up_box = None 179 180 dilated_down_box = attrs.evolve( 181 dilated_box, 182 up=box.down + 1, 183 ) 184 if dilated_down_box.up > dilated_down_box.down: 185 dilated_down_box = None 186 187 dilated_left_box = attrs.evolve( 188 box, 189 left=dilated_box.left, 190 right=box.left - 1, 191 ) 192 if dilated_left_box.left > dilated_left_box.right: 193 dilated_left_box = None 194 195 dilated_right_box = attrs.evolve( 196 box, 197 left=box.right + 1, 198 right=dilated_box.right, 199 ) 200 if dilated_right_box.left > dilated_right_box.right: 201 dilated_right_box = None 202 203 return ( 204 dilated_up_box, 205 dilated_down_box, 206 dilated_left_box, 207 dilated_right_box, 208 ) 209 210 def generate_page_text_line_boundary_masks( 211 self, 212 page_text_line_collection: PageTextLineCollection, 213 boxes: Sequence[Box], 214 dilated_boxes: Sequence[Box], 215 page_text_line_mask: Mask, 216 ): 217 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 218 219 for box, dilated_box in zip(boxes, dilated_boxes): 220 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 221 for dilated_only_box in dilated_only_boxes: 222 if dilated_only_box: 223 dilated_only_box.fill_mask(boundary_mask) 224 225 page_text_line_mask.fill_mask(boundary_mask, 0) 226 227 text_line_and_boundary_mask = boundary_mask.copy() 228 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 229 230 return boundary_mask, text_line_and_boundary_mask 231 232 def generate_page_text_line_boundary_score_map( 233 self, 234 page_text_line_collection: PageTextLineCollection, 235 boxes: Sequence[Box], 236 dilated_boxes: Sequence[Box], 237 page_text_line_boundary_mask: Mask, 238 ): 239 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 240 241 for box, dilated_box in zip(boxes, dilated_boxes): 242 ( 243 dilated_up_box, 244 dilated_down_box, 245 dilated_left_box, 246 dilated_right_box, 247 ) = self.generate_dilated_only_boxes(box, dilated_box) 248 249 if dilated_up_box: 250 boundary_score_map.fill_by_quad_interpolation( 251 point0=Point.create(y=box.up, x=box.right), 252 point1=Point.create(y=box.up, x=box.left), 253 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 254 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 255 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 256 keep_min_value=True, 257 ) 258 259 if dilated_down_box: 260 boundary_score_map.fill_by_quad_interpolation( 261 point0=Point.create(y=box.down, x=box.left), 262 point1=Point.create(y=box.down, x=box.right), 263 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 264 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 265 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 266 keep_min_value=True, 267 ) 268 269 if dilated_left_box: 270 boundary_score_map.fill_by_quad_interpolation( 271 point0=Point.create(y=box.up, x=box.left), 272 point1=Point.create(y=box.down, x=box.left), 273 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 274 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 275 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 276 keep_min_value=True, 277 ) 278 279 if dilated_right_box: 280 boundary_score_map.fill_by_quad_interpolation( 281 point0=Point.create(y=box.down, x=box.right), 282 point1=Point.create(y=box.up, x=box.right), 283 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 284 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 285 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 286 keep_min_value=True, 287 ) 288 289 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 290 return boundary_score_map 291 292 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 293 page_text_line_step_output = input.page_text_line_step_output 294 page_text_line_collection = page_text_line_step_output.page_text_line_collection 295 296 page_char_polygon_collection = self.generate_page_char_polygon_collection( 297 page_text_line_collection, 298 ) 299 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 300 page_text_line_collection, 301 ) 302 303 page_text_line_mask: Optional[Mask] = None 304 page_text_line_boundary_mask: Optional[Mask] = None 305 page_text_line_and_boundary_mask: Optional[Mask] = None 306 page_text_line_boundary_score_map: Optional[ScoreMap] = None 307 308 if self.config.enable_text_line_mask: 309 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 310 311 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 312 page_text_line_collection 313 ) 314 if self.config.enable_boundary_mask: 315 ( 316 page_text_line_boundary_mask, 317 page_text_line_and_boundary_mask, 318 ) = self.generate_page_text_line_boundary_masks( 319 page_text_line_collection, 320 boxes, 321 dilated_boxes, 322 page_text_line_mask, 323 ) 324 325 if self.config.enable_boundary_score_map: 326 page_text_line_boundary_score_map = \ 327 self.generate_page_text_line_boundary_score_map( 328 page_text_line_collection, 329 boxes, 330 dilated_boxes, 331 page_text_line_boundary_mask, 332 ) 333 334 return PageTextLineLabelStepOutput( 335 page_char_polygon_collection=page_char_polygon_collection, 336 page_text_line_polygon_collection=page_text_line_polygon_collection, 337 page_text_line_mask=page_text_line_mask, 338 page_text_line_boundary_mask=page_text_line_boundary_mask, 339 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 340 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 341 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
76 def generate_page_char_polygon_collection( 77 self, 78 page_text_line_collection: PageTextLineCollection, 79 ): 80 char_polygons: List[Polygon] = [] 81 height_points_up = PointList() 82 height_points_down = PointList() 83 84 for text_line in page_text_line_collection.text_lines: 85 char_polygons.extend( 86 text_line.to_char_polygons( 87 page_height=page_text_line_collection.height, 88 page_width=page_text_line_collection.width, 89 ) 90 ) 91 height_points_up.extend(text_line.get_char_level_height_points(is_up=True)) 92 height_points_down.extend(text_line.get_char_level_height_points(is_up=False)) 93 94 assert len(char_polygons) == len(height_points_up) == len(height_points_down) 95 return PageCharPolygonCollection( 96 height=page_text_line_collection.height, 97 width=page_text_line_collection.width, 98 polygons=char_polygons, 99 height_points_up=height_points_up, 100 height_points_down=height_points_down, 101 )
103 def generate_page_text_line_polygon_collection( 104 self, 105 page_text_line_collection: PageTextLineCollection, 106 ): 107 text_line_polygons: List[Polygon] = [] 108 109 height_points_group_sizes: List[int] = [] 110 height_points_up = PointList() 111 height_points_down = PointList() 112 113 for text_line in page_text_line_collection.text_lines: 114 text_line_polygons.append(text_line.to_polygon()) 115 116 cur_height_points_up = text_line.get_height_points( 117 num_points=self.config.num_sample_height_points, 118 is_up=True, 119 ) 120 cur_height_points_down = text_line.get_height_points( 121 num_points=self.config.num_sample_height_points, 122 is_up=False, 123 ) 124 height_points_group_size = len(cur_height_points_up) 125 assert height_points_group_size == len(cur_height_points_down) 126 assert height_points_group_size > 0 127 height_points_group_sizes.append(height_points_group_size) 128 height_points_up.extend(cur_height_points_up) 129 height_points_down.extend(cur_height_points_down) 130 131 return PageTextLinePolygonCollection( 132 height=page_text_line_collection.height, 133 width=page_text_line_collection.width, 134 polygons=text_line_polygons, 135 height_points_group_sizes=height_points_group_sizes, 136 height_points_up=height_points_up, 137 height_points_down=height_points_down, 138 )
140 def generate_page_text_line_mask(self, page_text_line_collection: PageTextLineCollection): 141 page_text_line_mask = Mask.from_shape(page_text_line_collection.shape) 142 143 text_lines = page_text_line_collection.text_lines 144 for text_line in text_lines: 145 text_line.box.fill_mask(page_text_line_mask) 146 return page_text_line_mask
148 def generate_text_line_boxes_and_dilated_boxes( 149 self, page_text_line_collection: PageTextLineCollection 150 ): 151 text_lines = page_text_line_collection.text_lines 152 text_lines = sorted(text_lines, key=lambda tl: tl.font_size, reverse=True) 153 154 boxes: List[Box] = [] 155 dilated_boxes: List[Box] = [] 156 157 for text_line in text_lines: 158 box = text_line.box 159 boxes.append(box) 160 161 dilated_box = box.to_dilated_box(self.config.boundary_dilate_ratio, clip_long_side=True) 162 dilated_box = dilated_box.to_clipped_box(page_text_line_collection.shape) 163 dilated_boxes.append(dilated_box) 164 165 return boxes, dilated_boxes
167 @classmethod 168 def generate_dilated_only_boxes( 169 cls, 170 box: Box, 171 dilated_box: Box, 172 ): 173 dilated_up_box = attrs.evolve( 174 dilated_box, 175 down=box.up - 1, 176 ) 177 if dilated_up_box.up > dilated_box.down: 178 dilated_up_box = None 179 180 dilated_down_box = attrs.evolve( 181 dilated_box, 182 up=box.down + 1, 183 ) 184 if dilated_down_box.up > dilated_down_box.down: 185 dilated_down_box = None 186 187 dilated_left_box = attrs.evolve( 188 box, 189 left=dilated_box.left, 190 right=box.left - 1, 191 ) 192 if dilated_left_box.left > dilated_left_box.right: 193 dilated_left_box = None 194 195 dilated_right_box = attrs.evolve( 196 box, 197 left=box.right + 1, 198 right=dilated_box.right, 199 ) 200 if dilated_right_box.left > dilated_right_box.right: 201 dilated_right_box = None 202 203 return ( 204 dilated_up_box, 205 dilated_down_box, 206 dilated_left_box, 207 dilated_right_box, 208 )
210 def generate_page_text_line_boundary_masks( 211 self, 212 page_text_line_collection: PageTextLineCollection, 213 boxes: Sequence[Box], 214 dilated_boxes: Sequence[Box], 215 page_text_line_mask: Mask, 216 ): 217 boundary_mask = Mask.from_shape(page_text_line_collection.shape) 218 219 for box, dilated_box in zip(boxes, dilated_boxes): 220 dilated_only_boxes = self.generate_dilated_only_boxes(box, dilated_box) 221 for dilated_only_box in dilated_only_boxes: 222 if dilated_only_box: 223 dilated_only_box.fill_mask(boundary_mask) 224 225 page_text_line_mask.fill_mask(boundary_mask, 0) 226 227 text_line_and_boundary_mask = boundary_mask.copy() 228 page_text_line_mask.fill_mask(text_line_and_boundary_mask) 229 230 return boundary_mask, text_line_and_boundary_mask
232 def generate_page_text_line_boundary_score_map( 233 self, 234 page_text_line_collection: PageTextLineCollection, 235 boxes: Sequence[Box], 236 dilated_boxes: Sequence[Box], 237 page_text_line_boundary_mask: Mask, 238 ): 239 boundary_score_map = ScoreMap.from_shape(page_text_line_collection.shape, value=1.0) 240 241 for box, dilated_box in zip(boxes, dilated_boxes): 242 ( 243 dilated_up_box, 244 dilated_down_box, 245 dilated_left_box, 246 dilated_right_box, 247 ) = self.generate_dilated_only_boxes(box, dilated_box) 248 249 if dilated_up_box: 250 boundary_score_map.fill_by_quad_interpolation( 251 point0=Point.create(y=box.up, x=box.right), 252 point1=Point.create(y=box.up, x=box.left), 253 point2=Point.create(y=dilated_box.up, x=dilated_box.left), 254 point3=Point.create(y=dilated_box.up, x=dilated_box.right), 255 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 256 keep_min_value=True, 257 ) 258 259 if dilated_down_box: 260 boundary_score_map.fill_by_quad_interpolation( 261 point0=Point.create(y=box.down, x=box.left), 262 point1=Point.create(y=box.down, x=box.right), 263 point2=Point.create(y=dilated_box.down, x=dilated_box.right), 264 point3=Point.create(y=dilated_box.down, x=dilated_box.left), 265 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 266 keep_min_value=True, 267 ) 268 269 if dilated_left_box: 270 boundary_score_map.fill_by_quad_interpolation( 271 point0=Point.create(y=box.up, x=box.left), 272 point1=Point.create(y=box.down, x=box.left), 273 point2=Point.create(y=dilated_box.down, x=dilated_box.left), 274 point3=Point.create(y=dilated_box.up, x=dilated_box.left), 275 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 276 keep_min_value=True, 277 ) 278 279 if dilated_right_box: 280 boundary_score_map.fill_by_quad_interpolation( 281 point0=Point.create(y=box.down, x=box.right), 282 point1=Point.create(y=box.up, x=box.right), 283 point2=Point.create(y=dilated_box.up, x=dilated_box.right), 284 point3=Point.create(y=dilated_box.down, x=dilated_box.right), 285 func_np_uv_to_mat=lambda np_uv: np_uv[:, :, 1], 286 keep_min_value=True, 287 ) 288 289 page_text_line_boundary_mask.to_inverted_mask().fill_score_map(boundary_score_map, 0.0) 290 return boundary_score_map
292 def run(self, input: PageTextLineLabelStepInput, rng: RandomGenerator): 293 page_text_line_step_output = input.page_text_line_step_output 294 page_text_line_collection = page_text_line_step_output.page_text_line_collection 295 296 page_char_polygon_collection = self.generate_page_char_polygon_collection( 297 page_text_line_collection, 298 ) 299 page_text_line_polygon_collection = self.generate_page_text_line_polygon_collection( 300 page_text_line_collection, 301 ) 302 303 page_text_line_mask: Optional[Mask] = None 304 page_text_line_boundary_mask: Optional[Mask] = None 305 page_text_line_and_boundary_mask: Optional[Mask] = None 306 page_text_line_boundary_score_map: Optional[ScoreMap] = None 307 308 if self.config.enable_text_line_mask: 309 page_text_line_mask = self.generate_page_text_line_mask(page_text_line_collection) 310 311 boxes, dilated_boxes = self.generate_text_line_boxes_and_dilated_boxes( 312 page_text_line_collection 313 ) 314 if self.config.enable_boundary_mask: 315 ( 316 page_text_line_boundary_mask, 317 page_text_line_and_boundary_mask, 318 ) = self.generate_page_text_line_boundary_masks( 319 page_text_line_collection, 320 boxes, 321 dilated_boxes, 322 page_text_line_mask, 323 ) 324 325 if self.config.enable_boundary_score_map: 326 page_text_line_boundary_score_map = \ 327 self.generate_page_text_line_boundary_score_map( 328 page_text_line_collection, 329 boxes, 330 dilated_boxes, 331 page_text_line_boundary_mask, 332 ) 333 334 return PageTextLineLabelStepOutput( 335 page_char_polygon_collection=page_char_polygon_collection, 336 page_text_line_polygon_collection=page_text_line_polygon_collection, 337 page_text_line_mask=page_text_line_mask, 338 page_text_line_boundary_mask=page_text_line_boundary_mask, 339 page_text_line_and_boundary_mask=page_text_line_and_boundary_mask, 340 page_text_line_boundary_score_map=page_text_line_boundary_score_map, 341 )