vkit.pipeline.text_detection.page_resizing
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14from typing import Sequence 15import logging 16 17import attrs 18from numpy.random import Generator as RandomGenerator 19import numpy as np 20 21from vkit.utility import sample_cv_resize_interpolation 22from vkit.element import Mask, ScoreMap, Image 23from .page_distortion import PageDistortionStepOutput 24from ..interface import PipelineStep, PipelineStepFactory 25 26logger = logging.getLogger(__name__) 27 28 29@attrs.define 30class PageResizingStepConfig: 31 resized_text_line_height_min: float = 3.0 32 resized_text_line_height_max: float = 10.0 33 text_line_heights_filtering_thr: float = 1.0 34 35 36@attrs.define 37class PageResizingStepInput: 38 page_distortion_step_output: PageDistortionStepOutput 39 40 41# TODO: Some fields could be optional. 42@attrs.define 43class PageResizingStepOutput: 44 page_image: Image 45 page_active_mask: Mask 46 page_char_mask: Mask 47 page_seal_impression_char_mask: Mask 48 page_char_height_score_map: ScoreMap 49 page_text_line_mask: Mask 50 page_text_line_height_score_map: ScoreMap 51 52 53class PageResizingStep( 54 PipelineStep[ 55 PageResizingStepConfig, 56 PageResizingStepInput, 57 PageResizingStepOutput, 58 ] 59): # yapf: disable 60 61 def __init__(self, config: PageResizingStepConfig): 62 super().__init__(config) 63 64 def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]): 65 # 1. Filtering. 66 text_line_heights = [ 67 text_line_height for text_line_height in page_distorted_text_line_heights 68 if text_line_height > self.config.text_line_heights_filtering_thr 69 ] 70 assert text_line_heights 71 # 2. Remove outliers. 72 # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm 73 text_line_heights = np.asarray(text_line_heights) 74 deltas = np.abs(text_line_heights - np.median(text_line_heights)) 75 deltas_median = np.median(deltas) 76 delta_ratios = deltas / (deltas_median or 1.0) 77 text_line_heights_min = float( 78 min( 79 text_line_height 80 for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios) 81 if delta_ratio < 3.5 82 ) 83 ) 84 return text_line_heights_min 85 86 def run(self, input: PageResizingStepInput, rng: RandomGenerator): 87 page_distortion_step_output = input.page_distortion_step_output 88 page_image = page_distortion_step_output.page_image 89 page_active_mask = page_distortion_step_output.page_active_mask 90 91 page_char_mask = page_distortion_step_output.page_char_mask 92 assert page_char_mask 93 94 page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask 95 assert page_seal_impression_char_mask 96 97 page_char_height_score_map = page_distortion_step_output.page_char_height_score_map 98 assert page_char_height_score_map 99 100 page_text_line_mask = page_distortion_step_output.page_text_line_mask 101 assert page_text_line_mask 102 103 page_text_line_height_score_map = \ 104 page_distortion_step_output.page_text_line_height_score_map 105 assert page_text_line_height_score_map 106 107 page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights 108 assert page_distorted_text_line_heights 109 110 # Resizing. 111 height, width = page_image.shape 112 text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights) 113 logger.debug(f'text_line_heights_min={text_line_heights_min}') 114 resized_text_line_height = rng.uniform( 115 self.config.resized_text_line_height_min, 116 self.config.resized_text_line_height_max, 117 ) 118 resize_ratio = resized_text_line_height / text_line_heights_min 119 120 resized_height = round(resize_ratio * height) 121 resized_width = round(resize_ratio * width) 122 123 cv_resize_interpolation = sample_cv_resize_interpolation( 124 rng, 125 include_cv_inter_area=(resize_ratio < 1.0), 126 ) 127 logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}') 128 129 page_image = page_image.to_resized_image( 130 resized_height=resized_height, 131 resized_width=resized_width, 132 cv_resize_interpolation=cv_resize_interpolation, 133 ) 134 135 assert page_active_mask.shape == (height, width) 136 page_active_mask = page_active_mask.to_resized_mask( 137 resized_height=resized_height, 138 resized_width=resized_width, 139 cv_resize_interpolation=cv_resize_interpolation, 140 ) 141 142 assert page_char_mask.shape == (height, width) 143 page_char_mask = page_char_mask.to_resized_mask( 144 resized_height=resized_height, 145 resized_width=resized_width, 146 cv_resize_interpolation=cv_resize_interpolation, 147 ) 148 149 assert page_seal_impression_char_mask.shape == (height, width) 150 page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask( 151 resized_height=resized_height, 152 resized_width=resized_width, 153 cv_resize_interpolation=cv_resize_interpolation, 154 ) 155 156 assert page_char_height_score_map.shape == (height, width) 157 page_char_height_score_map = page_char_height_score_map.to_resized_score_map( 158 resized_height=resized_height, 159 resized_width=resized_width, 160 cv_resize_interpolation=cv_resize_interpolation, 161 ) 162 # Scores are resized as well. 163 page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio) 164 165 assert page_text_line_mask.shape == (height, width) 166 page_text_line_mask = page_text_line_mask.to_resized_mask( 167 resized_height=resized_height, 168 resized_width=resized_width, 169 cv_resize_interpolation=cv_resize_interpolation, 170 ) 171 172 assert page_text_line_height_score_map.shape == (height, width) 173 page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map( 174 resized_height=resized_height, 175 resized_width=resized_width, 176 cv_resize_interpolation=cv_resize_interpolation, 177 ) 178 # Scores are resized as well. 179 page_text_line_height_score_map.assign_mat( 180 page_text_line_height_score_map.mat * resize_ratio 181 ) 182 183 return PageResizingStepOutput( 184 page_image=page_image, 185 page_active_mask=page_active_mask, 186 page_char_mask=page_char_mask, 187 page_seal_impression_char_mask=page_seal_impression_char_mask, 188 page_char_height_score_map=page_char_height_score_map, 189 page_text_line_mask=page_text_line_mask, 190 page_text_line_height_score_map=page_text_line_height_score_map, 191 ) 192 193 194page_resizing_step_factory = PipelineStepFactory(PageResizingStep)
class
PageResizingStepConfig:
31class PageResizingStepConfig: 32 resized_text_line_height_min: float = 3.0 33 resized_text_line_height_max: float = 10.0 34 text_line_heights_filtering_thr: float = 1.0
PageResizingStepConfig( resized_text_line_height_min: float = 3.0, resized_text_line_height_max: float = 10.0, text_line_heights_filtering_thr: float = 1.0)
2def __init__(self, resized_text_line_height_min=attr_dict['resized_text_line_height_min'].default, resized_text_line_height_max=attr_dict['resized_text_line_height_max'].default, text_line_heights_filtering_thr=attr_dict['text_line_heights_filtering_thr'].default): 3 self.resized_text_line_height_min = resized_text_line_height_min 4 self.resized_text_line_height_max = resized_text_line_height_max 5 self.text_line_heights_filtering_thr = text_line_heights_filtering_thr
Method generated by attrs for class PageResizingStepConfig.
class
PageResizingStepInput:
PageResizingStepInput( page_distortion_step_output: vkit.pipeline.text_detection.page_distortion.PageDistortionStepOutput)
2def __init__(self, page_distortion_step_output): 3 self.page_distortion_step_output = page_distortion_step_output
Method generated by attrs for class PageResizingStepInput.
class
PageResizingStepOutput:
44class PageResizingStepOutput: 45 page_image: Image 46 page_active_mask: Mask 47 page_char_mask: Mask 48 page_seal_impression_char_mask: Mask 49 page_char_height_score_map: ScoreMap 50 page_text_line_mask: Mask 51 page_text_line_height_score_map: ScoreMap
PageResizingStepOutput( page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_mask: vkit.element.mask.Mask, page_seal_impression_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap)
2def __init__(self, page_image, page_active_mask, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map): 3 self.page_image = page_image 4 self.page_active_mask = page_active_mask 5 self.page_char_mask = page_char_mask 6 self.page_seal_impression_char_mask = page_seal_impression_char_mask 7 self.page_char_height_score_map = page_char_height_score_map 8 self.page_text_line_mask = page_text_line_mask 9 self.page_text_line_height_score_map = page_text_line_height_score_map
Method generated by attrs for class PageResizingStepOutput.
54class PageResizingStep( 55 PipelineStep[ 56 PageResizingStepConfig, 57 PageResizingStepInput, 58 PageResizingStepOutput, 59 ] 60): # yapf: disable 61 62 def __init__(self, config: PageResizingStepConfig): 63 super().__init__(config) 64 65 def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]): 66 # 1. Filtering. 67 text_line_heights = [ 68 text_line_height for text_line_height in page_distorted_text_line_heights 69 if text_line_height > self.config.text_line_heights_filtering_thr 70 ] 71 assert text_line_heights 72 # 2. Remove outliers. 73 # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm 74 text_line_heights = np.asarray(text_line_heights) 75 deltas = np.abs(text_line_heights - np.median(text_line_heights)) 76 deltas_median = np.median(deltas) 77 delta_ratios = deltas / (deltas_median or 1.0) 78 text_line_heights_min = float( 79 min( 80 text_line_height 81 for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios) 82 if delta_ratio < 3.5 83 ) 84 ) 85 return text_line_heights_min 86 87 def run(self, input: PageResizingStepInput, rng: RandomGenerator): 88 page_distortion_step_output = input.page_distortion_step_output 89 page_image = page_distortion_step_output.page_image 90 page_active_mask = page_distortion_step_output.page_active_mask 91 92 page_char_mask = page_distortion_step_output.page_char_mask 93 assert page_char_mask 94 95 page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask 96 assert page_seal_impression_char_mask 97 98 page_char_height_score_map = page_distortion_step_output.page_char_height_score_map 99 assert page_char_height_score_map 100 101 page_text_line_mask = page_distortion_step_output.page_text_line_mask 102 assert page_text_line_mask 103 104 page_text_line_height_score_map = \ 105 page_distortion_step_output.page_text_line_height_score_map 106 assert page_text_line_height_score_map 107 108 page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights 109 assert page_distorted_text_line_heights 110 111 # Resizing. 112 height, width = page_image.shape 113 text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights) 114 logger.debug(f'text_line_heights_min={text_line_heights_min}') 115 resized_text_line_height = rng.uniform( 116 self.config.resized_text_line_height_min, 117 self.config.resized_text_line_height_max, 118 ) 119 resize_ratio = resized_text_line_height / text_line_heights_min 120 121 resized_height = round(resize_ratio * height) 122 resized_width = round(resize_ratio * width) 123 124 cv_resize_interpolation = sample_cv_resize_interpolation( 125 rng, 126 include_cv_inter_area=(resize_ratio < 1.0), 127 ) 128 logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}') 129 130 page_image = page_image.to_resized_image( 131 resized_height=resized_height, 132 resized_width=resized_width, 133 cv_resize_interpolation=cv_resize_interpolation, 134 ) 135 136 assert page_active_mask.shape == (height, width) 137 page_active_mask = page_active_mask.to_resized_mask( 138 resized_height=resized_height, 139 resized_width=resized_width, 140 cv_resize_interpolation=cv_resize_interpolation, 141 ) 142 143 assert page_char_mask.shape == (height, width) 144 page_char_mask = page_char_mask.to_resized_mask( 145 resized_height=resized_height, 146 resized_width=resized_width, 147 cv_resize_interpolation=cv_resize_interpolation, 148 ) 149 150 assert page_seal_impression_char_mask.shape == (height, width) 151 page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask( 152 resized_height=resized_height, 153 resized_width=resized_width, 154 cv_resize_interpolation=cv_resize_interpolation, 155 ) 156 157 assert page_char_height_score_map.shape == (height, width) 158 page_char_height_score_map = page_char_height_score_map.to_resized_score_map( 159 resized_height=resized_height, 160 resized_width=resized_width, 161 cv_resize_interpolation=cv_resize_interpolation, 162 ) 163 # Scores are resized as well. 164 page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio) 165 166 assert page_text_line_mask.shape == (height, width) 167 page_text_line_mask = page_text_line_mask.to_resized_mask( 168 resized_height=resized_height, 169 resized_width=resized_width, 170 cv_resize_interpolation=cv_resize_interpolation, 171 ) 172 173 assert page_text_line_height_score_map.shape == (height, width) 174 page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map( 175 resized_height=resized_height, 176 resized_width=resized_width, 177 cv_resize_interpolation=cv_resize_interpolation, 178 ) 179 # Scores are resized as well. 180 page_text_line_height_score_map.assign_mat( 181 page_text_line_height_score_map.mat * resize_ratio 182 ) 183 184 return PageResizingStepOutput( 185 page_image=page_image, 186 page_active_mask=page_active_mask, 187 page_char_mask=page_char_mask, 188 page_seal_impression_char_mask=page_seal_impression_char_mask, 189 page_char_height_score_map=page_char_height_score_map, 190 page_text_line_mask=page_text_line_mask, 191 page_text_line_height_score_map=page_text_line_height_score_map, 192 )
Abstract base class for generic types.
A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::
class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.
This class can then be used as follows::
def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default
PageResizingStep( config: vkit.pipeline.text_detection.page_resizing.PageResizingStepConfig)
def
get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
65 def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]): 66 # 1. Filtering. 67 text_line_heights = [ 68 text_line_height for text_line_height in page_distorted_text_line_heights 69 if text_line_height > self.config.text_line_heights_filtering_thr 70 ] 71 assert text_line_heights 72 # 2. Remove outliers. 73 # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm 74 text_line_heights = np.asarray(text_line_heights) 75 deltas = np.abs(text_line_heights - np.median(text_line_heights)) 76 deltas_median = np.median(deltas) 77 delta_ratios = deltas / (deltas_median or 1.0) 78 text_line_heights_min = float( 79 min( 80 text_line_height 81 for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios) 82 if delta_ratio < 3.5 83 ) 84 ) 85 return text_line_heights_min
def
run( self, input: vkit.pipeline.text_detection.page_resizing.PageResizingStepInput, rng: numpy.random._generator.Generator):
87 def run(self, input: PageResizingStepInput, rng: RandomGenerator): 88 page_distortion_step_output = input.page_distortion_step_output 89 page_image = page_distortion_step_output.page_image 90 page_active_mask = page_distortion_step_output.page_active_mask 91 92 page_char_mask = page_distortion_step_output.page_char_mask 93 assert page_char_mask 94 95 page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask 96 assert page_seal_impression_char_mask 97 98 page_char_height_score_map = page_distortion_step_output.page_char_height_score_map 99 assert page_char_height_score_map 100 101 page_text_line_mask = page_distortion_step_output.page_text_line_mask 102 assert page_text_line_mask 103 104 page_text_line_height_score_map = \ 105 page_distortion_step_output.page_text_line_height_score_map 106 assert page_text_line_height_score_map 107 108 page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights 109 assert page_distorted_text_line_heights 110 111 # Resizing. 112 height, width = page_image.shape 113 text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights) 114 logger.debug(f'text_line_heights_min={text_line_heights_min}') 115 resized_text_line_height = rng.uniform( 116 self.config.resized_text_line_height_min, 117 self.config.resized_text_line_height_max, 118 ) 119 resize_ratio = resized_text_line_height / text_line_heights_min 120 121 resized_height = round(resize_ratio * height) 122 resized_width = round(resize_ratio * width) 123 124 cv_resize_interpolation = sample_cv_resize_interpolation( 125 rng, 126 include_cv_inter_area=(resize_ratio < 1.0), 127 ) 128 logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}') 129 130 page_image = page_image.to_resized_image( 131 resized_height=resized_height, 132 resized_width=resized_width, 133 cv_resize_interpolation=cv_resize_interpolation, 134 ) 135 136 assert page_active_mask.shape == (height, width) 137 page_active_mask = page_active_mask.to_resized_mask( 138 resized_height=resized_height, 139 resized_width=resized_width, 140 cv_resize_interpolation=cv_resize_interpolation, 141 ) 142 143 assert page_char_mask.shape == (height, width) 144 page_char_mask = page_char_mask.to_resized_mask( 145 resized_height=resized_height, 146 resized_width=resized_width, 147 cv_resize_interpolation=cv_resize_interpolation, 148 ) 149 150 assert page_seal_impression_char_mask.shape == (height, width) 151 page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask( 152 resized_height=resized_height, 153 resized_width=resized_width, 154 cv_resize_interpolation=cv_resize_interpolation, 155 ) 156 157 assert page_char_height_score_map.shape == (height, width) 158 page_char_height_score_map = page_char_height_score_map.to_resized_score_map( 159 resized_height=resized_height, 160 resized_width=resized_width, 161 cv_resize_interpolation=cv_resize_interpolation, 162 ) 163 # Scores are resized as well. 164 page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio) 165 166 assert page_text_line_mask.shape == (height, width) 167 page_text_line_mask = page_text_line_mask.to_resized_mask( 168 resized_height=resized_height, 169 resized_width=resized_width, 170 cv_resize_interpolation=cv_resize_interpolation, 171 ) 172 173 assert page_text_line_height_score_map.shape == (height, width) 174 page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map( 175 resized_height=resized_height, 176 resized_width=resized_width, 177 cv_resize_interpolation=cv_resize_interpolation, 178 ) 179 # Scores are resized as well. 180 page_text_line_height_score_map.assign_mat( 181 page_text_line_height_score_map.mat * resize_ratio 182 ) 183 184 return PageResizingStepOutput( 185 page_image=page_image, 186 page_active_mask=page_active_mask, 187 page_char_mask=page_char_mask, 188 page_seal_impression_char_mask=page_seal_impression_char_mask, 189 page_char_height_score_map=page_char_height_score_map, 190 page_text_line_mask=page_text_line_mask, 191 page_text_line_height_score_map=page_text_line_height_score_map, 192 )