vkit.pipeline.text_detection.page_resizing

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence
 15import logging
 16
 17import attrs
 18from numpy.random import Generator as RandomGenerator
 19import numpy as np
 20
 21from vkit.utility import sample_cv_resize_interpolation
 22from vkit.element import Mask, ScoreMap, Image
 23from .page_distortion import PageDistortionStepOutput
 24from ..interface import PipelineStep, PipelineStepFactory
 25
 26logger = logging.getLogger(__name__)
 27
 28
 29@attrs.define
 30class PageResizingStepConfig:
 31    resized_text_line_height_min: float = 3.0
 32    resized_text_line_height_max: float = 10.0
 33    text_line_heights_filtering_thr: float = 1.0
 34
 35
 36@attrs.define
 37class PageResizingStepInput:
 38    page_distortion_step_output: PageDistortionStepOutput
 39
 40
 41# TODO: Some fields could be optional.
 42@attrs.define
 43class PageResizingStepOutput:
 44    page_image: Image
 45    page_active_mask: Mask
 46    page_char_mask: Mask
 47    page_seal_impression_char_mask: Mask
 48    page_char_height_score_map: ScoreMap
 49    page_text_line_mask: Mask
 50    page_text_line_height_score_map: ScoreMap
 51
 52
 53class PageResizingStep(
 54    PipelineStep[
 55        PageResizingStepConfig,
 56        PageResizingStepInput,
 57        PageResizingStepOutput,
 58    ]
 59):  # yapf: disable
 60
 61    def __init__(self, config: PageResizingStepConfig):
 62        super().__init__(config)
 63
 64    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
 65        # 1. Filtering.
 66        text_line_heights = [
 67            text_line_height for text_line_height in page_distorted_text_line_heights
 68            if text_line_height > self.config.text_line_heights_filtering_thr
 69        ]
 70        assert text_line_heights
 71        # 2. Remove outliers.
 72        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
 73        text_line_heights = np.asarray(text_line_heights)
 74        deltas = np.abs(text_line_heights - np.median(text_line_heights))
 75        deltas_median = np.median(deltas)
 76        delta_ratios = deltas / (deltas_median or 1.0)
 77        text_line_heights_min = float(
 78            min(
 79                text_line_height
 80                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
 81                if delta_ratio < 3.5
 82            )
 83        )
 84        return text_line_heights_min
 85
 86    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 87        page_distortion_step_output = input.page_distortion_step_output
 88        page_image = page_distortion_step_output.page_image
 89        page_active_mask = page_distortion_step_output.page_active_mask
 90
 91        page_char_mask = page_distortion_step_output.page_char_mask
 92        assert page_char_mask
 93
 94        page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask
 95        assert page_seal_impression_char_mask
 96
 97        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 98        assert page_char_height_score_map
 99
100        page_text_line_mask = page_distortion_step_output.page_text_line_mask
101        assert page_text_line_mask
102
103        page_text_line_height_score_map = \
104            page_distortion_step_output.page_text_line_height_score_map
105        assert page_text_line_height_score_map
106
107        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
108        assert page_distorted_text_line_heights
109
110        # Resizing.
111        height, width = page_image.shape
112        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
113        logger.debug(f'text_line_heights_min={text_line_heights_min}')
114        resized_text_line_height = rng.uniform(
115            self.config.resized_text_line_height_min,
116            self.config.resized_text_line_height_max,
117        )
118        resize_ratio = resized_text_line_height / text_line_heights_min
119
120        resized_height = round(resize_ratio * height)
121        resized_width = round(resize_ratio * width)
122
123        cv_resize_interpolation = sample_cv_resize_interpolation(
124            rng,
125            include_cv_inter_area=(resize_ratio < 1.0),
126        )
127        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
128
129        page_image = page_image.to_resized_image(
130            resized_height=resized_height,
131            resized_width=resized_width,
132            cv_resize_interpolation=cv_resize_interpolation,
133        )
134
135        assert page_active_mask.shape == (height, width)
136        page_active_mask = page_active_mask.to_resized_mask(
137            resized_height=resized_height,
138            resized_width=resized_width,
139            cv_resize_interpolation=cv_resize_interpolation,
140        )
141
142        assert page_char_mask.shape == (height, width)
143        page_char_mask = page_char_mask.to_resized_mask(
144            resized_height=resized_height,
145            resized_width=resized_width,
146            cv_resize_interpolation=cv_resize_interpolation,
147        )
148
149        assert page_seal_impression_char_mask.shape == (height, width)
150        page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask(
151            resized_height=resized_height,
152            resized_width=resized_width,
153            cv_resize_interpolation=cv_resize_interpolation,
154        )
155
156        assert page_char_height_score_map.shape == (height, width)
157        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
158            resized_height=resized_height,
159            resized_width=resized_width,
160            cv_resize_interpolation=cv_resize_interpolation,
161        )
162        # Scores are resized as well.
163        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
164
165        assert page_text_line_mask.shape == (height, width)
166        page_text_line_mask = page_text_line_mask.to_resized_mask(
167            resized_height=resized_height,
168            resized_width=resized_width,
169            cv_resize_interpolation=cv_resize_interpolation,
170        )
171
172        assert page_text_line_height_score_map.shape == (height, width)
173        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
174            resized_height=resized_height,
175            resized_width=resized_width,
176            cv_resize_interpolation=cv_resize_interpolation,
177        )
178        # Scores are resized as well.
179        page_text_line_height_score_map.assign_mat(
180            page_text_line_height_score_map.mat * resize_ratio
181        )
182
183        return PageResizingStepOutput(
184            page_image=page_image,
185            page_active_mask=page_active_mask,
186            page_char_mask=page_char_mask,
187            page_seal_impression_char_mask=page_seal_impression_char_mask,
188            page_char_height_score_map=page_char_height_score_map,
189            page_text_line_mask=page_text_line_mask,
190            page_text_line_height_score_map=page_text_line_height_score_map,
191        )
192
193
194page_resizing_step_factory = PipelineStepFactory(PageResizingStep)
class PageResizingStepConfig:
31class PageResizingStepConfig:
32    resized_text_line_height_min: float = 3.0
33    resized_text_line_height_max: float = 10.0
34    text_line_heights_filtering_thr: float = 1.0
PageResizingStepConfig( resized_text_line_height_min: float = 3.0, resized_text_line_height_max: float = 10.0, text_line_heights_filtering_thr: float = 1.0)
2def __init__(self, resized_text_line_height_min=attr_dict['resized_text_line_height_min'].default, resized_text_line_height_max=attr_dict['resized_text_line_height_max'].default, text_line_heights_filtering_thr=attr_dict['text_line_heights_filtering_thr'].default):
3    self.resized_text_line_height_min = resized_text_line_height_min
4    self.resized_text_line_height_max = resized_text_line_height_max
5    self.text_line_heights_filtering_thr = text_line_heights_filtering_thr

Method generated by attrs for class PageResizingStepConfig.

class PageResizingStepInput:
38class PageResizingStepInput:
39    page_distortion_step_output: PageDistortionStepOutput
PageResizingStepInput( page_distortion_step_output: vkit.pipeline.text_detection.page_distortion.PageDistortionStepOutput)
2def __init__(self, page_distortion_step_output):
3    self.page_distortion_step_output = page_distortion_step_output

Method generated by attrs for class PageResizingStepInput.

class PageResizingStepOutput:
44class PageResizingStepOutput:
45    page_image: Image
46    page_active_mask: Mask
47    page_char_mask: Mask
48    page_seal_impression_char_mask: Mask
49    page_char_height_score_map: ScoreMap
50    page_text_line_mask: Mask
51    page_text_line_height_score_map: ScoreMap
PageResizingStepOutput( page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_mask: vkit.element.mask.Mask, page_seal_impression_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap)
2def __init__(self, page_image, page_active_mask, page_char_mask, page_seal_impression_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map):
3    self.page_image = page_image
4    self.page_active_mask = page_active_mask
5    self.page_char_mask = page_char_mask
6    self.page_seal_impression_char_mask = page_seal_impression_char_mask
7    self.page_char_height_score_map = page_char_height_score_map
8    self.page_text_line_mask = page_text_line_mask
9    self.page_text_line_height_score_map = page_text_line_height_score_map

Method generated by attrs for class PageResizingStepOutput.

 54class PageResizingStep(
 55    PipelineStep[
 56        PageResizingStepConfig,
 57        PageResizingStepInput,
 58        PageResizingStepOutput,
 59    ]
 60):  # yapf: disable
 61
 62    def __init__(self, config: PageResizingStepConfig):
 63        super().__init__(config)
 64
 65    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
 66        # 1. Filtering.
 67        text_line_heights = [
 68            text_line_height for text_line_height in page_distorted_text_line_heights
 69            if text_line_height > self.config.text_line_heights_filtering_thr
 70        ]
 71        assert text_line_heights
 72        # 2. Remove outliers.
 73        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
 74        text_line_heights = np.asarray(text_line_heights)
 75        deltas = np.abs(text_line_heights - np.median(text_line_heights))
 76        deltas_median = np.median(deltas)
 77        delta_ratios = deltas / (deltas_median or 1.0)
 78        text_line_heights_min = float(
 79            min(
 80                text_line_height
 81                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
 82                if delta_ratio < 3.5
 83            )
 84        )
 85        return text_line_heights_min
 86
 87    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 88        page_distortion_step_output = input.page_distortion_step_output
 89        page_image = page_distortion_step_output.page_image
 90        page_active_mask = page_distortion_step_output.page_active_mask
 91
 92        page_char_mask = page_distortion_step_output.page_char_mask
 93        assert page_char_mask
 94
 95        page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask
 96        assert page_seal_impression_char_mask
 97
 98        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 99        assert page_char_height_score_map
100
101        page_text_line_mask = page_distortion_step_output.page_text_line_mask
102        assert page_text_line_mask
103
104        page_text_line_height_score_map = \
105            page_distortion_step_output.page_text_line_height_score_map
106        assert page_text_line_height_score_map
107
108        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
109        assert page_distorted_text_line_heights
110
111        # Resizing.
112        height, width = page_image.shape
113        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
114        logger.debug(f'text_line_heights_min={text_line_heights_min}')
115        resized_text_line_height = rng.uniform(
116            self.config.resized_text_line_height_min,
117            self.config.resized_text_line_height_max,
118        )
119        resize_ratio = resized_text_line_height / text_line_heights_min
120
121        resized_height = round(resize_ratio * height)
122        resized_width = round(resize_ratio * width)
123
124        cv_resize_interpolation = sample_cv_resize_interpolation(
125            rng,
126            include_cv_inter_area=(resize_ratio < 1.0),
127        )
128        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
129
130        page_image = page_image.to_resized_image(
131            resized_height=resized_height,
132            resized_width=resized_width,
133            cv_resize_interpolation=cv_resize_interpolation,
134        )
135
136        assert page_active_mask.shape == (height, width)
137        page_active_mask = page_active_mask.to_resized_mask(
138            resized_height=resized_height,
139            resized_width=resized_width,
140            cv_resize_interpolation=cv_resize_interpolation,
141        )
142
143        assert page_char_mask.shape == (height, width)
144        page_char_mask = page_char_mask.to_resized_mask(
145            resized_height=resized_height,
146            resized_width=resized_width,
147            cv_resize_interpolation=cv_resize_interpolation,
148        )
149
150        assert page_seal_impression_char_mask.shape == (height, width)
151        page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask(
152            resized_height=resized_height,
153            resized_width=resized_width,
154            cv_resize_interpolation=cv_resize_interpolation,
155        )
156
157        assert page_char_height_score_map.shape == (height, width)
158        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
159            resized_height=resized_height,
160            resized_width=resized_width,
161            cv_resize_interpolation=cv_resize_interpolation,
162        )
163        # Scores are resized as well.
164        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
165
166        assert page_text_line_mask.shape == (height, width)
167        page_text_line_mask = page_text_line_mask.to_resized_mask(
168            resized_height=resized_height,
169            resized_width=resized_width,
170            cv_resize_interpolation=cv_resize_interpolation,
171        )
172
173        assert page_text_line_height_score_map.shape == (height, width)
174        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
175            resized_height=resized_height,
176            resized_width=resized_width,
177            cv_resize_interpolation=cv_resize_interpolation,
178        )
179        # Scores are resized as well.
180        page_text_line_height_score_map.assign_mat(
181            page_text_line_height_score_map.mat * resize_ratio
182        )
183
184        return PageResizingStepOutput(
185            page_image=page_image,
186            page_active_mask=page_active_mask,
187            page_char_mask=page_char_mask,
188            page_seal_impression_char_mask=page_seal_impression_char_mask,
189            page_char_height_score_map=page_char_height_score_map,
190            page_text_line_mask=page_text_line_mask,
191            page_text_line_height_score_map=page_text_line_height_score_map,
192        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

62    def __init__(self, config: PageResizingStepConfig):
63        super().__init__(config)
def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
65    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
66        # 1. Filtering.
67        text_line_heights = [
68            text_line_height for text_line_height in page_distorted_text_line_heights
69            if text_line_height > self.config.text_line_heights_filtering_thr
70        ]
71        assert text_line_heights
72        # 2. Remove outliers.
73        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
74        text_line_heights = np.asarray(text_line_heights)
75        deltas = np.abs(text_line_heights - np.median(text_line_heights))
76        deltas_median = np.median(deltas)
77        delta_ratios = deltas / (deltas_median or 1.0)
78        text_line_heights_min = float(
79            min(
80                text_line_height
81                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
82                if delta_ratio < 3.5
83            )
84        )
85        return text_line_heights_min
def run( self, input: vkit.pipeline.text_detection.page_resizing.PageResizingStepInput, rng: numpy.random._generator.Generator):
 87    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 88        page_distortion_step_output = input.page_distortion_step_output
 89        page_image = page_distortion_step_output.page_image
 90        page_active_mask = page_distortion_step_output.page_active_mask
 91
 92        page_char_mask = page_distortion_step_output.page_char_mask
 93        assert page_char_mask
 94
 95        page_seal_impression_char_mask = page_distortion_step_output.page_seal_impression_char_mask
 96        assert page_seal_impression_char_mask
 97
 98        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 99        assert page_char_height_score_map
100
101        page_text_line_mask = page_distortion_step_output.page_text_line_mask
102        assert page_text_line_mask
103
104        page_text_line_height_score_map = \
105            page_distortion_step_output.page_text_line_height_score_map
106        assert page_text_line_height_score_map
107
108        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
109        assert page_distorted_text_line_heights
110
111        # Resizing.
112        height, width = page_image.shape
113        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
114        logger.debug(f'text_line_heights_min={text_line_heights_min}')
115        resized_text_line_height = rng.uniform(
116            self.config.resized_text_line_height_min,
117            self.config.resized_text_line_height_max,
118        )
119        resize_ratio = resized_text_line_height / text_line_heights_min
120
121        resized_height = round(resize_ratio * height)
122        resized_width = round(resize_ratio * width)
123
124        cv_resize_interpolation = sample_cv_resize_interpolation(
125            rng,
126            include_cv_inter_area=(resize_ratio < 1.0),
127        )
128        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
129
130        page_image = page_image.to_resized_image(
131            resized_height=resized_height,
132            resized_width=resized_width,
133            cv_resize_interpolation=cv_resize_interpolation,
134        )
135
136        assert page_active_mask.shape == (height, width)
137        page_active_mask = page_active_mask.to_resized_mask(
138            resized_height=resized_height,
139            resized_width=resized_width,
140            cv_resize_interpolation=cv_resize_interpolation,
141        )
142
143        assert page_char_mask.shape == (height, width)
144        page_char_mask = page_char_mask.to_resized_mask(
145            resized_height=resized_height,
146            resized_width=resized_width,
147            cv_resize_interpolation=cv_resize_interpolation,
148        )
149
150        assert page_seal_impression_char_mask.shape == (height, width)
151        page_seal_impression_char_mask = page_seal_impression_char_mask.to_resized_mask(
152            resized_height=resized_height,
153            resized_width=resized_width,
154            cv_resize_interpolation=cv_resize_interpolation,
155        )
156
157        assert page_char_height_score_map.shape == (height, width)
158        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
159            resized_height=resized_height,
160            resized_width=resized_width,
161            cv_resize_interpolation=cv_resize_interpolation,
162        )
163        # Scores are resized as well.
164        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
165
166        assert page_text_line_mask.shape == (height, width)
167        page_text_line_mask = page_text_line_mask.to_resized_mask(
168            resized_height=resized_height,
169            resized_width=resized_width,
170            cv_resize_interpolation=cv_resize_interpolation,
171        )
172
173        assert page_text_line_height_score_map.shape == (height, width)
174        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
175            resized_height=resized_height,
176            resized_width=resized_width,
177            cv_resize_interpolation=cv_resize_interpolation,
178        )
179        # Scores are resized as well.
180        page_text_line_height_score_map.assign_mat(
181            page_text_line_height_score_map.mat * resize_ratio
182        )
183
184        return PageResizingStepOutput(
185            page_image=page_image,
186            page_active_mask=page_active_mask,
187            page_char_mask=page_char_mask,
188            page_seal_impression_char_mask=page_seal_impression_char_mask,
189            page_char_height_score_map=page_char_height_score_map,
190            page_text_line_mask=page_text_line_mask,
191            page_text_line_height_score_map=page_text_line_height_score_map,
192        )