vkit.pipeline.text_detection.page_resizing

  1# Copyright 2022 vkit-x Administrator. All Rights Reserved.
  2#
  3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses.
  4#
  5# The commercial license gives you the full rights to create and distribute software
  6# on your own terms without any SSPL license obligations. For more information,
  7# please see the "LICENSE_COMMERCIAL.txt" file.
  8#
  9# This project is also available under Server Side Public License (SSPL).
 10# The SSPL licensing is ideal for use cases such as open source projects with
 11# SSPL distribution, student/academic purposes, hobby projects, internal research
 12# projects without external distribution, or other projects where all SSPL
 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file.
 14from typing import Sequence
 15import logging
 16
 17import attrs
 18from numpy.random import Generator as RandomGenerator
 19import numpy as np
 20
 21from vkit.utility import sample_cv_resize_interpolation
 22from vkit.element import Mask, ScoreMap, Image
 23from .page_distortion import PageDistortionStepOutput
 24from ..interface import PipelineStep, PipelineStepFactory
 25
 26logger = logging.getLogger(__name__)
 27
 28
 29@attrs.define
 30class PageResizingStepConfig:
 31    resized_text_line_height_min: float = 3.0
 32    resized_text_line_height_max: float = 10.0
 33    text_line_heights_filtering_thr: float = 1.0
 34
 35
 36@attrs.define
 37class PageResizingStepInput:
 38    page_distortion_step_output: PageDistortionStepOutput
 39
 40
 41@attrs.define
 42class PageResizingStepOutput:
 43    page_image: Image
 44    page_active_mask: Mask
 45    page_char_mask: Mask
 46    page_char_height_score_map: ScoreMap
 47    page_text_line_mask: Mask
 48    page_text_line_height_score_map: ScoreMap
 49
 50
 51class PageResizingStep(
 52    PipelineStep[
 53        PageResizingStepConfig,
 54        PageResizingStepInput,
 55        PageResizingStepOutput,
 56    ]
 57):  # yapf: disable
 58
 59    def __init__(self, config: PageResizingStepConfig):
 60        super().__init__(config)
 61
 62    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
 63        # 1. Filtering.
 64        text_line_heights = [
 65            text_line_height for text_line_height in page_distorted_text_line_heights
 66            if text_line_height > self.config.text_line_heights_filtering_thr
 67        ]
 68        assert text_line_heights
 69        # 2. Remove outliers.
 70        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
 71        text_line_heights = np.asarray(text_line_heights)
 72        deltas = np.abs(text_line_heights - np.median(text_line_heights))
 73        deltas_median = np.median(deltas)
 74        delta_ratios = deltas / (deltas_median or 1.0)
 75        text_line_heights_min = float(
 76            min(
 77                text_line_height
 78                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
 79                if delta_ratio < 3.5
 80            )
 81        )
 82        return text_line_heights_min
 83
 84    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 85        page_distortion_step_output = input.page_distortion_step_output
 86        page_image = page_distortion_step_output.page_image
 87        page_active_mask = page_distortion_step_output.page_active_mask
 88
 89        page_char_mask = page_distortion_step_output.page_char_mask
 90        assert page_char_mask
 91
 92        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 93        assert page_char_height_score_map
 94
 95        page_text_line_mask = page_distortion_step_output.page_text_line_mask
 96        assert page_text_line_mask
 97
 98        page_text_line_height_score_map = \
 99            page_distortion_step_output.page_text_line_height_score_map
100        assert page_text_line_height_score_map
101
102        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
103        assert page_distorted_text_line_heights
104
105        # Resizing.
106        height, width = page_image.shape
107        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
108        logger.debug(f'text_line_heights_min={text_line_heights_min}')
109        resized_text_line_height = rng.uniform(
110            self.config.resized_text_line_height_min,
111            self.config.resized_text_line_height_max,
112        )
113        resize_ratio = resized_text_line_height / text_line_heights_min
114
115        resized_height = round(resize_ratio * height)
116        resized_width = round(resize_ratio * width)
117
118        cv_resize_interpolation = sample_cv_resize_interpolation(
119            rng,
120            include_cv_inter_area=(resize_ratio < 1.0),
121        )
122        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
123
124        page_image = page_image.to_resized_image(
125            resized_height=resized_height,
126            resized_width=resized_width,
127            cv_resize_interpolation=cv_resize_interpolation,
128        )
129
130        assert page_active_mask.shape == (height, width)
131        page_active_mask = page_active_mask.to_resized_mask(
132            resized_height=resized_height,
133            resized_width=resized_width,
134            cv_resize_interpolation=cv_resize_interpolation,
135        )
136
137        assert page_char_mask.shape == (height, width)
138        page_char_mask = page_char_mask.to_resized_mask(
139            resized_height=resized_height,
140            resized_width=resized_width,
141            cv_resize_interpolation=cv_resize_interpolation,
142        )
143
144        assert page_char_height_score_map.shape == (height, width)
145        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
146            resized_height=resized_height,
147            resized_width=resized_width,
148            cv_resize_interpolation=cv_resize_interpolation,
149        )
150        # Scores are resized as well.
151        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
152
153        assert page_text_line_mask.shape == (height, width)
154        page_text_line_mask = page_text_line_mask.to_resized_mask(
155            resized_height=resized_height,
156            resized_width=resized_width,
157            cv_resize_interpolation=cv_resize_interpolation,
158        )
159
160        assert page_text_line_height_score_map.shape == (height, width)
161        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
162            resized_height=resized_height,
163            resized_width=resized_width,
164            cv_resize_interpolation=cv_resize_interpolation,
165        )
166        # Scores are resized as well.
167        page_text_line_height_score_map.assign_mat(
168            page_text_line_height_score_map.mat * resize_ratio
169        )
170
171        return PageResizingStepOutput(
172            page_image=page_image,
173            page_active_mask=page_active_mask,
174            page_char_mask=page_char_mask,
175            page_char_height_score_map=page_char_height_score_map,
176            page_text_line_mask=page_text_line_mask,
177            page_text_line_height_score_map=page_text_line_height_score_map,
178        )
179
180
181page_resizing_step_factory = PipelineStepFactory(PageResizingStep)
class PageResizingStepConfig:
31class PageResizingStepConfig:
32    resized_text_line_height_min: float = 3.0
33    resized_text_line_height_max: float = 10.0
34    text_line_heights_filtering_thr: float = 1.0
PageResizingStepConfig( resized_text_line_height_min: float = 3.0, resized_text_line_height_max: float = 10.0, text_line_heights_filtering_thr: float = 1.0)
2def __init__(self, resized_text_line_height_min=attr_dict['resized_text_line_height_min'].default, resized_text_line_height_max=attr_dict['resized_text_line_height_max'].default, text_line_heights_filtering_thr=attr_dict['text_line_heights_filtering_thr'].default):
3    self.resized_text_line_height_min = resized_text_line_height_min
4    self.resized_text_line_height_max = resized_text_line_height_max
5    self.text_line_heights_filtering_thr = text_line_heights_filtering_thr

Method generated by attrs for class PageResizingStepConfig.

class PageResizingStepInput:
38class PageResizingStepInput:
39    page_distortion_step_output: PageDistortionStepOutput
PageResizingStepInput( page_distortion_step_output: vkit.pipeline.text_detection.page_distortion.PageDistortionStepOutput)
2def __init__(self, page_distortion_step_output):
3    self.page_distortion_step_output = page_distortion_step_output

Method generated by attrs for class PageResizingStepInput.

class PageResizingStepOutput:
43class PageResizingStepOutput:
44    page_image: Image
45    page_active_mask: Mask
46    page_char_mask: Mask
47    page_char_height_score_map: ScoreMap
48    page_text_line_mask: Mask
49    page_text_line_height_score_map: ScoreMap
PageResizingStepOutput( page_image: vkit.element.image.Image, page_active_mask: vkit.element.mask.Mask, page_char_mask: vkit.element.mask.Mask, page_char_height_score_map: vkit.element.score_map.ScoreMap, page_text_line_mask: vkit.element.mask.Mask, page_text_line_height_score_map: vkit.element.score_map.ScoreMap)
2def __init__(self, page_image, page_active_mask, page_char_mask, page_char_height_score_map, page_text_line_mask, page_text_line_height_score_map):
3    self.page_image = page_image
4    self.page_active_mask = page_active_mask
5    self.page_char_mask = page_char_mask
6    self.page_char_height_score_map = page_char_height_score_map
7    self.page_text_line_mask = page_text_line_mask
8    self.page_text_line_height_score_map = page_text_line_height_score_map

Method generated by attrs for class PageResizingStepOutput.

 52class PageResizingStep(
 53    PipelineStep[
 54        PageResizingStepConfig,
 55        PageResizingStepInput,
 56        PageResizingStepOutput,
 57    ]
 58):  # yapf: disable
 59
 60    def __init__(self, config: PageResizingStepConfig):
 61        super().__init__(config)
 62
 63    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
 64        # 1. Filtering.
 65        text_line_heights = [
 66            text_line_height for text_line_height in page_distorted_text_line_heights
 67            if text_line_height > self.config.text_line_heights_filtering_thr
 68        ]
 69        assert text_line_heights
 70        # 2. Remove outliers.
 71        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
 72        text_line_heights = np.asarray(text_line_heights)
 73        deltas = np.abs(text_line_heights - np.median(text_line_heights))
 74        deltas_median = np.median(deltas)
 75        delta_ratios = deltas / (deltas_median or 1.0)
 76        text_line_heights_min = float(
 77            min(
 78                text_line_height
 79                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
 80                if delta_ratio < 3.5
 81            )
 82        )
 83        return text_line_heights_min
 84
 85    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 86        page_distortion_step_output = input.page_distortion_step_output
 87        page_image = page_distortion_step_output.page_image
 88        page_active_mask = page_distortion_step_output.page_active_mask
 89
 90        page_char_mask = page_distortion_step_output.page_char_mask
 91        assert page_char_mask
 92
 93        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 94        assert page_char_height_score_map
 95
 96        page_text_line_mask = page_distortion_step_output.page_text_line_mask
 97        assert page_text_line_mask
 98
 99        page_text_line_height_score_map = \
100            page_distortion_step_output.page_text_line_height_score_map
101        assert page_text_line_height_score_map
102
103        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
104        assert page_distorted_text_line_heights
105
106        # Resizing.
107        height, width = page_image.shape
108        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
109        logger.debug(f'text_line_heights_min={text_line_heights_min}')
110        resized_text_line_height = rng.uniform(
111            self.config.resized_text_line_height_min,
112            self.config.resized_text_line_height_max,
113        )
114        resize_ratio = resized_text_line_height / text_line_heights_min
115
116        resized_height = round(resize_ratio * height)
117        resized_width = round(resize_ratio * width)
118
119        cv_resize_interpolation = sample_cv_resize_interpolation(
120            rng,
121            include_cv_inter_area=(resize_ratio < 1.0),
122        )
123        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
124
125        page_image = page_image.to_resized_image(
126            resized_height=resized_height,
127            resized_width=resized_width,
128            cv_resize_interpolation=cv_resize_interpolation,
129        )
130
131        assert page_active_mask.shape == (height, width)
132        page_active_mask = page_active_mask.to_resized_mask(
133            resized_height=resized_height,
134            resized_width=resized_width,
135            cv_resize_interpolation=cv_resize_interpolation,
136        )
137
138        assert page_char_mask.shape == (height, width)
139        page_char_mask = page_char_mask.to_resized_mask(
140            resized_height=resized_height,
141            resized_width=resized_width,
142            cv_resize_interpolation=cv_resize_interpolation,
143        )
144
145        assert page_char_height_score_map.shape == (height, width)
146        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
147            resized_height=resized_height,
148            resized_width=resized_width,
149            cv_resize_interpolation=cv_resize_interpolation,
150        )
151        # Scores are resized as well.
152        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
153
154        assert page_text_line_mask.shape == (height, width)
155        page_text_line_mask = page_text_line_mask.to_resized_mask(
156            resized_height=resized_height,
157            resized_width=resized_width,
158            cv_resize_interpolation=cv_resize_interpolation,
159        )
160
161        assert page_text_line_height_score_map.shape == (height, width)
162        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
163            resized_height=resized_height,
164            resized_width=resized_width,
165            cv_resize_interpolation=cv_resize_interpolation,
166        )
167        # Scores are resized as well.
168        page_text_line_height_score_map.assign_mat(
169            page_text_line_height_score_map.mat * resize_ratio
170        )
171
172        return PageResizingStepOutput(
173            page_image=page_image,
174            page_active_mask=page_active_mask,
175            page_char_mask=page_char_mask,
176            page_char_height_score_map=page_char_height_score_map,
177            page_text_line_mask=page_text_line_mask,
178            page_text_line_height_score_map=page_text_line_height_score_map,
179        )

Abstract base class for generic types.

A generic type is typically declared by inheriting from this class parameterized with one or more type variables. For example, a generic mapping type might be defined as::

class Mapping(Generic[KT, VT]): def __getitem__(self, key: KT) -> VT: ... # Etc.

This class can then be used as follows::

def lookup_name(mapping: Mapping[KT, VT], key: KT, default: VT) -> VT: try: return mapping[key] except KeyError: return default

60    def __init__(self, config: PageResizingStepConfig):
61        super().__init__(config)
def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
63    def get_text_line_heights_min(self, page_distorted_text_line_heights: Sequence[float]):
64        # 1. Filtering.
65        text_line_heights = [
66            text_line_height for text_line_height in page_distorted_text_line_heights
67            if text_line_height > self.config.text_line_heights_filtering_thr
68        ]
69        assert text_line_heights
70        # 2. Remove outliers.
71        # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35h.htm
72        text_line_heights = np.asarray(text_line_heights)
73        deltas = np.abs(text_line_heights - np.median(text_line_heights))
74        deltas_median = np.median(deltas)
75        delta_ratios = deltas / (deltas_median or 1.0)
76        text_line_heights_min = float(
77            min(
78                text_line_height
79                for text_line_height, delta_ratio in zip(text_line_heights, delta_ratios)
80                if delta_ratio < 3.5
81            )
82        )
83        return text_line_heights_min
def run( self, input: vkit.pipeline.text_detection.page_resizing.PageResizingStepInput, rng: numpy.random._generator.Generator):
 85    def run(self, input: PageResizingStepInput, rng: RandomGenerator):
 86        page_distortion_step_output = input.page_distortion_step_output
 87        page_image = page_distortion_step_output.page_image
 88        page_active_mask = page_distortion_step_output.page_active_mask
 89
 90        page_char_mask = page_distortion_step_output.page_char_mask
 91        assert page_char_mask
 92
 93        page_char_height_score_map = page_distortion_step_output.page_char_height_score_map
 94        assert page_char_height_score_map
 95
 96        page_text_line_mask = page_distortion_step_output.page_text_line_mask
 97        assert page_text_line_mask
 98
 99        page_text_line_height_score_map = \
100            page_distortion_step_output.page_text_line_height_score_map
101        assert page_text_line_height_score_map
102
103        page_distorted_text_line_heights = page_distortion_step_output.page_text_line_heights
104        assert page_distorted_text_line_heights
105
106        # Resizing.
107        height, width = page_image.shape
108        text_line_heights_min = self.get_text_line_heights_min(page_distorted_text_line_heights)
109        logger.debug(f'text_line_heights_min={text_line_heights_min}')
110        resized_text_line_height = rng.uniform(
111            self.config.resized_text_line_height_min,
112            self.config.resized_text_line_height_max,
113        )
114        resize_ratio = resized_text_line_height / text_line_heights_min
115
116        resized_height = round(resize_ratio * height)
117        resized_width = round(resize_ratio * width)
118
119        cv_resize_interpolation = sample_cv_resize_interpolation(
120            rng,
121            include_cv_inter_area=(resize_ratio < 1.0),
122        )
123        logger.debug(f'cv_resize_interpolation={cv_resize_interpolation}')
124
125        page_image = page_image.to_resized_image(
126            resized_height=resized_height,
127            resized_width=resized_width,
128            cv_resize_interpolation=cv_resize_interpolation,
129        )
130
131        assert page_active_mask.shape == (height, width)
132        page_active_mask = page_active_mask.to_resized_mask(
133            resized_height=resized_height,
134            resized_width=resized_width,
135            cv_resize_interpolation=cv_resize_interpolation,
136        )
137
138        assert page_char_mask.shape == (height, width)
139        page_char_mask = page_char_mask.to_resized_mask(
140            resized_height=resized_height,
141            resized_width=resized_width,
142            cv_resize_interpolation=cv_resize_interpolation,
143        )
144
145        assert page_char_height_score_map.shape == (height, width)
146        page_char_height_score_map = page_char_height_score_map.to_resized_score_map(
147            resized_height=resized_height,
148            resized_width=resized_width,
149            cv_resize_interpolation=cv_resize_interpolation,
150        )
151        # Scores are resized as well.
152        page_char_height_score_map.assign_mat(page_char_height_score_map.mat * resize_ratio)
153
154        assert page_text_line_mask.shape == (height, width)
155        page_text_line_mask = page_text_line_mask.to_resized_mask(
156            resized_height=resized_height,
157            resized_width=resized_width,
158            cv_resize_interpolation=cv_resize_interpolation,
159        )
160
161        assert page_text_line_height_score_map.shape == (height, width)
162        page_text_line_height_score_map = page_text_line_height_score_map.to_resized_score_map(
163            resized_height=resized_height,
164            resized_width=resized_width,
165            cv_resize_interpolation=cv_resize_interpolation,
166        )
167        # Scores are resized as well.
168        page_text_line_height_score_map.assign_mat(
169            page_text_line_height_score_map.mat * resize_ratio
170        )
171
172        return PageResizingStepOutput(
173            page_image=page_image,
174            page_active_mask=page_active_mask,
175            page_char_mask=page_char_mask,
176            page_char_height_score_map=page_char_height_score_map,
177            page_text_line_mask=page_text_line_mask,
178            page_text_line_height_score_map=page_text_line_height_score_map,
179        )