vkit.utility.text.const.whitespace
Consts for detecting whitespace chars.
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14''' 15Consts for detecting whitespace chars. 16''' 17from typing import Sequence, Tuple 18 19#: Whitespace Chars. 20#: Pulled from https://en.wikipedia.org/wiki/Whitespace_character 21#: 22#: Table 1. 23#: 24#: 0x9 25#: 0xA 26#: 0xB 27#: 0xC 28#: 0xD 29#: 0x20 30#: 0x85 31#: 0xA0 32#: 0x1680 33#: 0x2000 34#: 0x2001 35#: 0x2002 36#: 0x2003 37#: 0x2004 38#: 0x2005 39#: 0x2006 40#: 0x2007 41#: 0x2008 42#: 0x2009 43#: 0x200A 44#: 0x2028 45#: 0x2029 46#: 0x202F 47#: 0x205F 48#: 0x3000 49#: 50#: Table 2. 51#: 52#: 0x180E 53#: 0x200B 54#: 0x200C 55#: 0x200D 56#: 0x2060 57#: 0xFEFF 58#: 59#: Table 3. 60#: 61#: 0xB7 62#: 0x237D 63#: 0x2420 64#: 0x2422 65#: 0x2423 66#: 67ITV_WHITESPACE: Sequence[Sequence[Tuple[int, int]]] = [[ 68 (0x9, 0xD), 69 (0x20, 0x20), 70 (0x85, 0x85), 71 (0xA0, 0xA0), 72 73 # Move the "middle dot" to delimiter category, 74 # since this one is commonly used in Chinese news material. 75 # (0xB7, 0xB7), 76 (0x1680, 0x1680), 77 (0x180E, 0x180E), 78 79 # (0x2000, 0x200D), 80 # Fix with: 81 (0x2000, 0x200F), 82 83 # (0x2028, 0x2029), 84 # Fix with: 85 (0x2028, 0x202C), 86 (0x202F, 0x202F), 87 88 # (0x205F, 0x2060), 89 # Fix with: 90 (0x205F, 0x206F), 91 (0x237D, 0x237D), 92 (0x2420, 0x2420), 93 (0x2422, 0x2423), 94 (0x3000, 0x3000), 95 (0xFEFF, 0xFEFF), 96]]