vkit.utility.text.const.delimiter
Consts for detecting delimiter chars.
1# Copyright 2022 vkit-x Administrator. All Rights Reserved. 2# 3# This project (vkit-x/vkit) is dual-licensed under commercial and SSPL licenses. 4# 5# The commercial license gives you the full rights to create and distribute software 6# on your own terms without any SSPL license obligations. For more information, 7# please see the "LICENSE_COMMERCIAL.txt" file. 8# 9# This project is also available under Server Side Public License (SSPL). 10# The SSPL licensing is ideal for use cases such as open source projects with 11# SSPL distribution, student/academic purposes, hobby projects, internal research 12# projects without external distribution, or other projects where all SSPL 13# obligations can be met. For more information, please see the "LICENSE_SSPL.txt" file. 14''' 15Consts for detecting delimiter chars. 16''' 17from typing import Sequence, Tuple 18 19#: Delimiters. 20ITV_DELIMITER: Sequence[Sequence[Tuple[int, int]]] = [ 21 # ASCII_DELIMITERS_RANGES 22 [ 23 (0x0021, 0x002F), 24 (0x003A, 0x0040), 25 (0x005B, 0x0060), 26 (0x007B, 0x007E), 27 # ¢, £, ¤, ¥ 28 (0x00A2, 0x00A5), 29 ], 30 [ 31 # Pick from the whitespace category. 32 (0xB7, 0xB7) 33 ], 34 35 # GENERAL_DELIMITERS_RAGES 36 # http://www.unicode.org/charts/PDF/U2000.pdf 37 [ 38 # (0x2000, 0x206F), 39 # Fix with: 40 (0x2010, 0x2027), 41 (0x202D, 0x202E), 42 (0x2030, 0x205E), 43 ], 44 # CJK_DELIMITERS_RANGES 45 # http://www.unicode.org/charts/PDF/U3000.pdf 46 # http://www.unicode.org/charts/PDF/UFE30.pdf 47 [ 48 # (0x3000, 0x303F), 49 # Fix with: 50 (0x3001, 0x3006), 51 (0x3008, 0x303F), 52 (0xFE30, 0xFE4F), 53 ], 54 # DELIMITERS_EXTENSION_RANGES 55 # http://www.unicode.org/charts/PDF/UFF00.pdf 56 [ 57 (0xFF01, 0xFF0F), 58 (0xFF1A, 0xFF20), 59 (0xFF3B, 0xFF40), 60 (0xFF5B, 0xFF64), 61 (0xFFE0, 0xFFEE), 62 ], 63] 64 65DELIMITER_BLACKLIST = { 66 '々', 67 '〓', 68 "〒", 69 '〆', 70}