From feafcba406005a903c2ebdf6f7f00f8370bbd64b Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 26 Aug 2024 17:28:32 +0400 Subject: [PATCH 1/3] Migrate to karakteristics library --- build.gradle.kts | 3 +- gradle/libs.versions.toml | 1 + json-schema-validator/build.gradle.kts | 1 + .../formats/IdnHostnameFormatValidator.kt | 83 ++++++++----------- 4 files changed, 40 insertions(+), 48 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index 7e9a35c3..a782680f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -8,11 +8,12 @@ plugins { allprojects { repositories { mavenCentral() + mavenLocal() } } apiValidation { - ignoredProjects += listOf("benchmark", "test-suites", "generator") + ignoredProjects += listOf("benchmark", "test-suites") } val ossrhUsername: String by project.ext diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 5400f8d1..f7b03c19 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -50,6 +50,7 @@ graphql-ktor = { group = "com.expediagroup", name = "graphql-kotlin-ktor-client" clikt = { group = "com.github.ajalt.clikt", name = "clikt", version = "4.4.0" } kotlin-codepoints = { group = "de.cketti.unicode", name = "kotlin-codepoints", version = "0.9.0" } normalize = { group = "com.doist.x", name = "normalize", version = "1.1.1" } +karacteristics = { group = "io.github.optimumcode", name = "karacteristics", version = "0.0.2-SNAPSHOT" } [bundles] openapi = ["openapi-validator", "openapi-interfaces", "openapi-jackson"] diff --git a/json-schema-validator/build.gradle.kts b/json-schema-validator/build.gradle.kts index 2fc643b9..2e270188 100644 --- a/json-schema-validator/build.gradle.kts +++ b/json-schema-validator/build.gradle.kts @@ -192,6 +192,7 @@ kotlin { ) { because("simplifies work with unicode codepoints") } + implementation(libs.karacteristics) } } diff --git a/json-schema-validator/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnHostnameFormatValidator.kt b/json-schema-validator/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnHostnameFormatValidator.kt index 9a881b65..6e52596f 100644 --- a/json-schema-validator/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnHostnameFormatValidator.kt +++ b/json-schema-validator/src/commonMain/kotlin/io/github/optimumcode/json/schema/internal/formats/IdnHostnameFormatValidator.kt @@ -9,26 +9,27 @@ import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValid import io.github.optimumcode.json.schema.internal.formats.IdnHostnameFormatValidator.BidiLabelType.RTL import io.github.optimumcode.json.schema.internal.hostname.Punycode import io.github.optimumcode.json.schema.internal.hostname.isNormalized -import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory -import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.ENCLOSING_MARK -import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.NONSPACING_MARK -import io.github.optimumcode.json.schema.internal.unicode.CharacterCategory.SPACING_MARK -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.ARABIC_LETTER -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.ARABIC_NUMBER -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.BOUNDARY_NEUTRAL -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.COMMON_SEPARATOR -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_NUMBER -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_SEPARATOR -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.EUROPEAN_TERMINATOR -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.LEFT_TO_RIGHT -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.OTHER_NEUTRAL -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.RIGHT_TO_LEFT -import io.github.optimumcode.json.schema.internal.unicode.DerivedProperties -import io.github.optimumcode.json.schema.internal.unicode.JoiningType import io.github.optimumcode.json.schema.internal.util.forEachCodePointIndexed +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.ARABIC_LETTER +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.ARABIC_NUMBER +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.BOUNDARY_NEUTRAL +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.COMMON_SEPARATOR +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_NUMBER +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_SEPARATOR +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.EUROPEAN_TERMINATOR +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.LEFT_TO_RIGHT +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.OTHER_NEUTRAL +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.RIGHT_TO_LEFT +import io.github.optimumcode.karacteristics.CodepointCategory.ENCLOSING_MARK +import io.github.optimumcode.karacteristics.CodepointCategory.NONSPACING_MARK +import io.github.optimumcode.karacteristics.CodepointCategory.SPACING_MARK +import io.github.optimumcode.karacteristics.CodepointDerivedProperty +import io.github.optimumcode.karacteristics.CodepointJoiningType +import io.github.optimumcode.karacteristics.bidirectionalClass +import io.github.optimumcode.karacteristics.category +import io.github.optimumcode.karacteristics.contains import kotlin.math.abs -import io.github.optimumcode.json.schema.internal.unicode.CharacterDirectionality.NONSPACING_MARK as NONSPACING_MARK_DIRECTIONALITY +import io.github.optimumcode.karacteristics.CodepointBidirectionalClass.NONSPACING_MARK as NONSPACING_MARK_DIRECTIONALITY private const val GREEK_LOWER_NUMERAL_SIGN: Int = 0x0375 private const val HEBREW_GERESH: Int = 0x05F3 @@ -57,7 +58,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { value.forEachLabel { it.forEachCodePointIndexed { _, codePoint -> isBidiDomainName = isBidiDomainName || - when (getDirectionality(codePoint)) { + when (codePoint.bidirectionalClass) { RIGHT_TO_LEFT, ARABIC_LETTER, ARABIC_NUMBER, @@ -131,7 +132,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { val bidiLabelType: BidiLabelType = if (isBidiDomainName) { - when (getDirectionality(firstCodePoint)) { + when (firstCodePoint.bidirectionalClass) { LEFT_TO_RIGHT, -> LTR @@ -171,7 +172,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { // // Check absents of opposite directionality // Point 4 https://datatracker.ietf.org/doc/html/rfc5893#section-2 - isExtendedArabicIndicDigit(codePoint) || EUROPEAN_NUMBER.characterData.contains(codePoint) -> -1 + isExtendedArabicIndicDigit(codePoint) || codePoint in EUROPEAN_NUMBER -> -1 else -> 0 } if (abs(currentArabicDigitStatus - arabicDigitStatus) > 1) { @@ -239,10 +240,10 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { -> false else -> - DerivedProperties.DISALLOWED.contains(codePoint) || - DerivedProperties.UNASSIGNED.contains(codePoint) || - DerivedProperties.CONTEXTJ.contains(codePoint) || - DerivedProperties.CONTEXTO.contains(codePoint) + codePoint in CodepointDerivedProperty.DISALLOWED || + codePoint in CodepointDerivedProperty.UNASSIGNED || + codePoint in CodepointDerivedProperty.CONTEXTJ || + codePoint in CodepointDerivedProperty.CONTEXTO } } @@ -253,7 +254,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { if (bidiLabelType == NONE) { return false } - return when (val directionality = getDirectionality(codePoint)) { + return when (val directionality = codePoint.bidirectionalClass) { EUROPEAN_NUMBER, EUROPEAN_SEPARATOR, COMMON_SEPARATOR, @@ -297,10 +298,10 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { } var index = unicode.length // Zero or more characters with Bidi property NSM are allowed in the end - while (index > 0 && getDirectionality(unicode.codePointBefore(index)) == NONSPACING_MARK_DIRECTIONALITY) { + while (index > 0 && unicode.codePointBefore(index).bidirectionalClass == NONSPACING_MARK_DIRECTIONALITY) { index-- } - val lastCodepointDirectionality = getDirectionality(unicode.codePointBefore(index)) + val lastCodepointDirectionality = unicode.codePointBefore(index).bidirectionalClass return when (bidiLabelType) { NONE -> false RTL -> @@ -456,7 +457,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { return false } var j = index - while (0 < j && JoiningType.TRANSPARENT.contains(unicode.codePointBefore(j))) { + while (0 < j && unicode.codePointBefore(j) in CodepointJoiningType.TRANSPARENT) { j -= 1 } if (j == 0) { @@ -465,8 +466,8 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { } val beforeFirstTransparent = unicode.codePointBefore(j) if ( - !JoiningType.LEFT_JOINING.contains(beforeFirstTransparent) && - !JoiningType.DUAL_JOINING.contains(beforeFirstTransparent) + beforeFirstTransparent !in CodepointJoiningType.LEFT_JOINING && + beforeFirstTransparent !in CodepointJoiningType.DUAL_JOINING ) { return true } @@ -476,7 +477,7 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { // Must have joining type T after return true } - while (j < len && JoiningType.TRANSPARENT.contains(unicode.codePointAt(j))) { + while (j < len && unicode.codePointAt(j) in CodepointJoiningType.TRANSPARENT) { j += 1 } if (j == len) { @@ -484,12 +485,12 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { return true } val afterLastTransparent = unicode.codePointAt(j) - return !JoiningType.RIGHT_JOINING.contains(afterLastTransparent) && - !JoiningType.DUAL_JOINING.contains(afterLastTransparent) + return afterLastTransparent !in CodepointJoiningType.RIGHT_JOINING && + afterLastTransparent !in CodepointJoiningType.DUAL_JOINING } private fun isLeadingCombiningMark(codePoint: Int): Boolean = - when (getCategory(codePoint)) { + when (codePoint.category) { NONSPACING_MARK, SPACING_MARK, ENCLOSING_MARK, @@ -519,18 +520,6 @@ internal object IdnHostnameFormatValidator : AbstractStringFormatValidator() { return value.length } - private fun getCategory(codePoint: Int): CharacterCategory { - return CharacterCategory.entries.first { - it.characterData.contains(codePoint) - } - } - - private fun getDirectionality(codePoint: Int): CharacterDirectionality { - return CharacterDirectionality.entries.first { - it.characterData.contains(codePoint) - } - } - @Suppress("detekt:MagicNumber") private fun isArabicIndicDigit(code: Int): Boolean = code in 0x0660..0x0669 From fd0284df54723c602b6fe84eadd6edc60e0b3b09 Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 26 Aug 2024 17:33:54 +0400 Subject: [PATCH 2/3] Remove generation from project --- generator/build.gradle.kts | 51 - generator/data/DerivedJoiningType.txt | 573 ---- generator/data/README.md | 8 - generator/data/rfc5895_appendix_b_1.txt | 2321 ----------------- .../optimumcode/unocode/generator/Main.kt | 125 - .../generator/internal/dump/DataDamper.kt | 153 -- .../internal/dump/DerivedPropertiesLoader.kt | 34 - .../internal/dump/JoiningTypesLoader.kt | 39 - .../unocode/generator/internal/dump/Util.kt | 13 - .../internal/generator/CategoryGenerator.kt | 184 -- .../generator/DerivedJoinigTypeGenerator.kt | 58 - .../generator/DerivedPropertiesGenerator.kt | 58 - .../internal/generator/DirectionGenerator.kt | 185 -- .../generator/internal/generator/Util.kt | 51 - .../internal/graphql/GraphqlClient.kt | 112 - .../unocode/generator/internal/model/Model.kt | 32 - .../BidirectionalCharacterForClass.graphql | 8 - .../resources/BidirectionalClasses.graphql | 6 - .../resources/CharacterCategories.graphql | 6 - .../resources/CharactersForCategory.graphql | 8 - json-schema-validator/build.gradle.kts | 180 -- settings.gradle.kts | 1 - 22 files changed, 4206 deletions(-) delete mode 100644 generator/build.gradle.kts delete mode 100644 generator/data/DerivedJoiningType.txt delete mode 100644 generator/data/README.md delete mode 100644 generator/data/rfc5895_appendix_b_1.txt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/Main.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DataDamper.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DerivedPropertiesLoader.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/JoiningTypesLoader.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/Util.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/CategoryGenerator.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedJoinigTypeGenerator.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedPropertiesGenerator.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DirectionGenerator.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/Util.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/graphql/GraphqlClient.kt delete mode 100644 generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/model/Model.kt delete mode 100644 generator/src/main/resources/BidirectionalCharacterForClass.graphql delete mode 100644 generator/src/main/resources/BidirectionalClasses.graphql delete mode 100644 generator/src/main/resources/CharacterCategories.graphql delete mode 100644 generator/src/main/resources/CharactersForCategory.graphql diff --git a/generator/build.gradle.kts b/generator/build.gradle.kts deleted file mode 100644 index e9a64a93..00000000 --- a/generator/build.gradle.kts +++ /dev/null @@ -1,51 +0,0 @@ -import com.expediagroup.graphql.plugin.gradle.config.GraphQLSerializer -import com.expediagroup.graphql.plugin.gradle.graphql -import org.jlleitschuh.gradle.ktlint.reporter.ReporterType - -plugins { - // otherwise there is Gradle exception - // https://github.com/gradle/gradle/issues/20084 - id( - libs.plugins.kotlin.jvm - .get() - .pluginId, - ) - alias(libs.plugins.kotlin.serialization) - alias(libs.plugins.expediagroup.graphql) - - alias(libs.plugins.detekt) - alias(libs.plugins.ktlint) -} - -kotlin { - jvmToolchain(11) -} - -dependencies { - implementation(libs.kotlinpoet) - implementation(libs.graphql.ktor) - implementation(libs.clikt) { - because("cli for executing generation") - } -} - -graphql { - client { - endpoint = "https://www.compart.com/en/unicode/graphql" - packageName = "io.github.optimumcode.unicode.generator.internal.graphql" - serializer = GraphQLSerializer.KOTLINX - } -} - -ktlint { - version.set(libs.versions.ktlint) - reporters { - reporter(ReporterType.HTML) - } - filter { - exclude { el -> - val absolutePath = el.file.absolutePath - absolutePath.contains("generated").and(!el.isDirectory) - } - } -} \ No newline at end of file diff --git a/generator/data/DerivedJoiningType.txt b/generator/data/DerivedJoiningType.txt deleted file mode 100644 index d0d994ca..00000000 --- a/generator/data/DerivedJoiningType.txt +++ /dev/null @@ -1,573 +0,0 @@ -# DerivedJoiningType-15.1.0.txt -# Date: 2023-01-05, 20:34:38 GMT -# © 2023 Unicode®, Inc. -# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. -# For terms of use, see https://www.unicode.org/terms_of_use.html -# -# Unicode Character Database -# For documentation, see https://www.unicode.org/reports/tr44/ - -# ================================================ - -# Type T is derived, as described in ArabicShaping.txt - -# All code points not explicitly listed for Joining_Type -# have the value Non_Joining (U). - -# @missing: 0000..10FFFF; Non_Joining - -# ================================================ - -# Joining_Type=Join_Causing - -0640 ; C # Lm ARABIC TATWEEL -07FA ; C # Lm NKO LAJANYALAN -0883..0885 ; C # Lo [3] ARABIC TATWEEL WITH OVERSTRUCK HAMZA..ARABIC TATWEEL WITH TWO DOTS BELOW -180A ; C # Po MONGOLIAN NIRUGU -200D ; C # Cf ZERO WIDTH JOINER - -# Total code points: 7 - -# ================================================ - -# Joining_Type=Dual_Joining - -0620 ; D # Lo ARABIC LETTER KASHMIRI YEH -0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE -0628 ; D # Lo ARABIC LETTER BEH -062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH -0633..063F ; D # Lo [13] ARABIC LETTER SEEN..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE -0641..0647 ; D # Lo [7] ARABIC LETTER FEH..ARABIC LETTER HEH -0649..064A ; D # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH -066E..066F ; D # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF -0678..0687 ; D # Lo [16] ARABIC LETTER HIGH HAMZA YEH..ARABIC LETTER TCHEHEH -069A..06BF ; D # Lo [38] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER TCHEH WITH DOT ABOVE -06C1..06C2 ; D # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE -06CC ; D # Lo ARABIC LETTER FARSI YEH -06CE ; D # Lo ARABIC LETTER YEH WITH SMALL V -06D0..06D1 ; D # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW -06FA..06FC ; D # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW -06FF ; D # Lo ARABIC LETTER HEH WITH INVERTED V -0712..0714 ; D # Lo [3] SYRIAC LETTER BETH..SYRIAC LETTER GAMAL GARSHUNI -071A..071D ; D # Lo [4] SYRIAC LETTER HETH..SYRIAC LETTER YUDH -071F..0727 ; D # Lo [9] SYRIAC LETTER KAPH..SYRIAC LETTER REVERSED PE -0729 ; D # Lo SYRIAC LETTER QAPH -072B ; D # Lo SYRIAC LETTER SHIN -072D..072E ; D # Lo [2] SYRIAC LETTER PERSIAN BHETH..SYRIAC LETTER PERSIAN GHAMAL -074E..0758 ; D # Lo [11] SYRIAC LETTER SOGDIAN KHAPH..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW -075C..076A ; D # Lo [15] ARABIC LETTER SEEN WITH FOUR DOTS ABOVE..ARABIC LETTER LAM WITH BAR -076D..0770 ; D # Lo [4] ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS -0772 ; D # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE -0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW -077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE -07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA -0841..0845 ; D # Lo [5] MANDAIC LETTER AB..MANDAIC LETTER USHENNA -0848 ; D # Lo MANDAIC LETTER ATT -084A..0853 ; D # Lo [10] MANDAIC LETTER AK..MANDAIC LETTER AR -0855 ; D # Lo MANDAIC LETTER AT -0860 ; D # Lo SYRIAC LETTER MALAYALAM NGA -0862..0865 ; D # Lo [4] SYRIAC LETTER MALAYALAM NYA..SYRIAC LETTER MALAYALAM NNNA -0868 ; D # Lo SYRIAC LETTER MALAYALAM LLA -0886 ; D # Lo ARABIC LETTER THIN YEH -0889..088D ; D # Lo [5] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC LETTER KEHEH WITH TWO DOTS VERTICALLY BELOW -08A0..08A9 ; D # Lo [10] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER YEH WITH TWO DOTS BELOW AND DOT ABOVE -08AF..08B0 ; D # Lo [2] ARABIC LETTER SAD WITH THREE DOTS BELOW..ARABIC LETTER GAF WITH INVERTED STROKE -08B3..08B8 ; D # Lo [6] ARABIC LETTER AIN WITH THREE DOTS BELOW..ARABIC LETTER TEH WITH SMALL TEH ABOVE -08BA..08C8 ; D # Lo [15] ARABIC LETTER YEH WITH TWO DOTS BELOW AND SMALL NOON ABOVE..ARABIC LETTER GRAF -1807 ; D # Po MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER -1820..1842 ; D # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI -1843 ; D # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN -1844..1878 ; D # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS -1887..18A8 ; D # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA -18AA ; D # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA -A840..A871 ; D # Lo [50] PHAGS-PA LETTER KA..PHAGS-PA SUBJOINED LETTER RA -10AC0..10AC4 ; D # Lo [5] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER GHIMEL -10AD3..10AD6 ; D # Lo [4] MANICHAEAN LETTER LAMEDH..MANICHAEAN LETTER MEM -10AD8..10ADC ; D # Lo [5] MANICHAEAN LETTER SAMEKH..MANICHAEAN LETTER FE -10ADE..10AE0 ; D # Lo [3] MANICHAEAN LETTER QOPH..MANICHAEAN LETTER QHOPH -10AEB..10AEE ; D # No [4] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER TWENTY -10B80 ; D # Lo PSALTER PAHLAVI LETTER ALEPH -10B82 ; D # Lo PSALTER PAHLAVI LETTER GIMEL -10B86..10B88 ; D # Lo [3] PSALTER PAHLAVI LETTER ZAYIN..PSALTER PAHLAVI LETTER YODH -10B8A..10B8B ; D # Lo [2] PSALTER PAHLAVI LETTER LAMEDH..PSALTER PAHLAVI LETTER MEM-QOPH -10B8D ; D # Lo PSALTER PAHLAVI LETTER SAMEKH -10B90 ; D # Lo PSALTER PAHLAVI LETTER SHIN -10BAD..10BAE ; D # No [2] PSALTER PAHLAVI NUMBER TEN..PSALTER PAHLAVI NUMBER TWENTY -10D01..10D21 ; D # Lo [33] HANIFI ROHINGYA LETTER BA..HANIFI ROHINGYA VOWEL O -10D23 ; D # Lo HANIFI ROHINGYA MARK NA KHONNA -10F30..10F32 ; D # Lo [3] SOGDIAN LETTER ALEPH..SOGDIAN LETTER GIMEL -10F34..10F44 ; D # Lo [17] SOGDIAN LETTER WAW..SOGDIAN LETTER LESH -10F51..10F53 ; D # No [3] SOGDIAN NUMBER ONE..SOGDIAN NUMBER TWENTY -10F70..10F73 ; D # Lo [4] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER WAW -10F76..10F81 ; D # Lo [12] OLD UYGHUR LETTER YODH..OLD UYGHUR LETTER LESH -10FB0 ; D # Lo CHORASMIAN LETTER ALEPH -10FB2..10FB3 ; D # Lo [2] CHORASMIAN LETTER BETH..CHORASMIAN LETTER GIMEL -10FB8 ; D # Lo CHORASMIAN LETTER ZAYIN -10FBB..10FBC ; D # Lo [2] CHORASMIAN LETTER KAPH..CHORASMIAN LETTER LAMEDH -10FBE..10FBF ; D # Lo [2] CHORASMIAN LETTER NUN..CHORASMIAN LETTER SAMEKH -10FC1 ; D # Lo CHORASMIAN LETTER PE -10FC4 ; D # Lo CHORASMIAN LETTER TAW -10FCA ; D # No CHORASMIAN NUMBER TWENTY -1E900..1E943 ; D # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA - -# Total code points: 610 - -# ================================================ - -# Joining_Type=Right_Joining - -0622..0625 ; R # Lo [4] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA BELOW -0627 ; R # Lo ARABIC LETTER ALEF -0629 ; R # Lo ARABIC LETTER TEH MARBUTA -062F..0632 ; R # Lo [4] ARABIC LETTER DAL..ARABIC LETTER ZAIN -0648 ; R # Lo ARABIC LETTER WAW -0671..0673 ; R # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW -0675..0677 ; R # Lo [3] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER U WITH HAMZA ABOVE -0688..0699 ; R # Lo [18] ARABIC LETTER DDAL..ARABIC LETTER REH WITH FOUR DOTS ABOVE -06C0 ; R # Lo ARABIC LETTER HEH WITH YEH ABOVE -06C3..06CB ; R # Lo [9] ARABIC LETTER TEH MARBUTA GOAL..ARABIC LETTER VE -06CD ; R # Lo ARABIC LETTER YEH WITH TAIL -06CF ; R # Lo ARABIC LETTER WAW WITH DOT ABOVE -06D2..06D3 ; R # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE -06D5 ; R # Lo ARABIC LETTER AE -06EE..06EF ; R # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V -0710 ; R # Lo SYRIAC LETTER ALAPH -0715..0719 ; R # Lo [5] SYRIAC LETTER DALATH..SYRIAC LETTER ZAIN -071E ; R # Lo SYRIAC LETTER YUDH HE -0728 ; R # Lo SYRIAC LETTER SADHE -072A ; R # Lo SYRIAC LETTER RISH -072C ; R # Lo SYRIAC LETTER TAW -072F ; R # Lo SYRIAC LETTER PERSIAN DHALATH -074D ; R # Lo SYRIAC LETTER SOGDIAN ZHAIN -0759..075B ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER REH WITH STROKE -076B..076C ; R # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE -0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS -0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE -0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE -0840 ; R # Lo MANDAIC LETTER HALQA -0846..0847 ; R # Lo [2] MANDAIC LETTER AZ..MANDAIC LETTER IT -0849 ; R # Lo MANDAIC LETTER AKSA -0854 ; R # Lo MANDAIC LETTER ASH -0856..0858 ; R # Lo [3] MANDAIC LETTER DUSHENNA..MANDAIC LETTER AIN -0867 ; R # Lo SYRIAC LETTER MALAYALAM RA -0869..086A ; R # Lo [2] SYRIAC LETTER MALAYALAM LLLA..SYRIAC LETTER MALAYALAM SSA -0870..0882 ; R # Lo [19] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC LETTER ALEF WITH ATTACHED LEFT HAMZA -088E ; R # Lo ARABIC VERTICAL TAIL -08AA..08AC ; R # Lo [3] ARABIC LETTER REH WITH LOOP..ARABIC LETTER ROHINGYA YEH -08AE ; R # Lo ARABIC LETTER DAL WITH THREE DOTS BELOW -08B1..08B2 ; R # Lo [2] ARABIC LETTER STRAIGHT WAW..ARABIC LETTER ZAIN WITH INVERTED V ABOVE -08B9 ; R # Lo ARABIC LETTER REH WITH SMALL NOON ABOVE -10AC5 ; R # Lo MANICHAEAN LETTER DALETH -10AC7 ; R # Lo MANICHAEAN LETTER WAW -10AC9..10ACA ; R # Lo [2] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER ZHAYIN -10ACE..10AD2 ; R # Lo [5] MANICHAEAN LETTER TETH..MANICHAEAN LETTER KHAPH -10ADD ; R # Lo MANICHAEAN LETTER SADHE -10AE1 ; R # Lo MANICHAEAN LETTER RESH -10AE4 ; R # Lo MANICHAEAN LETTER TAW -10AEF ; R # No MANICHAEAN NUMBER ONE HUNDRED -10B81 ; R # Lo PSALTER PAHLAVI LETTER BETH -10B83..10B85 ; R # Lo [3] PSALTER PAHLAVI LETTER DALETH..PSALTER PAHLAVI LETTER WAW-AYIN-RESH -10B89 ; R # Lo PSALTER PAHLAVI LETTER KAPH -10B8C ; R # Lo PSALTER PAHLAVI LETTER NUN -10B8E..10B8F ; R # Lo [2] PSALTER PAHLAVI LETTER PE..PSALTER PAHLAVI LETTER SADHE -10B91 ; R # Lo PSALTER PAHLAVI LETTER TAW -10BA9..10BAC ; R # No [4] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER FOUR -10D22 ; R # Lo HANIFI ROHINGYA MARK SAKIN -10F33 ; R # Lo SOGDIAN LETTER HE -10F54 ; R # No SOGDIAN NUMBER ONE HUNDRED -10F74..10F75 ; R # Lo [2] OLD UYGHUR LETTER ZAYIN..OLD UYGHUR LETTER FINAL HETH -10FB4..10FB6 ; R # Lo [3] CHORASMIAN LETTER DALETH..CHORASMIAN LETTER WAW -10FB9..10FBA ; R # Lo [2] CHORASMIAN LETTER HETH..CHORASMIAN LETTER YODH -10FBD ; R # Lo CHORASMIAN LETTER MEM -10FC2..10FC3 ; R # Lo [2] CHORASMIAN LETTER RESH..CHORASMIAN LETTER SHIN -10FC9 ; R # No CHORASMIAN NUMBER TEN - -# Total code points: 152 - -# ================================================ - -# Joining_Type=Left_Joining - -A872 ; L # Lo PHAGS-PA SUPERFIXED LETTER RA -10ACD ; L # Lo MANICHAEAN LETTER HETH -10AD7 ; L # Lo MANICHAEAN LETTER NUN -10D00 ; L # Lo HANIFI ROHINGYA LETTER A -10FCB ; L # No CHORASMIAN NUMBER ONE HUNDRED - -# Total code points: 5 - -# ================================================ - -# Joining_Type=Transparent - -00AD ; T # Cf SOFT HYPHEN -0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0483..0487 ; T # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE -0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN -0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BF ; T # Mn HEBREW POINT RAFE -05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; T # Mn HEBREW POINT QAMATS QATAN -0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA -061C ; T # Cf ARABIC LETTER MARK -064B..065F ; T # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW -0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF -06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA -06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM -070F ; T # Cf SYRIAC ABBREVIATION MARK -0711 ; T # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN -07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -07FD ; T # Mn NKO DANTAYALAN -0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH -081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A -0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U -0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA -0859..085B ; T # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK -0898..089F ; T # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA -08CA..08E1 ; T # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA -08E3..0902 ; T # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA -093A ; T # Mn DEVANAGARI VOWEL SIGN OE -093C ; T # Mn DEVANAGARI SIGN NUKTA -0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -094D ; T # Mn DEVANAGARI SIGN VIRAMA -0951..0957 ; T # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE -0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0981 ; T # Mn BENGALI SIGN CANDRABINDU -09BC ; T # Mn BENGALI SIGN NUKTA -09C1..09C4 ; T # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09CD ; T # Mn BENGALI SIGN VIRAMA -09E2..09E3 ; T # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -09FE ; T # Mn BENGALI SANDHI MARK -0A01..0A02 ; T # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A3C ; T # Mn GURMUKHI SIGN NUKTA -0A41..0A42 ; T # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48 ; T # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4D ; T # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A51 ; T # Mn GURMUKHI SIGN UDAAT -0A70..0A71 ; T # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A75 ; T # Mn GURMUKHI SIGN YAKASH -0A81..0A82 ; T # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0ABC ; T # Mn GUJARATI SIGN NUKTA -0AC1..0AC5 ; T # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8 ; T # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0ACD ; T # Mn GUJARATI SIGN VIRAMA -0AE2..0AE3 ; T # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0AFA..0AFF ; T # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE -0B01 ; T # Mn ORIYA SIGN CANDRABINDU -0B3C ; T # Mn ORIYA SIGN NUKTA -0B3F ; T # Mn ORIYA VOWEL SIGN I -0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR -0B4D ; T # Mn ORIYA SIGN VIRAMA -0B55..0B56 ; T # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK -0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL -0B82 ; T # Mn TAMIL SIGN ANUSVARA -0BC0 ; T # Mn TAMIL VOWEL SIGN II -0BCD ; T # Mn TAMIL SIGN VIRAMA -0C00 ; T # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE -0C04 ; T # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE -0C3C ; T # Mn TELUGU SIGN NUKTA -0C3E..0C40 ; T # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C46..0C48 ; T # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4D ; T # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C55..0C56 ; T # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C62..0C63 ; T # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL -0C81 ; T # Mn KANNADA SIGN CANDRABINDU -0CBC ; T # Mn KANNADA SIGN NUKTA -0CBF ; T # Mn KANNADA VOWEL SIGN I -0CC6 ; T # Mn KANNADA VOWEL SIGN E -0CCC..0CCD ; T # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA -0CE2..0CE3 ; T # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D00..0D01 ; T # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU -0D3B..0D3C ; T # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA -0D41..0D44 ; T # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR -0D4D ; T # Mn MALAYALAM SIGN VIRAMA -0D62..0D63 ; T # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL -0D81 ; T # Mn SINHALA SIGN CANDRABINDU -0DCA ; T # Mn SINHALA SIGN AL-LAKUNA -0DD2..0DD4 ; T # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6 ; T # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0E31 ; T # Mn THAI CHARACTER MAI HAN-AKAT -0E34..0E3A ; T # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E47..0E4E ; T # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN -0EB1 ; T # Mn LAO VOWEL SIGN MAI KAN -0EB4..0EBC ; T # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO -0EC8..0ECE ; T # Mn [7] LAO TONE MAI EK..LAO YAMAKKAN -0F18..0F19 ; T # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F35 ; T # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; T # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F39 ; T # Mn TIBETAN MARK TSA -PHRU -0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA -0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F8D..0F97 ; T # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA -0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN -102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1032..1037 ; T # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW -1039..103A ; T # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT -103D..103E ; T # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA -1058..1059 ; T # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -105E..1060 ; T # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA -1071..1074 ; T # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE -1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA -1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y -108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE -109D ; T # Mn MYANMAR VOWEL SIGN AITON AI -135D..135F ; T # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK -1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1732..1733 ; T # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U -1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; T # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17C6 ; T # Mn KHMER SIGN NIKAHIT -17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17DD ; T # Mn KHMER SIGN ATTHACAN -180B..180D ; T # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -180F ; T # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR -1885..1886 ; T # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA -18A9 ; T # Mn MONGOLIAN LETTER ALI GALI DAGALGA -1920..1922 ; T # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1927..1928 ; T # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA -1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1A1B ; T # Mn BUGINESE VOWEL SIGN AE -1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA -1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA -1A60 ; T # Mn TAI THAM SIGN SAKOT -1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT -1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW -1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN -1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT -1AB0..1ABD ; T # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW -1ABE ; T # Me COMBINING PARENTHESES OVERLAY -1ABF..1ACE ; T # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T -1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B34 ; T # Mn BALINESE SIGN REREKAN -1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3C ; T # Mn BALINESE VOWEL SIGN LA LENGA -1B42 ; T # Mn BALINESE VOWEL SIGN PEPET -1B6B..1B73 ; T # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR -1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU -1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1BAB..1BAD ; T # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA -1BE6 ; T # Mn BATAK SIGN TOMPI -1BE8..1BE9 ; T # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE -1BED ; T # Mn BATAK VOWEL SIGN KARO O -1BEF..1BF1 ; T # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H -1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T -1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA -1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA -1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL -1CED ; T # Mn VEDIC SIGN TIRYAK -1CF4 ; T # Mn VEDIC TONE CANDRA ABOVE -1CF8..1CF9 ; T # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE -1DC0..1DFF ; T # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -200B ; T # Cf ZERO WIDTH SPACE -200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK -202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -2060..2064 ; T # Cf [5] WORD JOINER..INVISIBLE PLUS -206A..206F ; T # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES -20D0..20DC ; T # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20DD..20E0 ; T # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH -20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS -2D7F ; T # Mn TIFINAGH CONSONANT JOINER -2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302D ; T # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK -3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -A66F ; T # Mn COMBINING CYRILLIC VZMET -A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A674..A67D ; T # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK -A69E..A69F ; T # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E -A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS -A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA -A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA -A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA -A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A82C ; T # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA -A8C4..A8C5 ; T # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU -A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA -A8FF ; T # Mn DEVANAGARI VOWEL SIGN AY -A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU -A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R -A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR -A9B3 ; T # Mn JAVANESE SIGN CECAK TELU -A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT -A9BC..A9BD ; T # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET -A9E5 ; T # Mn MYANMAR SIGN SHAN SAW -AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE -AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE -AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG -AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M -AA7C ; T # Mn MYANMAR SIGN TAI LAING TONE-2 -AAB0 ; T # Mn TAI VIET MAI KANG -AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U -AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA -AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK -AAC1 ; T # Mn TAI VIET TONE MAI THO -AAEC..AAED ; T # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI -AAF6 ; T # Mn MEETEI MAYEK VIRAMA -ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP -ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP -ABED ; T # Mn MEETEI MAYEK APUN IYEK -FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE2F ; T # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF -FEFF ; T # Cf ZERO WIDTH NO-BREAK SPACE -FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -101FD ; T # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE -102E0 ; T # Mn COPTIC EPACT THOUSANDS MARK -10376..1037A ; T # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII -10A01..10A03 ; T # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06 ; T # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW -10A3F ; T # Mn KHAROSHTHI VIRAMA -10AE5..10AE6 ; T # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW -10D24..10D27 ; T # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI -10EAB..10EAC ; T # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK -10EFD..10EFF ; T # Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA -10F46..10F50 ; T # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW -10F82..10F85 ; T # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW -11001 ; T # Mn BRAHMI SIGN ANUSVARA -11038..11046 ; T # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA -11070 ; T # Mn BRAHMI SIGN OLD TAMIL VIRAMA -11073..11074 ; T # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O -1107F..11081 ; T # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA -110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI -110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA -110C2 ; T # Mn KAITHI VOWEL SIGN VOCALIC R -11100..11102 ; T # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA -11127..1112B ; T # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU -1112D..11134 ; T # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA -11173 ; T # Mn MAHAJANI SIGN NUKTA -11180..11181 ; T # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA -111B6..111BE ; T # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O -111C9..111CC ; T # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK -111CF ; T # Mn SHARADA SIGN INVERTED CANDRABINDU -1122F..11231 ; T # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI -11234 ; T # Mn KHOJKI SIGN ANUSVARA -11236..11237 ; T # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA -1123E ; T # Mn KHOJKI SIGN SUKUN -11241 ; T # Mn KHOJKI VOWEL SIGN VOCALIC R -112DF ; T # Mn KHUDAWADI SIGN ANUSVARA -112E3..112EA ; T # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA -11300..11301 ; T # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU -1133B..1133C ; T # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA -11340 ; T # Mn GRANTHA VOWEL SIGN II -11366..1136C ; T # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX -11370..11374 ; T # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA -11438..1143F ; T # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI -11442..11444 ; T # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA -11446 ; T # Mn NEWA SIGN NUKTA -1145E ; T # Mn NEWA SANDHI MARK -114B3..114B8 ; T # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL -114BA ; T # Mn TIRHUTA VOWEL SIGN SHORT E -114BF..114C0 ; T # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA -114C2..114C3 ; T # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA -115B2..115B5 ; T # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR -115BC..115BD ; T # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA -115BF..115C0 ; T # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA -115DC..115DD ; T # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU -11633..1163A ; T # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI -1163D ; T # Mn MODI SIGN ANUSVARA -1163F..11640 ; T # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA -116AB ; T # Mn TAKRI SIGN ANUSVARA -116AD ; T # Mn TAKRI VOWEL SIGN AA -116B0..116B5 ; T # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU -116B7 ; T # Mn TAKRI SIGN NUKTA -1171D..1171F ; T # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA -11722..11725 ; T # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU -11727..1172B ; T # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER -1182F..11837 ; T # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA -11839..1183A ; T # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA -1193B..1193C ; T # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU -1193E ; T # Mn DIVES AKURU VIRAMA -11943 ; T # Mn DIVES AKURU SIGN NUKTA -119D4..119D7 ; T # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR -119DA..119DB ; T # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI -119E0 ; T # Mn NANDINAGARI SIGN VIRAMA -11A01..11A0A ; T # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK -11A33..11A38 ; T # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA -11A3B..11A3E ; T # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA -11A47 ; T # Mn ZANABAZAR SQUARE SUBJOINER -11A51..11A56 ; T # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE -11A59..11A5B ; T # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK -11A8A..11A96 ; T # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA -11A98..11A99 ; T # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER -11C30..11C36 ; T # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L -11C38..11C3D ; T # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA -11C3F ; T # Mn BHAIKSUKI SIGN VIRAMA -11C92..11CA7 ; T # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA -11CAA..11CB0 ; T # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA -11CB2..11CB3 ; T # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E -11CB5..11CB6 ; T # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU -11D31..11D36 ; T # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R -11D3A ; T # Mn MASARAM GONDI VOWEL SIGN E -11D3C..11D3D ; T # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O -11D3F..11D45 ; T # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA -11D47 ; T # Mn MASARAM GONDI RA-KARA -11D90..11D91 ; T # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI -11D95 ; T # Mn GUNJALA GONDI SIGN ANUSVARA -11D97 ; T # Mn GUNJALA GONDI VIRAMA -11EF3..11EF4 ; T # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U -11F00..11F01 ; T # Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA -11F36..11F3A ; T # Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R -11F40 ; T # Mn KAWI VOWEL SIGN EU -11F42 ; T # Mn KAWI CONJOINER -13430..1343F ; T # Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE -13440 ; T # Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY -13447..13455 ; T # Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED -16AF0..16AF4 ; T # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE -16B30..16B36 ; T # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM -16F4F ; T # Mn MIAO SIGN CONSONANT MODIFIER BAR -16F8F..16F92 ; T # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW -16FE4 ; T # Mn KHITAN SMALL SCRIPT FILLER -1BC9D..1BC9E ; T # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK -1BCA0..1BCA3 ; T # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP -1CF00..1CF2D ; T # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT -1CF30..1CF46 ; T # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG -1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE -1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D185..1D18B ; T # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D1AA..1D1AD ; T # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1D242..1D244 ; T # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -1DA00..1DA36 ; T # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN -1DA3B..1DA6C ; T # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT -1DA75 ; T # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS -1DA84 ; T # Mn SIGNWRITING LOCATION HEAD NECK -1DA9B..1DA9F ; T # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 -1DAA1..1DAAF ; T # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 -1E000..1E006 ; T # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE -1E008..1E018 ; T # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU -1E01B..1E021 ; T # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI -1E023..1E024 ; T # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS -1E026..1E02A ; T # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA -1E08F ; T # Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I -1E130..1E136 ; T # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D -1E2AE ; T # Mn TOTO SIGN RISING TONE -1E2EC..1E2EF ; T # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI -1E4EC..1E4EF ; T # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH -1E8D0..1E8D6 ; T # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS -1E944..1E94A ; T # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA -1E94B ; T # Lm ADLAM NASALIZATION MARK -E0001 ; T # Cf LANGUAGE TAG -E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG -E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 - -# Total code points: 2150 - -# EOF \ No newline at end of file diff --git a/generator/data/README.md b/generator/data/README.md deleted file mode 100644 index 89b29448..00000000 --- a/generator/data/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Unicode data - -This directory contains files with information about unicode characters that is required to perform IDN validation. - -Files and sources: - -+ [rfc5895_appendix_b_1.txt](https://datatracker.ietf.org/doc/html/rfc5892#appendix-B.1) -+ [DerivedJoiningType.txt](https://unicode.org/Public/UNIDATA/extracted/DerivedJoiningType.txt) \ No newline at end of file diff --git a/generator/data/rfc5895_appendix_b_1.txt b/generator/data/rfc5895_appendix_b_1.txt deleted file mode 100644 index a3f0002d..00000000 --- a/generator/data/rfc5895_appendix_b_1.txt +++ /dev/null @@ -1,2321 +0,0 @@ -0000..002C ; DISALLOWED # ..COMMA -002D ; PVALID # HYPHEN-MINUS -002E..002F ; DISALLOWED # FULL STOP..SOLIDUS -0030..0039 ; PVALID # DIGIT ZERO..DIGIT NINE -003A..0060 ; DISALLOWED # COLON..GRAVE ACCENT -0061..007A ; PVALID # LATIN SMALL LETTER A..LATIN SMALL LETTER Z -007B..00B6 ; DISALLOWED # LEFT CURLY BRACKET..PILCROW SIGN -00B7 ; CONTEXTO # MIDDLE DOT -00B8..00DE ; DISALLOWED # CEDILLA..LATIN CAPITAL LETTER THORN -00DF..00F6 ; PVALID # LATIN SMALL LETTER SHARP S..LATIN SMALL LETT -00F7 ; DISALLOWED # DIVISION SIGN -00F8..00FF ; PVALID # LATIN SMALL LETTER O WITH STROKE..LATIN SMAL -0100 ; DISALLOWED # LATIN CAPITAL LETTER A WITH MACRON -0101 ; PVALID # LATIN SMALL LETTER A WITH MACRON -0102 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE -0103 ; PVALID # LATIN SMALL LETTER A WITH BREVE -0104 ; DISALLOWED # LATIN CAPITAL LETTER A WITH OGONEK -0105 ; PVALID # LATIN SMALL LETTER A WITH OGONEK -0106 ; DISALLOWED # LATIN CAPITAL LETTER C WITH ACUTE -0107 ; PVALID # LATIN SMALL LETTER C WITH ACUTE -0108 ; DISALLOWED # LATIN CAPITAL LETTER C WITH CIRCUMFLEX -0109 ; PVALID # LATIN SMALL LETTER C WITH CIRCUMFLEX -010A ; DISALLOWED # LATIN CAPITAL LETTER C WITH DOT ABOVE -010B ; PVALID # LATIN SMALL LETTER C WITH DOT ABOVE -010C ; DISALLOWED # LATIN CAPITAL LETTER C WITH CARON -010D ; PVALID # LATIN SMALL LETTER C WITH CARON -010E ; DISALLOWED # LATIN CAPITAL LETTER D WITH CARON -010F ; PVALID # LATIN SMALL LETTER D WITH CARON -0110 ; DISALLOWED # LATIN CAPITAL LETTER D WITH STROKE -0111 ; PVALID # LATIN SMALL LETTER D WITH STROKE -0112 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON -0113 ; PVALID # LATIN SMALL LETTER E WITH MACRON -0114 ; DISALLOWED # LATIN CAPITAL LETTER E WITH BREVE -0115 ; PVALID # LATIN SMALL LETTER E WITH BREVE -0116 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOT ABOVE -0117 ; PVALID # LATIN SMALL LETTER E WITH DOT ABOVE -0118 ; DISALLOWED # LATIN CAPITAL LETTER E WITH OGONEK -0119 ; PVALID # LATIN SMALL LETTER E WITH OGONEK -011A ; DISALLOWED # LATIN CAPITAL LETTER E WITH CARON -011B ; PVALID # LATIN SMALL LETTER E WITH CARON -011C ; DISALLOWED # LATIN CAPITAL LETTER G WITH CIRCUMFLEX -011D ; PVALID # LATIN SMALL LETTER G WITH CIRCUMFLEX -011E ; DISALLOWED # LATIN CAPITAL LETTER G WITH BREVE -011F ; PVALID # LATIN SMALL LETTER G WITH BREVE -0120 ; DISALLOWED # LATIN CAPITAL LETTER G WITH DOT ABOVE -0121 ; PVALID # LATIN SMALL LETTER G WITH DOT ABOVE -0122 ; DISALLOWED # LATIN CAPITAL LETTER G WITH CEDILLA -0123 ; PVALID # LATIN SMALL LETTER G WITH CEDILLA -0124 ; DISALLOWED # LATIN CAPITAL LETTER H WITH CIRCUMFLEX -0125 ; PVALID # LATIN SMALL LETTER H WITH CIRCUMFLEX -0126 ; DISALLOWED # LATIN CAPITAL LETTER H WITH STROKE -0127 ; PVALID # LATIN SMALL LETTER H WITH STROKE -0128 ; DISALLOWED # LATIN CAPITAL LETTER I WITH TILDE -0129 ; PVALID # LATIN SMALL LETTER I WITH TILDE -012A ; DISALLOWED # LATIN CAPITAL LETTER I WITH MACRON -012B ; PVALID # LATIN SMALL LETTER I WITH MACRON -012C ; DISALLOWED # LATIN CAPITAL LETTER I WITH BREVE -012D ; PVALID # LATIN SMALL LETTER I WITH BREVE -012E ; DISALLOWED # LATIN CAPITAL LETTER I WITH OGONEK -012F ; PVALID # LATIN SMALL LETTER I WITH OGONEK -0130 ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOT ABOVE -0131 ; PVALID # LATIN SMALL LETTER DOTLESS I -0132..0134 ; DISALLOWED # LATIN CAPITAL LIGATURE IJ..LATIN CAPITAL LET -0135 ; PVALID # LATIN SMALL LETTER J WITH CIRCUMFLEX -0136 ; DISALLOWED # LATIN CAPITAL LETTER K WITH CEDILLA -0137..0138 ; PVALID # LATIN SMALL LETTER K WITH CEDILLA..LATIN SMA -0139 ; DISALLOWED # LATIN CAPITAL LETTER L WITH ACUTE -013A ; PVALID # LATIN SMALL LETTER L WITH ACUTE -013B ; DISALLOWED # LATIN CAPITAL LETTER L WITH CEDILLA -013C ; PVALID # LATIN SMALL LETTER L WITH CEDILLA -013D ; DISALLOWED # LATIN CAPITAL LETTER L WITH CARON -013E ; PVALID # LATIN SMALL LETTER L WITH CARON -013F..0141 ; DISALLOWED # LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATI -0142 ; PVALID # LATIN SMALL LETTER L WITH STROKE -0143 ; DISALLOWED # LATIN CAPITAL LETTER N WITH ACUTE -0144 ; PVALID # LATIN SMALL LETTER N WITH ACUTE -0145 ; DISALLOWED # LATIN CAPITAL LETTER N WITH CEDILLA -0146 ; PVALID # LATIN SMALL LETTER N WITH CEDILLA -0147 ; DISALLOWED # LATIN CAPITAL LETTER N WITH CARON -0148 ; PVALID # LATIN SMALL LETTER N WITH CARON -0149..014A ; DISALLOWED # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE. -014B ; PVALID # LATIN SMALL LETTER ENG -014C ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON -014D ; PVALID # LATIN SMALL LETTER O WITH MACRON -014E ; DISALLOWED # LATIN CAPITAL LETTER O WITH BREVE -014F ; PVALID # LATIN SMALL LETTER O WITH BREVE -0150 ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE -0151 ; PVALID # LATIN SMALL LETTER O WITH DOUBLE ACUTE -0152 ; DISALLOWED # LATIN CAPITAL LIGATURE OE -0153 ; PVALID # LATIN SMALL LIGATURE OE -0154 ; DISALLOWED # LATIN CAPITAL LETTER R WITH ACUTE -0155 ; PVALID # LATIN SMALL LETTER R WITH ACUTE -0156 ; DISALLOWED # LATIN CAPITAL LETTER R WITH CEDILLA -0157 ; PVALID # LATIN SMALL LETTER R WITH CEDILLA -0158 ; DISALLOWED # LATIN CAPITAL LETTER R WITH CARON -0159 ; PVALID # LATIN SMALL LETTER R WITH CARON -015A ; DISALLOWED # LATIN CAPITAL LETTER S WITH ACUTE -015B ; PVALID # LATIN SMALL LETTER S WITH ACUTE -015C ; DISALLOWED # LATIN CAPITAL LETTER S WITH CIRCUMFLEX -015D ; PVALID # LATIN SMALL LETTER S WITH CIRCUMFLEX -015E ; DISALLOWED # LATIN CAPITAL LETTER S WITH CEDILLA -015F ; PVALID # LATIN SMALL LETTER S WITH CEDILLA -0160 ; DISALLOWED # LATIN CAPITAL LETTER S WITH CARON -0161 ; PVALID # LATIN SMALL LETTER S WITH CARON -0162 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CEDILLA -0163 ; PVALID # LATIN SMALL LETTER T WITH CEDILLA -0164 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CARON -0165 ; PVALID # LATIN SMALL LETTER T WITH CARON -0166 ; DISALLOWED # LATIN CAPITAL LETTER T WITH STROKE -0167 ; PVALID # LATIN SMALL LETTER T WITH STROKE -0168 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE -0169 ; PVALID # LATIN SMALL LETTER U WITH TILDE -016A ; DISALLOWED # LATIN CAPITAL LETTER U WITH MACRON -016B ; PVALID # LATIN SMALL LETTER U WITH MACRON -016C ; DISALLOWED # LATIN CAPITAL LETTER U WITH BREVE -016D ; PVALID # LATIN SMALL LETTER U WITH BREVE -016E ; DISALLOWED # LATIN CAPITAL LETTER U WITH RING ABOVE -016F ; PVALID # LATIN SMALL LETTER U WITH RING ABOVE -0170 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE -0171 ; PVALID # LATIN SMALL LETTER U WITH DOUBLE ACUTE -0172 ; DISALLOWED # LATIN CAPITAL LETTER U WITH OGONEK -0173 ; PVALID # LATIN SMALL LETTER U WITH OGONEK -0174 ; DISALLOWED # LATIN CAPITAL LETTER W WITH CIRCUMFLEX -0175 ; PVALID # LATIN SMALL LETTER W WITH CIRCUMFLEX -0176 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX -0177 ; PVALID # LATIN SMALL LETTER Y WITH CIRCUMFLEX -0178..0179 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN -017A ; PVALID # LATIN SMALL LETTER Z WITH ACUTE -017B ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DOT ABOVE -017C ; PVALID # LATIN SMALL LETTER Z WITH DOT ABOVE -017D ; DISALLOWED # LATIN CAPITAL LETTER Z WITH CARON -017E ; PVALID # LATIN SMALL LETTER Z WITH CARON -017F ; DISALLOWED # LATIN SMALL LETTER LONG S -0180 ; PVALID # LATIN SMALL LETTER B WITH STROKE -0181..0182 ; DISALLOWED # LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPI -0183 ; PVALID # LATIN SMALL LETTER B WITH TOPBAR -0184 ; DISALLOWED # LATIN CAPITAL LETTER TONE SIX -0185 ; PVALID # LATIN SMALL LETTER TONE SIX -0186..0187 ; DISALLOWED # LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL L -0188 ; PVALID # LATIN SMALL LETTER C WITH HOOK -0189..018B ; DISALLOWED # LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITA -018C..018D ; PVALID # LATIN SMALL LETTER D WITH TOPBAR..LATIN SMAL -018E..0191 ; DISALLOWED # LATIN CAPITAL LETTER REVERSED E..LATIN CAPIT -0192 ; PVALID # LATIN SMALL LETTER F WITH HOOK -0193..0194 ; DISALLOWED # LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPI -0195 ; PVALID # LATIN SMALL LETTER HV -0196..0198 ; DISALLOWED # LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LET -0199..019B ; PVALID # LATIN SMALL LETTER K WITH HOOK..LATIN SMALL -019C..019D ; DISALLOWED # LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL -019E ; PVALID # LATIN SMALL LETTER N WITH LONG RIGHT LEG -019F..01A0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LA -01A1 ; PVALID # LATIN SMALL LETTER O WITH HORN -01A2 ; DISALLOWED # LATIN CAPITAL LETTER OI -01A3 ; PVALID # LATIN SMALL LETTER OI -01A4 ; DISALLOWED # LATIN CAPITAL LETTER P WITH HOOK -01A5 ; PVALID # LATIN SMALL LETTER P WITH HOOK -01A6..01A7 ; DISALLOWED # LATIN LETTER YR..LATIN CAPITAL LETTER TONE T -01A8 ; PVALID # LATIN SMALL LETTER TONE TWO -01A9 ; DISALLOWED # LATIN CAPITAL LETTER ESH -01AA..01AB ; PVALID # LATIN LETTER REVERSED ESH LOOP..LATIN SMALL -01AC ; DISALLOWED # LATIN CAPITAL LETTER T WITH HOOK -01AD ; PVALID # LATIN SMALL LETTER T WITH HOOK -01AE..01AF ; DISALLOWED # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK.. -01B0 ; PVALID # LATIN SMALL LETTER U WITH HORN -01B1..01B3 ; DISALLOWED # LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL -01B4 ; PVALID # LATIN SMALL LETTER Y WITH HOOK -01B5 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH STROKE -01B6 ; PVALID # LATIN SMALL LETTER Z WITH STROKE -01B7..01B8 ; DISALLOWED # LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETT -01B9..01BB ; PVALID # LATIN SMALL LETTER EZH REVERSED..LATIN LETTE -01BC ; DISALLOWED # LATIN CAPITAL LETTER TONE FIVE -01BD..01C3 ; PVALID # LATIN SMALL LETTER TONE FIVE..LATIN LETTER R -01C4..01CD ; DISALLOWED # LATIN CAPITAL LETTER DZ WITH CARON..LATIN CA -01CE ; PVALID # LATIN SMALL LETTER A WITH CARON -01CF ; DISALLOWED # LATIN CAPITAL LETTER I WITH CARON -01D0 ; PVALID # LATIN SMALL LETTER I WITH CARON -01D1 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CARON -01D2 ; PVALID # LATIN SMALL LETTER O WITH CARON -01D3 ; DISALLOWED # LATIN CAPITAL LETTER U WITH CARON -01D4 ; PVALID # LATIN SMALL LETTER U WITH CARON -01D5 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND MA -01D6 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND MACR -01D7 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND AC -01D8 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND ACUT -01D9 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND CA -01DA ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND CARO -01DB ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS AND GR -01DC..01DD ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS AND GRAV -01DE ; DISALLOWED # LATIN CAPITAL LETTER A WITH DIAERESIS AND MA -01DF ; PVALID # LATIN SMALL LETTER A WITH DIAERESIS AND MACR -01E0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MA -01E1 ; PVALID # LATIN SMALL LETTER A WITH DOT ABOVE AND MACR -01E2 ; DISALLOWED # LATIN CAPITAL LETTER AE WITH MACRON -01E3 ; PVALID # LATIN SMALL LETTER AE WITH MACRON -01E4 ; DISALLOWED # LATIN CAPITAL LETTER G WITH STROKE -01E5 ; PVALID # LATIN SMALL LETTER G WITH STROKE -01E6 ; DISALLOWED # LATIN CAPITAL LETTER G WITH CARON -01E7 ; PVALID # LATIN SMALL LETTER G WITH CARON -01E8 ; DISALLOWED # LATIN CAPITAL LETTER K WITH CARON -01E9 ; PVALID # LATIN SMALL LETTER K WITH CARON -01EA ; DISALLOWED # LATIN CAPITAL LETTER O WITH OGONEK -01EB ; PVALID # LATIN SMALL LETTER O WITH OGONEK -01EC ; DISALLOWED # LATIN CAPITAL LETTER O WITH OGONEK AND MACRO -01ED ; PVALID # LATIN SMALL LETTER O WITH OGONEK AND MACRON -01EE ; DISALLOWED # LATIN CAPITAL LETTER EZH WITH CARON -01EF..01F0 ; PVALID # LATIN SMALL LETTER EZH WITH CARON..LATIN SMA -01F1..01F4 ; DISALLOWED # LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTE -01F5 ; PVALID # LATIN SMALL LETTER G WITH ACUTE -01F6..01F8 ; DISALLOWED # LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LE -01F9 ; PVALID # LATIN SMALL LETTER N WITH GRAVE -01FA ; DISALLOWED # LATIN CAPITAL LETTER A WITH RING ABOVE AND A -01FB ; PVALID # LATIN SMALL LETTER A WITH RING ABOVE AND ACU -01FC ; DISALLOWED # LATIN CAPITAL LETTER AE WITH ACUTE -01FD ; PVALID # LATIN SMALL LETTER AE WITH ACUTE -01FE ; DISALLOWED # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE -01FF ; PVALID # LATIN SMALL LETTER O WITH STROKE AND ACUTE -0200 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE -0201 ; PVALID # LATIN SMALL LETTER A WITH DOUBLE GRAVE -0202 ; DISALLOWED # LATIN CAPITAL LETTER A WITH INVERTED BREVE -0203 ; PVALID # LATIN SMALL LETTER A WITH INVERTED BREVE -0204 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE -0205 ; PVALID # LATIN SMALL LETTER E WITH DOUBLE GRAVE -0206 ; DISALLOWED # LATIN CAPITAL LETTER E WITH INVERTED BREVE -0207 ; PVALID # LATIN SMALL LETTER E WITH INVERTED BREVE -0208 ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE -0209 ; PVALID # LATIN SMALL LETTER I WITH DOUBLE GRAVE -020A ; DISALLOWED # LATIN CAPITAL LETTER I WITH INVERTED BREVE -020B ; PVALID # LATIN SMALL LETTER I WITH INVERTED BREVE -020C ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE -020D ; PVALID # LATIN SMALL LETTER O WITH DOUBLE GRAVE -020E ; DISALLOWED # LATIN CAPITAL LETTER O WITH INVERTED BREVE -020F ; PVALID # LATIN SMALL LETTER O WITH INVERTED BREVE -0210 ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE -0211 ; PVALID # LATIN SMALL LETTER R WITH DOUBLE GRAVE -0212 ; DISALLOWED # LATIN CAPITAL LETTER R WITH INVERTED BREVE -0213 ; PVALID # LATIN SMALL LETTER R WITH INVERTED BREVE -0214 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE -0215 ; PVALID # LATIN SMALL LETTER U WITH DOUBLE GRAVE -0216 ; DISALLOWED # LATIN CAPITAL LETTER U WITH INVERTED BREVE -0217 ; PVALID # LATIN SMALL LETTER U WITH INVERTED BREVE -0218 ; DISALLOWED # LATIN CAPITAL LETTER S WITH COMMA BELOW -0219 ; PVALID # LATIN SMALL LETTER S WITH COMMA BELOW -021A ; DISALLOWED # LATIN CAPITAL LETTER T WITH COMMA BELOW -021B ; PVALID # LATIN SMALL LETTER T WITH COMMA BELOW -021C ; DISALLOWED # LATIN CAPITAL LETTER YOGH -021D ; PVALID # LATIN SMALL LETTER YOGH -021E ; DISALLOWED # LATIN CAPITAL LETTER H WITH CARON -021F ; PVALID # LATIN SMALL LETTER H WITH CARON -0220 ; DISALLOWED # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG -0221 ; PVALID # LATIN SMALL LETTER D WITH CURL -0222 ; DISALLOWED # LATIN CAPITAL LETTER OU -0223 ; PVALID # LATIN SMALL LETTER OU -0224 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH HOOK -0225 ; PVALID # LATIN SMALL LETTER Z WITH HOOK -0226 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT ABOVE -0227 ; PVALID # LATIN SMALL LETTER A WITH DOT ABOVE -0228 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CEDILLA -0229 ; PVALID # LATIN SMALL LETTER E WITH CEDILLA -022A ; DISALLOWED # LATIN CAPITAL LETTER O WITH DIAERESIS AND MA -022B ; PVALID # LATIN SMALL LETTER O WITH DIAERESIS AND MACR -022C ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND MACRON -022D ; PVALID # LATIN SMALL LETTER O WITH TILDE AND MACRON -022E ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT ABOVE -022F ; PVALID # LATIN SMALL LETTER O WITH DOT ABOVE -0230 ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MA -0231 ; PVALID # LATIN SMALL LETTER O WITH DOT ABOVE AND MACR -0232 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH MACRON -0233..0239 ; PVALID # LATIN SMALL LETTER Y WITH MACRON..LATIN SMAL -023A..023B ; DISALLOWED # LATIN CAPITAL LETTER A WITH STROKE..LATIN CA -023C ; PVALID # LATIN SMALL LETTER C WITH STROKE -023D..023E ; DISALLOWED # LATIN CAPITAL LETTER L WITH BAR..LATIN CAPIT -023F..0240 ; PVALID # LATIN SMALL LETTER S WITH SWASH TAIL..LATIN -0241 ; DISALLOWED # LATIN CAPITAL LETTER GLOTTAL STOP -0242 ; PVALID # LATIN SMALL LETTER GLOTTAL STOP -0243..0246 ; DISALLOWED # LATIN CAPITAL LETTER B WITH STROKE..LATIN CA -0247 ; PVALID # LATIN SMALL LETTER E WITH STROKE -0248 ; DISALLOWED # LATIN CAPITAL LETTER J WITH STROKE -0249 ; PVALID # LATIN SMALL LETTER J WITH STROKE -024A ; DISALLOWED # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL -024B ; PVALID # LATIN SMALL LETTER Q WITH HOOK TAIL -024C ; DISALLOWED # LATIN CAPITAL LETTER R WITH STROKE -024D ; PVALID # LATIN SMALL LETTER R WITH STROKE -024E ; DISALLOWED # LATIN CAPITAL LETTER Y WITH STROKE -024F..02AF ; PVALID # LATIN SMALL LETTER Y WITH STROKE..LATIN SMAL -02B0..02B8 ; DISALLOWED # MODIFIER LETTER SMALL H..MODIFIER LETTER SMA -02B9..02C1 ; PVALID # MODIFIER LETTER PRIME..MODIFIER LETTER REVER -02C2..02C5 ; DISALLOWED # MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LET -02C6..02D1 ; PVALID # MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER -02D2..02EB ; DISALLOWED # MODIFIER LETTER CENTRED RIGHT HALF RING..MOD -02EC ; PVALID # MODIFIER LETTER VOICING -02ED ; DISALLOWED # MODIFIER LETTER UNASPIRATED -02EE ; PVALID # MODIFIER LETTER DOUBLE APOSTROPHE -02EF..02FF ; DISALLOWED # MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER -0300..033F ; PVALID # COMBINING GRAVE ACCENT..COMBINING DOUBLE OVE -0340..0341 ; DISALLOWED # COMBINING GRAVE TONE MARK..COMBINING ACUTE T -0342 ; PVALID # COMBINING GREEK PERISPOMENI -0343..0345 ; DISALLOWED # COMBINING GREEK KORONIS..COMBINING GREEK YPO -0346..034E ; PVALID # COMBINING BRIDGE ABOVE..COMBINING UPWARDS AR -034F ; DISALLOWED # COMBINING GRAPHEME JOINER -0350..036F ; PVALID # COMBINING RIGHT ARROWHEAD ABOVE..COMBINING L -0370 ; DISALLOWED # GREEK CAPITAL LETTER HETA -0371 ; PVALID # GREEK SMALL LETTER HETA -0372 ; DISALLOWED # GREEK CAPITAL LETTER ARCHAIC SAMPI -0373 ; PVALID # GREEK SMALL LETTER ARCHAIC SAMPI -0374 ; DISALLOWED # GREEK NUMERAL SIGN -0375 ; CONTEXTO # GREEK LOWER NUMERAL SIGN -0376 ; DISALLOWED # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA -0377 ; PVALID # GREEK SMALL LETTER PAMPHYLIAN DIGAMMA -0378..0379 ; UNASSIGNED # .. -037A ; DISALLOWED # GREEK YPOGEGRAMMENI -037B..037D ; PVALID # GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GR -037E ; DISALLOWED # GREEK QUESTION MARK -037F..0383 ; UNASSIGNED # .. -0384..038A ; DISALLOWED # GREEK TONOS..GREEK CAPITAL LETTER IOTA WITH -038B ; UNASSIGNED # -038C ; DISALLOWED # GREEK CAPITAL LETTER OMICRON WITH TONOS -038D ; UNASSIGNED # -038E..038F ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH TONOS..GRE -0390 ; PVALID # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND T -0391..03A1 ; DISALLOWED # GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LE -03A2 ; UNASSIGNED # -03A3..03AB ; DISALLOWED # GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LE -03AC..03CE ; PVALID # GREEK SMALL LETTER ALPHA WITH TONOS..GREEK S -03CF..03D6 ; DISALLOWED # GREEK CAPITAL KAI SYMBOL..GREEK PI SYMBOL -03D7 ; PVALID # GREEK KAI SYMBOL -03D8 ; DISALLOWED # GREEK LETTER ARCHAIC KOPPA -03D9 ; PVALID # GREEK SMALL LETTER ARCHAIC KOPPA -03DA ; DISALLOWED # GREEK LETTER STIGMA -03DB ; PVALID # GREEK SMALL LETTER STIGMA -03DC ; DISALLOWED # GREEK LETTER DIGAMMA -03DD ; PVALID # GREEK SMALL LETTER DIGAMMA -03DE ; DISALLOWED # GREEK LETTER KOPPA -03DF ; PVALID # GREEK SMALL LETTER KOPPA -03E0 ; DISALLOWED # GREEK LETTER SAMPI -03E1 ; PVALID # GREEK SMALL LETTER SAMPI -03E2 ; DISALLOWED # COPTIC CAPITAL LETTER SHEI -03E3 ; PVALID # COPTIC SMALL LETTER SHEI -03E4 ; DISALLOWED # COPTIC CAPITAL LETTER FEI -03E5 ; PVALID # COPTIC SMALL LETTER FEI -03E6 ; DISALLOWED # COPTIC CAPITAL LETTER KHEI -03E7 ; PVALID # COPTIC SMALL LETTER KHEI -03E8 ; DISALLOWED # COPTIC CAPITAL LETTER HORI -03E9 ; PVALID # COPTIC SMALL LETTER HORI -03EA ; DISALLOWED # COPTIC CAPITAL LETTER GANGIA -03EB ; PVALID # COPTIC SMALL LETTER GANGIA -03EC ; DISALLOWED # COPTIC CAPITAL LETTER SHIMA -03ED ; PVALID # COPTIC SMALL LETTER SHIMA -03EE ; DISALLOWED # COPTIC CAPITAL LETTER DEI -03EF ; PVALID # COPTIC SMALL LETTER DEI -03F0..03F2 ; DISALLOWED # GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBO -03F3 ; PVALID # GREEK LETTER YOT -03F4..03F7 ; DISALLOWED # GREEK CAPITAL THETA SYMBOL..GREEK CAPITAL LE -03F8 ; PVALID # GREEK SMALL LETTER SHO -03F9..03FA ; DISALLOWED # GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAP -03FB..03FC ; PVALID # GREEK SMALL LETTER SAN..GREEK RHO WITH STROK -03FD..042F ; DISALLOWED # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL.. -0430..045F ; PVALID # CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETT -0460 ; DISALLOWED # CYRILLIC CAPITAL LETTER OMEGA -0461 ; PVALID # CYRILLIC SMALL LETTER OMEGA -0462 ; DISALLOWED # CYRILLIC CAPITAL LETTER YAT -0463 ; PVALID # CYRILLIC SMALL LETTER YAT -0464 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED E -0465 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED E -0466 ; DISALLOWED # CYRILLIC CAPITAL LETTER LITTLE YUS -0467 ; PVALID # CYRILLIC SMALL LETTER LITTLE YUS -0468 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS -0469 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS -046A ; DISALLOWED # CYRILLIC CAPITAL LETTER BIG YUS -046B ; PVALID # CYRILLIC SMALL LETTER BIG YUS -046C ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS -046D ; PVALID # CYRILLIC SMALL LETTER IOTIFIED BIG YUS -046E ; DISALLOWED # CYRILLIC CAPITAL LETTER KSI -046F ; PVALID # CYRILLIC SMALL LETTER KSI -0470 ; DISALLOWED # CYRILLIC CAPITAL LETTER PSI -0471 ; PVALID # CYRILLIC SMALL LETTER PSI -0472 ; DISALLOWED # CYRILLIC CAPITAL LETTER FITA -0473 ; PVALID # CYRILLIC SMALL LETTER FITA -0474 ; DISALLOWED # CYRILLIC CAPITAL LETTER IZHITSA -0475 ; PVALID # CYRILLIC SMALL LETTER IZHITSA -0476 ; DISALLOWED # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE -0477 ; PVALID # CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GR -0478 ; DISALLOWED # CYRILLIC CAPITAL LETTER UK -0479 ; PVALID # CYRILLIC SMALL LETTER UK -047A ; DISALLOWED # CYRILLIC CAPITAL LETTER ROUND OMEGA -047B ; PVALID # CYRILLIC SMALL LETTER ROUND OMEGA -047C ; DISALLOWED # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO -047D ; PVALID # CYRILLIC SMALL LETTER OMEGA WITH TITLO -047E ; DISALLOWED # CYRILLIC CAPITAL LETTER OT -047F ; PVALID # CYRILLIC SMALL LETTER OT -0480 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOPPA -0481 ; PVALID # CYRILLIC SMALL LETTER KOPPA -0482 ; DISALLOWED # CYRILLIC THOUSANDS SIGN -0483..0487 ; PVALID # COMBINING CYRILLIC TITLO..COMBINING CYRILLIC -0488..048A ; DISALLOWED # COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..C -048B ; PVALID # CYRILLIC SMALL LETTER SHORT I WITH TAIL -048C ; DISALLOWED # CYRILLIC CAPITAL LETTER SEMISOFT SIGN -048D ; PVALID # CYRILLIC SMALL LETTER SEMISOFT SIGN -048E ; DISALLOWED # CYRILLIC CAPITAL LETTER ER WITH TICK -048F ; PVALID # CYRILLIC SMALL LETTER ER WITH TICK -0490 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH UPTURN -0491 ; PVALID # CYRILLIC SMALL LETTER GHE WITH UPTURN -0492 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH STROKE -0493 ; PVALID # CYRILLIC SMALL LETTER GHE WITH STROKE -0494 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK -0495 ; PVALID # CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK -0496 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER -0497 ; PVALID # CYRILLIC SMALL LETTER ZHE WITH DESCENDER -0498 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER -0499 ; PVALID # CYRILLIC SMALL LETTER ZE WITH DESCENDER -049A ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH DESCENDER -049B ; PVALID # CYRILLIC SMALL LETTER KA WITH DESCENDER -049C ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STR -049D ; PVALID # CYRILLIC SMALL LETTER KA WITH VERTICAL STROK -049E ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH STROKE -049F ; PVALID # CYRILLIC SMALL LETTER KA WITH STROKE -04A0 ; DISALLOWED # CYRILLIC CAPITAL LETTER BASHKIR KA -04A1 ; PVALID # CYRILLIC SMALL LETTER BASHKIR KA -04A2 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH DESCENDER -04A3 ; PVALID # CYRILLIC SMALL LETTER EN WITH DESCENDER -04A4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE EN GHE -04A5 ; PVALID # CYRILLIC SMALL LIGATURE EN GHE -04A6 ; DISALLOWED # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK -04A7 ; PVALID # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK -04A8 ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN HA -04A9 ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN HA -04AA ; DISALLOWED # CYRILLIC CAPITAL LETTER ES WITH DESCENDER -04AB ; PVALID # CYRILLIC SMALL LETTER ES WITH DESCENDER -04AC ; DISALLOWED # CYRILLIC CAPITAL LETTER TE WITH DESCENDER -04AD ; PVALID # CYRILLIC SMALL LETTER TE WITH DESCENDER -04AE ; DISALLOWED # CYRILLIC CAPITAL LETTER STRAIGHT U -04AF ; PVALID # CYRILLIC SMALL LETTER STRAIGHT U -04B0 ; DISALLOWED # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STRO -04B1 ; PVALID # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE -04B2 ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH DESCENDER -04B3 ; PVALID # CYRILLIC SMALL LETTER HA WITH DESCENDER -04B4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE TE TSE -04B5 ; PVALID # CYRILLIC SMALL LIGATURE TE TSE -04B6 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER -04B7 ; PVALID # CYRILLIC SMALL LETTER CHE WITH DESCENDER -04B8 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL ST -04B9 ; PVALID # CYRILLIC SMALL LETTER CHE WITH VERTICAL STRO -04BA ; DISALLOWED # CYRILLIC CAPITAL LETTER SHHA -04BB ; PVALID # CYRILLIC SMALL LETTER SHHA -04BC ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN CHE -04BD ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN CHE -04BE ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH D -04BF ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DES -04C0..04C1 ; DISALLOWED # CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL L -04C2 ; PVALID # CYRILLIC SMALL LETTER ZHE WITH BREVE -04C3 ; DISALLOWED # CYRILLIC CAPITAL LETTER KA WITH HOOK -04C4 ; PVALID # CYRILLIC SMALL LETTER KA WITH HOOK -04C5 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH TAIL -04C6 ; PVALID # CYRILLIC SMALL LETTER EL WITH TAIL -04C7 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH HOOK -04C8 ; PVALID # CYRILLIC SMALL LETTER EN WITH HOOK -04C9 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH TAIL -04CA ; PVALID # CYRILLIC SMALL LETTER EN WITH TAIL -04CB ; DISALLOWED # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE -04CC ; PVALID # CYRILLIC SMALL LETTER KHAKASSIAN CHE -04CD ; DISALLOWED # CYRILLIC CAPITAL LETTER EM WITH TAIL -04CE..04CF ; PVALID # CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC -04D0 ; DISALLOWED # CYRILLIC CAPITAL LETTER A WITH BREVE -04D1 ; PVALID # CYRILLIC SMALL LETTER A WITH BREVE -04D2 ; DISALLOWED # CYRILLIC CAPITAL LETTER A WITH DIAERESIS -04D3 ; PVALID # CYRILLIC SMALL LETTER A WITH DIAERESIS -04D4 ; DISALLOWED # CYRILLIC CAPITAL LIGATURE A IE -04D5 ; PVALID # CYRILLIC SMALL LIGATURE A IE -04D6 ; DISALLOWED # CYRILLIC CAPITAL LETTER IE WITH BREVE -04D7 ; PVALID # CYRILLIC SMALL LETTER IE WITH BREVE -04D8 ; DISALLOWED # CYRILLIC CAPITAL LETTER SCHWA -04D9 ; PVALID # CYRILLIC SMALL LETTER SCHWA -04DA ; DISALLOWED # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS -04DB ; PVALID # CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS -04DC ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS -04DD ; PVALID # CYRILLIC SMALL LETTER ZHE WITH DIAERESIS -04DE ; DISALLOWED # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS -04DF ; PVALID # CYRILLIC SMALL LETTER ZE WITH DIAERESIS -04E0 ; DISALLOWED # CYRILLIC CAPITAL LETTER ABKHASIAN DZE -04E1 ; PVALID # CYRILLIC SMALL LETTER ABKHASIAN DZE -04E2 ; DISALLOWED # CYRILLIC CAPITAL LETTER I WITH MACRON -04E3 ; PVALID # CYRILLIC SMALL LETTER I WITH MACRON -04E4 ; DISALLOWED # CYRILLIC CAPITAL LETTER I WITH DIAERESIS -04E5 ; PVALID # CYRILLIC SMALL LETTER I WITH DIAERESIS -04E6 ; DISALLOWED # CYRILLIC CAPITAL LETTER O WITH DIAERESIS -04E7 ; PVALID # CYRILLIC SMALL LETTER O WITH DIAERESIS -04E8 ; DISALLOWED # CYRILLIC CAPITAL LETTER BARRED O -04E9 ; PVALID # CYRILLIC SMALL LETTER BARRED O -04EA ; DISALLOWED # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERE -04EB ; PVALID # CYRILLIC SMALL LETTER BARRED O WITH DIAERESI -04EC ; DISALLOWED # CYRILLIC CAPITAL LETTER E WITH DIAERESIS -04ED ; PVALID # CYRILLIC SMALL LETTER E WITH DIAERESIS -04EE ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH MACRON -04EF ; PVALID # CYRILLIC SMALL LETTER U WITH MACRON -04F0 ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH DIAERESIS -04F1 ; PVALID # CYRILLIC SMALL LETTER U WITH DIAERESIS -04F2 ; DISALLOWED # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE -04F3 ; PVALID # CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE -04F4 ; DISALLOWED # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS -04F5 ; PVALID # CYRILLIC SMALL LETTER CHE WITH DIAERESIS -04F6 ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER -04F7 ; PVALID # CYRILLIC SMALL LETTER GHE WITH DESCENDER -04F8 ; DISALLOWED # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS -04F9 ; PVALID # CYRILLIC SMALL LETTER YERU WITH DIAERESIS -04FA ; DISALLOWED # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND -04FB ; PVALID # CYRILLIC SMALL LETTER GHE WITH STROKE AND HO -04FC ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH HOOK -04FD ; PVALID # CYRILLIC SMALL LETTER HA WITH HOOK -04FE ; DISALLOWED # CYRILLIC CAPITAL LETTER HA WITH STROKE -04FF ; PVALID # CYRILLIC SMALL LETTER HA WITH STROKE -0500 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DE -0501 ; PVALID # CYRILLIC SMALL LETTER KOMI DE -0502 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DJE -0503 ; PVALID # CYRILLIC SMALL LETTER KOMI DJE -0504 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI ZJE -0505 ; PVALID # CYRILLIC SMALL LETTER KOMI ZJE -0506 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI DZJE -0507 ; PVALID # CYRILLIC SMALL LETTER KOMI DZJE -0508 ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI LJE -0509 ; PVALID # CYRILLIC SMALL LETTER KOMI LJE -050A ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI NJE -050B ; PVALID # CYRILLIC SMALL LETTER KOMI NJE -050C ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI SJE -050D ; PVALID # CYRILLIC SMALL LETTER KOMI SJE -050E ; DISALLOWED # CYRILLIC CAPITAL LETTER KOMI TJE -050F ; PVALID # CYRILLIC SMALL LETTER KOMI TJE -0510 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED ZE -0511 ; PVALID # CYRILLIC SMALL LETTER REVERSED ZE -0512 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH HOOK -0513 ; PVALID # CYRILLIC SMALL LETTER EL WITH HOOK -0514 ; DISALLOWED # CYRILLIC CAPITAL LETTER LHA -0515 ; PVALID # CYRILLIC SMALL LETTER LHA -0516 ; DISALLOWED # CYRILLIC CAPITAL LETTER RHA -0517 ; PVALID # CYRILLIC SMALL LETTER RHA -0518 ; DISALLOWED # CYRILLIC CAPITAL LETTER YAE -0519 ; PVALID # CYRILLIC SMALL LETTER YAE -051A ; DISALLOWED # CYRILLIC CAPITAL LETTER QA -051B ; PVALID # CYRILLIC SMALL LETTER QA -051C ; DISALLOWED # CYRILLIC CAPITAL LETTER WE -051D ; PVALID # CYRILLIC SMALL LETTER WE -051E ; DISALLOWED # CYRILLIC CAPITAL LETTER ALEUT KA -051F ; PVALID # CYRILLIC SMALL LETTER ALEUT KA -0520 ; DISALLOWED # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK -0521 ; PVALID # CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK -0522 ; DISALLOWED # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK -0523 ; PVALID # CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK -0524 ; DISALLOWED # CYRILLIC CAPITAL LETTER PE WITH DESCENDER -0525 ; PVALID # CYRILLIC SMALL LETTER PE WITH DESCENDER -0526..0530 ; UNASSIGNED # .. -0531..0556 ; DISALLOWED # ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITA -0557..0558 ; UNASSIGNED # .. -0559 ; PVALID # ARMENIAN MODIFIER LETTER LEFT HALF RING -055A..055F ; DISALLOWED # ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION M -0560 ; UNASSIGNED # -0561..0586 ; PVALID # ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LE -0587 ; DISALLOWED # ARMENIAN SMALL LIGATURE ECH YIWN -0588 ; UNASSIGNED # -0589..058A ; DISALLOWED # ARMENIAN FULL STOP..ARMENIAN HYPHEN -058B..0590 ; UNASSIGNED # .. -0591..05BD ; PVALID # HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BE ; DISALLOWED # HEBREW PUNCTUATION MAQAF -05BF ; PVALID # HEBREW POINT RAFE -05C0 ; DISALLOWED # HEBREW PUNCTUATION PASEQ -05C1..05C2 ; PVALID # HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C3 ; DISALLOWED # HEBREW PUNCTUATION SOF PASUQ -05C4..05C5 ; PVALID # HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C6 ; DISALLOWED # HEBREW PUNCTUATION NUN HAFUKHA -05C7 ; PVALID # HEBREW POINT QAMATS QATAN -05C8..05CF ; UNASSIGNED # .. -05D0..05EA ; PVALID # HEBREW LETTER ALEF..HEBREW LETTER TAV -05EB..05EF ; UNASSIGNED # .. -05F0..05F2 ; PVALID # HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW L -05F3..05F4 ; CONTEXTO # HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATIO -05F5..05FF ; UNASSIGNED # .. -0600..0603 ; DISALLOWED # ARABIC NUMBER SIGN..ARABIC SIGN SAFHA -0604..0605 ; UNASSIGNED # .. -0606..060F ; DISALLOWED # ARABIC-INDIC CUBE ROOT..ARABIC SIGN MISRA -0610..061A ; PVALID # ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..AR -061B ; DISALLOWED # ARABIC SEMICOLON -061C..061D ; UNASSIGNED # .. -061E..061F ; DISALLOWED # ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC Q -0620 ; UNASSIGNED # -0621..063F ; PVALID # ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH -0640 ; DISALLOWED # ARABIC TATWEEL -0641..065E ; PVALID # ARABIC LETTER FEH..ARABIC FATHA WITH TWO DOT -065F ; UNASSIGNED # -0660..0669 ; CONTEXTO # ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT -066A..066D ; DISALLOWED # ARABIC PERCENT SIGN..ARABIC FIVE POINTED STA -066E..0674 ; PVALID # ARABIC LETTER DOTLESS BEH..ARABIC LETTER HIG -0675..0678 ; DISALLOWED # ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER -0679..06D3 ; PVALID # ARABIC LETTER TTEH..ARABIC LETTER YEH BARREE -06D4 ; DISALLOWED # ARABIC FULL STOP -06D5..06DC ; PVALID # ARABIC LETTER AE..ARABIC SMALL HIGH SEEN -06DD..06DE ; DISALLOWED # ARABIC END OF AYAH..ARABIC START OF RUB EL H -06DF..06E8 ; PVALID # ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL -06E9 ; DISALLOWED # ARABIC PLACE OF SAJDAH -06EA..06EF ; PVALID # ARABIC EMPTY CENTRE LOW STOP..ARABIC LETTER -06F0..06F9 ; CONTEXTO # EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED A -06FA..06FF ; PVALID # ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC L -0700..070D ; DISALLOWED # SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN AST -070E ; UNASSIGNED # -070F ; DISALLOWED # SYRIAC ABBREVIATION MARK -0710..074A ; PVALID # SYRIAC LETTER ALAPH..SYRIAC BARREKH -074B..074C ; UNASSIGNED # .. -074D..07B1 ; PVALID # SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER N -07B2..07BF ; UNASSIGNED # .. -07C0..07F5 ; PVALID # NKO DIGIT ZERO..NKO LOW TONE APOSTROPHE -07F6..07FA ; DISALLOWED # NKO SYMBOL OO DENNEN..NKO LAJANYALAN -07FB..07FF ; UNASSIGNED # .. -0800..082D ; PVALID # SAMARITAN LETTER ALAF..SAMARITAN MARK NEQUDA -082E..082F ; UNASSIGNED # .. -0830..083E ; DISALLOWED # SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUN -083F..08FF ; UNASSIGNED # .. -0900..0939 ; PVALID # DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANA -093A..093B ; UNASSIGNED # .. -093C..094E ; PVALID # DEVANAGARI SIGN NUKTA..DEVANAGARI VOWEL SIGN -094F ; UNASSIGNED # -0950..0955 ; PVALID # DEVANAGARI OM..DEVANAGARI VOWEL SIGN CANDRA -0956..0957 ; UNASSIGNED # .. -0958..095F ; DISALLOWED # DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA -0960..0963 ; PVALID # DEVANAGARI LETTER VOCALIC RR..DEVANAGARI VOW -0964..0965 ; DISALLOWED # DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA -0966..096F ; PVALID # DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE -0970 ; DISALLOWED # DEVANAGARI ABBREVIATION SIGN -0971..0972 ; PVALID # DEVANAGARI SIGN HIGH SPACING DOT..DEVANAGARI -0973..0978 ; UNASSIGNED # .. -0979..097F ; PVALID # DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA -0980 ; UNASSIGNED # -0981..0983 ; PVALID # BENGALI SIGN CANDRABINDU..BENGALI SIGN VISAR -0984 ; UNASSIGNED # -0985..098C ; PVALID # BENGALI LETTER A..BENGALI LETTER VOCALIC L -098D..098E ; UNASSIGNED # .. -098F..0990 ; PVALID # BENGALI LETTER E..BENGALI LETTER AI -0991..0992 ; UNASSIGNED # .. -0993..09A8 ; PVALID # BENGALI LETTER O..BENGALI LETTER NA -09A9 ; UNASSIGNED # -09AA..09B0 ; PVALID # BENGALI LETTER PA..BENGALI LETTER RA -09B1 ; UNASSIGNED # -09B2 ; PVALID # BENGALI LETTER LA -09B3..09B5 ; UNASSIGNED # .. -09B6..09B9 ; PVALID # BENGALI LETTER SHA..BENGALI LETTER HA -09BA..09BB ; UNASSIGNED # .. -09BC..09C4 ; PVALID # BENGALI SIGN NUKTA..BENGALI VOWEL SIGN VOCAL -09C5..09C6 ; UNASSIGNED # .. -09C7..09C8 ; PVALID # BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI -09C9..09CA ; UNASSIGNED # .. -09CB..09CE ; PVALID # BENGALI VOWEL SIGN O..BENGALI LETTER KHANDA -09CF..09D6 ; UNASSIGNED # .. -09D7 ; PVALID # BENGALI AU LENGTH MARK -09D8..09DB ; UNASSIGNED # .. -09DC..09DD ; DISALLOWED # BENGALI LETTER RRA..BENGALI LETTER RHA -09DE ; UNASSIGNED # -09DF ; DISALLOWED # BENGALI LETTER YYA -09E0..09E3 ; PVALID # BENGALI LETTER VOCALIC RR..BENGALI VOWEL SIG -09E4..09E5 ; UNASSIGNED # .. -09E6..09F1 ; PVALID # BENGALI DIGIT ZERO..BENGALI LETTER RA WITH L -09F2..09FB ; DISALLOWED # BENGALI RUPEE MARK..BENGALI GANDA MARK -09FC..0A00 ; UNASSIGNED # .. -0A01..0A03 ; PVALID # GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN VISA -0A04 ; UNASSIGNED # -0A05..0A0A ; PVALID # GURMUKHI LETTER A..GURMUKHI LETTER UU -0A0B..0A0E ; UNASSIGNED # .. -0A0F..0A10 ; PVALID # GURMUKHI LETTER EE..GURMUKHI LETTER AI -0A11..0A12 ; UNASSIGNED # .. -0A13..0A28 ; PVALID # GURMUKHI LETTER OO..GURMUKHI LETTER NA -0A29 ; UNASSIGNED # -0A2A..0A30 ; PVALID # GURMUKHI LETTER PA..GURMUKHI LETTER RA -0A31 ; UNASSIGNED # -0A32 ; PVALID # GURMUKHI LETTER LA -0A33 ; DISALLOWED # GURMUKHI LETTER LLA -0A34 ; UNASSIGNED # -0A35 ; PVALID # GURMUKHI LETTER VA -0A36 ; DISALLOWED # GURMUKHI LETTER SHA -0A37 ; UNASSIGNED # -0A38..0A39 ; PVALID # GURMUKHI LETTER SA..GURMUKHI LETTER HA -0A3A..0A3B ; UNASSIGNED # .. -0A3C ; PVALID # GURMUKHI SIGN NUKTA -0A3D ; UNASSIGNED # -0A3E..0A42 ; PVALID # GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN -0A43..0A46 ; UNASSIGNED # .. -0A47..0A48 ; PVALID # GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN -0A49..0A4A ; UNASSIGNED # .. -0A4B..0A4D ; PVALID # GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A4E..0A50 ; UNASSIGNED # .. -0A51 ; PVALID # GURMUKHI SIGN UDAAT -0A52..0A58 ; UNASSIGNED # .. -0A59..0A5B ; DISALLOWED # GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA -0A5C ; PVALID # GURMUKHI LETTER RRA -0A5D ; UNASSIGNED # -0A5E ; DISALLOWED # GURMUKHI LETTER FA -0A5F..0A65 ; UNASSIGNED # .. -0A66..0A75 ; PVALID # GURMUKHI DIGIT ZERO..GURMUKHI SIGN YAKASH -0A76..0A80 ; UNASSIGNED # .. -0A81..0A83 ; PVALID # GUJARATI SIGN CANDRABINDU..GUJARATI SIGN VIS -0A84 ; UNASSIGNED # -0A85..0A8D ; PVALID # GUJARATI LETTER A..GUJARATI VOWEL CANDRA E -0A8E ; UNASSIGNED # -0A8F..0A91 ; PVALID # GUJARATI LETTER E..GUJARATI VOWEL CANDRA O -0A92 ; UNASSIGNED # -0A93..0AA8 ; PVALID # GUJARATI LETTER O..GUJARATI LETTER NA -0AA9 ; UNASSIGNED # -0AAA..0AB0 ; PVALID # GUJARATI LETTER PA..GUJARATI LETTER RA -0AB1 ; UNASSIGNED # -0AB2..0AB3 ; PVALID # GUJARATI LETTER LA..GUJARATI LETTER LLA -0AB4 ; UNASSIGNED # -0AB5..0AB9 ; PVALID # GUJARATI LETTER VA..GUJARATI LETTER HA -0ABA..0ABB ; UNASSIGNED # .. -0ABC..0AC5 ; PVALID # GUJARATI SIGN NUKTA..GUJARATI VOWEL SIGN CAN -0AC6 ; UNASSIGNED # -0AC7..0AC9 ; PVALID # GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN C -0ACA ; UNASSIGNED # -0ACB..0ACD ; PVALID # GUJARATI VOWEL SIGN O..GUJARATI SIGN VIRAMA -0ACE..0ACF ; UNASSIGNED # .. -0AD0 ; PVALID # GUJARATI OM -0AD1..0ADF ; UNASSIGNED # .. -0AE0..0AE3 ; PVALID # GUJARATI LETTER VOCALIC RR..GUJARATI VOWEL S -0AE4..0AE5 ; UNASSIGNED # .. -0AE6..0AEF ; PVALID # GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE -0AF0 ; UNASSIGNED # -0AF1 ; DISALLOWED # GUJARATI RUPEE SIGN -0AF2..0B00 ; UNASSIGNED # .. -0B01..0B03 ; PVALID # ORIYA SIGN CANDRABINDU..ORIYA SIGN VISARGA -0B04 ; UNASSIGNED # -0B05..0B0C ; PVALID # ORIYA LETTER A..ORIYA LETTER VOCALIC L -0B0D..0B0E ; UNASSIGNED # .. -0B0F..0B10 ; PVALID # ORIYA LETTER E..ORIYA LETTER AI -0B11..0B12 ; UNASSIGNED # .. -0B13..0B28 ; PVALID # ORIYA LETTER O..ORIYA LETTER NA -0B29 ; UNASSIGNED # -0B2A..0B30 ; PVALID # ORIYA LETTER PA..ORIYA LETTER RA -0B31 ; UNASSIGNED # -0B32..0B33 ; PVALID # ORIYA LETTER LA..ORIYA LETTER LLA -0B34 ; UNASSIGNED # -0B35..0B39 ; PVALID # ORIYA LETTER VA..ORIYA LETTER HA -0B3A..0B3B ; UNASSIGNED # .. -0B3C..0B44 ; PVALID # ORIYA SIGN NUKTA..ORIYA VOWEL SIGN VOCALIC R -0B45..0B46 ; UNASSIGNED # .. -0B47..0B48 ; PVALID # ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI -0B49..0B4A ; UNASSIGNED # .. -0B4B..0B4D ; PVALID # ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA -0B4E..0B55 ; UNASSIGNED # .. -0B56..0B57 ; PVALID # ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK -0B58..0B5B ; UNASSIGNED # .. -0B5C..0B5D ; DISALLOWED # ORIYA LETTER RRA..ORIYA LETTER RHA -0B5E ; UNASSIGNED # -0B5F..0B63 ; PVALID # ORIYA LETTER YYA..ORIYA VOWEL SIGN VOCALIC L -0B64..0B65 ; UNASSIGNED # .. -0B66..0B6F ; PVALID # ORIYA DIGIT ZERO..ORIYA DIGIT NINE -0B70 ; DISALLOWED # ORIYA ISSHAR -0B71 ; PVALID # ORIYA LETTER WA -0B72..0B81 ; UNASSIGNED # .. -0B82..0B83 ; PVALID # TAMIL SIGN ANUSVARA..TAMIL SIGN VISARGA -0B84 ; UNASSIGNED # -0B85..0B8A ; PVALID # TAMIL LETTER A..TAMIL LETTER UU -0B8B..0B8D ; UNASSIGNED # .. -0B8E..0B90 ; PVALID # TAMIL LETTER E..TAMIL LETTER AI -0B91 ; UNASSIGNED # -0B92..0B95 ; PVALID # TAMIL LETTER O..TAMIL LETTER KA -0B96..0B98 ; UNASSIGNED # .. -0B99..0B9A ; PVALID # TAMIL LETTER NGA..TAMIL LETTER CA -0B9B ; UNASSIGNED # -0B9C ; PVALID # TAMIL LETTER JA -0B9D ; UNASSIGNED # -0B9E..0B9F ; PVALID # TAMIL LETTER NYA..TAMIL LETTER TTA -0BA0..0BA2 ; UNASSIGNED # .. -0BA3..0BA4 ; PVALID # TAMIL LETTER NNA..TAMIL LETTER TA -0BA5..0BA7 ; UNASSIGNED # .. -0BA8..0BAA ; PVALID # TAMIL LETTER NA..TAMIL LETTER PA -0BAB..0BAD ; UNASSIGNED # .. -0BAE..0BB9 ; PVALID # TAMIL LETTER MA..TAMIL LETTER HA -0BBA..0BBD ; UNASSIGNED # .. -0BBE..0BC2 ; PVALID # TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN UU -0BC3..0BC5 ; UNASSIGNED # .. -0BC6..0BC8 ; PVALID # TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI -0BC9 ; UNASSIGNED # -0BCA..0BCD ; PVALID # TAMIL VOWEL SIGN O..TAMIL SIGN VIRAMA -0BCE..0BCF ; UNASSIGNED # .. -0BD0 ; PVALID # TAMIL OM -0BD1..0BD6 ; UNASSIGNED # .. -0BD7 ; PVALID # TAMIL AU LENGTH MARK -0BD8..0BE5 ; UNASSIGNED # .. -0BE6..0BEF ; PVALID # TAMIL DIGIT ZERO..TAMIL DIGIT NINE -0BF0..0BFA ; DISALLOWED # TAMIL NUMBER TEN..TAMIL NUMBER SIGN -0BFB..0C00 ; UNASSIGNED # .. -0C01..0C03 ; PVALID # TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA -0C04 ; UNASSIGNED # -0C05..0C0C ; PVALID # TELUGU LETTER A..TELUGU LETTER VOCALIC L -0C0D ; UNASSIGNED # -0C0E..0C10 ; PVALID # TELUGU LETTER E..TELUGU LETTER AI -0C11 ; UNASSIGNED # -0C12..0C28 ; PVALID # TELUGU LETTER O..TELUGU LETTER NA -0C29 ; UNASSIGNED # -0C2A..0C33 ; PVALID # TELUGU LETTER PA..TELUGU LETTER LLA -0C34 ; UNASSIGNED # -0C35..0C39 ; PVALID # TELUGU LETTER VA..TELUGU LETTER HA -0C3A..0C3C ; UNASSIGNED # .. -0C3D..0C44 ; PVALID # TELUGU SIGN AVAGRAHA..TELUGU VOWEL SIGN VOCA -0C45 ; UNASSIGNED # -0C46..0C48 ; PVALID # TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C49 ; UNASSIGNED # -0C4A..0C4D ; PVALID # TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C4E..0C54 ; UNASSIGNED # .. -0C55..0C56 ; PVALID # TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C57 ; UNASSIGNED # -0C58..0C59 ; PVALID # TELUGU LETTER TSA..TELUGU LETTER DZA -0C5A..0C5F ; UNASSIGNED # .. -0C60..0C63 ; PVALID # TELUGU LETTER VOCALIC RR..TELUGU VOWEL SIGN -0C64..0C65 ; UNASSIGNED # .. -0C66..0C6F ; PVALID # TELUGU DIGIT ZERO..TELUGU DIGIT NINE -0C70..0C77 ; UNASSIGNED # .. -0C78..0C7F ; DISALLOWED # TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF -0C80..0C81 ; UNASSIGNED # .. -0C82..0C83 ; PVALID # KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA -0C84 ; UNASSIGNED # -0C85..0C8C ; PVALID # KANNADA LETTER A..KANNADA LETTER VOCALIC L -0C8D ; UNASSIGNED # -0C8E..0C90 ; PVALID # KANNADA LETTER E..KANNADA LETTER AI -0C91 ; UNASSIGNED # -0C92..0CA8 ; PVALID # KANNADA LETTER O..KANNADA LETTER NA -0CA9 ; UNASSIGNED # -0CAA..0CB3 ; PVALID # KANNADA LETTER PA..KANNADA LETTER LLA -0CB4 ; UNASSIGNED # -0CB5..0CB9 ; PVALID # KANNADA LETTER VA..KANNADA LETTER HA -0CBA..0CBB ; UNASSIGNED # .. -0CBC..0CC4 ; PVALID # KANNADA SIGN NUKTA..KANNADA VOWEL SIGN VOCAL -0CC5 ; UNASSIGNED # -0CC6..0CC8 ; PVALID # KANNADA VOWEL SIGN E..KANNADA VOWEL SIGN AI -0CC9 ; UNASSIGNED # -0CCA..0CCD ; PVALID # KANNADA VOWEL SIGN O..KANNADA SIGN VIRAMA -0CCE..0CD4 ; UNASSIGNED # .. -0CD5..0CD6 ; PVALID # KANNADA LENGTH MARK..KANNADA AI LENGTH MARK -0CD7..0CDD ; UNASSIGNED # .. -0CDE ; PVALID # KANNADA LETTER FA -0CDF ; UNASSIGNED # -0CE0..0CE3 ; PVALID # KANNADA LETTER VOCALIC RR..KANNADA VOWEL SIG -0CE4..0CE5 ; UNASSIGNED # .. -0CE6..0CEF ; PVALID # KANNADA DIGIT ZERO..KANNADA DIGIT NINE -0CF0 ; UNASSIGNED # -0CF1..0CF2 ; DISALLOWED # KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADH -0CF3..0D01 ; UNASSIGNED # .. -0D02..0D03 ; PVALID # MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISA -0D04 ; UNASSIGNED # -0D05..0D0C ; PVALID # MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC -0D0D ; UNASSIGNED # -0D0E..0D10 ; PVALID # MALAYALAM LETTER E..MALAYALAM LETTER AI -0D11 ; UNASSIGNED # -0D12..0D28 ; PVALID # MALAYALAM LETTER O..MALAYALAM LETTER NA -0D29 ; UNASSIGNED # -0D2A..0D39 ; PVALID # MALAYALAM LETTER PA..MALAYALAM LETTER HA -0D3A..0D3C ; UNASSIGNED # .. -0D3D..0D44 ; PVALID # MALAYALAM SIGN AVAGRAHA..MALAYALAM VOWEL SIG -0D45 ; UNASSIGNED # -0D46..0D48 ; PVALID # MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN -0D49 ; UNASSIGNED # -0D4A..0D4D ; PVALID # MALAYALAM VOWEL SIGN O..MALAYALAM SIGN VIRAM -0D4E..0D56 ; UNASSIGNED # .. -0D57 ; PVALID # MALAYALAM AU LENGTH MARK -0D58..0D5F ; UNASSIGNED # .. -0D60..0D63 ; PVALID # MALAYALAM LETTER VOCALIC RR..MALAYALAM VOWEL -0D64..0D65 ; UNASSIGNED # .. -0D66..0D6F ; PVALID # MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE -0D70..0D75 ; DISALLOWED # MALAYALAM NUMBER TEN..MALAYALAM FRACTION THR -0D76..0D78 ; UNASSIGNED # .. -0D79 ; DISALLOWED # MALAYALAM DATE MARK -0D7A..0D7F ; PVALID # MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER -0D80..0D81 ; UNASSIGNED # .. -0D82..0D83 ; PVALID # SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARG -0D84 ; UNASSIGNED # -0D85..0D96 ; PVALID # SINHALA LETTER AYANNA..SINHALA LETTER AUYANN -0D97..0D99 ; UNASSIGNED # .. -0D9A..0DB1 ; PVALID # SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA L -0DB2 ; UNASSIGNED # -0DB3..0DBB ; PVALID # SINHALA LETTER SANYAKA DAYANNA..SINHALA LETT -0DBC ; UNASSIGNED # -0DBD ; PVALID # SINHALA LETTER DANTAJA LAYANNA -0DBE..0DBF ; UNASSIGNED # .. -0DC0..0DC6 ; PVALID # SINHALA LETTER VAYANNA..SINHALA LETTER FAYAN -0DC7..0DC9 ; UNASSIGNED # .. -0DCA ; PVALID # SINHALA SIGN AL-LAKUNA -0DCB..0DCE ; UNASSIGNED # .. -0DCF..0DD4 ; PVALID # SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL -0DD5 ; UNASSIGNED # -0DD6 ; PVALID # SINHALA VOWEL SIGN DIGA PAA-PILLA -0DD7 ; UNASSIGNED # -0DD8..0DDF ; PVALID # SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOW -0DE0..0DF1 ; UNASSIGNED # .. -0DF2..0DF3 ; PVALID # SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHAL -0DF4 ; DISALLOWED # SINHALA PUNCTUATION KUNDDALIYA -0DF5..0E00 ; UNASSIGNED # .. -0E01..0E32 ; PVALID # THAI CHARACTER KO KAI..THAI CHARACTER SARA A -0E33 ; DISALLOWED # THAI CHARACTER SARA AM -0E34..0E3A ; PVALID # THAI CHARACTER SARA I..THAI CHARACTER PHINTH -0E3B..0E3E ; UNASSIGNED # .. -0E3F ; DISALLOWED # THAI CURRENCY SYMBOL BAHT -0E40..0E4E ; PVALID # THAI CHARACTER SARA E..THAI CHARACTER YAMAKK -0E4F ; DISALLOWED # THAI CHARACTER FONGMAN -0E50..0E59 ; PVALID # THAI DIGIT ZERO..THAI DIGIT NINE -0E5A..0E5B ; DISALLOWED # THAI CHARACTER ANGKHANKHU..THAI CHARACTER KH -0E5C..0E80 ; UNASSIGNED # .. -0E81..0E82 ; PVALID # LAO LETTER KO..LAO LETTER KHO SUNG -0E83 ; UNASSIGNED # -0E84 ; PVALID # LAO LETTER KHO TAM -0E85..0E86 ; UNASSIGNED # .. -0E87..0E88 ; PVALID # LAO LETTER NGO..LAO LETTER CO -0E89 ; UNASSIGNED # -0E8A ; PVALID # LAO LETTER SO TAM -0E8B..0E8C ; UNASSIGNED # .. -0E8D ; PVALID # LAO LETTER NYO -0E8E..0E93 ; UNASSIGNED # .. -0E94..0E97 ; PVALID # LAO LETTER DO..LAO LETTER THO TAM -0E98 ; UNASSIGNED # -0E99..0E9F ; PVALID # LAO LETTER NO..LAO LETTER FO SUNG -0EA0 ; UNASSIGNED # -0EA1..0EA3 ; PVALID # LAO LETTER MO..LAO LETTER LO LING -0EA4 ; UNASSIGNED # -0EA5 ; PVALID # LAO LETTER LO LOOT -0EA6 ; UNASSIGNED # -0EA7 ; PVALID # LAO LETTER WO -0EA8..0EA9 ; UNASSIGNED # .. -0EAA..0EAB ; PVALID # LAO LETTER SO SUNG..LAO LETTER HO SUNG -0EAC ; UNASSIGNED # -0EAD..0EB2 ; PVALID # LAO LETTER O..LAO VOWEL SIGN AA -0EB3 ; DISALLOWED # LAO VOWEL SIGN AM -0EB4..0EB9 ; PVALID # LAO VOWEL SIGN I..LAO VOWEL SIGN UU -0EBA ; UNASSIGNED # -0EBB..0EBD ; PVALID # LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN N -0EBE..0EBF ; UNASSIGNED # .. -0EC0..0EC4 ; PVALID # LAO VOWEL SIGN E..LAO VOWEL SIGN AI -0EC5 ; UNASSIGNED # -0EC6 ; PVALID # LAO KO LA -0EC7 ; UNASSIGNED # -0EC8..0ECD ; PVALID # LAO TONE MAI EK..LAO NIGGAHITA -0ECE..0ECF ; UNASSIGNED # .. -0ED0..0ED9 ; PVALID # LAO DIGIT ZERO..LAO DIGIT NINE -0EDA..0EDB ; UNASSIGNED # .. -0EDC..0EDD ; DISALLOWED # LAO HO NO..LAO HO MO -0EDE..0EFF ; UNASSIGNED # .. -0F00 ; PVALID # TIBETAN SYLLABLE OM -0F01..0F0A ; DISALLOWED # TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBET -0F0B ; PVALID # TIBETAN MARK INTERSYLLABIC TSHEG -0F0C..0F17 ; DISALLOWED # TIBETAN MARK DELIMITER TSHEG BSTAR..TIBETAN -0F18..0F19 ; PVALID # TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN -0F1A..0F1F ; DISALLOWED # TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RD -0F20..0F29 ; PVALID # TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE -0F2A..0F34 ; DISALLOWED # TIBETAN DIGIT HALF ONE..TIBETAN MARK BSDUS R -0F35 ; PVALID # TIBETAN MARK NGAS BZUNG NYI ZLA -0F36 ; DISALLOWED # TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN -0F37 ; PVALID # TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F38 ; DISALLOWED # TIBETAN MARK CHE MGO -0F39 ; PVALID # TIBETAN MARK TSA -PHRU -0F3A..0F3D ; DISALLOWED # TIBETAN MARK GUG RTAGS GYON..TIBETAN MARK AN -0F3E..0F42 ; PVALID # TIBETAN SIGN YAR TSHES..TIBETAN LETTER GA -0F43 ; DISALLOWED # TIBETAN LETTER GHA -0F44..0F47 ; PVALID # TIBETAN LETTER NGA..TIBETAN LETTER JA -0F48 ; UNASSIGNED # -0F49..0F4C ; PVALID # TIBETAN LETTER NYA..TIBETAN LETTER DDA -0F4D ; DISALLOWED # TIBETAN LETTER DDHA -0F4E..0F51 ; PVALID # TIBETAN LETTER NNA..TIBETAN LETTER DA -0F52 ; DISALLOWED # TIBETAN LETTER DHA -0F53..0F56 ; PVALID # TIBETAN LETTER NA..TIBETAN LETTER BA -0F57 ; DISALLOWED # TIBETAN LETTER BHA -0F58..0F5B ; PVALID # TIBETAN LETTER MA..TIBETAN LETTER DZA -0F5C ; DISALLOWED # TIBETAN LETTER DZHA -0F5D..0F68 ; PVALID # TIBETAN LETTER WA..TIBETAN LETTER A -0F69 ; DISALLOWED # TIBETAN LETTER KSSA -0F6A..0F6C ; PVALID # TIBETAN LETTER FIXED-FORM RA..TIBETAN LETTER -0F6D..0F70 ; UNASSIGNED # .. -0F71..0F72 ; PVALID # TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I -0F73 ; DISALLOWED # TIBETAN VOWEL SIGN II -0F74 ; PVALID # TIBETAN VOWEL SIGN U -0F75..0F79 ; DISALLOWED # TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VO -0F7A..0F80 ; PVALID # TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN REV -0F81 ; DISALLOWED # TIBETAN VOWEL SIGN REVERSED II -0F82..0F84 ; PVALID # TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HA -0F85 ; DISALLOWED # TIBETAN MARK PALUTA -0F86..0F8B ; PVALID # TIBETAN SIGN LCI RTAGS..TIBETAN SIGN GRU MED -0F8C..0F8F ; UNASSIGNED # .. -0F90..0F92 ; PVALID # TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOIN -0F93 ; DISALLOWED # TIBETAN SUBJOINED LETTER GHA -0F94..0F97 ; PVALID # TIBETAN SUBJOINED LETTER NGA..TIBETAN SUBJOI -0F98 ; UNASSIGNED # -0F99..0F9C ; PVALID # TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOI -0F9D ; DISALLOWED # TIBETAN SUBJOINED LETTER DDHA -0F9E..0FA1 ; PVALID # TIBETAN SUBJOINED LETTER NNA..TIBETAN SUBJOI -0FA2 ; DISALLOWED # TIBETAN SUBJOINED LETTER DHA -0FA3..0FA6 ; PVALID # TIBETAN SUBJOINED LETTER NA..TIBETAN SUBJOIN -0FA7 ; DISALLOWED # TIBETAN SUBJOINED LETTER BHA -0FA8..0FAB ; PVALID # TIBETAN SUBJOINED LETTER MA..TIBETAN SUBJOIN -0FAC ; DISALLOWED # TIBETAN SUBJOINED LETTER DZHA -0FAD..0FB8 ; PVALID # TIBETAN SUBJOINED LETTER WA..TIBETAN SUBJOIN -0FB9 ; DISALLOWED # TIBETAN SUBJOINED LETTER KSSA -0FBA..0FBC ; PVALID # TIBETAN SUBJOINED LETTER FIXED-FORM WA..TIBE -0FBD ; UNASSIGNED # -0FBE..0FC5 ; DISALLOWED # TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE -0FC6 ; PVALID # TIBETAN SYMBOL PADMA GDAN -0FC7..0FCC ; DISALLOWED # TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SY -0FCD ; UNASSIGNED # -0FCE..0FD8 ; DISALLOWED # TIBETAN SIGN RDEL NAG RDEL DKAR..LEFT-FACING -0FD9..0FFF ; UNASSIGNED # .. -1000..1049 ; PVALID # MYANMAR LETTER KA..MYANMAR DIGIT NINE -104A..104F ; DISALLOWED # MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL -1050..109D ; PVALID # MYANMAR LETTER SHA..MYANMAR VOWEL SIGN AITON -109E..10C5 ; DISALLOWED # MYANMAR SYMBOL SHAN ONE..GEORGIAN CAPITAL LE -10C6..10CF ; UNASSIGNED # .. -10D0..10FA ; PVALID # GEORGIAN LETTER AN..GEORGIAN LETTER AIN -10FB..10FC ; DISALLOWED # GEORGIAN PARAGRAPH SEPARATOR..MODIFIER LETTE -10FD..10FF ; UNASSIGNED # .. -1100..11FF ; DISALLOWED # HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSA -1200..1248 ; PVALID # ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA -1249 ; UNASSIGNED # -124A..124D ; PVALID # ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE -124E..124F ; UNASSIGNED # .. -1250..1256 ; PVALID # ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO -1257 ; UNASSIGNED # -1258 ; PVALID # ETHIOPIC SYLLABLE QHWA -1259 ; UNASSIGNED # -125A..125D ; PVALID # ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QH -125E..125F ; UNASSIGNED # .. -1260..1288 ; PVALID # ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA -1289 ; UNASSIGNED # -128A..128D ; PVALID # ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE -128E..128F ; UNASSIGNED # .. -1290..12B0 ; PVALID # ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA -12B1 ; UNASSIGNED # -12B2..12B5 ; PVALID # ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE -12B6..12B7 ; UNASSIGNED # .. -12B8..12BE ; PVALID # ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO -12BF ; UNASSIGNED # -12C0 ; PVALID # ETHIOPIC SYLLABLE KXWA -12C1 ; UNASSIGNED # -12C2..12C5 ; PVALID # ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KX -12C6..12C7 ; UNASSIGNED # .. -12C8..12D6 ; PVALID # ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHAR -12D7 ; UNASSIGNED # -12D8..1310 ; PVALID # ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA -1311 ; UNASSIGNED # -1312..1315 ; PVALID # ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE -1316..1317 ; UNASSIGNED # .. -1318..135A ; PVALID # ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA -135B..135E ; UNASSIGNED # .. -135F ; PVALID # ETHIOPIC COMBINING GEMINATION MARK -1360..137C ; DISALLOWED # ETHIOPIC SECTION MARK..ETHIOPIC NUMBER TEN T -137D..137F ; UNASSIGNED # .. -1380..138F ; PVALID # ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SY -1390..1399 ; DISALLOWED # ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MA -139A..139F ; UNASSIGNED # .. -13A0..13F4 ; PVALID # CHEROKEE LETTER A..CHEROKEE LETTER YV -13F5..13FF ; UNASSIGNED # .. -1400 ; DISALLOWED # CANADIAN SYLLABICS HYPHEN -1401..166C ; PVALID # CANADIAN SYLLABICS E..CANADIAN SYLLABICS CAR -166D..166E ; DISALLOWED # CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLAB -166F..167F ; PVALID # CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS B -1680 ; DISALLOWED # OGHAM SPACE MARK -1681..169A ; PVALID # OGHAM LETTER BEITH..OGHAM LETTER PEITH -169B..169C ; DISALLOWED # OGHAM FEATHER MARK..OGHAM REVERSED FEATHER M -169D..169F ; UNASSIGNED # .. -16A0..16EA ; PVALID # RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X -16EB..16F0 ; DISALLOWED # RUNIC SINGLE PUNCTUATION..RUNIC BELGTHOR SYM -16F1..16FF ; UNASSIGNED # .. -1700..170C ; PVALID # TAGALOG LETTER A..TAGALOG LETTER YA -170D ; UNASSIGNED # -170E..1714 ; PVALID # TAGALOG LETTER LA..TAGALOG SIGN VIRAMA -1715..171F ; UNASSIGNED # .. -1720..1734 ; PVALID # HANUNOO LETTER A..HANUNOO SIGN PAMUDPOD -1735..1736 ; DISALLOWED # PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DO -1737..173F ; UNASSIGNED # .. -1740..1753 ; PVALID # BUHID LETTER A..BUHID VOWEL SIGN U -1754..175F ; UNASSIGNED # .. -1760..176C ; PVALID # TAGBANWA LETTER A..TAGBANWA LETTER YA -176D ; UNASSIGNED # -176E..1770 ; PVALID # TAGBANWA LETTER LA..TAGBANWA LETTER SA -1771 ; UNASSIGNED # -1772..1773 ; PVALID # TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -1774..177F ; UNASSIGNED # .. -1780..17B3 ; PVALID # KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU -17B4..17B5 ; DISALLOWED # KHMER VOWEL INHERENT AQ..KHMER VOWEL INHEREN -17B6..17D3 ; PVALID # KHMER VOWEL SIGN AA..KHMER SIGN BATHAMASAT -17D4..17D6 ; DISALLOWED # KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH -17D7 ; PVALID # KHMER SIGN LEK TOO -17D8..17DB ; DISALLOWED # KHMER SIGN BEYYAL..KHMER CURRENCY SYMBOL RIE -17DC..17DD ; PVALID # KHMER SIGN AVAKRAHASANYA..KHMER SIGN ATTHACA -17DE..17DF ; UNASSIGNED # .. -17E0..17E9 ; PVALID # KHMER DIGIT ZERO..KHMER DIGIT NINE -17EA..17EF ; UNASSIGNED # .. -17F0..17F9 ; DISALLOWED # KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK -17FA..17FF ; UNASSIGNED # .. -1800..180E ; DISALLOWED # MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR -180F ; UNASSIGNED # -1810..1819 ; PVALID # MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE -181A..181F ; UNASSIGNED # .. -1820..1877 ; PVALID # MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU -1878..187F ; UNASSIGNED # .. -1880..18AA ; PVALID # MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONG -18AB..18AF ; UNASSIGNED # .. -18B0..18F5 ; PVALID # CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CA -18F6..18FF ; UNASSIGNED # .. -1900..191C ; PVALID # LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA -191D..191F ; UNASSIGNED # .. -1920..192B ; PVALID # LIMBU VOWEL SIGN A..LIMBU SUBJOINED LETTER W -192C..192F ; UNASSIGNED # .. -1930..193B ; PVALID # LIMBU SMALL LETTER KA..LIMBU SIGN SA-I -193C..193F ; UNASSIGNED # .. -1940 ; DISALLOWED # LIMBU SIGN LOO -1941..1943 ; UNASSIGNED # .. -1944..1945 ; DISALLOWED # LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK -1946..196D ; PVALID # LIMBU DIGIT ZERO..TAI LE LETTER AI -196E..196F ; UNASSIGNED # .. -1970..1974 ; PVALID # TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 -1975..197F ; UNASSIGNED # .. -1980..19AB ; PVALID # NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETT -19AC..19AF ; UNASSIGNED # .. -19B0..19C9 ; PVALID # NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW -19CA..19CF ; UNASSIGNED # .. -19D0..19DA ; PVALID # NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIG -19DB..19DD ; UNASSIGNED # .. -19DE..19FF ; DISALLOWED # NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM -1A00..1A1B ; PVALID # BUGINESE LETTER KA..BUGINESE VOWEL SIGN AE -1A1C..1A1D ; UNASSIGNED # .. -1A1E..1A1F ; DISALLOWED # BUGINESE PALLAWA..BUGINESE END OF SECTION -1A20..1A5E ; PVALID # TAI THAM LETTER HIGH KA..TAI THAM CONSONANT -1A5F ; UNASSIGNED # -1A60..1A7C ; PVALID # TAI THAM SIGN SAKOT..TAI THAM SIGN KHUEN-LUE -1A7D..1A7E ; UNASSIGNED # .. -1A7F..1A89 ; PVALID # TAI THAM COMBINING CRYPTOGRAMMIC DOT..TAI TH -1A8A..1A8F ; UNASSIGNED # .. -1A90..1A99 ; PVALID # TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGI -1A9A..1A9F ; UNASSIGNED # .. -1AA0..1AA6 ; DISALLOWED # TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED -1AA7 ; PVALID # TAI THAM SIGN MAI YAMOK -1AA8..1AAD ; DISALLOWED # TAI THAM SIGN KAAN..TAI THAM SIGN CAANG -1AAE..1AFF ; UNASSIGNED # .. -1B00..1B4B ; PVALID # BALINESE SIGN ULU RICEM..BALINESE LETTER ASY -1B4C..1B4F ; UNASSIGNED # .. -1B50..1B59 ; PVALID # BALINESE DIGIT ZERO..BALINESE DIGIT NINE -1B5A..1B6A ; DISALLOWED # BALINESE PANTI..BALINESE MUSICAL SYMBOL DANG -1B6B..1B73 ; PVALID # BALINESE MUSICAL SYMBOL COMBINING TEGEH..BAL -1B74..1B7C ; DISALLOWED # BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG. -1B7D..1B7F ; UNASSIGNED # .. -1B80..1BAA ; PVALID # SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PAMA -1BAB..1BAD ; UNASSIGNED # .. -1BAE..1BB9 ; PVALID # SUNDANESE LETTER KHA..SUNDANESE DIGIT NINE -1BBA..1BFF ; UNASSIGNED # .. -1C00..1C37 ; PVALID # LEPCHA LETTER KA..LEPCHA SIGN NUKTA -1C38..1C3A ; UNASSIGNED # .. -1C3B..1C3F ; DISALLOWED # LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATIO -1C40..1C49 ; PVALID # LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE -1C4A..1C4C ; UNASSIGNED # .. -1C4D..1C7D ; PVALID # LEPCHA LETTER TTA..OL CHIKI AHAD -1C7E..1C7F ; DISALLOWED # OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTU -1C80..1CCF ; UNASSIGNED # .. -1CD0..1CD2 ; PVALID # VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD3 ; DISALLOWED # VEDIC SIGN NIHSHVASA -1CD4..1CF2 ; PVALID # VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC -1CF3..1CFF ; UNASSIGNED # .. -1D00..1D2B ; PVALID # LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTE -1D2C..1D2E ; DISALLOWED # MODIFIER LETTER CAPITAL A..MODIFIER LETTER C -1D2F ; PVALID # MODIFIER LETTER CAPITAL BARRED B -1D30..1D3A ; DISALLOWED # MODIFIER LETTER CAPITAL D..MODIFIER LETTER C -1D3B ; PVALID # MODIFIER LETTER CAPITAL REVERSED N -1D3C..1D4D ; DISALLOWED # MODIFIER LETTER CAPITAL O..MODIFIER LETTER S -1D4E ; PVALID # MODIFIER LETTER SMALL TURNED I -1D4F..1D6A ; DISALLOWED # MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMA -1D6B..1D77 ; PVALID # LATIN SMALL LETTER UE..LATIN SMALL LETTER TU -1D78 ; DISALLOWED # MODIFIER LETTER CYRILLIC EN -1D79..1D9A ; PVALID # LATIN SMALL LETTER INSULAR G..LATIN SMALL LE -1D9B..1DBF ; DISALLOWED # MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER -1DC0..1DE6 ; PVALID # COMBINING DOTTED GRAVE ACCENT..COMBINING LAT -1DE7..1DFC ; UNASSIGNED # .. -1DFD..1DFF ; PVALID # COMBINING ALMOST EQUAL TO BELOW..COMBINING R -1E00 ; DISALLOWED # LATIN CAPITAL LETTER A WITH RING BELOW -1E01 ; PVALID # LATIN SMALL LETTER A WITH RING BELOW -1E02 ; DISALLOWED # LATIN CAPITAL LETTER B WITH DOT ABOVE -1E03 ; PVALID # LATIN SMALL LETTER B WITH DOT ABOVE -1E04 ; DISALLOWED # LATIN CAPITAL LETTER B WITH DOT BELOW -1E05 ; PVALID # LATIN SMALL LETTER B WITH DOT BELOW -1E06 ; DISALLOWED # LATIN CAPITAL LETTER B WITH LINE BELOW -1E07 ; PVALID # LATIN SMALL LETTER B WITH LINE BELOW -1E08 ; DISALLOWED # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUT -1E09 ; PVALID # LATIN SMALL LETTER C WITH CEDILLA AND ACUTE -1E0A ; DISALLOWED # LATIN CAPITAL LETTER D WITH DOT ABOVE -1E0B ; PVALID # LATIN SMALL LETTER D WITH DOT ABOVE -1E0C ; DISALLOWED # LATIN CAPITAL LETTER D WITH DOT BELOW -1E0D ; PVALID # LATIN SMALL LETTER D WITH DOT BELOW -1E0E ; DISALLOWED # LATIN CAPITAL LETTER D WITH LINE BELOW -1E0F ; PVALID # LATIN SMALL LETTER D WITH LINE BELOW -1E10 ; DISALLOWED # LATIN CAPITAL LETTER D WITH CEDILLA -1E11 ; PVALID # LATIN SMALL LETTER D WITH CEDILLA -1E12 ; DISALLOWED # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW -1E13 ; PVALID # LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW -1E14 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE -1E15 ; PVALID # LATIN SMALL LETTER E WITH MACRON AND GRAVE -1E16 ; DISALLOWED # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE -1E17 ; PVALID # LATIN SMALL LETTER E WITH MACRON AND ACUTE -1E18 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW -1E19 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW -1E1A ; DISALLOWED # LATIN CAPITAL LETTER E WITH TILDE BELOW -1E1B ; PVALID # LATIN SMALL LETTER E WITH TILDE BELOW -1E1C ; DISALLOWED # LATIN CAPITAL LETTER E WITH CEDILLA AND BREV -1E1D ; PVALID # LATIN SMALL LETTER E WITH CEDILLA AND BREVE -1E1E ; DISALLOWED # LATIN CAPITAL LETTER F WITH DOT ABOVE -1E1F ; PVALID # LATIN SMALL LETTER F WITH DOT ABOVE -1E20 ; DISALLOWED # LATIN CAPITAL LETTER G WITH MACRON -1E21 ; PVALID # LATIN SMALL LETTER G WITH MACRON -1E22 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DOT ABOVE -1E23 ; PVALID # LATIN SMALL LETTER H WITH DOT ABOVE -1E24 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DOT BELOW -1E25 ; PVALID # LATIN SMALL LETTER H WITH DOT BELOW -1E26 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DIAERESIS -1E27 ; PVALID # LATIN SMALL LETTER H WITH DIAERESIS -1E28 ; DISALLOWED # LATIN CAPITAL LETTER H WITH CEDILLA -1E29 ; PVALID # LATIN SMALL LETTER H WITH CEDILLA -1E2A ; DISALLOWED # LATIN CAPITAL LETTER H WITH BREVE BELOW -1E2B ; PVALID # LATIN SMALL LETTER H WITH BREVE BELOW -1E2C ; DISALLOWED # LATIN CAPITAL LETTER I WITH TILDE BELOW -1E2D ; PVALID # LATIN SMALL LETTER I WITH TILDE BELOW -1E2E ; DISALLOWED # LATIN CAPITAL LETTER I WITH DIAERESIS AND AC -1E2F ; PVALID # LATIN SMALL LETTER I WITH DIAERESIS AND ACUT -1E30 ; DISALLOWED # LATIN CAPITAL LETTER K WITH ACUTE -1E31 ; PVALID # LATIN SMALL LETTER K WITH ACUTE -1E32 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DOT BELOW -1E33 ; PVALID # LATIN SMALL LETTER K WITH DOT BELOW -1E34 ; DISALLOWED # LATIN CAPITAL LETTER K WITH LINE BELOW -1E35 ; PVALID # LATIN SMALL LETTER K WITH LINE BELOW -1E36 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOT BELOW -1E37 ; PVALID # LATIN SMALL LETTER L WITH DOT BELOW -1E38 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOT BELOW AND MA -1E39 ; PVALID # LATIN SMALL LETTER L WITH DOT BELOW AND MACR -1E3A ; DISALLOWED # LATIN CAPITAL LETTER L WITH LINE BELOW -1E3B ; PVALID # LATIN SMALL LETTER L WITH LINE BELOW -1E3C ; DISALLOWED # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW -1E3D ; PVALID # LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW -1E3E ; DISALLOWED # LATIN CAPITAL LETTER M WITH ACUTE -1E3F ; PVALID # LATIN SMALL LETTER M WITH ACUTE -1E40 ; DISALLOWED # LATIN CAPITAL LETTER M WITH DOT ABOVE -1E41 ; PVALID # LATIN SMALL LETTER M WITH DOT ABOVE -1E42 ; DISALLOWED # LATIN CAPITAL LETTER M WITH DOT BELOW -1E43 ; PVALID # LATIN SMALL LETTER M WITH DOT BELOW -1E44 ; DISALLOWED # LATIN CAPITAL LETTER N WITH DOT ABOVE -1E45 ; PVALID # LATIN SMALL LETTER N WITH DOT ABOVE -1E46 ; DISALLOWED # LATIN CAPITAL LETTER N WITH DOT BELOW -1E47 ; PVALID # LATIN SMALL LETTER N WITH DOT BELOW -1E48 ; DISALLOWED # LATIN CAPITAL LETTER N WITH LINE BELOW -1E49 ; PVALID # LATIN SMALL LETTER N WITH LINE BELOW -1E4A ; DISALLOWED # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW -1E4B ; PVALID # LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW -1E4C ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE -1E4D ; PVALID # LATIN SMALL LETTER O WITH TILDE AND ACUTE -1E4E ; DISALLOWED # LATIN CAPITAL LETTER O WITH TILDE AND DIAERE -1E4F ; PVALID # LATIN SMALL LETTER O WITH TILDE AND DIAERESI -1E50 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE -1E51 ; PVALID # LATIN SMALL LETTER O WITH MACRON AND GRAVE -1E52 ; DISALLOWED # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE -1E53 ; PVALID # LATIN SMALL LETTER O WITH MACRON AND ACUTE -1E54 ; DISALLOWED # LATIN CAPITAL LETTER P WITH ACUTE -1E55 ; PVALID # LATIN SMALL LETTER P WITH ACUTE -1E56 ; DISALLOWED # LATIN CAPITAL LETTER P WITH DOT ABOVE -1E57 ; PVALID # LATIN SMALL LETTER P WITH DOT ABOVE -1E58 ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT ABOVE -1E59 ; PVALID # LATIN SMALL LETTER R WITH DOT ABOVE -1E5A ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT BELOW -1E5B ; PVALID # LATIN SMALL LETTER R WITH DOT BELOW -1E5C ; DISALLOWED # LATIN CAPITAL LETTER R WITH DOT BELOW AND MA -1E5D ; PVALID # LATIN SMALL LETTER R WITH DOT BELOW AND MACR -1E5E ; DISALLOWED # LATIN CAPITAL LETTER R WITH LINE BELOW -1E5F ; PVALID # LATIN SMALL LETTER R WITH LINE BELOW -1E60 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT ABOVE -1E61 ; PVALID # LATIN SMALL LETTER S WITH DOT ABOVE -1E62 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT BELOW -1E63 ; PVALID # LATIN SMALL LETTER S WITH DOT BELOW -1E64 ; DISALLOWED # LATIN CAPITAL LETTER S WITH ACUTE AND DOT AB -1E65 ; PVALID # LATIN SMALL LETTER S WITH ACUTE AND DOT ABOV -1E66 ; DISALLOWED # LATIN CAPITAL LETTER S WITH CARON AND DOT AB -1E67 ; PVALID # LATIN SMALL LETTER S WITH CARON AND DOT ABOV -1E68 ; DISALLOWED # LATIN CAPITAL LETTER S WITH DOT BELOW AND DO -1E69 ; PVALID # LATIN SMALL LETTER S WITH DOT BELOW AND DOT -1E6A ; DISALLOWED # LATIN CAPITAL LETTER T WITH DOT ABOVE -1E6B ; PVALID # LATIN SMALL LETTER T WITH DOT ABOVE -1E6C ; DISALLOWED # LATIN CAPITAL LETTER T WITH DOT BELOW -1E6D ; PVALID # LATIN SMALL LETTER T WITH DOT BELOW -1E6E ; DISALLOWED # LATIN CAPITAL LETTER T WITH LINE BELOW -1E6F ; PVALID # LATIN SMALL LETTER T WITH LINE BELOW -1E70 ; DISALLOWED # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW -1E71 ; PVALID # LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW -1E72 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW -1E73 ; PVALID # LATIN SMALL LETTER U WITH DIAERESIS BELOW -1E74 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE BELOW -1E75 ; PVALID # LATIN SMALL LETTER U WITH TILDE BELOW -1E76 ; DISALLOWED # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW -1E77 ; PVALID # LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW -1E78 ; DISALLOWED # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE -1E79 ; PVALID # LATIN SMALL LETTER U WITH TILDE AND ACUTE -1E7A ; DISALLOWED # LATIN CAPITAL LETTER U WITH MACRON AND DIAER -1E7B ; PVALID # LATIN SMALL LETTER U WITH MACRON AND DIAERES -1E7C ; DISALLOWED # LATIN CAPITAL LETTER V WITH TILDE -1E7D ; PVALID # LATIN SMALL LETTER V WITH TILDE -1E7E ; DISALLOWED # LATIN CAPITAL LETTER V WITH DOT BELOW -1E7F ; PVALID # LATIN SMALL LETTER V WITH DOT BELOW -1E80 ; DISALLOWED # LATIN CAPITAL LETTER W WITH GRAVE -1E81 ; PVALID # LATIN SMALL LETTER W WITH GRAVE -1E82 ; DISALLOWED # LATIN CAPITAL LETTER W WITH ACUTE -1E83 ; PVALID # LATIN SMALL LETTER W WITH ACUTE -1E84 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DIAERESIS -1E85 ; PVALID # LATIN SMALL LETTER W WITH DIAERESIS -1E86 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DOT ABOVE -1E87 ; PVALID # LATIN SMALL LETTER W WITH DOT ABOVE -1E88 ; DISALLOWED # LATIN CAPITAL LETTER W WITH DOT BELOW -1E89 ; PVALID # LATIN SMALL LETTER W WITH DOT BELOW -1E8A ; DISALLOWED # LATIN CAPITAL LETTER X WITH DOT ABOVE -1E8B ; PVALID # LATIN SMALL LETTER X WITH DOT ABOVE -1E8C ; DISALLOWED # LATIN CAPITAL LETTER X WITH DIAERESIS -1E8D ; PVALID # LATIN SMALL LETTER X WITH DIAERESIS -1E8E ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DOT ABOVE -1E8F ; PVALID # LATIN SMALL LETTER Y WITH DOT ABOVE -1E90 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX -1E91 ; PVALID # LATIN SMALL LETTER Z WITH CIRCUMFLEX -1E92 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DOT BELOW -1E93 ; PVALID # LATIN SMALL LETTER Z WITH DOT BELOW -1E94 ; DISALLOWED # LATIN CAPITAL LETTER Z WITH LINE BELOW -1E95..1E99 ; PVALID # LATIN SMALL LETTER Z WITH LINE BELOW..LATIN -1E9A..1E9B ; DISALLOWED # LATIN SMALL LETTER A WITH RIGHT HALF RING..L -1E9C..1E9D ; PVALID # LATIN SMALL LETTER LONG S WITH DIAGONAL STRO -1E9E ; DISALLOWED # LATIN CAPITAL LETTER SHARP S -1E9F ; PVALID # LATIN SMALL LETTER DELTA -1EA0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH DOT BELOW -1EA1 ; PVALID # LATIN SMALL LETTER A WITH DOT BELOW -1EA2 ; DISALLOWED # LATIN CAPITAL LETTER A WITH HOOK ABOVE -1EA3 ; PVALID # LATIN SMALL LETTER A WITH HOOK ABOVE -1EA4 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND A -1EA5 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACU -1EA6 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND G -1EA7 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRA -1EA8 ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND H -1EA9 ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOO -1EAA ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND T -1EAB ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND TIL -1EAC ; DISALLOWED # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND D -1EAD ; PVALID # LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT -1EAE ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE -1EAF ; PVALID # LATIN SMALL LETTER A WITH BREVE AND ACUTE -1EB0 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE -1EB1 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND GRAVE -1EB2 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND HOOK A -1EB3 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND HOOK ABO -1EB4 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND TILDE -1EB5 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND TILDE -1EB6 ; DISALLOWED # LATIN CAPITAL LETTER A WITH BREVE AND DOT BE -1EB7 ; PVALID # LATIN SMALL LETTER A WITH BREVE AND DOT BELO -1EB8 ; DISALLOWED # LATIN CAPITAL LETTER E WITH DOT BELOW -1EB9 ; PVALID # LATIN SMALL LETTER E WITH DOT BELOW -1EBA ; DISALLOWED # LATIN CAPITAL LETTER E WITH HOOK ABOVE -1EBB ; PVALID # LATIN SMALL LETTER E WITH HOOK ABOVE -1EBC ; DISALLOWED # LATIN CAPITAL LETTER E WITH TILDE -1EBD ; PVALID # LATIN SMALL LETTER E WITH TILDE -1EBE ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND A -1EBF ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACU -1EC0 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND G -1EC1 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRA -1EC2 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND H -1EC3 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOO -1EC4 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND T -1EC5 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND TIL -1EC6 ; DISALLOWED # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND D -1EC7 ; PVALID # LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT -1EC8 ; DISALLOWED # LATIN CAPITAL LETTER I WITH HOOK ABOVE -1EC9 ; PVALID # LATIN SMALL LETTER I WITH HOOK ABOVE -1ECA ; DISALLOWED # LATIN CAPITAL LETTER I WITH DOT BELOW -1ECB ; PVALID # LATIN SMALL LETTER I WITH DOT BELOW -1ECC ; DISALLOWED # LATIN CAPITAL LETTER O WITH DOT BELOW -1ECD ; PVALID # LATIN SMALL LETTER O WITH DOT BELOW -1ECE ; DISALLOWED # LATIN CAPITAL LETTER O WITH HOOK ABOVE -1ECF ; PVALID # LATIN SMALL LETTER O WITH HOOK ABOVE -1ED0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND A -1ED1 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACU -1ED2 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND G -1ED3 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRA -1ED4 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND H -1ED5 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOO -1ED6 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND T -1ED7 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND TIL -1ED8 ; DISALLOWED # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND D -1ED9 ; PVALID # LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT -1EDA ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND ACUTE -1EDB ; PVALID # LATIN SMALL LETTER O WITH HORN AND ACUTE -1EDC ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND GRAVE -1EDD ; PVALID # LATIN SMALL LETTER O WITH HORN AND GRAVE -1EDE ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND HOOK AB -1EDF ; PVALID # LATIN SMALL LETTER O WITH HORN AND HOOK ABOV -1EE0 ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND TILDE -1EE1 ; PVALID # LATIN SMALL LETTER O WITH HORN AND TILDE -1EE2 ; DISALLOWED # LATIN CAPITAL LETTER O WITH HORN AND DOT BEL -1EE3 ; PVALID # LATIN SMALL LETTER O WITH HORN AND DOT BELOW -1EE4 ; DISALLOWED # LATIN CAPITAL LETTER U WITH DOT BELOW -1EE5 ; PVALID # LATIN SMALL LETTER U WITH DOT BELOW -1EE6 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HOOK ABOVE -1EE7 ; PVALID # LATIN SMALL LETTER U WITH HOOK ABOVE -1EE8 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND ACUTE -1EE9 ; PVALID # LATIN SMALL LETTER U WITH HORN AND ACUTE -1EEA ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND GRAVE -1EEB ; PVALID # LATIN SMALL LETTER U WITH HORN AND GRAVE -1EEC ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND HOOK AB -1EED ; PVALID # LATIN SMALL LETTER U WITH HORN AND HOOK ABOV -1EEE ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND TILDE -1EEF ; PVALID # LATIN SMALL LETTER U WITH HORN AND TILDE -1EF0 ; DISALLOWED # LATIN CAPITAL LETTER U WITH HORN AND DOT BEL -1EF1 ; PVALID # LATIN SMALL LETTER U WITH HORN AND DOT BELOW -1EF2 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH GRAVE -1EF3 ; PVALID # LATIN SMALL LETTER Y WITH GRAVE -1EF4 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH DOT BELOW -1EF5 ; PVALID # LATIN SMALL LETTER Y WITH DOT BELOW -1EF6 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH HOOK ABOVE -1EF7 ; PVALID # LATIN SMALL LETTER Y WITH HOOK ABOVE -1EF8 ; DISALLOWED # LATIN CAPITAL LETTER Y WITH TILDE -1EF9 ; PVALID # LATIN SMALL LETTER Y WITH TILDE -1EFA ; DISALLOWED # LATIN CAPITAL LETTER MIDDLE-WELSH LL -1EFB ; PVALID # LATIN SMALL LETTER MIDDLE-WELSH LL -1EFC ; DISALLOWED # LATIN CAPITAL LETTER MIDDLE-WELSH V -1EFD ; PVALID # LATIN SMALL LETTER MIDDLE-WELSH V -1EFE ; DISALLOWED # LATIN CAPITAL LETTER Y WITH LOOP -1EFF..1F07 ; PVALID # LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL -1F08..1F0F ; DISALLOWED # GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK -1F10..1F15 ; PVALID # GREEK SMALL LETTER EPSILON WITH PSILI..GREEK -1F16..1F17 ; UNASSIGNED # .. -1F18..1F1D ; DISALLOWED # GREEK CAPITAL LETTER EPSILON WITH PSILI..GRE -1F1E..1F1F ; UNASSIGNED # .. -1F20..1F27 ; PVALID # GREEK SMALL LETTER ETA WITH PSILI..GREEK SMA -1F28..1F2F ; DISALLOWED # GREEK CAPITAL LETTER ETA WITH PSILI..GREEK C -1F30..1F37 ; PVALID # GREEK SMALL LETTER IOTA WITH PSILI..GREEK SM -1F38..1F3F ; DISALLOWED # GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK -1F40..1F45 ; PVALID # GREEK SMALL LETTER OMICRON WITH PSILI..GREEK -1F46..1F47 ; UNASSIGNED # .. -1F48..1F4D ; DISALLOWED # GREEK CAPITAL LETTER OMICRON WITH PSILI..GRE -1F4E..1F4F ; UNASSIGNED # .. -1F50..1F57 ; PVALID # GREEK SMALL LETTER UPSILON WITH PSILI..GREEK -1F58 ; UNASSIGNED # -1F59 ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA -1F5A ; UNASSIGNED # -1F5B ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND -1F5C ; UNASSIGNED # -1F5D ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND -1F5E ; UNASSIGNED # -1F5F ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH DASIA AND -1F60..1F67 ; PVALID # GREEK SMALL LETTER OMEGA WITH PSILI..GREEK S -1F68..1F6F ; DISALLOWED # GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK -1F70 ; PVALID # GREEK SMALL LETTER ALPHA WITH VARIA -1F71 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH OXIA -1F72 ; PVALID # GREEK SMALL LETTER EPSILON WITH VARIA -1F73 ; DISALLOWED # GREEK SMALL LETTER EPSILON WITH OXIA -1F74 ; PVALID # GREEK SMALL LETTER ETA WITH VARIA -1F75 ; DISALLOWED # GREEK SMALL LETTER ETA WITH OXIA -1F76 ; PVALID # GREEK SMALL LETTER IOTA WITH VARIA -1F77 ; DISALLOWED # GREEK SMALL LETTER IOTA WITH OXIA -1F78 ; PVALID # GREEK SMALL LETTER OMICRON WITH VARIA -1F79 ; DISALLOWED # GREEK SMALL LETTER OMICRON WITH OXIA -1F7A ; PVALID # GREEK SMALL LETTER UPSILON WITH VARIA -1F7B ; DISALLOWED # GREEK SMALL LETTER UPSILON WITH OXIA -1F7C ; PVALID # GREEK SMALL LETTER OMEGA WITH VARIA -1F7D ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH OXIA -1F7E..1F7F ; UNASSIGNED # .. -1F80..1FAF ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOG -1FB0..1FB1 ; PVALID # GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK -1FB2..1FB4 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOG -1FB5 ; UNASSIGNED # -1FB6 ; PVALID # GREEK SMALL LETTER ALPHA WITH PERISPOMENI -1FB7..1FC4 ; DISALLOWED # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AN -1FC5 ; UNASSIGNED # -1FC6 ; PVALID # GREEK SMALL LETTER ETA WITH PERISPOMENI -1FC7..1FCF ; DISALLOWED # GREEK SMALL LETTER ETA WITH PERISPOMENI AND -1FD0..1FD2 ; PVALID # GREEK SMALL LETTER IOTA WITH VRACHY..GREEK S -1FD3 ; DISALLOWED # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND O -1FD4..1FD5 ; UNASSIGNED # .. -1FD6..1FD7 ; PVALID # GREEK SMALL LETTER IOTA WITH PERISPOMENI..GR -1FD8..1FDB ; DISALLOWED # GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK -1FDC ; UNASSIGNED # -1FDD..1FDF ; DISALLOWED # GREEK DASIA AND VARIA..GREEK DASIA AND PERIS -1FE0..1FE2 ; PVALID # GREEK SMALL LETTER UPSILON WITH VRACHY..GREE -1FE3 ; DISALLOWED # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AN -1FE4..1FE7 ; PVALID # GREEK SMALL LETTER RHO WITH PSILI..GREEK SMA -1FE8..1FEF ; DISALLOWED # GREEK CAPITAL LETTER UPSILON WITH VRACHY..GR -1FF0..1FF1 ; UNASSIGNED # .. -1FF2..1FF4 ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOG -1FF5 ; UNASSIGNED # -1FF6 ; PVALID # GREEK SMALL LETTER OMEGA WITH PERISPOMENI -1FF7..1FFE ; DISALLOWED # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AN -1FFF ; UNASSIGNED # -2000..200B ; DISALLOWED # EN QUAD..ZERO WIDTH SPACE -200C..200D ; CONTEXTJ # ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER -200E..2064 ; DISALLOWED # LEFT-TO-RIGHT MARK..INVISIBLE PLUS -2065..2069 ; UNASSIGNED # .. -206A..2071 ; DISALLOWED # INHIBIT SYMMETRIC SWAPPING..SUPERSCRIPT LATI -2072..2073 ; UNASSIGNED # .. -2074..208E ; DISALLOWED # SUPERSCRIPT FOUR..SUBSCRIPT RIGHT PARENTHESI -208F ; UNASSIGNED # -2090..2094 ; DISALLOWED # LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCR -2095..209F ; UNASSIGNED # .. -20A0..20B8 ; DISALLOWED # EURO-CURRENCY SIGN..TENGE SIGN -20B9..20CF ; UNASSIGNED # .. -20D0..20F0 ; DISALLOWED # COMBINING LEFT HARPOON ABOVE..COMBINING ASTE -20F1..20FF ; UNASSIGNED # .. -2100..214D ; DISALLOWED # ACCOUNT OF..AKTIESELSKAB -214E ; PVALID # TURNED SMALL F -214F..2183 ; DISALLOWED # SYMBOL FOR SAMARITAN SOURCE..ROMAN NUMERAL R -2184 ; PVALID # LATIN SMALL LETTER REVERSED C -2185..2189 ; DISALLOWED # ROMAN NUMERAL SIX LATE FORM..VULGAR FRACTION -218A..218F ; UNASSIGNED # .. -2190..23E8 ; DISALLOWED # LEFTWARDS ARROW..DECIMAL EXPONENT SYMBOL -23E9..23FF ; UNASSIGNED # .. -2400..2426 ; DISALLOWED # SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM -2427..243F ; UNASSIGNED # .. -2440..244A ; DISALLOWED # OCR HOOK..OCR DOUBLE BACKSLASH -244B..245F ; UNASSIGNED # .. -2460..26CD ; DISALLOWED # CIRCLED DIGIT ONE..DISABLED CAR -26CE ; UNASSIGNED # -26CF..26E1 ; DISALLOWED # PICK..RESTRICTED LEFT ENTRY-2 -26E2 ; UNASSIGNED # -26E3 ; DISALLOWED # HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE -26E4..26E7 ; UNASSIGNED # .. -26E8..26FF ; DISALLOWED # BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZ -2700 ; UNASSIGNED # -2701..2704 ; DISALLOWED # UPPER BLADE SCISSORS..WHITE SCISSORS -2705 ; UNASSIGNED # -2706..2709 ; DISALLOWED # TELEPHONE LOCATION SIGN..ENVELOPE -270A..270B ; UNASSIGNED # .. -270C..2727 ; DISALLOWED # VICTORY HAND..WHITE FOUR POINTED STAR -2728 ; UNASSIGNED # -2729..274B ; DISALLOWED # STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEAR -274C ; UNASSIGNED # -274D ; DISALLOWED # SHADOWED WHITE CIRCLE -274E ; UNASSIGNED # -274F..2752 ; DISALLOWED # LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPE -2753..2755 ; UNASSIGNED # .. -2756..275E ; DISALLOWED # BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE CO -275F..2760 ; UNASSIGNED # .. -2761..2794 ; DISALLOWED # CURVED STEM PARAGRAPH SIGN ORNAMENT..HEAVY W -2795..2797 ; UNASSIGNED # .. -2798..27AF ; DISALLOWED # HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT- -27B0 ; UNASSIGNED # -27B1..27BE ; DISALLOWED # NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARD -27BF ; UNASSIGNED # -27C0..27CA ; DISALLOWED # THREE DIMENSIONAL ANGLE..VERTICAL BAR WITH H -27CB ; UNASSIGNED # -27CC ; DISALLOWED # LONG DIVISION -27CD..27CF ; UNASSIGNED # .. -27D0..2B4C ; DISALLOWED # WHITE DIAMOND WITH CENTRED DOT..RIGHTWARDS A -2B4D..2B4F ; UNASSIGNED # .. -2B50..2B59 ; DISALLOWED # WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE -2B5A..2BFF ; UNASSIGNED # .. -2C00..2C2E ; DISALLOWED # GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CA -2C2F ; UNASSIGNED # -2C30..2C5E ; PVALID # GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMAL -2C5F ; UNASSIGNED # -2C60 ; DISALLOWED # LATIN CAPITAL LETTER L WITH DOUBLE BAR -2C61 ; PVALID # LATIN SMALL LETTER L WITH DOUBLE BAR -2C62..2C64 ; DISALLOWED # LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LA -2C65..2C66 ; PVALID # LATIN SMALL LETTER A WITH STROKE..LATIN SMAL -2C67 ; DISALLOWED # LATIN CAPITAL LETTER H WITH DESCENDER -2C68 ; PVALID # LATIN SMALL LETTER H WITH DESCENDER -2C69 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DESCENDER -2C6A ; PVALID # LATIN SMALL LETTER K WITH DESCENDER -2C6B ; DISALLOWED # LATIN CAPITAL LETTER Z WITH DESCENDER -2C6C ; PVALID # LATIN SMALL LETTER Z WITH DESCENDER -2C6D..2C70 ; DISALLOWED # LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LE -2C71 ; PVALID # LATIN SMALL LETTER V WITH RIGHT HOOK -2C72 ; DISALLOWED # LATIN CAPITAL LETTER W WITH HOOK -2C73..2C74 ; PVALID # LATIN SMALL LETTER W WITH HOOK..LATIN SMALL -2C75 ; DISALLOWED # LATIN CAPITAL LETTER HALF H -2C76..2C7B ; PVALID # LATIN SMALL LETTER HALF H..LATIN LETTER SMAL -2C7C..2C80 ; DISALLOWED # LATIN SUBSCRIPT SMALL LETTER J..COPTIC CAPIT -2C81 ; PVALID # COPTIC SMALL LETTER ALFA -2C82 ; DISALLOWED # COPTIC CAPITAL LETTER VIDA -2C83 ; PVALID # COPTIC SMALL LETTER VIDA -2C84 ; DISALLOWED # COPTIC CAPITAL LETTER GAMMA -2C85 ; PVALID # COPTIC SMALL LETTER GAMMA -2C86 ; DISALLOWED # COPTIC CAPITAL LETTER DALDA -2C87 ; PVALID # COPTIC SMALL LETTER DALDA -2C88 ; DISALLOWED # COPTIC CAPITAL LETTER EIE -2C89 ; PVALID # COPTIC SMALL LETTER EIE -2C8A ; DISALLOWED # COPTIC CAPITAL LETTER SOU -2C8B ; PVALID # COPTIC SMALL LETTER SOU -2C8C ; DISALLOWED # COPTIC CAPITAL LETTER ZATA -2C8D ; PVALID # COPTIC SMALL LETTER ZATA -2C8E ; DISALLOWED # COPTIC CAPITAL LETTER HATE -2C8F ; PVALID # COPTIC SMALL LETTER HATE -2C90 ; DISALLOWED # COPTIC CAPITAL LETTER THETHE -2C91 ; PVALID # COPTIC SMALL LETTER THETHE -2C92 ; DISALLOWED # COPTIC CAPITAL LETTER IAUDA -2C93 ; PVALID # COPTIC SMALL LETTER IAUDA -2C94 ; DISALLOWED # COPTIC CAPITAL LETTER KAPA -2C95 ; PVALID # COPTIC SMALL LETTER KAPA -2C96 ; DISALLOWED # COPTIC CAPITAL LETTER LAULA -2C97 ; PVALID # COPTIC SMALL LETTER LAULA -2C98 ; DISALLOWED # COPTIC CAPITAL LETTER MI -2C99 ; PVALID # COPTIC SMALL LETTER MI -2C9A ; DISALLOWED # COPTIC CAPITAL LETTER NI -2C9B ; PVALID # COPTIC SMALL LETTER NI -2C9C ; DISALLOWED # COPTIC CAPITAL LETTER KSI -2C9D ; PVALID # COPTIC SMALL LETTER KSI -2C9E ; DISALLOWED # COPTIC CAPITAL LETTER O -2C9F ; PVALID # COPTIC SMALL LETTER O -2CA0 ; DISALLOWED # COPTIC CAPITAL LETTER PI -2CA1 ; PVALID # COPTIC SMALL LETTER PI -2CA2 ; DISALLOWED # COPTIC CAPITAL LETTER RO -2CA3 ; PVALID # COPTIC SMALL LETTER RO -2CA4 ; DISALLOWED # COPTIC CAPITAL LETTER SIMA -2CA5 ; PVALID # COPTIC SMALL LETTER SIMA -2CA6 ; DISALLOWED # COPTIC CAPITAL LETTER TAU -2CA7 ; PVALID # COPTIC SMALL LETTER TAU -2CA8 ; DISALLOWED # COPTIC CAPITAL LETTER UA -2CA9 ; PVALID # COPTIC SMALL LETTER UA -2CAA ; DISALLOWED # COPTIC CAPITAL LETTER FI -2CAB ; PVALID # COPTIC SMALL LETTER FI -2CAC ; DISALLOWED # COPTIC CAPITAL LETTER KHI -2CAD ; PVALID # COPTIC SMALL LETTER KHI -2CAE ; DISALLOWED # COPTIC CAPITAL LETTER PSI -2CAF ; PVALID # COPTIC SMALL LETTER PSI -2CB0 ; DISALLOWED # COPTIC CAPITAL LETTER OOU -2CB1 ; PVALID # COPTIC SMALL LETTER OOU -2CB2 ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P ALEF -2CB3 ; PVALID # COPTIC SMALL LETTER DIALECT-P ALEF -2CB4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC AIN -2CB5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC AIN -2CB6 ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE -2CB7 ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC EIE -2CB8 ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P KAPA -2CB9 ; PVALID # COPTIC SMALL LETTER DIALECT-P KAPA -2CBA ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P NI -2CBB ; PVALID # COPTIC SMALL LETTER DIALECT-P NI -2CBC ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI -2CBD ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC NI -2CBE ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC OOU -2CBF ; PVALID # COPTIC SMALL LETTER OLD COPTIC OOU -2CC0 ; DISALLOWED # COPTIC CAPITAL LETTER SAMPI -2CC1 ; PVALID # COPTIC SMALL LETTER SAMPI -2CC2 ; DISALLOWED # COPTIC CAPITAL LETTER CROSSED SHEI -2CC3 ; PVALID # COPTIC SMALL LETTER CROSSED SHEI -2CC4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC SHEI -2CC5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC SHEI -2CC6 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC ESH -2CC7 ; PVALID # COPTIC SMALL LETTER OLD COPTIC ESH -2CC8 ; DISALLOWED # COPTIC CAPITAL LETTER AKHMIMIC KHEI -2CC9 ; PVALID # COPTIC SMALL LETTER AKHMIMIC KHEI -2CCA ; DISALLOWED # COPTIC CAPITAL LETTER DIALECT-P HORI -2CCB ; PVALID # COPTIC SMALL LETTER DIALECT-P HORI -2CCC ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HORI -2CCD ; PVALID # COPTIC SMALL LETTER OLD COPTIC HORI -2CCE ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HA -2CCF ; PVALID # COPTIC SMALL LETTER OLD COPTIC HA -2CD0 ; DISALLOWED # COPTIC CAPITAL LETTER L-SHAPED HA -2CD1 ; PVALID # COPTIC SMALL LETTER L-SHAPED HA -2CD2 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HEI -2CD3 ; PVALID # COPTIC SMALL LETTER OLD COPTIC HEI -2CD4 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC HAT -2CD5 ; PVALID # COPTIC SMALL LETTER OLD COPTIC HAT -2CD6 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC GANGIA -2CD7 ; PVALID # COPTIC SMALL LETTER OLD COPTIC GANGIA -2CD8 ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC DJA -2CD9 ; PVALID # COPTIC SMALL LETTER OLD COPTIC DJA -2CDA ; DISALLOWED # COPTIC CAPITAL LETTER OLD COPTIC SHIMA -2CDB ; PVALID # COPTIC SMALL LETTER OLD COPTIC SHIMA -2CDC ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA -2CDD ; PVALID # COPTIC SMALL LETTER OLD NUBIAN SHIMA -2CDE ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN NGI -2CDF ; PVALID # COPTIC SMALL LETTER OLD NUBIAN NGI -2CE0 ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN NYI -2CE1 ; PVALID # COPTIC SMALL LETTER OLD NUBIAN NYI -2CE2 ; DISALLOWED # COPTIC CAPITAL LETTER OLD NUBIAN WAU -2CE3..2CE4 ; PVALID # COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC S -2CE5..2CEB ; DISALLOWED # COPTIC SYMBOL MI RO..COPTIC CAPITAL LETTER C -2CEC ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI -2CED ; DISALLOWED # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA -2CEE..2CF1 ; PVALID # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA..CO -2CF2..2CF8 ; UNASSIGNED # .. -2CF9..2CFF ; DISALLOWED # COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLO -2D00..2D25 ; PVALID # GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LET -2D26..2D2F ; UNASSIGNED # .. -2D30..2D65 ; PVALID # TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ -2D66..2D6E ; UNASSIGNED # .. -2D6F ; DISALLOWED # TIFINAGH MODIFIER LETTER LABIALIZATION MARK -2D70..2D7F ; UNASSIGNED # .. -2D80..2D96 ; PVALID # ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGW -2D97..2D9F ; UNASSIGNED # .. -2DA0..2DA6 ; PVALID # ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO -2DA7 ; UNASSIGNED # -2DA8..2DAE ; PVALID # ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO -2DAF ; UNASSIGNED # -2DB0..2DB6 ; PVALID # ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO -2DB7 ; UNASSIGNED # -2DB8..2DBE ; PVALID # ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CC -2DBF ; UNASSIGNED # -2DC0..2DC6 ; PVALID # ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO -2DC7 ; UNASSIGNED # -2DC8..2DCE ; PVALID # ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO -2DCF ; UNASSIGNED # -2DD0..2DD6 ; PVALID # ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO -2DD7 ; UNASSIGNED # -2DD8..2DDE ; PVALID # ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO -2DDF ; UNASSIGNED # -2DE0..2DFF ; PVALID # COMBINING CYRILLIC LETTER BE..COMBINING CYRI -2E00..2E2E ; DISALLOWED # RIGHT ANGLE SUBSTITUTION MARKER..REVERSED QU -2E2F ; PVALID # VERTICAL TILDE -2E30..2E31 ; DISALLOWED # RING POINT..WORD SEPARATOR MIDDLE DOT -2E32..2E7F ; UNASSIGNED # .. -2E80..2E99 ; DISALLOWED # CJK RADICAL REPEAT..CJK RADICAL RAP -2E9A ; UNASSIGNED # -2E9B..2EF3 ; DISALLOWED # CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED -2EF4..2EFF ; UNASSIGNED # .. -2F00..2FD5 ; DISALLOWED # KANGXI RADICAL ONE..KANGXI RADICAL FLUTE -2FD6..2FEF ; UNASSIGNED # .. -2FF0..2FFB ; DISALLOWED # IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RI -2FFC..2FFF ; UNASSIGNED # .. -3000..3004 ; DISALLOWED # IDEOGRAPHIC SPACE..JAPANESE INDUSTRIAL STAND -3005..3007 ; PVALID # IDEOGRAPHIC ITERATION MARK..IDEOGRAPHIC NUMB -3008..3029 ; DISALLOWED # LEFT ANGLE BRACKET..HANGZHOU NUMERAL NINE -302A..302D ; PVALID # IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENT -302E..303B ; DISALLOWED # HANGUL SINGLE DOT TONE MARK..VERTICAL IDEOGR -303C ; PVALID # MASU MARK -303D..303F ; DISALLOWED # PART ALTERNATION MARK..IDEOGRAPHIC HALF FILL -3040 ; UNASSIGNED # -3041..3096 ; PVALID # HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMA -3097..3098 ; UNASSIGNED # .. -3099..309A ; PVALID # COMBINING KATAKANA-HIRAGANA VOICED SOUND MAR -309B..309C ; DISALLOWED # KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKAN -309D..309E ; PVALID # HIRAGANA ITERATION MARK..HIRAGANA VOICED ITE -309F..30A0 ; DISALLOWED # HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOU -30A1..30FA ; PVALID # KATAKANA LETTER SMALL A..KATAKANA LETTER VO -30FB ; CONTEXTO # KATAKANA MIDDLE DOT -30FC..30FE ; PVALID # KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATA -30FF ; DISALLOWED # KATAKANA DIGRAPH KOTO -3100..3104 ; UNASSIGNED # .. -3105..312D ; PVALID # BOPOMOFO LETTER B..BOPOMOFO LETTER IH -312E..3130 ; UNASSIGNED # .. -3131..318E ; DISALLOWED # HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE -318F ; UNASSIGNED # -3190..319F ; DISALLOWED # IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRA -31A0..31B7 ; PVALID # BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H -31B8..31BF ; UNASSIGNED # .. -31C0..31E3 ; DISALLOWED # CJK STROKE T..CJK STROKE Q -31E4..31EF ; UNASSIGNED # .. -31F0..31FF ; PVALID # KATAKANA LETTER SMALL KU..KATAKANA LETTER SM -3200..321E ; DISALLOWED # PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED K -321F ; UNASSIGNED # -3220..32FE ; DISALLOWED # PARENTHESIZED IDEOGRAPH ONE..CIRCLED KATAKAN -32FF ; UNASSIGNED # -3300..33FF ; DISALLOWED # SQUARE APAATO..SQUARE GAL -3400..4DB5 ; PVALID # .... -4DC0..4DFF ; DISALLOWED # HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM F -4E00..9FCB ; PVALID # .. -9FCC..9FFF ; UNASSIGNED # .. -A000..A48C ; PVALID # YI SYLLABLE IT..YI SYLLABLE YYR -A48D..A48F ; UNASSIGNED # .. -A490..A4C6 ; DISALLOWED # YI RADICAL QOT..YI RADICAL KE -A4C7..A4CF ; UNASSIGNED # .. -A4D0..A4FD ; PVALID # LISU LETTER BA..LISU LETTER TONE MYA JEU -A4FE..A4FF ; DISALLOWED # LISU PUNCTUATION COMMA..LISU PUNCTUATION FUL -A500..A60C ; PVALID # VAI SYLLABLE EE..VAI SYLLABLE LENGTHENER -A60D..A60F ; DISALLOWED # VAI COMMA..VAI QUESTION MARK -A610..A62B ; PVALID # VAI SYLLABLE NDOLE FA..VAI SYLLABLE NDOLE DO -A62C..A63F ; UNASSIGNED # .. -A640 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZEMLYA -A641 ; PVALID # CYRILLIC SMALL LETTER ZEMLYA -A642 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZELO -A643 ; PVALID # CYRILLIC SMALL LETTER DZELO -A644 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED DZE -A645 ; PVALID # CYRILLIC SMALL LETTER REVERSED DZE -A646 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTA -A647 ; PVALID # CYRILLIC SMALL LETTER IOTA -A648 ; DISALLOWED # CYRILLIC CAPITAL LETTER DJERV -A649 ; PVALID # CYRILLIC SMALL LETTER DJERV -A64A ; DISALLOWED # CYRILLIC CAPITAL LETTER MONOGRAPH UK -A64B ; PVALID # CYRILLIC SMALL LETTER MONOGRAPH UK -A64C ; DISALLOWED # CYRILLIC CAPITAL LETTER BROAD OMEGA -A64D ; PVALID # CYRILLIC SMALL LETTER BROAD OMEGA -A64E ; DISALLOWED # CYRILLIC CAPITAL LETTER NEUTRAL YER -A64F ; PVALID # CYRILLIC SMALL LETTER NEUTRAL YER -A650 ; DISALLOWED # CYRILLIC CAPITAL LETTER YERU WITH BACK YER -A651 ; PVALID # CYRILLIC SMALL LETTER YERU WITH BACK YER -A652 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED YAT -A653 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED YAT -A654 ; DISALLOWED # CYRILLIC CAPITAL LETTER REVERSED YU -A655 ; PVALID # CYRILLIC SMALL LETTER REVERSED YU -A656 ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED A -A657 ; PVALID # CYRILLIC SMALL LETTER IOTIFIED A -A658 ; DISALLOWED # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS -A659 ; PVALID # CYRILLIC SMALL LETTER CLOSED LITTLE YUS -A65A ; DISALLOWED # CYRILLIC CAPITAL LETTER BLENDED YUS -A65B ; PVALID # CYRILLIC SMALL LETTER BLENDED YUS -A65C ; DISALLOWED # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITT -A65D ; PVALID # CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE -A65E ; DISALLOWED # CYRILLIC CAPITAL LETTER YN -A65F ; PVALID # CYRILLIC SMALL LETTER YN -A660..A661 ; UNASSIGNED # .. -A662 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT DE -A663 ; PVALID # CYRILLIC SMALL LETTER SOFT DE -A664 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT EL -A665 ; PVALID # CYRILLIC SMALL LETTER SOFT EL -A666 ; DISALLOWED # CYRILLIC CAPITAL LETTER SOFT EM -A667 ; PVALID # CYRILLIC SMALL LETTER SOFT EM -A668 ; DISALLOWED # CYRILLIC CAPITAL LETTER MONOCULAR O -A669 ; PVALID # CYRILLIC SMALL LETTER MONOCULAR O -A66A ; DISALLOWED # CYRILLIC CAPITAL LETTER BINOCULAR O -A66B ; PVALID # CYRILLIC SMALL LETTER BINOCULAR O -A66C ; DISALLOWED # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O -A66D..A66F ; PVALID # CYRILLIC SMALL LETTER DOUBLE MONOCULAR O..CO -A670..A673 ; DISALLOWED # COMBINING CYRILLIC TEN MILLIONS SIGN..SLAVON -A674..A67B ; UNASSIGNED # .. -A67C..A67D ; PVALID # COMBINING CYRILLIC KAVYKA..COMBINING CYRILLI -A67E ; DISALLOWED # CYRILLIC KAVYKA -A67F ; PVALID # CYRILLIC PAYEROK -A680 ; DISALLOWED # CYRILLIC CAPITAL LETTER DWE -A681 ; PVALID # CYRILLIC SMALL LETTER DWE -A682 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZWE -A683 ; PVALID # CYRILLIC SMALL LETTER DZWE -A684 ; DISALLOWED # CYRILLIC CAPITAL LETTER ZHWE -A685 ; PVALID # CYRILLIC SMALL LETTER ZHWE -A686 ; DISALLOWED # CYRILLIC CAPITAL LETTER CCHE -A687 ; PVALID # CYRILLIC SMALL LETTER CCHE -A688 ; DISALLOWED # CYRILLIC CAPITAL LETTER DZZE -A689 ; PVALID # CYRILLIC SMALL LETTER DZZE -A68A ; DISALLOWED # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK -A68B ; PVALID # CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK -A68C ; DISALLOWED # CYRILLIC CAPITAL LETTER TWE -A68D ; PVALID # CYRILLIC SMALL LETTER TWE -A68E ; DISALLOWED # CYRILLIC CAPITAL LETTER TSWE -A68F ; PVALID # CYRILLIC SMALL LETTER TSWE -A690 ; DISALLOWED # CYRILLIC CAPITAL LETTER TSSE -A691 ; PVALID # CYRILLIC SMALL LETTER TSSE -A692 ; DISALLOWED # CYRILLIC CAPITAL LETTER TCHE -A693 ; PVALID # CYRILLIC SMALL LETTER TCHE -A694 ; DISALLOWED # CYRILLIC CAPITAL LETTER HWE -A695 ; PVALID # CYRILLIC SMALL LETTER HWE -A696 ; DISALLOWED # CYRILLIC CAPITAL LETTER SHWE -A697 ; PVALID # CYRILLIC SMALL LETTER SHWE -A698..A69F ; UNASSIGNED # .. -A6A0..A6E5 ; PVALID # BAMUM LETTER A..BAMUM LETTER KI -A6E6..A6EF ; DISALLOWED # BAMUM LETTER MO..BAMUM LETTER KOGHOM -A6F0..A6F1 ; PVALID # BAMUM COMBINING MARK KOQNDON..BAMUM COMBININ -A6F2..A6F7 ; DISALLOWED # BAMUM NJAEMLI..BAMUM QUESTION MARK -A6F8..A6FF ; UNASSIGNED # .. -A700..A716 ; DISALLOWED # MODIFIER LETTER CHINESE TONE YIN PING..MODIF -A717..A71F ; PVALID # MODIFIER LETTER DOT VERTICAL BAR..MODIFIER L -A720..A722 ; DISALLOWED # MODIFIER LETTER STRESS AND HIGH TONE..LATIN -A723 ; PVALID # LATIN SMALL LETTER EGYPTOLOGICAL ALEF -A724 ; DISALLOWED # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN -A725 ; PVALID # LATIN SMALL LETTER EGYPTOLOGICAL AIN -A726 ; DISALLOWED # LATIN CAPITAL LETTER HENG -A727 ; PVALID # LATIN SMALL LETTER HENG -A728 ; DISALLOWED # LATIN CAPITAL LETTER TZ -A729 ; PVALID # LATIN SMALL LETTER TZ -A72A ; DISALLOWED # LATIN CAPITAL LETTER TRESILLO -A72B ; PVALID # LATIN SMALL LETTER TRESILLO -A72C ; DISALLOWED # LATIN CAPITAL LETTER CUATRILLO -A72D ; PVALID # LATIN SMALL LETTER CUATRILLO -A72E ; DISALLOWED # LATIN CAPITAL LETTER CUATRILLO WITH COMMA -A72F..A731 ; PVALID # LATIN SMALL LETTER CUATRILLO WITH COMMA..LAT -A732 ; DISALLOWED # LATIN CAPITAL LETTER AA -A733 ; PVALID # LATIN SMALL LETTER AA -A734 ; DISALLOWED # LATIN CAPITAL LETTER AO -A735 ; PVALID # LATIN SMALL LETTER AO -A736 ; DISALLOWED # LATIN CAPITAL LETTER AU -A737 ; PVALID # LATIN SMALL LETTER AU -A738 ; DISALLOWED # LATIN CAPITAL LETTER AV -A739 ; PVALID # LATIN SMALL LETTER AV -A73A ; DISALLOWED # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR -A73B ; PVALID # LATIN SMALL LETTER AV WITH HORIZONTAL BAR -A73C ; DISALLOWED # LATIN CAPITAL LETTER AY -A73D ; PVALID # LATIN SMALL LETTER AY -A73E ; DISALLOWED # LATIN CAPITAL LETTER REVERSED C WITH DOT -A73F ; PVALID # LATIN SMALL LETTER REVERSED C WITH DOT -A740 ; DISALLOWED # LATIN CAPITAL LETTER K WITH STROKE -A741 ; PVALID # LATIN SMALL LETTER K WITH STROKE -A742 ; DISALLOWED # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE -A743 ; PVALID # LATIN SMALL LETTER K WITH DIAGONAL STROKE -A744 ; DISALLOWED # LATIN CAPITAL LETTER K WITH STROKE AND DIAGO -A745 ; PVALID # LATIN SMALL LETTER K WITH STROKE AND DIAGONA -A746 ; DISALLOWED # LATIN CAPITAL LETTER BROKEN L -A747 ; PVALID # LATIN SMALL LETTER BROKEN L -A748 ; DISALLOWED # LATIN CAPITAL LETTER L WITH HIGH STROKE -A749 ; PVALID # LATIN SMALL LETTER L WITH HIGH STROKE -A74A ; DISALLOWED # LATIN CAPITAL LETTER O WITH LONG STROKE OVER -A74B ; PVALID # LATIN SMALL LETTER O WITH LONG STROKE OVERLA -A74C ; DISALLOWED # LATIN CAPITAL LETTER O WITH LOOP -A74D ; PVALID # LATIN SMALL LETTER O WITH LOOP -A74E ; DISALLOWED # LATIN CAPITAL LETTER OO -A74F ; PVALID # LATIN SMALL LETTER OO -A750 ; DISALLOWED # LATIN CAPITAL LETTER P WITH STROKE THROUGH D -A751 ; PVALID # LATIN SMALL LETTER P WITH STROKE THROUGH DES -A752 ; DISALLOWED # LATIN CAPITAL LETTER P WITH FLOURISH -A753 ; PVALID # LATIN SMALL LETTER P WITH FLOURISH -A754 ; DISALLOWED # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL -A755 ; PVALID # LATIN SMALL LETTER P WITH SQUIRREL TAIL -A756 ; DISALLOWED # LATIN CAPITAL LETTER Q WITH STROKE THROUGH D -A757 ; PVALID # LATIN SMALL LETTER Q WITH STROKE THROUGH DES -A758 ; DISALLOWED # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE -A759 ; PVALID # LATIN SMALL LETTER Q WITH DIAGONAL STROKE -A75A ; DISALLOWED # LATIN CAPITAL LETTER R ROTUNDA -A75B ; PVALID # LATIN SMALL LETTER R ROTUNDA -A75C ; DISALLOWED # LATIN CAPITAL LETTER RUM ROTUNDA -A75D ; PVALID # LATIN SMALL LETTER RUM ROTUNDA -A75E ; DISALLOWED # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE -A75F ; PVALID # LATIN SMALL LETTER V WITH DIAGONAL STROKE -A760 ; DISALLOWED # LATIN CAPITAL LETTER VY -A761 ; PVALID # LATIN SMALL LETTER VY -A762 ; DISALLOWED # LATIN CAPITAL LETTER VISIGOTHIC Z -A763 ; PVALID # LATIN SMALL LETTER VISIGOTHIC Z -A764 ; DISALLOWED # LATIN CAPITAL LETTER THORN WITH STROKE -A765 ; PVALID # LATIN SMALL LETTER THORN WITH STROKE -A766 ; DISALLOWED # LATIN CAPITAL LETTER THORN WITH STROKE THROU -A767 ; PVALID # LATIN SMALL LETTER THORN WITH STROKE THROUGH -A768 ; DISALLOWED # LATIN CAPITAL LETTER VEND -A769 ; PVALID # LATIN SMALL LETTER VEND -A76A ; DISALLOWED # LATIN CAPITAL LETTER ET -A76B ; PVALID # LATIN SMALL LETTER ET -A76C ; DISALLOWED # LATIN CAPITAL LETTER IS -A76D ; PVALID # LATIN SMALL LETTER IS -A76E ; DISALLOWED # LATIN CAPITAL LETTER CON -A76F ; PVALID # LATIN SMALL LETTER CON -A770 ; DISALLOWED # MODIFIER LETTER US -A771..A778 ; PVALID # LATIN SMALL LETTER DUM..LATIN SMALL LETTER U -A779 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR D -A77A ; PVALID # LATIN SMALL LETTER INSULAR D -A77B ; DISALLOWED # LATIN CAPITAL LETTER INSULAR F -A77C ; PVALID # LATIN SMALL LETTER INSULAR F -A77D..A77E ; DISALLOWED # LATIN CAPITAL LETTER INSULAR G..LATIN CAPITA -A77F ; PVALID # LATIN SMALL LETTER TURNED INSULAR G -A780 ; DISALLOWED # LATIN CAPITAL LETTER TURNED L -A781 ; PVALID # LATIN SMALL LETTER TURNED L -A782 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR R -A783 ; PVALID # LATIN SMALL LETTER INSULAR R -A784 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR S -A785 ; PVALID # LATIN SMALL LETTER INSULAR S -A786 ; DISALLOWED # LATIN CAPITAL LETTER INSULAR T -A787..A788 ; PVALID # LATIN SMALL LETTER INSULAR T..MODIFIER LETTE -A789..A78B ; DISALLOWED # MODIFIER LETTER COLON..LATIN CAPITAL LETTER -A78C ; PVALID # LATIN SMALL LETTER SALTILLO -A78D..A7FA ; UNASSIGNED # .. -A7FB..A827 ; PVALID # LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI N -A828..A82B ; DISALLOWED # SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POE -A82C..A82F ; UNASSIGNED # .. -A830..A839 ; DISALLOWED # NORTH INDIC FRACTION ONE QUARTER..NORTH INDI -A83A..A83F ; UNASSIGNED # .. -A840..A873 ; PVALID # PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABI -A874..A877 ; DISALLOWED # PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOU -A878..A87F ; UNASSIGNED # .. -A880..A8C4 ; PVALID # SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VI -A8C5..A8CD ; UNASSIGNED # .. -A8CE..A8CF ; DISALLOWED # SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA -A8D0..A8D9 ; PVALID # SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE -A8DA..A8DF ; UNASSIGNED # .. -A8E0..A8F7 ; PVALID # COMBINING DEVANAGARI DIGIT ZERO..DEVANAGARI -A8F8..A8FA ; DISALLOWED # DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET -A8FB ; PVALID # DEVANAGARI HEADSTROKE -A8FC..A8FF ; UNASSIGNED # .. -A900..A92D ; PVALID # KAYAH LI DIGIT ZERO..KAYAH LI TONE CALYA PLO -A92E..A92F ; DISALLOWED # KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA -A930..A953 ; PVALID # REJANG LETTER KA..REJANG VIRAMA -A954..A95E ; UNASSIGNED # .. -A95F..A97C ; DISALLOWED # REJANG SECTION MARK..HANGUL CHOSEONG SSANGYE -A97D..A97F ; UNASSIGNED # .. -A980..A9C0 ; PVALID # JAVANESE SIGN PANYANGGA..JAVANESE PANGKON -A9C1..A9CD ; DISALLOWED # JAVANESE LEFT RERENGGAN..JAVANESE TURNED PAD -A9CE ; UNASSIGNED # -A9CF..A9D9 ; PVALID # JAVANESE PANGRANGKEP..JAVANESE DIGIT NINE -A9DA..A9DD ; UNASSIGNED # .. -A9DE..A9DF ; DISALLOWED # JAVANESE PADA TIRTA TUMETES..JAVANESE PADA I -A9E0..A9FF ; UNASSIGNED # .. -AA00..AA36 ; PVALID # CHAM LETTER A..CHAM CONSONANT SIGN WA -AA37..AA3F ; UNASSIGNED # .. -AA40..AA4D ; PVALID # CHAM LETTER FINAL K..CHAM CONSONANT SIGN FIN -AA4E..AA4F ; UNASSIGNED # .. -AA50..AA59 ; PVALID # CHAM DIGIT ZERO..CHAM DIGIT NINE -AA5A..AA5B ; UNASSIGNED # .. -AA5C..AA5F ; DISALLOWED # CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TR -AA60..AA76 ; PVALID # MYANMAR LETTER KHAMTI GA..MYANMAR LOGOGRAM K -AA77..AA79 ; DISALLOWED # MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SY -AA7A..AA7B ; PVALID # MYANMAR LETTER AITON RA..MYANMAR SIGN PAO KA -AA7C..AA7F ; UNASSIGNED # .. -AA80..AAC2 ; PVALID # TAI VIET LETTER LOW KO..TAI VIET TONE MAI SO -AAC3..AADA ; UNASSIGNED # .. -AADB..AADD ; PVALID # TAI VIET SYMBOL KON..TAI VIET SYMBOL SAM -AADE..AADF ; DISALLOWED # TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI -AAE0..ABBF ; UNASSIGNED # .. -ABC0..ABEA ; PVALID # MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL -ABEB ; DISALLOWED # MEETEI MAYEK CHEIKHEI -ABEC..ABED ; PVALID # MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYE -ABEE..ABEF ; UNASSIGNED # .. -ABF0..ABF9 ; PVALID # MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT -ABFA..ABFF ; UNASSIGNED # .. -AC00..D7A3 ; PVALID # .. -D7A4..D7AF ; UNASSIGNED # .. -D7B0..D7C6 ; DISALLOWED # HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARA -D7C7..D7CA ; UNASSIGNED # .. -D7CB..D7FB ; DISALLOWED # HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEO -D7FC..D7FF ; UNASSIGNED # .. -D800..FA0D ; DISALLOWED # ..CJK COMPAT -FA0E..FA0F ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPAT -FA10 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA10 -FA11 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA11 -FA12 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA12 -FA13..FA14 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPAT -FA15..FA1E ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPAT -FA1F ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA1F -FA20 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA20 -FA21 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA21 -FA22 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA22 -FA23..FA24 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPAT -FA25..FA26 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPAT -FA27..FA29 ; PVALID # CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPAT -FA2A..FA2D ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPAT -FA2E..FA2F ; UNASSIGNED # .. -FA30..FA6D ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPAT -FA6E..FA6F ; UNASSIGNED # .. -FA70..FAD9 ; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPAT -FADA..FAFF ; UNASSIGNED # .. -FB00..FB06 ; DISALLOWED # LATIN SMALL LIGATURE FF..LATIN SMALL LIGATUR -FB07..FB12 ; UNASSIGNED # .. -FB13..FB17 ; DISALLOWED # ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SM -FB18..FB1C ; UNASSIGNED # .. -FB1D ; DISALLOWED # HEBREW LETTER YOD WITH HIRIQ -FB1E ; PVALID # HEBREW POINT JUDEO-SPANISH VARIKA -FB1F..FB36 ; DISALLOWED # HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBRE -FB37 ; UNASSIGNED # -FB38..FB3C ; DISALLOWED # HEBREW LETTER TET WITH DAGESH..HEBREW LETTER -FB3D ; UNASSIGNED # -FB3E ; DISALLOWED # HEBREW LETTER MEM WITH DAGESH -FB3F ; UNASSIGNED # -FB40..FB41 ; DISALLOWED # HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER -FB42 ; UNASSIGNED # -FB43..FB44 ; DISALLOWED # HEBREW LETTER FINAL PE WITH DAGESH..HEBREW L -FB45 ; UNASSIGNED # -FB46..FBB1 ; DISALLOWED # HEBREW LETTER TSADI WITH DAGESH..ARABIC LETT -FBB2..FBD2 ; UNASSIGNED # .. -FBD3..FD3F ; DISALLOWED # ARABIC LETTER NG ISOLATED FORM..ORNATE RIGHT -FD40..FD4F ; UNASSIGNED # .. -FD50..FD8F ; DISALLOWED # ARABIC LIGATURE TEH WITH JEEM WITH MEEM INIT -FD90..FD91 ; UNASSIGNED # .. -FD92..FDC7 ; DISALLOWED # ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INI -FDC8..FDCF ; UNASSIGNED # .. -FDD0..FDFD ; DISALLOWED # ..ARABIC LIGATURE BISMILLAH AR -FDFE..FDFF ; UNASSIGNED # .. -FE00..FE19 ; DISALLOWED # VARIATION SELECTOR-1..PRESENTATION FORM FOR -FE1A..FE1F ; UNASSIGNED # .. -FE20..FE26 ; PVALID # COMBINING LIGATURE LEFT HALF..COMBINING CONJ -FE27..FE2F ; UNASSIGNED # .. -FE30..FE52 ; DISALLOWED # PRESENTATION FORM FOR VERTICAL TWO DOT LEADE -FE53 ; UNASSIGNED # -FE54..FE66 ; DISALLOWED # SMALL SEMICOLON..SMALL EQUALS SIGN -FE67 ; UNASSIGNED # -FE68..FE6B ; DISALLOWED # SMALL REVERSE SOLIDUS..SMALL COMMERCIAL AT -FE6C..FE6F ; UNASSIGNED # .. -FE70..FE72 ; DISALLOWED # ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMAT -FE73 ; PVALID # ARABIC TAIL FRAGMENT -FE74 ; DISALLOWED # ARABIC KASRATAN ISOLATED FORM -FE75 ; UNASSIGNED # -FE76..FEFC ; DISALLOWED # ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE -FEFD..FEFE ; UNASSIGNED # .. -FEFF ; DISALLOWED # ZERO WIDTH NO-BREAK SPACE -FF00 ; UNASSIGNED # -FF01..FFBE ; DISALLOWED # FULLWIDTH EXCLAMATION MARK..HALFWIDTH HANGUL -FFBF..FFC1 ; UNASSIGNED # .. -FFC2..FFC7 ; DISALLOWED # HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL -FFC8..FFC9 ; UNASSIGNED # .. -FFCA..FFCF ; DISALLOWED # HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGU -FFD0..FFD1 ; UNASSIGNED # .. -FFD2..FFD7 ; DISALLOWED # HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL -FFD8..FFD9 ; UNASSIGNED # .. -FFDA..FFDC ; DISALLOWED # HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL -FFDD..FFDF ; UNASSIGNED # .. -FFE0..FFE6 ; DISALLOWED # FULLWIDTH CENT SIGN..FULLWIDTH WON SIGN -FFE7 ; UNASSIGNED # -FFE8..FFEE ; DISALLOWED # HALFWIDTH FORMS LIGHT VERTICAL..HALFWIDTH WH -FFEF..FFF8 ; UNASSIGNED # .. -FFF9..FFFF ; DISALLOWED # INTERLINEAR ANNOTATION ANCHOR.. -1000D..10026; PVALID # LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE -10027 ; UNASSIGNED # -10028..1003A; PVALID # LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE -1003B ; UNASSIGNED # -1003C..1003D; PVALID # LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE -1003E ; UNASSIGNED # -1003F..1004D; PVALID # LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE -1004E..1004F; UNASSIGNED # .. -10050..1005D; PVALID # LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 -1005E..1007F; UNASSIGNED # .. -10080..100FA; PVALID # LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRA -100FB..100FF; UNASSIGNED # .. -10100..10102; DISALLOWED # AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MAR -10103..10106; UNASSIGNED # .. -10107..10133; DISALLOWED # AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOU -10134..10136; UNASSIGNED # .. -10137..1018A; DISALLOWED # AEGEAN WEIGHT BASE UNIT..GREEK ZERO SIGN -1018B..1018F; UNASSIGNED # .. -10190..1019B; DISALLOWED # ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN -1019C..101CF; UNASSIGNED # .. -101D0..101FC; DISALLOWED # PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC -101FD ; PVALID # PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE -101FE..1027F; UNASSIGNED # .. -10280..1029C; PVALID # LYCIAN LETTER A..LYCIAN LETTER X -1029D..1029F; UNASSIGNED # .. -102A0..102D0; PVALID # CARIAN LETTER A..CARIAN LETTER UUU3 -102D1..102FF; UNASSIGNED # .. -10300..1031E; PVALID # OLD ITALIC LETTER A..OLD ITALIC LETTER UU -1031F ; UNASSIGNED # -10320..10323; DISALLOWED # OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL F -10324..1032F; UNASSIGNED # .. -10330..10340; PVALID # GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA -10341 ; DISALLOWED # GOTHIC LETTER NINETY -10342..10349; PVALID # GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL -1034A ; DISALLOWED # GOTHIC LETTER NINE HUNDRED -1034B..1037F; UNASSIGNED # .. -10380..1039D; PVALID # UGARITIC LETTER ALPA..UGARITIC LETTER SSU -1039E ; UNASSIGNED # -1039F ; DISALLOWED # UGARITIC WORD DIVIDER -103A0..103C3; PVALID # OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA -103C4..103C7; UNASSIGNED # .. -103C8..103CF; PVALID # OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIG -103D0..103D5; DISALLOWED # OLD PERSIAN WORD DIVIDER..OLD PERSIAN NUMBER -103D6..103FF; UNASSIGNED # .. -10400..10427; DISALLOWED # DESERET CAPITAL LETTER LONG I..DESERET CAPIT -10428..1049D; PVALID # DESERET SMALL LETTER LONG I..OSMANYA LETTER -1049E..1049F; UNASSIGNED # .. -104A0..104A9; PVALID # OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE -104AA..107FF; UNASSIGNED # .. -10800..10805; PVALID # CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA -10806..10807; UNASSIGNED # .. -10808 ; PVALID # CYPRIOT SYLLABLE JO -10809 ; UNASSIGNED # -1080A..10835; PVALID # CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO -10836 ; UNASSIGNED # -10837..10838; PVALID # CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE -10839..1083B; UNASSIGNED # .. -1083C ; PVALID # CYPRIOT SYLLABLE ZA -1083D..1083E; UNASSIGNED # .. -1083F..10855; PVALID # CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER -10856 ; UNASSIGNED # -10857..1085F; DISALLOWED # IMPERIAL ARAMAIC SECTION SIGN..IMPERIAL ARAM -10860..108FF; UNASSIGNED # .. -10900..10915; PVALID # PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU -10916..1091B; DISALLOWED # PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THR -1091C..1091E; UNASSIGNED # .. -1091F ; DISALLOWED # PHOENICIAN WORD SEPARATOR -10920..10939; PVALID # LYDIAN LETTER A..LYDIAN LETTER C -1093A..1093E; UNASSIGNED # .. -1093F ; DISALLOWED # LYDIAN TRIANGULAR MARK -10940..109FF; UNASSIGNED # .. -10A00..10A03; PVALID # KHAROSHTHI LETTER A..KHAROSHTHI VOWEL SIGN V -10A04 ; UNASSIGNED # -10A05..10A06; PVALID # KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SI -10A07..10A0B; UNASSIGNED # .. -10A0C..10A13; PVALID # KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI LET -10A14 ; UNASSIGNED # -10A15..10A17; PVALID # KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA -10A18 ; UNASSIGNED # -10A19..10A33; PVALID # KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTT -10A34..10A37; UNASSIGNED # .. -10A38..10A3A; PVALID # KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN D -10A3B..10A3E; UNASSIGNED # .. -10A3F ; PVALID # KHAROSHTHI VIRAMA -10A40..10A47; DISALLOWED # KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE -10A48..10A4F; UNASSIGNED # .. -10A50..10A58; DISALLOWED # KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCT -10A59..10A5F; UNASSIGNED # .. -10A60..10A7C; PVALID # OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABI -10A7D..10A7F; DISALLOWED # OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARAB -10A80..10AFF; UNASSIGNED # .. -10B00..10B35; PVALID # AVESTAN LETTER A..AVESTAN LETTER HE -10B36..10B38; UNASSIGNED # .. -10B39..10B3F; DISALLOWED # AVESTAN ABBREVIATION MARK..LARGE ONE RING OV -10B40..10B55; PVALID # INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIP -10B56..10B57; UNASSIGNED # .. -10B58..10B5F; DISALLOWED # INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTI -10B60..10B72; PVALID # INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPT -10B73..10B77; UNASSIGNED # .. -10B78..10B7F; DISALLOWED # INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIO -10B80..10BFF; UNASSIGNED # .. -10C00..10C48; PVALID # OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTE -10C49..10E5F; UNASSIGNED # .. -10E60..10E7E; DISALLOWED # RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS -10E7F..1107F; UNASSIGNED # .. -11080..110BA; PVALID # KAITHI SIGN CANDRABINDU..KAITHI SIGN NUKTA -110BB..110C1; DISALLOWED # KAITHI ABBREVIATION SIGN..KAITHI DOUBLE DAND -110C2..11FFF; UNASSIGNED # .. -12000..1236E; PVALID # CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM -1236F..123FF; UNASSIGNED # .. -12400..12462; DISALLOWED # CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NU -12463..1246F; UNASSIGNED # .. -12470..12473; DISALLOWED # CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD -12474..12FFF; UNASSIGNED # .. -13000..1342E; PVALID # EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYP -1342F..1CFFF; UNASSIGNED # .. -1D000..1D0F5; DISALLOWED # BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MU -1D0F6..1D0FF; UNASSIGNED # .. -1D100..1D126; DISALLOWED # MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBO -1D127..1D128; UNASSIGNED # .. -1D129..1D1DD; DISALLOWED # MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICA -1D1DE..1D1FF; UNASSIGNED # .. -1D200..1D245; DISALLOWED # GREEK VOCAL NOTATION SYMBOL-1..GREEK MUSICAL -1D246..1D2FF; UNASSIGNED # .. -1D300..1D356; DISALLOWED # MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING -1D357..1D35F; UNASSIGNED # .. -1D360..1D371; DISALLOWED # COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TE -1D372..1D3FF; UNASSIGNED # .. -1D400..1D454; DISALLOWED # MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL IT -1D455 ; UNASSIGNED # -1D456..1D49C; DISALLOWED # MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SC -1D49D ; UNASSIGNED # -1D49E..1D49F; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL -1D4A0..1D4A1; UNASSIGNED # .. -1D4A2 ; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL G -1D4A3..1D4A4; UNASSIGNED # .. -1D4A5..1D4A6; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL -1D4A7..1D4A8; UNASSIGNED # .. -1D4A9..1D4AC; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL -1D4AD ; UNASSIGNED # -1D4AE..1D4B9; DISALLOWED # MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL -1D4BA ; UNASSIGNED # -1D4BB ; DISALLOWED # MATHEMATICAL SCRIPT SMALL F -1D4BC ; UNASSIGNED # -1D4BD..1D4C3; DISALLOWED # MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SC -1D4C4 ; UNASSIGNED # -1D4C5..1D505; DISALLOWED # MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FR -1D506 ; UNASSIGNED # -1D507..1D50A; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL -1D50B..1D50C; UNASSIGNED # .. -1D50D..1D514; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL -1D515 ; UNASSIGNED # -1D516..1D51C; DISALLOWED # MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL -1D51D ; UNASSIGNED # -1D51E..1D539; DISALLOWED # MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL D -1D53A ; UNASSIGNED # -1D53B..1D53E; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEM -1D53F ; UNASSIGNED # -1D540..1D544; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEM -1D545 ; UNASSIGNED # -1D546 ; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL O -1D547..1D549; UNASSIGNED # .. -1D54A..1D550; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEM -1D551 ; UNASSIGNED # -1D552..1D6A5; DISALLOWED # MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMAT -1D6A6..1D6A7; UNASSIGNED # .. -1D6A8..1D7CB; DISALLOWED # MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICA -1D7CC..1D7CD; UNASSIGNED # .. -1D7CE..1D7FF; DISALLOWED # MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL M -1D800..1EFFF; UNASSIGNED # .. -1F000..1F02B; DISALLOWED # MAHJONG TILE EAST WIND..MAHJONG TILE BACK -1F02C..1F02F; UNASSIGNED # .. -1F030..1F093; DISALLOWED # DOMINO TILE HORIZONTAL BACK..DOMINO TILE VER -1F094..1F0FF; UNASSIGNED # .. -1F100..1F10A; DISALLOWED # DIGIT ZERO FULL STOP..DIGIT NINE COMMA -1F10B..1F10F; UNASSIGNED # .. -1F110..1F12E; DISALLOWED # PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLE -1F12F..1F130; UNASSIGNED # .. -1F131 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER B -1F132..1F13C; UNASSIGNED # .. -1F13D ; DISALLOWED # SQUARED LATIN CAPITAL LETTER N -1F13E ; UNASSIGNED # -1F13F ; DISALLOWED # SQUARED LATIN CAPITAL LETTER P -1F140..1F141; UNASSIGNED # .. -1F142 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER S -1F143..1F145; UNASSIGNED # .. -1F146 ; DISALLOWED # SQUARED LATIN CAPITAL LETTER W -1F147..1F149; UNASSIGNED # .. -1F14A..1F14E; DISALLOWED # SQUARED HV..SQUARED PPV -1F14F..1F156; UNASSIGNED # .. -1F157 ; DISALLOWED # NEGATIVE CIRCLED LATIN CAPITAL LETTER H -1F158..1F15E; UNASSIGNED # .. -1F15F ; DISALLOWED # NEGATIVE CIRCLED LATIN CAPITAL LETTER P -1F160..1F178; UNASSIGNED # .. -1F179 ; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER J -1F17A ; UNASSIGNED # -1F17B..1F17C; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEG -1F17D..1F17E; UNASSIGNED # .. -1F17F ; DISALLOWED # NEGATIVE SQUARED LATIN CAPITAL LETTER P -1F180..1F189; UNASSIGNED # .. -1F18A..1F18D; DISALLOWED # CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTE -1F18E..1F18F; UNASSIGNED # .. -1F190 ; DISALLOWED # SQUARE DJ -1F191..1F1FF; UNASSIGNED # .. -1F200 ; DISALLOWED # SQUARE HIRAGANA HOKA -1F201..1F20F; UNASSIGNED # .. -1F210..1F231; DISALLOWED # SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED -1F232..1F23F; UNASSIGNED # .. -1F240..1F248; DISALLOWED # TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRA -1F249..1FFFD; UNASSIGNED # .. -1FFFE..1FFFF; DISALLOWED # .. -20000..2A6D6; PVALID # .... -2A700..2B734; PVALID # .... -2F800..2FA1D; DISALLOWED # CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPA -2FA1E..2FFFD; UNASSIGNED # .. -2FFFE..2FFFF; DISALLOWED # .. -30000..3FFFD; UNASSIGNED # .. -3FFFE..3FFFF; DISALLOWED # .. -40000..4FFFD; UNASSIGNED # .. -4FFFE..4FFFF; DISALLOWED # .. -50000..5FFFD; UNASSIGNED # .. -5FFFE..5FFFF; DISALLOWED # .. -60000..6FFFD; UNASSIGNED # .. -6FFFE..6FFFF; DISALLOWED # .. -70000..7FFFD; UNASSIGNED # .. -7FFFE..7FFFF; DISALLOWED # .. -80000..8FFFD; UNASSIGNED # .. -8FFFE..8FFFF; DISALLOWED # .. -90000..9FFFD; UNASSIGNED # .. -9FFFE..9FFFF; DISALLOWED # .. -A0000..AFFFD; UNASSIGNED # .. -AFFFE..AFFFF; DISALLOWED # .. -B0000..BFFFD; UNASSIGNED # .. -BFFFE..BFFFF; DISALLOWED # .. -C0000..CFFFD; UNASSIGNED # .. -CFFFE..CFFFF; DISALLOWED # .. -D0000..DFFFD; UNASSIGNED # .. -DFFFE..DFFFF; DISALLOWED # .. -E0000 ; UNASSIGNED # -E0001 ; DISALLOWED # LANGUAGE TAG -E0002..E001F; UNASSIGNED # .. -E0020..E007F; DISALLOWED # TAG SPACE..CANCEL TAG -E0080..E00FF; UNASSIGNED # .. -E0100..E01EF; DISALLOWED # VARIATION SELECTOR-17..VARIATION SELECTOR-25 -E01F0..EFFFD; UNASSIGNED # .. -EFFFE..10FFFF; DISALLOWED # .. \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/Main.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/Main.kt deleted file mode 100644 index d3f2cc3c..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/Main.kt +++ /dev/null @@ -1,125 +0,0 @@ -@file:JvmName("Main") - -package io.github.optimumcode.unocode.generator - -import com.github.ajalt.clikt.core.CliktCommand -import com.github.ajalt.clikt.core.subcommands -import com.github.ajalt.clikt.parameters.options.check -import com.github.ajalt.clikt.parameters.options.default -import com.github.ajalt.clikt.parameters.options.option -import com.github.ajalt.clikt.parameters.options.required -import com.github.ajalt.clikt.parameters.types.path -import io.github.optimumcode.unocode.generator.internal.dump.DataDamper -import io.github.optimumcode.unocode.generator.internal.dump.DerivedPropertiesLoader -import io.github.optimumcode.unocode.generator.internal.dump.JoiningTypesLoader -import io.github.optimumcode.unocode.generator.internal.generator.generateCategoryClasses -import io.github.optimumcode.unocode.generator.internal.generator.generateDerivedJoiningTypes -import io.github.optimumcode.unocode.generator.internal.generator.generateDerivedProperties -import io.github.optimumcode.unocode.generator.internal.generator.generateDirectionClasses -import io.github.optimumcode.unocode.generator.internal.graphql.GraphqlClient -import kotlinx.coroutines.runBlocking -import java.net.URL -import java.nio.file.Path -import kotlin.io.path.createDirectories - -fun main(args: Array) = GeneratorCommand().main(args) - -private class GeneratorCommand : CliktCommand() { - init { - subcommands( - CharacterDirectionGenerator(), - CharacterCategoryGenerator(), - DerivedPropertiesGenerator(), - JoiningTypesGenerator(), - DumpCommand(), - ) - } - - override fun run() = Unit -} - -private class DumpCommand : CliktCommand( - name = "dump", -) { - private val outputDirectory: Path by option("--outputDir", "-o", help = "Output directory") - .path(mustExist = false, canBeFile = false, canBeDir = true) - .required() - - private val sourceUrl: String by option("--source", "-s", help = "Source URL") - .default("https://www.compart.com/en/unicode/graphql") - - override fun run() { - outputDirectory.createDirectories() - GraphqlClient(URL(sourceUrl)).use { cl -> - runBlocking { - DataDamper.dump(cl, outputDirectory) { - echo(it) - } - } - } - } -} - -private abstract class AbstractGenerator(name: String) : CliktCommand(name = name) { - protected val outputDirectory: Path by option("--outputDir", "-o", help = "Output directory") - .path(mustExist = true, canBeFile = false, canBeDir = true) - .required() - - protected val packageName: String by option("--package-name", "-p", help = "Package name") - .required() - .check("empty package name", String::isNotEmpty) -} - -private abstract class AbstractDumperGenerator(name: String) : AbstractGenerator(name) { - protected val dumpDirectory: Path by option("--dumpDir", "-d", help = "Output directory") - .path(mustExist = true, canBeFile = false, canBeDir = true) - .required() -} - -private class CharacterDirectionGenerator : AbstractDumperGenerator( - name = "character-direction", -) { - override fun run() { - val classes = DataDamper.loadClasses(dumpDirectory) - generateDirectionClasses(packageName, outputDirectory, classes) { - DataDamper.loadRanges(dumpDirectory, it) - } - } -} - -private class CharacterCategoryGenerator : AbstractDumperGenerator( - name = "character-category", -) { - override fun run() { - val categories = DataDamper.loadCategories(dumpDirectory) - generateCategoryClasses(packageName, outputDirectory, categories) { - DataDamper.loadRanges(dumpDirectory, it) - } - } -} - -private class DerivedPropertiesGenerator : AbstractGenerator( - name = "derived-properties", -) { - private val dataFile: Path by option("--data-file", "-d", help = "Input file") - .path(mustExist = true, canBeFile = true, canBeDir = false) - .required() - - override fun run() { - val properties = DerivedPropertiesLoader.loadProperties(dataFile) - generateDerivedProperties(packageName, outputDirectory, properties) - } -} - -private class JoiningTypesGenerator : AbstractGenerator( - name = "joining-types", -) { - private val dataFile: Path by option("--data-file", "-d", help = "Input file") - .path(mustExist = true, canBeFile = true, canBeDir = false) - .required() - - override fun run() { - val properties = JoiningTypesLoader.loadTypes(dataFile) - generateDerivedJoiningTypes(packageName, outputDirectory, properties) - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DataDamper.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DataDamper.kt deleted file mode 100644 index ecd20968..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DataDamper.kt +++ /dev/null @@ -1,153 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.dump - -import io.github.optimumcode.unocode.generator.internal.graphql.GraphqlClient -import io.github.optimumcode.unocode.generator.internal.model.BiDirectionalClass -import io.github.optimumcode.unocode.generator.internal.model.Category -import io.github.optimumcode.unocode.generator.internal.model.Range -import io.github.optimumcode.unocode.generator.internal.model.UnicodeChar -import kotlinx.coroutines.flow.Flow -import kotlinx.serialization.ExperimentalSerializationApi -import kotlinx.serialization.builtins.ListSerializer -import kotlinx.serialization.json.Json -import kotlinx.serialization.json.decodeFromStream -import kotlinx.serialization.json.encodeToStream -import java.nio.file.Path -import kotlin.io.path.inputStream -import kotlin.io.path.outputStream - -internal object DataDamper { - private const val CATEGORIES = "categories.json" - private const val CATEGORY_RANGES = "-categories-range.json" - private const val BIDIRECTIONAL_CLASSES = "bidir-classes.json" - private const val BIDIRECTIONAL_CLASSE_RANGES = "-classes-range.json" - - suspend fun dump( - client: GraphqlClient, - dumpDir: Path, - onDump: (String) -> Unit, - ) { - val classes: List = client.bidirectionalClasses() - dumpDirectionClasses(dumpDir, classes) - onDump("directional classes are dumped") - - for (clazz in classes) { - dumpCharactersForDirection(client, clazz, dumpDir) - onDump("characters for directional class ${clazz.name} are dumped") - } - - val categories = client.categories() - - dumpCategories(dumpDir, categories) - onDump("categories are dumped") - - for (category in categories) { - dumpCharactersForCategory(client, category, dumpDir) - onDump("characters for category class ${category.name} are dumped") - } - } - - private suspend fun dumpCharactersForCategory( - client: GraphqlClient, - category: Category, - dumpDir: Path, - ) { - val ranges = - client.charactersForCategory(category) - .groupCharactersToRanges() - - dumpToFile(dumpDir.resolve(fileNameForCategory(category)), ranges) - } - - private fun dumpCategories( - dumpDir: Path, - categories: List, - ) { - dumpToFile(dumpDir.resolve(CATEGORIES), categories) - } - - private suspend fun dumpCharactersForDirection( - client: GraphqlClient, - clazz: BiDirectionalClass, - dumpDir: Path, - ) { - val ranges: List = - client.charactersForClass(clazz.id) - .groupCharactersToRanges() - - dumpToFile(dumpDir.resolve(fileNameForDirectionClass(clazz)), ranges) - } - - private fun dumpDirectionClasses( - dumpDir: Path, - classes: List, - ) { - dumpToFile(dumpDir.resolve(BIDIRECTIONAL_CLASSES), classes) - } - - @OptIn(ExperimentalSerializationApi::class) - private inline fun dumpToFile( - path: Path, - data: T, - ) { - path.outputStream().use { - Json.encodeToStream(data, it) - } - } - - @OptIn(ExperimentalSerializationApi::class) - fun loadClasses(dumpDir: Path): List = - dumpDir.resolve(BIDIRECTIONAL_CLASSES).inputStream().use { - Json.decodeFromStream(ListSerializer(BiDirectionalClass.serializer()), it) - } - - @OptIn(ExperimentalSerializationApi::class) - fun loadCategories(dumpDir: Path): List = - dumpDir.resolve(CATEGORIES).inputStream().use { - Json.decodeFromStream(ListSerializer(Category.serializer()), it) - } - - fun loadRanges( - dumpDir: Path, - clazz: BiDirectionalClass, - ): List = loadRanges(dumpDir.resolve(fileNameForDirectionClass(clazz))) - - fun loadRanges( - dumpDir: Path, - category: Category, - ): List = loadRanges(dumpDir.resolve(fileNameForCategory(category))) - - @OptIn(ExperimentalSerializationApi::class) - private fun loadRanges(path: Path): List = - path.inputStream().use { - Json.decodeFromStream(ListSerializer(Range.serializer()), it) - } - - private fun fileNameForDirectionClass(clazz: BiDirectionalClass) = "${clazz.id}$BIDIRECTIONAL_CLASSE_RANGES" - - private fun fileNameForCategory(category: Category) = "${category.id}$CATEGORY_RANGES" - - private suspend fun Flow.groupCharactersToRanges(): List { - val resultRanges = mutableListOf() - var lastStartCodePoint: Int = -1 - var lastEndCodePoint: Int = -1 - collect { unicode -> - @Suppress("detekt:MagicNumber") - val codepoint = unicode.id.removePrefix("U+").toInt(16) - if (lastStartCodePoint < 0) { - lastStartCodePoint = codepoint - lastEndCodePoint = codepoint - } - if (codepoint - lastEndCodePoint > 1) { - resultRanges.add(Range(lastStartCodePoint, lastEndCodePoint)) - lastStartCodePoint = codepoint - lastEndCodePoint = codepoint - return@collect - } - lastEndCodePoint = codepoint - } - if (lastStartCodePoint >= 0) { - resultRanges.add(Range(lastStartCodePoint, lastEndCodePoint)) - } - return resultRanges - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DerivedPropertiesLoader.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DerivedPropertiesLoader.kt deleted file mode 100644 index bfec6210..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/DerivedPropertiesLoader.kt +++ /dev/null @@ -1,34 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.dump - -import io.github.optimumcode.unocode.generator.internal.model.DerivedProperty -import io.github.optimumcode.unocode.generator.internal.model.Range -import java.nio.file.Path -import kotlin.io.path.inputStream - -object DerivedPropertiesLoader { - fun loadProperties(path: Path): Map> { - return path.inputStream() - .bufferedReader(Charsets.UTF_8) - .useLines { lines -> - lines.filter(String::isNotBlank) - .map(this::parseDerivedProperties) - .groupBy(DerivedProperty::type) - } - } - - private fun parseDerivedProperties(line: String): DerivedProperty { - val parts = line.split(';', limit = 2) - check(parts.size == 2) { - "Line '$line' does not match required pattern" - } - val (codepoints, property) = parts - val range: Range = parseCodepointsPart(codepoints) - val type: String = extractType(property) - return DerivedProperty(type, range) - } - - private fun extractType(property: String): String { - // extract and create copy - return property.substringBefore('#').trim() + "" - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/JoiningTypesLoader.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/JoiningTypesLoader.kt deleted file mode 100644 index 9555448e..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/JoiningTypesLoader.kt +++ /dev/null @@ -1,39 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.dump - -import io.github.optimumcode.unocode.generator.internal.model.JoiningType -import java.nio.file.Path -import kotlin.io.path.inputStream - -internal object JoiningTypesLoader { - private const val TYPE_DECLARATION = "Joining_Type=" - private const val COMMENT = '#' - private const val SEPARATOR = ';' - - fun loadTypes(path: Path): Map> { - return path.inputStream() - .bufferedReader(Charsets.UTF_8) - .useLines { lines -> - var type: String? = null - val destination = hashMapOf>() - for (line in lines.filter(String::isNotBlank)) { - if (line.contains(TYPE_DECLARATION)) { - type = line.substringAfter(TYPE_DECLARATION).trim() + "" - continue - } - if (SEPARATOR !in line || line.startsWith(COMMENT)) { - continue - } - val codepoints = line.substringBefore(SEPARATOR).trim() - destination.computeIfAbsent( - requireNotNull(type) { "type" }, - ) { arrayListOf() }.add( - JoiningType( - type = type, - range = parseCodepointsPart(codepoints), - ), - ) - } - destination - } - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/Util.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/Util.kt deleted file mode 100644 index 5061250f..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/dump/Util.kt +++ /dev/null @@ -1,13 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.dump - -import io.github.optimumcode.unocode.generator.internal.model.Range - -internal fun parseCodepointsPart(codepoints: String): Range { - val rangeParts = codepoints.trim().split("..", limit = 2) - @Suppress("detekt:MagicNumber") - return when (rangeParts.size) { - 1 -> Range(rangeParts[0].toInt(16)) - 2 -> Range(rangeParts[0].toInt(16), rangeParts[1].toInt(16)) - else -> error("invalid code points part '$rangeParts'") - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/CategoryGenerator.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/CategoryGenerator.kt deleted file mode 100644 index a11aa8e1..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/CategoryGenerator.kt +++ /dev/null @@ -1,184 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.generator - -import com.squareup.kotlinpoet.AnnotationSpec -import com.squareup.kotlinpoet.BOOLEAN -import com.squareup.kotlinpoet.ClassName -import com.squareup.kotlinpoet.FileSpec -import com.squareup.kotlinpoet.FunSpec -import com.squareup.kotlinpoet.INT -import com.squareup.kotlinpoet.KModifier.ABSTRACT -import com.squareup.kotlinpoet.KModifier.INTERNAL -import com.squareup.kotlinpoet.KModifier.OVERRIDE -import com.squareup.kotlinpoet.ParameterSpec -import com.squareup.kotlinpoet.PropertySpec -import com.squareup.kotlinpoet.TypeSpec -import io.github.optimumcode.unocode.generator.internal.model.Category -import io.github.optimumcode.unocode.generator.internal.model.Range -import java.nio.file.Path - -private const val MIN_CODEPOINT_PROPERTY = "minCodepoint" - -private const val MAX_CODEPOINT_PROPERTY = "maxCodepoint" - -private const val CONTAINS_METHOD = "contains" - -private const val CODEPOINT_PARAMETER = "codepoint" - -fun generateCategoryClasses( - packageName: String, - outputDir: Path, - classes: List, - rangeProvider: (Category) -> List, -) { - val internalPackageName = "$packageName.categories" - val unicodeObjects = - classes.associateBy { - it.name.replace(" ", "") - } - - val characterData = ClassName(packageName, "CharacterCategoryData") - FileSpec.builder(characterData) - .addType( - TypeSpec.interfaceBuilder(characterData) - .addModifiers(INTERNAL) - .addProperty( - PropertySpec.builder(MIN_CODEPOINT_PROPERTY, INT) - .addModifiers(ABSTRACT) - .build(), - ) - .addProperty( - PropertySpec.builder(MAX_CODEPOINT_PROPERTY, INT) - .addModifiers(ABSTRACT) - .build(), - ) - .addFunction( - FunSpec.builder(CONTAINS_METHOD) - .addModifiers(ABSTRACT) - .addParameter( - ParameterSpec.builder(CODEPOINT_PARAMETER, INT) - .build(), - ) - .returns(BOOLEAN) - .build(), - ) - .build(), - ) - .build() - .writeTo(outputDir) - - unicodeObjects.forEach { (directionClassName, unicodeObject) -> - println("Processing '${unicodeObject.name}' category") - generateObjectWithCheckLogic( - unicodeObject, - directionClassName, - internalPackageName, - characterData, - rangeProvider, - ).build() - .writeTo(outputDir) - } - generateEnum(packageName, characterData, unicodeObjects, internalPackageName, outputDir) -} - -private fun generateEnum( - packageName: String, - characterData: ClassName, - unicodeObjects: Map, - internalPackageName: String, - outputDir: Path, -) { - val characterDataProperty = "characterData" - FileSpec.builder(packageName, "CharacterCategory") - .addType( - TypeSpec.enumBuilder("CharacterCategory") - .addModifiers(INTERNAL) - .primaryConstructor( - FunSpec.constructorBuilder() - .addParameter( - ParameterSpec.builder(characterDataProperty, characterData) - .build(), - ) - .build(), - ) - .addProperty( - PropertySpec.builder(characterDataProperty, characterData) - .initializer(characterDataProperty) - .build(), - ) - .apply { - unicodeObjects.forEach { (className, unicodeObject) -> - addEnumConstant( - unicodeObject.name.replace(" ", "_").uppercase(), - TypeSpec.anonymousClassBuilder() - .apply { - kdoc.addStatement("%L category \"%L\" in unicode", unicodeObject.name, unicodeObject.id) - } - .addSuperclassConstructorParameter("%T", ClassName(internalPackageName, className)) - .build(), - ) - } - } - .build(), - ) - .build() - .writeTo(outputDir) -} - -private fun generateObjectWithCheckLogic( - category: Category, - directionClassName: String, - packageName: String, - interfaceImpl: ClassName, - rangeProvider: (Category) -> List, -): FileSpec.Builder { - val codepointRanges: List = rangeProvider(category) - val minCodepoint: Int = codepointRanges.minOf { it.start } - val maxCodepoint: Int = codepointRanges.maxOf { it.end } - val minCodepointProp = - PropertySpec.builder(MIN_CODEPOINT_PROPERTY, INT) - .addModifiers(OVERRIDE) - .getter(FunSpec.getterBuilder().addStatement("return %L", minCodepoint.toHexString()).build()) - .build() - val maxCodepointProp = - PropertySpec.builder(MAX_CODEPOINT_PROPERTY, INT) - .addModifiers(OVERRIDE) - .getter(FunSpec.getterBuilder().addStatement("return %L", maxCodepoint.toHexString()).build()) - .build() - return FileSpec.builder(packageName, directionClassName) - .addType( - TypeSpec.objectBuilder(directionClassName) - .addAnnotation(AnnotationSpec.builder(Suppress::class).addMember("%S", "detekt:all").build()) - .addModifiers(INTERNAL) - .addSuperinterface(interfaceImpl) - .addProperties( - listOf( - minCodepointProp, - maxCodepointProp, - ), - ) - .addFunction( - FunSpec.builder(CONTAINS_METHOD) - .addModifiers(OVERRIDE) - .returns(BOOLEAN) - .addParameter( - ParameterSpec.builder(CODEPOINT_PARAMETER, INT) - .build(), - ) - .apply { - if (codepointRanges.size > 1) { - beginControlFlow( - "if (%2N > %1L || %3N < %1L)", - CODEPOINT_PARAMETER, - minCodepointProp, - maxCodepointProp, - ) - addStatement("return false") - endControlFlow() - } - checkCodepointInRanges(codepointRanges, CODEPOINT_PARAMETER) - } - .build(), - ) - .build(), - ) -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedJoinigTypeGenerator.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedJoinigTypeGenerator.kt deleted file mode 100644 index 6206aaad..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedJoinigTypeGenerator.kt +++ /dev/null @@ -1,58 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.generator - -import com.squareup.kotlinpoet.AnnotationSpec -import com.squareup.kotlinpoet.FileSpec -import com.squareup.kotlinpoet.FunSpec -import com.squareup.kotlinpoet.INT -import com.squareup.kotlinpoet.KModifier.ABSTRACT -import com.squareup.kotlinpoet.KModifier.INTERNAL -import com.squareup.kotlinpoet.KModifier.OVERRIDE -import com.squareup.kotlinpoet.ParameterSpec -import com.squareup.kotlinpoet.TypeSpec -import io.github.optimumcode.unocode.generator.internal.model.JoiningType -import java.nio.file.Path - -private const val CODE_POINT_PARAMETER = "codePoint" - -fun generateDerivedJoiningTypes( - packageName: String, - outputDir: Path, - joiningTypes: Map>, -) { - fun containsFunction(): FunSpec.Builder = - FunSpec.builder("contains") - .returns(Boolean::class) - .addParameter(ParameterSpec.builder(CODE_POINT_PARAMETER, INT).build()) - - FileSpec.builder(packageName, "JoiningType") - .addType( - TypeSpec.enumBuilder("JoiningType") - .addModifiers(INTERNAL) - .addAnnotation(AnnotationSpec.builder(Suppress::class).addMember("%S", "detekt:all").build()) - .addFunction(containsFunction().addModifiers(ABSTRACT).build()) - .apply { - for ((type, groupedTypes) in joiningTypes) { - addEnumConstant( - type.uppercase(), - TypeSpec.anonymousClassBuilder() - .addFunction( - containsFunction() - .addModifiers(OVERRIDE) - .checkCodePoint(groupedTypes) - .build(), - ) - .build(), - ) - } - } - .build(), - ) - .build() - .writeTo(outputDir) -} - -private fun FunSpec.Builder.checkCodePoint(properties: List): FunSpec.Builder = - apply { - val ranges = properties.map { it.range } - checkCodepointInRanges(ranges, CODE_POINT_PARAMETER) - } \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedPropertiesGenerator.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedPropertiesGenerator.kt deleted file mode 100644 index 43cc78d9..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DerivedPropertiesGenerator.kt +++ /dev/null @@ -1,58 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.generator - -import com.squareup.kotlinpoet.AnnotationSpec -import com.squareup.kotlinpoet.FileSpec -import com.squareup.kotlinpoet.FunSpec -import com.squareup.kotlinpoet.INT -import com.squareup.kotlinpoet.KModifier.ABSTRACT -import com.squareup.kotlinpoet.KModifier.INTERNAL -import com.squareup.kotlinpoet.KModifier.OVERRIDE -import com.squareup.kotlinpoet.ParameterSpec -import com.squareup.kotlinpoet.TypeSpec -import io.github.optimumcode.unocode.generator.internal.model.DerivedProperty -import java.nio.file.Path - -private const val CODE_POINT_PARAMETER = "codePoint" - -fun generateDerivedProperties( - packageName: String, - outputDir: Path, - derivedProperties: Map>, -) { - fun containsFunction(): FunSpec.Builder = - FunSpec.builder("contains") - .returns(Boolean::class) - .addParameter(ParameterSpec.builder(CODE_POINT_PARAMETER, INT).build()) - - FileSpec.builder(packageName, "DerivedProperties") - .addType( - TypeSpec.enumBuilder("DerivedProperties") - .addModifiers(INTERNAL) - .addAnnotation(AnnotationSpec.builder(Suppress::class).addMember("%S", "detekt:all").build()) - .addFunction(containsFunction().addModifiers(ABSTRACT).build()) - .apply { - for ((type, properties) in derivedProperties) { - addEnumConstant( - type.uppercase(), - TypeSpec.anonymousClassBuilder() - .addFunction( - containsFunction() - .addModifiers(OVERRIDE) - .checkCodePoint(properties) - .build(), - ) - .build(), - ) - } - } - .build(), - ) - .build() - .writeTo(outputDir) -} - -private fun FunSpec.Builder.checkCodePoint(properties: List): FunSpec.Builder = - apply { - val ranges = properties.map { it.range } - checkCodepointInRanges(ranges, CODE_POINT_PARAMETER) - } \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DirectionGenerator.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DirectionGenerator.kt deleted file mode 100644 index 2503d927..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/DirectionGenerator.kt +++ /dev/null @@ -1,185 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.generator - -import com.squareup.kotlinpoet.AnnotationSpec -import com.squareup.kotlinpoet.BOOLEAN -import com.squareup.kotlinpoet.ClassName -import com.squareup.kotlinpoet.FileSpec -import com.squareup.kotlinpoet.FunSpec -import com.squareup.kotlinpoet.FunSpec.Builder -import com.squareup.kotlinpoet.INT -import com.squareup.kotlinpoet.KModifier.ABSTRACT -import com.squareup.kotlinpoet.KModifier.INTERNAL -import com.squareup.kotlinpoet.KModifier.OVERRIDE -import com.squareup.kotlinpoet.ParameterSpec -import com.squareup.kotlinpoet.PropertySpec -import com.squareup.kotlinpoet.TypeSpec -import io.github.optimumcode.unocode.generator.internal.model.BiDirectionalClass -import io.github.optimumcode.unocode.generator.internal.model.Range -import java.nio.file.Path - -private const val MIN_CODEPOINT_PROPERTY = "minCodepoint" - -private const val MAX_CODEPOINT_PROPERTY = "maxCodepoint" - -private const val CONTAINS_METHOD = "contains" - -private const val CODEPOINT_PARAMETER = "codepoint" - -fun generateDirectionClasses( - packageName: String, - outputDir: Path, - classes: List, - rangeProvider: (BiDirectionalClass) -> List, -) { - val internalPackageName = "$packageName.classes" - val unicodeObjects = - classes.associateBy { - it.name.replace(" ", "") - } - - val characterData = ClassName(packageName, "CharacterDirectionData") - FileSpec.builder(characterData) - .addType( - TypeSpec.interfaceBuilder(characterData) - .addModifiers(INTERNAL) - .addProperty( - PropertySpec.builder(MIN_CODEPOINT_PROPERTY, INT) - .addModifiers(ABSTRACT) - .build(), - ) - .addProperty( - PropertySpec.builder(MAX_CODEPOINT_PROPERTY, INT) - .addModifiers(ABSTRACT) - .build(), - ) - .addFunction( - FunSpec.builder(CONTAINS_METHOD) - .addModifiers(ABSTRACT) - .addParameter( - ParameterSpec.builder(CODEPOINT_PARAMETER, INT) - .build(), - ) - .returns(BOOLEAN) - .build(), - ) - .build(), - ) - .build() - .writeTo(outputDir) - - unicodeObjects.forEach { (directionClassName, unicodeObject) -> - println("Processing '${unicodeObject.name}' group") - generateObjectWithCheckLogic( - unicodeObject, - directionClassName, - internalPackageName, - characterData, - rangeProvider, - ).build() - .writeTo(outputDir) - } - generateEnum(packageName, characterData, unicodeObjects, internalPackageName, outputDir) -} - -private fun generateEnum( - packageName: String, - characterData: ClassName, - unicodeObjects: Map, - internalPackageName: String, - outputDir: Path, -) { - val characterDataProperty = "characterData" - FileSpec.builder(packageName, "CharacterDirectionality") - .addType( - TypeSpec.enumBuilder("CharacterDirectionality") - .addModifiers(INTERNAL) - .primaryConstructor( - FunSpec.constructorBuilder() - .addParameter( - ParameterSpec.builder(characterDataProperty, characterData) - .build(), - ) - .build(), - ) - .addProperty( - PropertySpec.builder(characterDataProperty, characterData) - .initializer(characterDataProperty) - .build(), - ) - .apply { - unicodeObjects.forEach { (className, unicodeObject) -> - addEnumConstant( - unicodeObject.name.replace(" ", "_").uppercase(), - TypeSpec.anonymousClassBuilder() - .apply { - kdoc.addStatement("%L type \"%L\" in unicode", unicodeObject.name, unicodeObject.id) - } - .addSuperclassConstructorParameter("%T", ClassName(internalPackageName, className)) - .build(), - ) - } - } - .build(), - ) - .build() - .writeTo(outputDir) -} - -private fun generateObjectWithCheckLogic( - biDirectionalClass: BiDirectionalClass, - directionClassName: String, - packageName: String, - interfaceImpl: ClassName, - rangeProvider: (BiDirectionalClass) -> List, -): FileSpec.Builder { - val codepointRanges: List = rangeProvider(biDirectionalClass) - val minCodepoint: Int = codepointRanges.minOf { it.start } - val maxCodepoint: Int = codepointRanges.maxOf { it.end } - val minCodepointProp = - PropertySpec.builder(MIN_CODEPOINT_PROPERTY, INT) - .addModifiers(OVERRIDE) - .getter(FunSpec.getterBuilder().addStatement("return %L", minCodepoint.toHexString()).build()) - .build() - val maxCodepointProp = - PropertySpec.builder(MAX_CODEPOINT_PROPERTY, INT) - .addModifiers(OVERRIDE) - .getter(FunSpec.getterBuilder().addStatement("return %L", maxCodepoint.toHexString()).build()) - .build() - return FileSpec.builder(packageName, directionClassName) - .addType( - TypeSpec.objectBuilder(directionClassName) - .addAnnotation(AnnotationSpec.builder(Suppress::class).addMember("%S", "detekt:all").build()) - .addModifiers(INTERNAL) - .addSuperinterface(interfaceImpl) - .addProperties( - listOf( - minCodepointProp, - maxCodepointProp, - ), - ) - .addFunction( - FunSpec.builder(CONTAINS_METHOD) - .addModifiers(OVERRIDE) - .returns(BOOLEAN) - .addParameter( - ParameterSpec.builder(CODEPOINT_PARAMETER, INT) - .build(), - ) - .apply { - if (codepointRanges.size > 1) { - beginControlFlow( - "if (%2N > %1L || %3N < %1L)", - CODEPOINT_PARAMETER, - minCodepointProp, - maxCodepointProp, - ) - addStatement("return false") - endControlFlow() - } - checkCodepointInRanges(codepointRanges, CODEPOINT_PARAMETER) - } - .build(), - ) - .build(), - ) -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/Util.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/Util.kt deleted file mode 100644 index c65a7781..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/generator/Util.kt +++ /dev/null @@ -1,51 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.generator - -import com.squareup.kotlinpoet.FunSpec -import io.github.optimumcode.unocode.generator.internal.model.Range - -internal fun FunSpec.Builder.checkCodepointInRanges( - codepointRanges: List, - parameterName: String, -) { - addCode("return ") - addStatements(codepointRanges, parameterName) -} - -internal fun FunSpec.Builder.addStatements( - codepointRanges: List, - codepointParameterName: String, -) { - when (codepointRanges.size) { - 0 -> addStatement("false") - 1 -> { - val range = codepointRanges[0] - if (range.start == range.end) { - addStatement( - "%L == %L", - codepointParameterName, - range.end.toHexString(), - ) - } else { - addStatement( - "%L in %L..%L", - codepointParameterName, - range.start.toHexString(), - range.end.toHexString(), - ) - } - } - - else -> { - val middleIndex = codepointRanges.size / 2 - val middle = codepointRanges[middleIndex] - beginControlFlow("if (%L < %L)", codepointParameterName, middle.start.toHexString()) - addStatements(codepointRanges.subList(0, middleIndex), codepointParameterName) - nextControlFlow("else") - addStatements(codepointRanges.subList(middleIndex, codepointRanges.size), codepointParameterName) - endControlFlow() - } - } -} - -@Suppress("detekt:MagicNumber") -internal fun Int.toHexString(): String = "0x${toString(16)}" \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/graphql/GraphqlClient.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/graphql/GraphqlClient.kt deleted file mode 100644 index d48101f9..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/graphql/GraphqlClient.kt +++ /dev/null @@ -1,112 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.graphql - -import com.expediagroup.graphql.client.ktor.GraphQLKtorClient -import com.expediagroup.graphql.client.types.GraphQLClientResponse -import io.github.optimumcode.unicode.generator.internal.graphql.BidirectionalCharactersForClass -import io.github.optimumcode.unicode.generator.internal.graphql.BidirectionalClasses -import io.github.optimumcode.unicode.generator.internal.graphql.CharacterCategories -import io.github.optimumcode.unicode.generator.internal.graphql.CharactersForCategory -import io.github.optimumcode.unocode.generator.internal.model.BiDirectionalClass -import io.github.optimumcode.unocode.generator.internal.model.Category -import io.github.optimumcode.unocode.generator.internal.model.UnicodeChar -import io.ktor.client.HttpClient -import io.ktor.client.engine.cio.CIO -import io.ktor.client.plugins.HttpRequestRetry -import kotlinx.coroutines.flow.Flow -import kotlinx.coroutines.flow.flow -import java.net.URL - -internal class GraphqlClient( - url: URL, - maxRetries: Int = 3, -) : AutoCloseable { - private val client = - GraphQLKtorClient( - url = url, - httpClient = - HttpClient(CIO) { - install(HttpRequestRetry) { - retryOnServerErrors(maxRetries = maxRetries) - exponentialDelay() - } - }, - ) - - suspend fun bidirectionalClasses(): List { - val response = client.execute(BidirectionalClasses()) - checkNoErrors(response) - return response.data?.unicodeObject - ?.mapNotNull { it?.run { BiDirectionalClass(id!!, name!!) } } - ?: emptyList() - } - - @Suppress("DuplicatedCode") - fun charactersForClass(classId: String): Flow = - flow { - var offset = 0 - do { - val response = - client.execute( - BidirectionalCharactersForClass( - BidirectionalCharactersForClass.Variables( - id = classId, - offset = offset, - limit = 1000, - ), - ), - ) - checkNoErrors(response) - val data = response.data?.unicodeObject ?: return@flow - for (unicodeObject in data) { - unicodeObject?.chars?.forEach { - it?.run { emit(UnicodeChar(id!!, text?.takeUnless(String::isEmpty))) } - } - offset += unicodeObject?.chars?.size ?: 0 - } - } while (data.any { !it?.chars.isNullOrEmpty() }) - } - - suspend fun categories(): List { - val response = client.execute(CharacterCategories()) - checkNoErrors(response) - return response.data?.unicodeObject - ?.mapNotNull { it?.run { Category(id!!, name!!) } } - ?: emptyList() - } - - @Suppress("DuplicatedCode") - fun charactersForCategory(category: Category): Flow = - flow { - var offset = 0 - do { - val response = - client.execute( - CharactersForCategory( - CharactersForCategory.Variables( - id = category.id, - offset = offset, - limit = 1000, - ), - ), - ) - checkNoErrors(response) - val data = response.data?.unicodeObject ?: return@flow - for (unicodeObject in data) { - unicodeObject?.chars?.forEach { - it?.run { emit(UnicodeChar(id!!, text?.takeUnless(String::isEmpty))) } - } - offset += unicodeObject?.chars?.size ?: 0 - } - } while (data.any { !it?.chars.isNullOrEmpty() }) - } - - override fun close() { - client.close() - } - - private fun checkNoErrors(response: GraphQLClientResponse<*>) { - check(response.errors.isNullOrEmpty()) { - "errors during request execution: ${response.errors}" - } - } -} \ No newline at end of file diff --git a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/model/Model.kt b/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/model/Model.kt deleted file mode 100644 index cdbc4115..00000000 --- a/generator/src/main/kotlin/io/github/optimumcode/unocode/generator/internal/model/Model.kt +++ /dev/null @@ -1,32 +0,0 @@ -package io.github.optimumcode.unocode.generator.internal.model - -import kotlinx.serialization.Serializable - -@Serializable -data class BiDirectionalClass( - val id: String, - val name: String, -) - -@Serializable -data class UnicodeChar( - val id: String, - val text: String?, -) - -@Serializable -data class Category( - val id: String, - val name: String, -) - -@Serializable -class Range(val start: Int, val end: Int) { - constructor(single: Int) : this(single, single) -} - -@Serializable -class DerivedProperty(val type: String, val range: Range) - -@Serializable -class JoiningType(val type: String, val range: Range) \ No newline at end of file diff --git a/generator/src/main/resources/BidirectionalCharacterForClass.graphql b/generator/src/main/resources/BidirectionalCharacterForClass.graphql deleted file mode 100644 index 9cc7db12..00000000 --- a/generator/src/main/resources/BidirectionalCharacterForClass.graphql +++ /dev/null @@ -1,8 +0,0 @@ -query BidirectionalCharactersForClass($id: String, $offset: Int = 0, $limit: Int = 100) { - unicodeObject(type: bidiclass, id: $id) { - chars(offset: $offset, limit: $limit) { - id - text - } - } -} \ No newline at end of file diff --git a/generator/src/main/resources/BidirectionalClasses.graphql b/generator/src/main/resources/BidirectionalClasses.graphql deleted file mode 100644 index 6554c730..00000000 --- a/generator/src/main/resources/BidirectionalClasses.graphql +++ /dev/null @@ -1,6 +0,0 @@ -query BidirectionalClasses { - unicodeObject(type: bidiclass) { - id - name - } -} \ No newline at end of file diff --git a/generator/src/main/resources/CharacterCategories.graphql b/generator/src/main/resources/CharacterCategories.graphql deleted file mode 100644 index 6f217853..00000000 --- a/generator/src/main/resources/CharacterCategories.graphql +++ /dev/null @@ -1,6 +0,0 @@ -query CharacterCategories { - unicodeObject(type: category) { - id - name - } -} \ No newline at end of file diff --git a/generator/src/main/resources/CharactersForCategory.graphql b/generator/src/main/resources/CharactersForCategory.graphql deleted file mode 100644 index 10b352e4..00000000 --- a/generator/src/main/resources/CharactersForCategory.graphql +++ /dev/null @@ -1,8 +0,0 @@ -query CharactersForCategory($id: String, $offset: Int = 0, $limit: Int = 100) { - unicodeObject(type: category, id: $id) { - chars(offset: $offset, limit: $limit) { - id - text - } - } -} \ No newline at end of file diff --git a/json-schema-validator/build.gradle.kts b/json-schema-validator/build.gradle.kts index 2e270188..ab86e820 100644 --- a/json-schema-validator/build.gradle.kts +++ b/json-schema-validator/build.gradle.kts @@ -6,7 +6,6 @@ import org.jetbrains.kotlin.gradle.plugin.KotlinTarget import org.jetbrains.kotlin.gradle.plugin.KotlinTargetWithTests import org.jetbrains.kotlin.gradle.targets.js.dsl.ExperimentalWasmDsl import org.jlleitschuh.gradle.ktlint.reporter.ReporterType -import java.util.Locale plugins { alias(libs.plugins.kotlin.mutliplatform) @@ -19,117 +18,6 @@ plugins { convention.publication } -val generatedSourceDirectory: Provider = layout.buildDirectory.dir("generated/source/unicode") - -//region Generation tasks block -val generatorConfiguration: Configuration by configurations.creating - -dependencies { - generatorConfiguration(projects.generator) -} - -val dumpDir: File = - rootProject.layout.projectDirectory - .dir("unicode_dump") - .asFile - -val dumpCharacterData by tasks.register("dumpCharacterData") { - onlyIf { - dumpDir.run { !exists() || listFiles().isNullOrEmpty() } - } - outputs.dir(dumpDir) - classpath(generatorConfiguration) - mainClass.set("io.github.optimumcode.unocode.generator.Main") - args( - "dump", - "-o", - dumpDir, - ) -} - -val generateCharacterDirectionData by tasks.register("generateCharacterDirectionData") { - inputs.dir(dumpDir) - outputs.dir(generatedSourceDirectory) - - dependsOn(dumpCharacterData) - - classpath(generatorConfiguration) - mainClass.set("io.github.optimumcode.unocode.generator.Main") - args( - "character-direction", - "-p", - "io.github.optimumcode.json.schema.internal.unicode", - "-o", - generatedSourceDirectory.get(), - "-d", - dumpDir, - ) -} - -val generateCharacterCategoryData by tasks.register("generateCharacterCategoryData") { - inputs.dir(dumpDir) - outputs.dir(generatedSourceDirectory) - - dependsOn(dumpCharacterData) - - classpath(generatorConfiguration) - mainClass.set("io.github.optimumcode.unocode.generator.Main") - args( - "character-category", - "-p", - "io.github.optimumcode.json.schema.internal.unicode", - "-o", - generatedSourceDirectory.get(), - "-d", - dumpDir, - ) -} - -val generateDerivedProperties by tasks.register("generateDerivedProperties") { - val dataFile = - rootProject.layout.projectDirectory - .dir("generator") - .dir("data") - .file("rfc5895_appendix_b_1.txt") - inputs.file(dataFile) - outputs.dir(generatedSourceDirectory) - - classpath(generatorConfiguration) - mainClass.set("io.github.optimumcode.unocode.generator.Main") - args( - "derived-properties", - "-p", - "io.github.optimumcode.json.schema.internal.unicode", - "-o", - generatedSourceDirectory.get(), - "-d", - dataFile, - ) -} - -val generateJoiningTypes by tasks.register("generateJoiningTypes") { - val dataFile = - rootProject.layout.projectDirectory - .dir("generator") - .dir("data") - .file("DerivedJoiningType.txt") - inputs.file(dataFile) - outputs.dir(generatedSourceDirectory) - - classpath(generatorConfiguration) - mainClass.set("io.github.optimumcode.unocode.generator.Main") - args( - "joining-types", - "-p", - "io.github.optimumcode.json.schema.internal.unicode", - "-o", - generatedSourceDirectory.get(), - "-d", - dataFile, - ) -} -//endregion - kotlin { explicitApi() @@ -179,8 +67,6 @@ kotlin { sourceSets { val commonMain by getting { - kotlin.srcDirs(generatedSourceDirectory) - dependencies { api(libs.kotlin.serialization.json) api(libs.uri) @@ -233,33 +119,6 @@ kotlin { } } - fun Task.addGeneratedTasks() { - dependsOn( - generateCharacterDirectionData, - generateCharacterCategoryData, - generateDerivedProperties, - generateJoiningTypes, - ) - } - - targets.configureEach { - val capitalizedTargetName = - name.replaceFirstChar { if (it.isLowerCase()) it.titlecase(Locale.getDefault()) else it.toString() } - tasks.named("compileKotlin$capitalizedTargetName") { - addGeneratedTasks() - } - } - afterEvaluate { - targets.configureEach { - tasks.named("${name}SourcesJar") { - addGeneratedTasks() - } - } - tasks.named("sourcesJar") { - addGeneratedTasks() - } - } - afterEvaluate { fun Task.dependsOnTargetTests(targets: List) { targets.forEach { @@ -289,50 +148,11 @@ kotlin { } } -afterEvaluate { - val taskNames = setOf("compile", "detekt", "runKtlint") - tasks.configureEach { - // There is something wrong with compileCommonMainKotlinMetadata task - // Gradle cannot find it, but this task uses the generated source directory - // and Gradle reports implicit dependency. - // As a workaround I do this - seems like it is working. - // However, I might be missing something. Need to revisit this later. - - if (taskNames.any { name.startsWith(it) }) { - mustRunAfter( - generateCharacterDirectionData, - generateCharacterCategoryData, - generateDerivedProperties, - generateJoiningTypes, - ) - } - } -} - -kover { - reports { - filters { - excludes { - packages( - "io.github.optimumcode.json.schema.internal.unicode.*", - "io.github.optimumcode.json.schema.internal.unicode", - ) - } - } - } -} - ktlint { version.set(libs.versions.ktlint) reporters { reporter(ReporterType.HTML) } - filter { - exclude { el -> - val absolutePath = el.file.absolutePath - absolutePath.contains("generated").and(!el.isDirectory) - } - } } afterEvaluate { diff --git a/settings.gradle.kts b/settings.gradle.kts index 6e417862..6925b470 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -4,5 +4,4 @@ rootProject.name = "json-schema-validator-root" include(":test-suites") include(":benchmark") -include(":generator") include(":json-schema-validator") \ No newline at end of file From e792677397786acf1f77d3428b3f0415ece31cfa Mon Sep 17 00:00:00 2001 From: Oleg Smirnov Date: Mon, 26 Aug 2024 18:12:54 +0400 Subject: [PATCH 3/3] Use release version of karacteristics --- build.gradle.kts | 1 - gradle/libs.versions.toml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/build.gradle.kts b/build.gradle.kts index a782680f..bcccbb03 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -8,7 +8,6 @@ plugins { allprojects { repositories { mavenCentral() - mavenLocal() } } diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index f7b03c19..dd795b96 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -50,7 +50,7 @@ graphql-ktor = { group = "com.expediagroup", name = "graphql-kotlin-ktor-client" clikt = { group = "com.github.ajalt.clikt", name = "clikt", version = "4.4.0" } kotlin-codepoints = { group = "de.cketti.unicode", name = "kotlin-codepoints", version = "0.9.0" } normalize = { group = "com.doist.x", name = "normalize", version = "1.1.1" } -karacteristics = { group = "io.github.optimumcode", name = "karacteristics", version = "0.0.2-SNAPSHOT" } +karacteristics = { group = "io.github.optimumcode", name = "karacteristics", version = "0.0.2" } [bundles] openapi = ["openapi-validator", "openapi-interfaces", "openapi-jackson"]