From 4b5a31cc75855ddccdec58f6447491ba932c1095 Mon Sep 17 00:00:00 2001 From: Sergio Alejandro Vargas Date: Mon, 19 Jun 2023 15:57:18 -0500 Subject: [PATCH] Refactor `is_wc_cat_id_start` math symbol list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Check `∂, ∃, ∄, ∅, ∆, ∇` and `∎, ∏, ∐, ∑` in two ranges, instead of comparing `wc` to each one individually. - Check ∟ with the rest of the angle symbols. - Check ∞ at the beginning with the other symbols that are not in any range. - Check integral symbols in a separate range. - Move angle symbols inside the big check (they're in the range 0x2140 : 0x2a1c) - Move some symbols to match lexicographical order. This commit does NOT add any new characters. --- src/flisp/julia_extensions.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c index f29e3972755c5..310f46bd97c71 100644 --- a/src/flisp/julia_extensions.c +++ b/src/flisp/julia_extensions.c @@ -79,23 +79,26 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat) // math symbol (category Sm) whitelist (wc >= 0x2140 && wc <= 0x2a1c && ((wc >= 0x2140 && wc <= 0x2144) || // ⅀, ⅁, ⅂, ⅃, ⅄ - wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿ - wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥ + wc == 0x221e || wc == 0x223f || // ∞, ∿, + wc == 0x22a4 || wc == 0x22a5 || // ⊤ ⊥ + wc == 0x22be || wc == 0x22bf || // ⊾, ⊿ - (wc >= 0x2200 && wc <= 0x2233 && - (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆ - wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏ - wc == 0x2200 || wc == 0x2203 || wc == 0x2204 || // ∀, ∃, ∄ - wc == 0x2210 || wc == 0x2211 || // ∐, ∑ - wc == 0x221e || wc == 0x221f || // ∞, ∟ - wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ + wc == 0x2200 || // ∀ + (wc >= 0x2202 && wc <= 0x2207) || // ∂, ∃, ∄, ∅, ∆, ∇ + (wc >= 0x220e && wc <= 0x2211) || // ∎, ∏, ∐, ∑ + // angle symbols + (wc >= 0x221f && wc <= 0x2222) || // ∟, ∠, ∡, ∢ + (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ + + (wc >= 0x222b && wc <= 0x2233) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳ (wc >= 0x22c0 && wc <= 0x22c3) || // N-ary big ops: ⋀, ⋁, ⋂, ⋃ - (wc >= 0x25F8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ + (wc >= 0x25f8 && wc <= 0x25ff) || // ◸, ◹, ◺, ◻, ◼, ◽, ◾, ◿ (wc >= 0x266f && - (wc == 0x266f || wc == 0x27d8 || wc == 0x27d9 || // ♯, ⟘, ⟙ - (wc >= 0x27c0 && wc <= 0x27c1) || // ⟀, ⟁ + (wc == 0x266f || // ♯ + wc == 0x27c0 || wc == 0x27c1 || // ⟀, ⟁ + wc == 0x27d8 || wc == 0x27d9 || // ⟘, ⟙ (wc >= 0x29b0 && wc <= 0x29b4) || // ⦰, ⦱, ⦲, ⦳, ⦴ (wc >= 0x2a00 && wc <= 0x2a06) || // ⨀, ⨁, ⨂, ⨃, ⨄, ⨅, ⨆ (wc >= 0x2a09 && wc <= 0x2a16) || // ⨉, ⨊, ⨋, ⨌, ⨍, ⨎, ⨏, ⨐, ⨑, ⨒, ⨓, ⨔, ⨕, ⨖ @@ -112,10 +115,6 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat) (wc >= 0x207a && wc <= 0x207e) || (wc >= 0x208a && wc <= 0x208e) || - // angle symbols - (wc >= 0x2220 && wc <= 0x2222) || // ∠, ∡, ∢ - (wc >= 0x299b && wc <= 0x29af) || // ⦛, ⦜, ⦝, ⦞, ⦟, ⦠, ⦡, ⦢, ⦣, ⦤, ⦥, ⦦, ⦧, ⦨, ⦩, ⦪, ⦫, ⦬, ⦭, ⦮, ⦯ - // Other_ID_Start wc == 0x2118 || wc == 0x212E || // ℘, ℮ (wc >= 0x309B && wc <= 0x309C) || // katakana-hiragana sound marks