Skip to content

Commit 9ebeeda

Browse files
Adding some lemmas for Str_toUTF8 in UTF8Str, cleaning up some proofs in RustString (#3)
> Adding some lemmas for Str_toUTF8 in UTF8Str, cleaning up some proofs in RustString By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses. Co-authored-by: THANH NGUYEN <[email protected]>
1 parent e89e39a commit 9ebeeda

File tree

3 files changed

+70
-19
lines changed

3 files changed

+70
-19
lines changed

RustLeanModels/ProofExample.lean

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
-- SPDX-License-Identifier: Apache-2.0 OR MIT
33
import RustLeanModels.Basic
44
import RustLeanModels.RustString
5+
import RustLeanModels.UTF8Str
56
import Lean
67
open RustString
78
open List
@@ -29,7 +30,6 @@ lemma substring_charIndex_map : substring_charIndex s ss = some i →
2930
. unfold is_first_substringcharIndex at gj
3031
exact gj.right (i + v.length) gi
3132

32-
3333
lemma sub_split_map (gss: ss.length > 0):
3434
(split_substring s ss).length ≤ (split_substring (List.map f (v++s)) (List.map f ss)).length :=by
3535
generalize gl: s.length = l
@@ -70,3 +70,13 @@ lemma sub_split_map (gss: ss.length > 0):
7070
lemma split_map (gss: ss.length > 0):
7171
(split_substring s ss).length ≤ (split_substring (List.map f s) (List.map f ss)).length :=by
7272
have := @sub_split_map s ss f [] gss; simp only [nil_append] at this; exact this
73+
74+
/- An example of using UTF8Str-/
75+
lemma exists_Str_toUTF8_prefix_of_char_boundary: is_char_boundary s i → ∃ p, List.IsPrefix p s ∧ (Str_toUTF8 s).take i = Str_toUTF8 p :=by
76+
intro g
77+
generalize gx: PrefixFromPos s i = op
78+
have gx1:= gx
79+
simp only [PrefixFromPos, g, ↓reduceIte] at gx; symm at gx; rw[gx] at gx1
80+
generalize gp: PrefixFromPos_safe_r s i = p
81+
rw[gp] at gx1; have gx1:= PrefixFromPos_some_sound.mp gx1
82+
use p; simp only [gx1, true_and]; rw[← gx1.right]; exact Str_toUTF8_take_prefix gx1.left

RustLeanModels/RustString.lean

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ def ListCharPos_aux (s: Str) (i: Nat):= match s with
153153
| _::_ => i:: ListCharPos_aux t (i+ Char.utf8Size h)
154154

155155
/- List of the byte positions of all Chars in the String s-/
156-
@[simp]
157156
def ListCharPos (s: Str) := ListCharPos_aux s 0
158157

159158
lemma ListCharPos_aux_sound : x ∈ ListCharPos_aux s k ↔ ∃ p, List.IsPrefix p s ∧ p ≠ s ∧ byteSize p + k = x :=by
@@ -188,11 +187,10 @@ lemma ListCharPos_aux_sound : x ∈ ListCharPos_aux s k ↔ ∃ p, List.IsPrefix
188187
simp only [ne_eq, not_false_eq_true, not_sym, true_implies] at ind;
189188
simp only [ind.mpr this, or_true]
190189

191-
theorem ListCharPos_sound : x ∈ ListCharPos s ↔ ∃ pf, List.IsPrefix pf s ∧ pf ≠ s ∧ byteSize pf = x :=by
190+
theorem ListCharPos_sound : x ∈ ListCharPos s ↔ ∃ p, List.IsPrefix p s ∧ p ≠ s ∧ byteSize p = x :=by
192191
simp only [ListCharPos, ListCharPos_aux_sound, ne_eq, add_zero]
193192

194193

195-
@[simp]
196194
def CharBoundPos_aux (l: Str) (s: Nat): List Nat := match l with
197195
| [] => [s]
198196
| h::t => s :: CharBoundPos_aux t (s + Char.utf8Size h)
@@ -258,7 +256,6 @@ lemma ListCharPos_prefix_CharBoundPos_aux : List.IsPrefix (ListCharPos_aux s k)
258256
rw[this] at hl; simp only [succ.injEq] at hl;
259257
exact ind hl
260258

261-
262259
lemma ListCharPos_prefix_CharBoundPos: List.IsPrefix (ListCharPos s) (CharBoundPos s) := by
263260
unfold CharBoundPos ListCharPos; apply ListCharPos_prefix_CharBoundPos_aux
264261

@@ -312,7 +309,8 @@ lemma byteSize_in_CharBoundPos : (byteSize s) ∈ (CharBoundPos s) := by
312309
unfold CharBoundPos; rw[byteSize_aux_para1_elim]
313310
apply byteSize_aux_mem_CharBoundPos_aux
314311

315-
lemma CharBoundPos_eq_ListCharPos_cc_byteSize: CharBoundPos s = (ListCharPos s)++[byteSize s] := by
312+
theorem CharBoundPos_EQ : CharBoundPos s = CharBoundPos_def s := by
313+
unfold CharBoundPos_def
316314
have p: List.IsPrefix (ListCharPos s) (CharBoundPos s) := by exact ListCharPos_prefix_CharBoundPos
317315
unfold List.IsPrefix at p; obtain ⟨t, ht⟩ := p
318316
have m: byteSize s ∈ CharBoundPos s := by exact byteSize_in_CharBoundPos
@@ -336,8 +334,13 @@ lemma CharBoundPos_eq_ListCharPos_cc_byteSize: CharBoundPos s = (ListCharPos s)+
336334
simp only [length_cons, succ.injEq, add_eq_zero, one_ne_zero, and_false, not_false_eq_true,
337335
not_sym] at tl
338336

339-
theorem CharBoundPos_EQ : CharBoundPos s = CharBoundPos_def s := by
340-
rw[CharBoundPos_eq_ListCharPos_cc_byteSize, CharBoundPos_def]
337+
lemma prefix_byteSize_in_CharBoundPos: List.IsPrefix p s → byteSize p ∈ CharBoundPos s :=by
338+
rw[CharBoundPos_EQ, CharBoundPos_def]; intro g;
339+
by_cases (p ≠ s)
340+
have: byteSize p ∈ ListCharPos s :=by apply ListCharPos_sound.mpr; use p;
341+
simp only [mem_append, this, mem_singleton, true_or]
342+
rename_i gc; simp only [ne_eq, Decidable.not_not] at gc
343+
simp only [gc, mem_append, mem_singleton, or_true]
341344

342345
lemma prefix_byteSize_le_aux (g: List.IsPrefix p s) : byteSize_aux p k ≤ byteSize_aux s k:=by
343346
generalize gl: s.length = n
@@ -374,7 +377,6 @@ lemma prefix_byteSize_lt_aux (g: List.IsPrefix p s) (gl: p.length < s.length):
374377
simp_all only [length_cons, succ.injEq, byteSize_aux, gt_iff_lt]
375378
apply ind g.right (by omega) gn
376379

377-
378380
lemma prefix_byteSize_lt (g: List.IsPrefix p s) (gp: p.length < s.length): byteSize p < byteSize s :=by
379381
rw[byteSize_aux_para1_elim]; rw[byteSize_aux_para1_elim]; apply prefix_byteSize_lt_aux g gp
380382

@@ -392,7 +394,6 @@ lemma byteSize_le_of_length_le (g1: List.IsPrefix p1 s) (g2: List.IsPrefix p2 s)
392394
have g:= List.prefix_of_prefix_length_le g1 g2 gi
393395
apply prefix_byteSize_le g
394396

395-
396397
lemma prefix_of_byteSize_le (g1: List.IsPrefix p1 s) (g2: List.IsPrefix p2 s)
397398
(gi: byteSize p1 ≤ byteSize p2) : List.IsPrefix p1 p2 :=by
398399
have g:= length_le_of_byteSize_le g1 g2 gi
@@ -426,17 +427,17 @@ theorem is_char_boundary_EQ : is_char_boundary s i = is_char_boundary_def s i :
426427
unfold is_char_boundary is_char_boundary_def
427428
have : ∀ m l, m ∈ ListCharPos l ∨ m = byteSize l ↔ m ∈ (ListCharPos l ++ [byteSize l]) :=by
428429
intro m l; simp only [ListCharPos,byteSize, mem_append, mem_singleton]
429-
rw[this, ← CharBoundPos_eq_ListCharPos_cc_byteSize]
430+
rw[this, ← CharBoundPos_def, ← CharBoundPos_EQ]
430431
unfold CharBoundPos CharBoundPos_aux
431432
split; rename_i g; simp only [g, zero_add, mem_cons, true_or]
432433
split; rename_i g _; simp only [zero_add, mem_cons, g, not_false_eq_true, not_sym, false_or, eq_iff_iff, false_iff]
433434
rw[CharBoundPos_aux_para1_elim]; by_contra; rename_i gi
434435
simp only [CharBoundPos, mem_map] at gi; obtain⟨a, ga⟩ := gi; omega
435436
rename_i gi1 gi2; simp only [zero_add, mem_cons, gi1, not_false_eq_true, not_sym, false_or, eq_iff_iff]
436437
rw[CharBoundPos_aux_para1_elim]; simp only [CharBoundPos, mem_map]
437-
constructor; intro g; rw[ind, is_char_boundary_def, this, ← CharBoundPos_eq_ListCharPos_cc_byteSize, CharBoundPos] at g;
438+
constructor; intro g; rw[ind, is_char_boundary_def, this, ← CharBoundPos_def, ← CharBoundPos_EQ, CharBoundPos] at g;
438439
use i - Char.utf8Size h; simp only [g, true_and]; omega
439-
intro g; obtain⟨a, ga⟩ := g ; rw[ind, is_char_boundary_def, this, ← CharBoundPos_eq_ListCharPos_cc_byteSize, CharBoundPos]
440+
intro g; obtain⟨a, ga⟩ := g ; rw[ind, is_char_boundary_def, this, ← CharBoundPos_def, ← CharBoundPos_EQ, CharBoundPos]
440441
have : i - Char.utf8Size h = a := by omega
441442
rw[this]; simp only [ga]
442443

@@ -673,8 +674,12 @@ def PrefixFromPos_safe_r (s: Str) (i: Nat): Str := match s with
673674
def PrefixFromPos (s: Str) (i: Nat): Option Str :=
674675
if is_char_boundary s i then some (PrefixFromPos_safe_r s i) else none
675676

677+
lemma PrefixFromPos_none_sound: PrefixFromPos s i = none ↔ ¬ is_char_boundary s i :=by
678+
unfold PrefixFromPos
679+
split; rename_i g; simp only [g, not_true_eq_false]
680+
rename_i g; simp only [g, Bool.false_eq_true, not_false_eq_true, not_sym]
676681

677-
lemma PrefixFromPos_verified: PrefixFromPos s i = some p ↔ (List.IsPrefix p s) ∧ (byteSize p = i) := by
682+
lemma PrefixFromPos_some_sound: PrefixFromPos s i = some p ↔ (List.IsPrefix p s) ∧ (byteSize p = i) := by
678683
unfold PrefixFromPos ;
679684
split; simp only [Option.some.injEq, byteSize]
680685
induction s generalizing i p
@@ -727,7 +732,7 @@ lemma PrefixFromPos_verified: PrefixFromPos s i = some p ↔ (List.IsPrefix p s)
727732

728733
lemma PrefixFromPos_byteSize : PrefixFromPos s i = some p → byteSize p = i := by
729734
intro h ;
730-
have h1 := PrefixFromPos_verified.mp h
735+
have h1 := PrefixFromPos_some_sound.mp h
731736
exact h1.right
732737

733738
lemma is_char_boundary_from_prefix (h: PrefixFromPos s i = some pre)
@@ -736,10 +741,10 @@ lemma is_char_boundary_from_prefix (h: PrefixFromPos s i = some pre)
736741
split at h; assumption; contradiction
737742

738743
lemma PrefixFromPos_self: PrefixFromPos s0 (byteSize s0) = some s0 :=by
739-
apply PrefixFromPos_verified.mpr; simp
744+
apply PrefixFromPos_some_sound.mpr; simp
740745

741746
lemma PrefixFromPos_prefix (hp: List.IsPrefix i s) : PrefixFromPos s (byteSize i) = some i := by
742-
apply PrefixFromPos_verified.mpr; simp [hp]
747+
apply PrefixFromPos_some_sound.mpr; simp [hp]
743748

744749
lemma PrefixFromPos_eq_split_at_none: PrefixFromPos s i = none ↔ split_at s i = none :=by
745750
simp only [PrefixFromPos, ite_eq_right_iff, not_false_eq_true, not_sym, imp_false,

RustLeanModels/UTF8Str.lean

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
-- Copyright Kani Contributors
22
-- SPDX-License-Identifier: Apache-2.0 OR MIT
33
import RustLeanModels.Basic
4+
import RustLeanModels.RustString
45
import Lean
56
open Char
67
open List
78
open Mathlib
9+
open RustString
810
open Nat
911
set_option maxHeartbeats 10000000
1012

@@ -38,7 +40,6 @@ axiom Char_size_eq_of_firstbyte_eq {c1 c2: Char} : toByte c1 0 (by linarith [@ut
3840
axiom exists_byte_ne_of_Chat_ne {c1 c2: Char} :
3941
c1 ≠ c2 → ∃ i g1 g2, toByte c1 i g1 ≠ toByte c2 i g2
4042

41-
4243
/-
4344
The Char_toUTF8 function converts a Char into it UTF8 encoding.
4445
Char_toUTF8 is the same as String.utf8EncodeChar, but it is defined based on the opaque function toByte,
@@ -144,7 +145,7 @@ lemma char_eq_of_toByteList_prefix : List.IsPrefix (Char_toUTF8 c1) (Char_toUTF8
144145
have ge : Char_toUTF8 c1 = Char_toUTF8 c2:= by apply prefix_eq_self g l3
145146
exact Char_toUTF8_eq_iff_eq.mp ge
146147

147-
theorem prefix_iff_listByte_prefix : List.IsPrefix (Str_toUTF8 p) (Str_toUTF8 s) ↔ List.IsPrefix p s := by
148+
lemma prefix_iff_listByte_prefix : List.IsPrefix (Str_toUTF8 p) (Str_toUTF8 s) ↔ List.IsPrefix p s := by
148149
induction p generalizing s
149150
simp only [Str_toUTF8, _root_.nil_prefix]
150151
rename_i hp tp ind
@@ -170,3 +171,38 @@ theorem prefix_iff_listByte_prefix : List.IsPrefix (Str_toUTF8 p) (Str_toUTF8 s)
170171
rw [g.left];
171172
apply (prefix_append_right_inj (Char_toUTF8 hs)).mpr
172173
simp only [ind.mpr, g.right]
174+
175+
lemma Str_toUTF8_append: Str_toUTF8 (s1 ++ s2) = Str_toUTF8 s1 ++ Str_toUTF8 s2 :=by
176+
induction s1
177+
simp only [nil_append, Str_toUTF8]
178+
rename_i h1 t1 ind
179+
simp only [Str_toUTF8, append_eq, ind, append_assoc]
180+
181+
lemma Str_toUTF8_eq_iff_eq: Str_toUTF8 s1 = Str_toUTF8 s2 ↔ s1 = s2 :=by
182+
constructor
183+
induction s1 generalizing s2
184+
cases s2; simp only [imp_self]
185+
simp only [Str_toUTF8, nil_eq_append, Char_toUTF8_ne_nil, false_and, not_false_eq_true, not_sym, imp_self]
186+
rename_i h1 t1 ind
187+
cases s2
188+
simp only [Str_toUTF8, append_eq_nil, Char_toUTF8_ne_nil, false_and, not_false_eq_true, not_sym, imp_self]
189+
rename_i h2 t2
190+
simp only [Str_toUTF8, cons.injEq]
191+
intro g
192+
have g1: List.IsPrefix (Char_toUTF8 h1) (Char_toUTF8 h1 ++ Str_toUTF8 t1):=by simp only [prefix_append]
193+
have g2: List.IsPrefix (Char_toUTF8 h2) (Char_toUTF8 h1 ++ Str_toUTF8 t1):=by simp only [g, prefix_append]
194+
have gp:= List.prefix_or_prefix_of_prefix g1 g2
195+
have ge: h1 = h2 :=by
196+
cases gp; exact char_eq_of_toByteList_prefix (by assumption); symm; exact char_eq_of_toByteList_prefix (by assumption)
197+
simp only [ge, true_and]; simp only [ge, append_cancel_left_eq] at g
198+
exact ind g
199+
intro g; simp only [g]
200+
201+
lemma Str_toUTF8_length: (Str_toUTF8 s).length = byteSize s:=by
202+
induction s
203+
simp only [Str_toUTF8, length_nil, byteSize]
204+
rename_i ind
205+
simp only [Str_toUTF8, length_append, Char_toUTF8_length, ind, byteSize]
206+
207+
lemma Str_toUTF8_take_prefix: List.IsPrefix p s → (Str_toUTF8 s).take (byteSize p) = Str_toUTF8 p :=by
208+
intro g; rw[← @Str_toUTF8_length p, prefix_eq_take]; exact prefix_iff_listByte_prefix.mpr g

0 commit comments

Comments
 (0)