Skip to content

Commit 655c95f

Browse files
committed
return borrows where possible, use real iterators
1 parent c3bcc5c commit 655c95f

File tree

6 files changed

+161
-39
lines changed

6 files changed

+161
-39
lines changed

regex_rs.pyi

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,29 @@
1+
class Captures:
2+
def get(self, i: int) -> Match | None: ...
3+
def name(self, name: str) -> Match | None: ...
4+
def expand(self, replacement: str, dst: str) -> str: ...
5+
def __getitem__(self, i: int) -> Match: ...
6+
def __len__(self) -> int: ...
7+
def __repr__(self) -> str: ...
8+
9+
10+
class CapturesIter:
11+
def __iter__(slf) -> CapturesIter: ...
12+
def __next__(self) -> Captures: ...
13+
def __repr__(self) -> str: ...
14+
15+
class Match:
16+
matched_text: str
17+
start: int
18+
end: int
19+
def __str__(self) -> str: ...
20+
def __repr__(self) -> str: ...
21+
22+
class Matches:
23+
def __iter__(self) -> Matches: ...
24+
def __next__(self) -> Match: ...
25+
def __repr__(self) -> str: ...
26+
127
class Regex:
228
def __init__(
329
self,
@@ -16,26 +42,15 @@ class Regex:
1642
) -> None: ...
1743
def is_match(self, text: str, start: int | None = None) -> bool: ...
1844
def find(self, text: str, start: int | None) -> Match | None: ...
19-
def find_iter(self, text: str) -> list[Match]: ...
45+
def find_iter(self, text: str) -> Matches: ...
2046
def captures(self, text: str) -> Captures | None: ...
21-
def split(self, text: str, limit: int | None = None) -> list[str]: ...
47+
def captures_iter(self, text: str) -> CapturesIter: ...
48+
def split(self, text: str, limit: int | None = None) -> Split: ...
2249
def replace(self, text: str, rep: str, limit: int | None = None) -> str: ...
23-
def __repr__(self) -> str: ...
2450
def __str__(self) -> str: ...
25-
26-
27-
class Captures:
28-
def get(self, i: int) -> Match | None: ...
29-
def name(self, name: str) -> Match | None: ...
30-
def expand(self, replacement: str, dst: str) -> str: ...
31-
def __getitem__(self, i: int) -> Match | None: ...
32-
def __len__(self) -> int: ...
3351
def __repr__(self) -> str: ...
3452

35-
36-
class Match:
37-
matched_text: str
38-
start: int
39-
end: int
40-
def __str__(self) -> str: ...
53+
class Split:
54+
def __iter__(slf) -> Split: ...
55+
def __next__(self) -> str: ...
4156
def __repr__(self) -> str: ...

src/captures.rs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
use std::sync::Arc;
22

33
use ouroboros::self_referencing;
4-
use pyo3::prelude::*;
5-
use regex::Captures as ReCaptures;
4+
use pyo3::{exceptions::PyIndexError, prelude::*};
65

76
use crate::match_struct::Match;
87

@@ -13,7 +12,7 @@ pub struct Captures {
1312

1413
#[borrows(text)]
1514
#[covariant]
16-
pub captures: ReCaptures<'this>,
15+
pub captures: regex::Captures<'this>,
1716
}
1817

1918
#[pymethods]
@@ -31,16 +30,45 @@ impl Captures {
3130
dst
3231
}
3332

34-
pub fn __getitem__(&self, i: usize) -> Option<Match> {
35-
self.get(i)
33+
pub fn __getitem__(&self, i: usize) -> PyResult<Match> {
34+
self.get(i).ok_or(PyIndexError::new_err(i))
3635
}
3736

3837
pub fn __len__(&self) -> usize {
3938
self.borrow_captures().len()
4039
}
4140

4241
pub fn __repr__(&self) -> String {
43-
let dep = self.borrow_captures();
44-
format!("{dep:#?}")
42+
format!("{:#?}", self.borrow_captures())
43+
}
44+
}
45+
46+
#[pyclass]
47+
#[self_referencing(pub_extras)]
48+
pub struct CapturesIter {
49+
pub text: Arc<String>,
50+
pub re: Arc<regex::Regex>,
51+
52+
#[borrows(text, re)]
53+
#[not_covariant]
54+
pub capture_matches: regex::CaptureMatches<'this, 'this>,
55+
}
56+
57+
#[pymethods]
58+
impl CapturesIter {
59+
pub fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
60+
slf
61+
}
62+
63+
pub fn __next__(&mut self) -> Option<Captures> {
64+
let text = self.borrow_text().clone();
65+
self.with_capture_matches_mut(|iter| {
66+
iter.next()
67+
.map(|caps| Captures::new(text, |text| caps.adopt(text)))
68+
})
69+
}
70+
71+
pub fn __repr__(&self) -> String {
72+
self.with_capture_matches(|caps| format!("{caps:#?}"))
4573
}
4674
}

src/lib.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@ mod captures;
22
mod error;
33
mod match_struct;
44
mod regex;
5+
mod split;
56

67
use pyo3::prelude::*;
78

89
/// A Python module implemented in Rust.
910
#[pymodule]
1011
fn regex_rs(_py: Python, m: &PyModule) -> PyResult<()> {
11-
m.add_class::<regex::Regex>()?;
12-
m.add_class::<match_struct::Match>()?;
1312
m.add_class::<captures::Captures>()?;
13+
m.add_class::<captures::CapturesIter>()?;
14+
m.add_class::<match_struct::Match>()?;
15+
m.add_class::<match_struct::Matches>()?;
16+
m.add_class::<regex::Regex>()?;
17+
m.add_class::<split::Split>()?;
1418
Ok(())
1519
}

src/match_struct.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
use std::sync::Arc;
2+
3+
use ouroboros::self_referencing;
14
use pyo3::prelude::*;
25

36
#[pyclass]
@@ -31,3 +34,29 @@ impl Match {
3134
format!("{self:#?}")
3235
}
3336
}
37+
38+
#[pyclass]
39+
#[self_referencing(pub_extras)]
40+
pub struct Matches {
41+
text: String,
42+
re: Arc<regex::Regex>,
43+
44+
#[borrows(text, re)]
45+
#[not_covariant]
46+
matches: regex::Matches<'this, 'this>,
47+
}
48+
49+
#[pymethods]
50+
impl Matches {
51+
pub fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
52+
slf
53+
}
54+
55+
pub fn __next__(&mut self) -> Option<Match> {
56+
self.with_matches_mut(|iter| iter.next().map(|m| m.into()))
57+
}
58+
59+
pub fn __repr__(&self) -> String {
60+
self.with_matches(|matches| format!("{matches:#?}"))
61+
}
62+
}

src/regex.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,12 @@ use std::sync::Arc;
22

33
use pyo3::prelude::*;
44

5-
use crate::{captures::Captures, error::RegexResult, match_struct::Match};
5+
use crate::{
6+
captures::{Captures, CapturesIter},
7+
error::RegexResult,
8+
match_struct::{Match, Matches},
9+
split::Split,
10+
};
611

712
#[pyclass]
813
#[derive(Debug)]
@@ -96,28 +101,22 @@ impl Regex {
96101
mat.map(|m| m.into())
97102
}
98103

99-
pub fn find_iter(&self, text: &str) -> Vec<Match> {
100-
self.0.find_iter(text).map(|m| m.into()).collect()
104+
pub fn find_iter(&self, text: String) -> Matches {
105+
Matches::new(text, self.0.clone(), |text, re| re.find_iter(text))
101106
}
102107

103108
pub fn captures(&self, text: String) -> Option<Captures> {
104109
Captures::try_new(Arc::new(text), |text| self.0.captures(text).ok_or(())).ok()
105110
}
106111

107-
pub fn captures_iter(&self, text: String) -> Vec<Captures> {
112+
pub fn captures_iter(&self, text: String) -> CapturesIter {
108113
let text = Arc::new(text);
109-
self.0.captures_iter(&text)
110-
.map(|caps| Captures::new(text.clone(), |text| caps.adopt(text)))
111-
.collect()
114+
CapturesIter::new(text, self.0.clone(), |text, re| re.captures_iter(text))
112115
}
113116

114117
#[pyo3(signature = (text, limit=None))]
115-
pub fn split(&self, text: &str, limit: Option<usize>) -> Vec<String> {
116-
if let Some(limit) = limit {
117-
self.0.splitn(text, limit).map(|v| v.to_owned()).collect()
118-
} else {
119-
self.0.split(text).map(|v| v.to_owned()).collect()
120-
}
118+
pub fn split(&self, text: String, limit: Option<usize>) -> Split {
119+
Split::new(text, self.0.clone(), limit, |text, re| re.split(text))
121120
}
122121

123122
#[pyo3(signature = (text, rep, limit=None))]

src/split.rs

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
use std::sync::Arc;
2+
3+
use ouroboros::self_referencing;
4+
use pyo3::prelude::*;
5+
6+
#[pyclass]
7+
#[self_referencing(pub_extras)]
8+
#[derive(Debug)]
9+
pub struct Split {
10+
pub text: String,
11+
pub re: Arc<regex::Regex>,
12+
pub limit: Option<usize>,
13+
14+
#[borrows(text, re)]
15+
#[not_covariant]
16+
pub split: regex::Split<'this, 'this>,
17+
}
18+
19+
#[pymethods]
20+
impl Split {
21+
pub fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> {
22+
slf
23+
}
24+
25+
pub fn __next__(&mut self) -> Option<&str> {
26+
let ret = self.with_limit_mut(|limit| {
27+
if let Some(limit) = limit {
28+
if *limit == 0 {
29+
return false;
30+
}
31+
32+
*limit -= 1;
33+
}
34+
35+
true
36+
});
37+
if !ret {
38+
return None;
39+
}
40+
41+
self.with_split_mut(|split| split.next())
42+
}
43+
44+
pub fn __repr__(&self) -> String {
45+
format!("{self:#?}")
46+
}
47+
}

0 commit comments

Comments
 (0)