diff --git a/py/string.go b/py/string.go index 9050f114..69600149 100644 --- a/py/string.go +++ b/py/string.go @@ -17,6 +17,7 @@ import ( "fmt" "strconv" "strings" + "unicode" "unicode/utf8" ) @@ -34,6 +35,73 @@ or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.`, StrNew, nil) +// standard golang strings.Fields doesn't have a 'first N' argument +func fieldsN(s string, n int) []string { + out := []string{} + cur := []rune{} + r := []rune(s) + for _, c := range r { + //until we have covered the first N elements, multiple white-spaces are 'merged' + if n < 0 || len(out) < n { + if unicode.IsSpace(c) { + if len(cur) > 0 { + out = append(out, string(cur)) + cur = []rune{} + } + } else { + cur = append(cur, c) + } + //until we see the next letter, after collecting the first N fields, continue to merge whitespaces + } else if len(out) == n && len(cur) == 0 { + if !unicode.IsSpace(c) { + cur = append(cur, c) + } + //now that enough words have been collected, just copy into the last element + } else { + cur = append(cur, c) + } + } + if len(cur) > 0 { + out = append(out, string(cur)) + } + return out +} + +func init() { + StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple) (Object, error) { + selfStr := self.(String) + var value Object = None + zeroRemove := true + if len(args) > 0 { + if _, ok := args[0].(NoneType); !ok { + value = args[0] + zeroRemove = false + } + } + var maxSplit int = -2 + if len(args) > 1 { + if m, ok := args[1].(Int); ok { + maxSplit = int(m) + } + } + valArray := []string{} + if valStr, ok := value.(String); ok { + valArray = strings.SplitN(string(selfStr), string(valStr), maxSplit+1) + } else if _, ok := value.(NoneType); ok { + valArray = fieldsN(string(selfStr), maxSplit) + } else { + return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", value.Type()) + } + o := List{} + for _, j := range valArray { + if len(j) > 0 || !zeroRemove { + o.Items = append(o.Items, String(j)) + } + } + return &o, nil + }, 0, "split(sub) -> split string with sub.") +} + // Type of this object func (s String) Type() *Type { return StringType diff --git a/py/tests/string.py b/py/tests/string.py index c837a441..c8bc1118 100644 --- a/py/tests/string.py +++ b/py/tests/string.py @@ -99,6 +99,14 @@ class C(): asc="hello" uni="£100世界𠜎" # 1,2,3,4 byte unicode characters +doc="split" +assert ["0","1","2","4"] == list("0,1,2,4".split(",")) +assert [""] == list("".split(",")) +assert ['a', 'd,c'] == list("a,d,c".split(",",1)) +assert ['a', 'd', 'b'] == list(" a d b ".split()) +assert ['a', 'd b '] == list(" a d b ".split(None, 1)) +assertRaisesText(TypeError, "Can't convert 'int' object to str implicitly", lambda: "0,1,2,4".split(1)) + doc="ascii len" assert len(asc) == 5