diff --git a/doc/specs/stdlib_strings.md b/doc/specs/stdlib_strings.md index 19bbf4a59..d5f22519c 100644 --- a/doc/specs/stdlib_strings.md +++ b/doc/specs/stdlib_strings.md @@ -283,7 +283,6 @@ Default value of `occurrence` is set to `1`. If `consider_overlapping` is not provided or is set to `.true.` the function counts two overlapping occurrences of substring as two different occurrences. If `occurrence`th occurrence is not found, function returns `0`. - #### Syntax `string = [[stdlib_strings(module):find(interface)]] (string, pattern [, occurrence, consider_overlapping])` @@ -318,7 +317,7 @@ program demo_find use stdlib_string_type, only: string_type, assignment(=) use stdlib_strings, only : find implicit none - string_type :: string + type(string_type) :: string string = "needle in the character-stack" @@ -328,3 +327,58 @@ program demo_find end program demo_find ``` + + + +### `replace_all` + +#### Description + +Replaces all occurrences of substring `pattern` in the input `string` with the replacement `replacement`. +Occurrences overlapping on a base occurrence will not be replaced. + +#### Syntax + +`string = [[stdlib_strings(module):replace_all(interface)]] (string, pattern, replacement)` + +#### Status + +Experimental + +#### Class + +Pure function + +#### Argument + +- `string`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `pattern`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). +- `replacement`: Character scalar or [[stdlib_string_type(module):string_type(type)]]. + This argument is intent(in). + +#### Result value + +The result is of the same type as `string`. + +#### Example + +```fortran +program demo_replace_all + use stdlib_string_type, only: string_type, assignment(=) + use stdlib_strings, only : replace_all + implicit none + type(string_type) :: string + + string = "hurdles here, hurdles there, hurdles everywhere" + ! string <-- "hurdles here, hurdles there, hurdles everywhere" + + print'(a)', replace_all(string, "hurdles", "learn from") + ! "learn from here, learn from there, learn from everywhere" + + string = replace_all(string, "hurdles", "technology") + ! string <-- "technology here, technology there, technology everywhere" + +end program demo_replace_all +``` \ No newline at end of file diff --git a/src/stdlib_strings.f90 b/src/stdlib_strings.f90 index 89d5ba020..6bfd8a630 100644 --- a/src/stdlib_strings.f90 +++ b/src/stdlib_strings.f90 @@ -12,7 +12,7 @@ module stdlib_strings public :: strip, chomp public :: starts_with, ends_with - public :: slice, find + public :: slice, find, replace_all !> Remove leading and trailing whitespace characters. @@ -79,6 +79,20 @@ module stdlib_strings module procedure :: find_char_char end interface find + !> Replaces all the occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Version: experimental + interface replace_all + module procedure :: replace_all_string_string_string + module procedure :: replace_all_string_string_char + module procedure :: replace_all_string_char_string + module procedure :: replace_all_char_string_string + module procedure :: replace_all_string_char_char + module procedure :: replace_all_char_string_char + module procedure :: replace_all_char_char_string + module procedure :: replace_all_char_char_char + end interface replace_all + contains @@ -353,7 +367,7 @@ pure function slice_char(string, first, last, stride) result(sliced_string) end if if (present(first)) then - first_index = first + first_index = first end if if (present(last)) then last_index = last @@ -499,5 +513,140 @@ pure function compute_lps(string) result(lps_array) end function compute_lps + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_string_string_string(string, pattern, replacement) result(res) + type(string_type), intent(in) :: string + type(string_type), intent(in) :: pattern + type(string_type), intent(in) :: replacement + type(string_type) :: res + + res = string_type(replace_all(char(string), & + & char(pattern), char(replacement))) + + end function replace_all_string_string_string + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_string_string_char(string, pattern, replacement) result(res) + type(string_type), intent(in) :: string + type(string_type), intent(in) :: pattern + character(len=*), intent(in) :: replacement + type(string_type) :: res + + res = string_type(replace_all(char(string), char(pattern), replacement)) + + end function replace_all_string_string_char + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_string_char_string(string, pattern, replacement) result(res) + type(string_type), intent(in) :: string + character(len=*), intent(in) :: pattern + type(string_type), intent(in) :: replacement + type(string_type) :: res + + res = string_type(replace_all(char(string), pattern, char(replacement))) + + end function replace_all_string_char_string + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_char_string_string(string, pattern, replacement) result(res) + character(len=*), intent(in) :: string + type(string_type), intent(in) :: pattern + type(string_type), intent(in) :: replacement + character(len=:), allocatable :: res + + res = replace_all(string, char(pattern), char(replacement)) + + end function replace_all_char_string_string + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_string_char_char(string, pattern, replacement) result(res) + type(string_type), intent(in) :: string + character(len=*), intent(in) :: pattern + character(len=*), intent(in) :: replacement + type(string_type) :: res + + res = string_type(replace_all(char(string), pattern, replacement)) + + end function replace_all_string_char_char + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_char_string_char(string, pattern, replacement) result(res) + character(len=*), intent(in) :: string + type(string_type), intent(in) :: pattern + character(len=*), intent(in) :: replacement + character(len=:), allocatable :: res + + res = replace_all(string, char(pattern), replacement) + + end function replace_all_char_string_char + + !> Replaces all occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_char_char_string(string, pattern, replacement) result(res) + character(len=*), intent(in) :: string + character(len=*), intent(in) :: pattern + type(string_type), intent(in) :: replacement + character(len=:), allocatable :: res + + res = replace_all(string, pattern, char(replacement)) + + end function replace_all_char_char_string + + !> Replaces all the occurrences of substring 'pattern' in the input 'string' + !> with the replacement 'replacement' + !> Returns a new string + pure function replace_all_char_char_char(string, pattern, replacement) result(res) + character(len=*), intent(in) :: string + character(len=*), intent(in) :: pattern + character(len=*), intent(in) :: replacement + character(len=:), allocatable :: res + integer :: lps_array(len(pattern)) + integer :: s_i, p_i, last, length_string, length_pattern + + res = "" + length_string = len(string) + length_pattern = len(pattern) + last = 1 + + if (length_pattern > 0 .and. length_pattern <= length_string) then + lps_array = compute_lps(pattern) + + s_i = 1 + p_i = 1 + do while (s_i <= length_string) + if (string(s_i:s_i) == pattern(p_i:p_i)) then + if (p_i == length_pattern) then + res = res // & + & string(last : s_i - length_pattern) // & + & replacement + last = s_i + 1 + p_i = 0 + end if + s_i = s_i + 1 + p_i = p_i + 1 + else if (p_i > 1) then + p_i = lps_array(p_i - 1) + 1 + else + s_i = s_i + 1 + end if + end do + end if + + res = res // string(last : length_string) + + end function replace_all_char_char_char end module stdlib_strings diff --git a/src/tests/string/test_string_functions.f90 b/src/tests/string/test_string_functions.f90 index 194c9f1bb..d0afb745a 100644 --- a/src/tests/string/test_string_functions.f90 +++ b/src/tests/string/test_string_functions.f90 @@ -4,7 +4,7 @@ module test_string_functions use stdlib_error, only : check use stdlib_string_type, only : string_type, assignment(=), operator(==), & to_lower, to_upper, to_title, to_sentence, reverse - use stdlib_strings, only: slice, find + use stdlib_strings, only: slice, find, replace_all use stdlib_optval, only: optval use stdlib_ascii, only : to_string implicit none @@ -63,102 +63,102 @@ subroutine test_slice_string ! Only one argument is given ! Valid call check(slice(test_string, first=10) == "jklmnopqrstuvwxyz", & - "Slice, Valid arguments: first=10") ! last=+inf + "slice, Valid arguments: first=10") ! last=+inf call check(slice(test_string, last=10) == "abcdefghij", & - "Slice, Valid arguments: last=10") ! first=-inf + "slice, Valid arguments: last=10") ! first=-inf call check(slice(test_string, stride=3) == "adgjmpsvy", & - "Slice, Valid arguments: stride=3") ! first=-inf, last=+inf + "slice, Valid arguments: stride=3") ! first=-inf, last=+inf call check(slice(test_string, stride=-3) == "zwtqnkheb", & - "Slice, Valid arguments: stride=-3") ! first=+inf, last=-inf + "slice, Valid arguments: stride=-3") ! first=+inf, last=-inf ! Invalid call check(slice(test_string, first=27) == "", & - "Slice, Invalid arguments: first=27") ! last=+inf + "slice, Invalid arguments: first=27") ! last=+inf call check(slice(test_string, first=-10) == "abcdefghijklmnopqrstuvwxyz", & - "Slice, Invalid arguments: first=-10") ! last=+inf + "slice, Invalid arguments: first=-10") ! last=+inf call check(slice(test_string, last=-2) == "", & - "Slice, Invalid arguments: last=-2") ! first=-inf + "slice, Invalid arguments: last=-2") ! first=-inf call check(slice(test_string, last=30) == "abcdefghijklmnopqrstuvwxyz", & - "Slice, Invalid arguments: last=30") ! first=-inf + "slice, Invalid arguments: last=30") ! first=-inf call check(slice(test_string, stride=0) == "abcdefghijklmnopqrstuvwxyz", & - "Slice, Invalid arguments: stride=0") ! stride=1 + "slice, Invalid arguments: stride=0") ! stride=1 ! Only two arguments are given ! Valid call check(slice(test_string, first=10, last=20) == "jklmnopqrst", & - "Slice, Valid arguments: first=10, last=20") + "slice, Valid arguments: first=10, last=20") call check(slice(test_string, first=7, last=2) == "gfedcb", & - "Slice, Valid arguments: first=7, last=2") ! stride=-1 + "slice, Valid arguments: first=7, last=2") ! stride=-1 call check(slice(test_string, first=10, stride=-2) == "jhfdb", & - "Slice, Valid arguments: first=10, stride=-2") ! last=-inf + "slice, Valid arguments: first=10, stride=-2") ! last=-inf call check(slice(test_string, last=21, stride=-2) == "zxv", & - "Slice, Valid arguments: last=21, stride=-2") ! first=+inf + "slice, Valid arguments: last=21, stride=-2") ! first=+inf ! Atleast one argument is invalid call check(slice(test_string, first=30, last=-3) == "zyxwvutsrqponmlkjihgfedcba", & - "Slice, Invalid arguments: first=30, last=-3") + "slice, Invalid arguments: first=30, last=-3") call check(slice(test_string, first=1, last=-20) == "a", & - "Slice, Invalid arguments: first=1, last=-20") + "slice, Invalid arguments: first=1, last=-20") call check(slice(test_string, first=7, last=-10) == "gfedcba", & - "Slice, Invalid arguments: first=7, last=-10") + "slice, Invalid arguments: first=7, last=-10") call check(slice(test_string, first=500, last=22) == "zyxwv", & - "Slice, Invalid arguments: first=500, last=22") + "slice, Invalid arguments: first=500, last=22") call check(slice(test_string, first=50, last=27) == "", & - "Slice, Invalid arguments: first=50, last=27") + "slice, Invalid arguments: first=50, last=27") call check(slice(test_string, first=-20, last=0) == "", & - "Slice, Invalid arguments: first=-20, last=0") + "slice, Invalid arguments: first=-20, last=0") call check(slice(test_string, last=-3, stride=-2) == "zxvtrpnljhfdb", & - "Slice, Invalid arguments: last=-3, stride=-2") ! first=+inf + "slice, Invalid arguments: last=-3, stride=-2") ! first=+inf call check(slice(test_string, last=10, stride=0) == "abcdefghij", & - "Slice, Invalid arguments: last=10, stride=0") ! stride=1 + "slice, Invalid arguments: last=10, stride=0") ! stride=1 call check(slice(test_string, first=-2, stride=-2) == "", & - "Slice, Invalid arguments: first=-2, stride=-2") ! last=-inf + "slice, Invalid arguments: first=-2, stride=-2") ! last=-inf call check(slice(test_string, first=27, stride=2) == "", & - "Slice, Invalid arguments: first=27, stride=2") ! last=+inf + "slice, Invalid arguments: first=27, stride=2") ! last=+inf call check(slice(test_string, last=27, stride=-1) == "", & - "Slice, Invalid arguments: last=27, stride=-1") ! first=+inf + "slice, Invalid arguments: last=27, stride=-1") ! first=+inf ! All three arguments are given ! Valid call check(slice(test_string, first=2, last=16, stride=3) == "behkn", & - "Slice, Valid arguments: first=2, last=16, stride=3") + "slice, Valid arguments: first=2, last=16, stride=3") call check(slice(test_string, first=16, last=2, stride=-3) == "pmjgd", & - "Slice, Valid arguments: first=16, last=2, stride=-3") + "slice, Valid arguments: first=16, last=2, stride=-3") call check(slice(test_string, first=7, last=7, stride=-4) == "g", & - "Slice, Valid arguments: first=7, last=7, stride=-4") + "slice, Valid arguments: first=7, last=7, stride=-4") call check(slice(test_string, first=7, last=7, stride=3) == "g", & - "Slice, Valid arguments: first=7, last=7, stride=3") + "slice, Valid arguments: first=7, last=7, stride=3") call check(slice(test_string, first=2, last=6, stride=-1) == "", & - "Slice, Valid arguments: first=2, last=6, stride=-1") + "slice, Valid arguments: first=2, last=6, stride=-1") call check(slice(test_string, first=20, last=10, stride=2) == "", & - "Slice, Valid arguments: first=20, last=10, stride=2") + "slice, Valid arguments: first=20, last=10, stride=2") ! Atleast one argument is invalid call check(slice(test_string, first=20, last=30, stride=2) == "tvxz", & - "Slice, Invalid arguments: first=20, last=30, stride=2") + "slice, Invalid arguments: first=20, last=30, stride=2") call check(slice(test_string, first=-20, last=30, stride=2) == "acegikmoqsuwy", & - "Slice, Invalid arguments: first=-20, last=30, stride=2") + "slice, Invalid arguments: first=-20, last=30, stride=2") call check(slice(test_string, first=26, last=30, stride=1) == "z", & - "Slice, Invalid arguments: first=26, last=30, stride=1") + "slice, Invalid arguments: first=26, last=30, stride=1") call check(slice(test_string, first=1, last=-20, stride=-1) == "a", & - "Slice, Invalid arguments: first=1, last=-20, stride=-1") + "slice, Invalid arguments: first=1, last=-20, stride=-1") call check(slice(test_string, first=26, last=20, stride=1) == "", & - "Slice, Invalid arguments: first=26, last=20, stride=1") + "slice, Invalid arguments: first=26, last=20, stride=1") call check(slice(test_string, first=1, last=20, stride=-1) == "", & - "Slice, Invalid arguments: first=1, last=20, stride=-1") + "slice, Invalid arguments: first=1, last=20, stride=-1") test_string = "" ! Empty string input call check(slice(test_string, first=-2, last=6) == "", & - "Slice, Empty string: first=-2, last=6") + "slice, Empty string: first=-2, last=6") call check(slice(test_string, first=6, last=-2) == "", & - "Slice, Empty string: first=6, last=-2") + "slice, Empty string: first=6, last=-2") call check(slice(test_string, first=-10) == "", & - "Slice, Empty string: first=-10") ! last=+inf + "slice, Empty string: first=-10") ! last=+inf call check(slice(test_string, last=10) == "", & - "Slice, Empty string: last=10") ! first=-inf + "slice, Empty string: last=10") ! first=-inf call check(slice(test_string) == "", & - "Slice, Empty string: no arguments provided") + "slice, Empty string: no arguments provided") end subroutine test_slice_string @@ -170,27 +170,27 @@ subroutine test_find test_pattern_2 = "abccbabc" call check(all(find([test_string_1, test_string_2], test_pattern_1, 4) == [7, 0]), & - & 'Find: [test_string_1, test_string_2], test_pattern_1, 4') + & 'find: [test_string_1, test_string_2], test_pattern_1, 4') call check(all(find(test_string_1, [test_pattern_1, test_pattern_2], 3, .false.) == [9, 0]), & - & 'Find: test_string_1, [test_pattern_1, test_pattern_2], 3, .false.') + & 'find: test_string_1, [test_pattern_1, test_pattern_2], 3, .false.') call check(find(test_string_1, test_pattern_1, 7) == 0, & - & 'Find: test_string_1, test_pattern_1, 7') + & 'find: test_string_1, test_pattern_1, 7') call check(all(find([test_string_1, test_string_2, test_string_2], [test_pattern_1, & & test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]) == [0, 0, 6]), & - & 'Find: [test_string_1, test_string_2, test_string_2], [test_pattern_1, & + & 'find: [test_string_1, test_string_2, test_string_2], [test_pattern_1, & & test_pattern_2, test_pattern_2], [7, 2, 2], [.true., .false., .true.]') call check(find("qwqwqwqwqwqwqw", test_pattern_1) == 1, & - & 'Find: "qwqwqwqwqwqwqw", test_pattern_1') + & 'find: "qwqwqwqwqwqwqw", test_pattern_1') call check(all(find(test_string_1, ["qwq", "wqw"], 2) == [3, 4]), & - & 'Find: test_string_1, ["qwq", "wqw"], 2') + & 'find: test_string_1, ["qwq", "wqw"], 2') call check(find("qwqwqwqwqwqwqw", "qwq", 2, .false.) == 5, & - & 'Find: "qwqwqwqwqwqwqw", "qwq", 2, .false.') + & 'find: "qwqwqwqwqwqwqw", "qwq", 2, .false.') call check(find("", "") == 0, & - & 'Find: "", ""') + & 'find: "", ""') call check(find("", test_pattern_1) == 0, & - & 'Find: "", test_pattern_1') + & 'find: "", test_pattern_1') call check(find(test_string_1, "") == 0, & - & 'Find: test_string_1, ""') + & 'find: test_string_1, ""') end subroutine test_find @@ -318,6 +318,66 @@ pure function carray_to_string(carray) result(string) string = transfer(carray, string) end function carray_to_string + subroutine test_replace_all + type(string_type) :: test_string_1, test_pattern_1, test_replacement_1 + type(string_type) :: test_string_2, test_pattern_2, test_replacement_2 + test_string_1 = "mutate DNA sequence: GTTATCGTATGCCGTAATTAT" + test_pattern_1 = "TAT" + test_replacement_1 = "ATA" + test_string_2 = "mutate DNA sequence: AGAGAGCCTAGAGAGAG" + test_pattern_2 = "AGA" + test_replacement_2 = "aga" + + ! all 3 as string_type + call check(replace_all(test_string_1, test_pattern_1, test_replacement_1) == & + & "mutate DNA sequence: GTATACGATAGCCGTAATATA", & + & "replace_all: all 3 string_type, test case 1") + call check(replace_all(test_string_2, test_pattern_2, test_replacement_2) == & + & "mutate DNA sequence: agaGAGCCTagaGagaG", & + & "replace_all: all 3 string_type, test case 2") + call check(replace_all(test_string_2, test_pattern_2, test_replacement_1) == & + & "mutate DNA sequence: ATAGAGCCTATAGATAG", & + & "replace_all: all 3 string_type, test case 3") + + ! 2 as string_type and 1 as character scalar + call check(replace_all(test_string_1, "tat", test_replacement_1) == & + & "muATAe DNA sequence: GTTATCGTATGCCGTAATTAT", & + & "replace_all: 2 string_type & 1 character scalar, test case 1") + call check(replace_all(test_string_2, test_pattern_2, "GC") == & + & "mutate DNA sequence: GCGAGCCTGCGGCG", & + & "replace_all: 2 string_type & 1 character scalar, test case 2") + call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, & + & test_replacement_2) == "mutate DNA sequence: agaGAGCCTagaGagaG", & + & "replace_all: 2 string_type & 1 character scalar, test case 3") + + + ! 1 as string_type and 2 as character scalar + call check(replace_all(test_string_1, "TAT", "ATA") == & + & "mutate DNA sequence: GTATACGATAGCCGTAATATA", & + & "replace_all: 1 string_type & 2 character scalar, test case 1") + call check(replace_all("mutate DNA sequence: AGAGAGCCTAGAGAGAG", test_pattern_2, & + & "GC") == "mutate DNA sequence: GCGAGCCTGCGGCG", & + & "replace_all: 1 string_type & 2 character scalar, test case 2") + call check(replace_all("mutate DNA sequence: GTTATCGTATGCCGTAATTAT", "TA", & + & test_replacement_2) == "mutate DNA sequence: GTagaTCGagaTGCCGagaATagaT", & + & "replace_all: 1 string_type & 2 character scalar, test case 3") + call check(replace_all("mutate DNA sequence: GTTATCGTATGCCGTAATTAT", & + & test_pattern_1, "") == "mutate DNA sequence: GTCGGCCGTAAT", & + & "replace_all: 1 string_type & 2 character scalar, test case 4") + call check(replace_all(test_string_1, "", "anything here") == test_string_1, & + & "replace_all: 1 string_type & 2 character scalar, test case 5") + call check(replace_all("", test_pattern_2, "anything here") == "", & + & "replace_all: 1 string_type & 2 character scalar, test case 6") + + ! all 3 as character scalar + call check(replace_all("mutate DNA sequence: GTTATCGTATGCCGTAATTAT", & + & "GT", "gct") == "mutate DNA sequence: gctTATCgctATGCCgctAATTAT", & + & "replace_all: all 3 character scalar, test case 1") + call check(replace_all("", "anything here", "anything here") == "", & + & "replace_all: all 3 character scalar, test case 2") + + end subroutine test_replace_all + end module test_string_functions @@ -333,5 +393,6 @@ program tester call test_slice_string call test_slice_gen call test_find + call test_replace_all end program tester