Skip to content

Vendor snowball package #1116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions hackage-server.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ extra-source-files:
tests/unpack-checks/LANGUAGE-GHC-9.2/Main.hs
tests/unpack-checks/LANGUAGE-GHC-9.2/Setup.hs
tests/unpack-checks/LANGUAGE-GHC-9.2/LANGUAGE-GHC.cabal
libstemmer_c/src_c/stem_ISO_8859_1_english.h
libstemmer_c/include/libstemmer.h
libstemmer_c/runtime/api.h
libstemmer_c/runtime/header.h
libstemmer_c/LICENSE
src/Distribution/Server/Util/NLP/LICENSE

source-repository head
type: git
Expand Down Expand Up @@ -356,6 +362,7 @@ library lib-server
Distribution.Server.Features.StaticFiles
Distribution.Server.Features.ServerIntrospect
Distribution.Server.Features.Sitemap
Distribution.Server.Util.NLP.Snowball

if flag(debug)
cpp-options: -DDEBUG
Expand Down Expand Up @@ -410,8 +417,12 @@ library lib-server
, xss-sanitize ^>= 0.3.6

if !flag(minimal)
build-depends: snowball ^>= 1.0
, tokenize ^>= 0.3
build-depends: tokenize ^>= 0.3

c-sources: libstemmer_c/src_c/stem_ISO_8859_1_english.c
libstemmer_c/runtime/api.c
libstemmer_c/runtime/utilities.c
libstemmer_c/libstemmer/libstemmer.c

if flag(cabal-parsers)
build-depends: cabal-parsers ^>= 0
Expand Down
24 changes: 24 additions & 0 deletions libstemmer_c/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Copyright (c) 2002, Richard Boulton
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
20 changes: 20 additions & 0 deletions libstemmer_c/include/libstemmer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

/* Make header file work when included from C++ */
#ifdef __cplusplus
extern "C" {
#endif

typedef unsigned char sb_symbol;

struct SN_env * english_ISO_8859_1_stemmer_new();

void english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env);

const sb_symbol * english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size);

int english_ISO_8859_1_stemmer_length(struct SN_env * sn_env);

#ifdef __cplusplus
}
#endif

47 changes: 47 additions & 0 deletions libstemmer_c/libstemmer/libstemmer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

#include <stdlib.h>
#include <string.h>
#include "../include/libstemmer.h"
#include "../runtime/api.h"
#include "../src_c/stem_ISO_8859_1_english.h"

extern struct SN_env *
english_ISO_8859_1_stemmer_new()
{
struct SN_env * sn_env = english_ISO_8859_1_create_env();
if (sn_env == NULL)
{
english_ISO_8859_1_stemmer_delete(sn_env);
return NULL;
}

return sn_env;
}

void
english_ISO_8859_1_stemmer_delete(struct SN_env * sn_env)
{
if (sn_env == 0) return;
english_ISO_8859_1_close_env(sn_env);
}

const sb_symbol *
english_ISO_8859_1_stemmer_stem(struct SN_env * sn_env, const sb_symbol * word, int size)
{
int ret;
if (SN_set_current(sn_env, size, (const symbol *)(word)))
{
sn_env->l = 0;
return NULL;
}
ret = english_ISO_8859_1_stem(sn_env);
if (ret < 0) return NULL;
sn_env->p[sn_env->l] = 0;
return (const sb_symbol *)(sn_env->p);
}

int
english_ISO_8859_1_stemmer_length(struct SN_env * sn_env)
{
return sn_env->l;
}
66 changes: 66 additions & 0 deletions libstemmer_c/runtime/api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@

#include <stdlib.h> /* for calloc, free */
#include "header.h"

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size)
{
struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env));
if (z == NULL) return NULL;
z->p = create_s();
if (z->p == NULL) goto error;
if (S_size)
{
int i;
z->S = (symbol * *) calloc(S_size, sizeof(symbol *));
if (z->S == NULL) goto error;

for (i = 0; i < S_size; i++)
{
z->S[i] = create_s();
if (z->S[i] == NULL) goto error;
}
}

if (I_size)
{
z->I = (int *) calloc(I_size, sizeof(int));
if (z->I == NULL) goto error;
}

if (B_size)
{
z->B = (unsigned char *) calloc(B_size, sizeof(unsigned char));
if (z->B == NULL) goto error;
}

return z;
error:
SN_close_env(z, S_size);
return NULL;
}

extern void SN_close_env(struct SN_env * z, int S_size)
{
if (z == NULL) return;
if (S_size)
{
int i;
for (i = 0; i < S_size; i++)
{
lose_s(z->S[i]);
}
free(z->S);
}
free(z->I);
free(z->B);
if (z->p) lose_s(z->p);
free(z);
}

extern int SN_set_current(struct SN_env * z, int size, const symbol * s)
{
int err = replace_s(z, 0, z->l, size, s, NULL);
z->c = 0;
return err;
}

26 changes: 26 additions & 0 deletions libstemmer_c/runtime/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@

typedef unsigned char symbol;

/* Or replace 'char' above with 'short' for 16 bit characters.

More precisely, replace 'char' with whatever type guarantees the
character width you need. Note however that sizeof(symbol) should divide
HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise
there is an alignment problem. In the unlikely event of a problem here,
consult Martin Porter.

*/

struct SN_env {
symbol * p;
int c; int l; int lb; int bra; int ket;
symbol * * S;
int * I;
unsigned char * B;
};

extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size);
extern void SN_close_env(struct SN_env * z, int S_size);

extern int SN_set_current(struct SN_env * z, int size, const symbol * s);

58 changes: 58 additions & 0 deletions libstemmer_c/runtime/header.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

#include <limits.h>

#include "api.h"

#define MAXINT INT_MAX
#define MININT INT_MIN

#define HEAD 2*sizeof(int)

#define SIZE(p) ((int *)(p))[-1]
#define SET_SIZE(p, n) ((int *)(p))[-1] = n
#define CAPACITY(p) ((int *)(p))[-2]

struct among
{ int s_size; /* number of chars in string */
const symbol * s; /* search string */
int substring_i;/* index to longest matching substring */
int result; /* result of the lookup */
int (* function)(struct SN_env *);
};

extern symbol * create_s(void);
extern void lose_s(symbol * p);

extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);

extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);

extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);

extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
extern int eq_v(struct SN_env * z, const symbol * p);
extern int eq_v_b(struct SN_env * z, const symbol * p);

extern int find_among(struct SN_env * z, const struct among * v, int v_size);
extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);

extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
extern int slice_from_v(struct SN_env * z, const symbol * p);
extern int slice_del(struct SN_env * z);

extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);

extern symbol * slice_to(struct SN_env * z, symbol * p);
extern symbol * assign_to(struct SN_env * z, symbol * p);

extern void debug(struct SN_env * z, int number, int line_count);

Loading