Skip to content

Segmentation fault using serial OpenBLAS with OpenMP on Windows #1847

Closed
@hjbreg

Description

@hjbreg

Updated test results (OpenBLAS 0.3.3)

Windows 64-bit

gcc version 4.9.3 20150626 (Fedora MinGW 4.9.3-1.el7)

  • USE_THREAD=0 NUM_THREADS=1
    segmentation fault

  • USE_THREAD=0 NUM_THREADS=16
    incorrect result

  • USE_THREAD=1 NUM_THREADS=2 USE_OPENMP=1 NUM_PARALEL=16
    incorrect result

  • USE_THREAD=1 NUM_THREADS=2 openblas_set_num_threads(1)
    incorrect result

Linux 64-bit

gcc version 4.4.7 20120313 (Red Hat 4.4.7-4)

  • USE_THREAD=0 NUM_THREADS=1
    too many memory regions

  • USE_THREAD=0 NUM_THREADS=16
    OK


Segmentation fault if OMP_NUM_THREADS > 4, but linking to netlib-lapack is ok, so I think this may be OpenBLAS side problem. I also tested it on Linux, no segmentation fault.

GCC: x86_64-w64-mingw32-gcc 4.9.3

Single threaded OpenBLAS is built

USE_THREAD=0 DYNAMIC_ARCH=1 DYNAMIC_OLDER=0 NO_CBLAS=1 NO_LAPACKE=1 NO_SHARED=1

Test code: segfault-win.cpp

#include <cstdlib>
#include <vector>
#include <iostream>

// g++ segfault-win.cpp -lopenblas -lgfortran -lquadmath -fopenmp

#define bint int

extern "C"
void dgels_(char *trans, bint *m, bint *n, bint *nrhs, double *a, bint *lda, double *b, bint *ldb,
            double *work, bint *lwork, bint *info);

extern "C" void openblas_set_num_threads(int);
extern "C" int openblas_get_num_threads();
extern "C" int omp_get_max_threads();

int C_dgels(char trans, bint m, bint n, bint nrhs, double *a, bint lda, double *b, bint ldb)
{
    bint info = 0;
    double wkopt;
    bint lwork = -1;
    dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, &wkopt, &lwork, &info);
    if (info == 0) {
        lwork = static_cast<bint>( wkopt );
        double *work = new double[lwork];
        dgels_(&trans, &m, &n, &nrhs, a, &lda, b, &ldb, work, &lwork, &info);
        delete[] work;
    }
    return info;
}

int main()
{
    // USE_THREAD=1 NUM_THREADS=2
    // openblas_set_num_threads(1);

    int n = 300;
    int q = 5;
    int r = 1000;

    std::vector<double> x, y;
    for (int i = 0; i < n; ++i) {
        y.push_back( static_cast<double>(std::rand()) / RAND_MAX );
        for (int j = 0; j < q; ++j)
            x.push_back( static_cast<double>(std::rand()) / RAND_MAX );
    }

    std::cerr << "omp_get_max_threads: " << omp_get_max_threads() << "\n";
    std::cerr << "openblas_get_num_threads: " << openblas_get_num_threads() << "\n";

    std::vector< std::vector<double> > z(r);

    #pragma omp parallel for
    for (int i = 0; i < r; ++i) {
        std::vector<double> xx = x;
        std::vector<double> yy = y;
        C_dgels('N', n, q, 1, xx.data(), n, yy.data(), n);
        z[i] = yy;
    }

    for (int i = 1; i < r; ++i)
        std::cerr << (z[i] == z[0]);
    std::cerr << "\n";

    return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions