-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathbuild_native.py
339 lines (282 loc) · 12.8 KB
/
build_native.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#! /usr/bin/env python3
# SPDX-FileCopyrightText: 2025 geisserml <[email protected]>
# SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause
# Related work: https://github.com/tiran/libpdfium/ and https://aur.archlinux.org/packages/libpdfium-nojs
# NOTE So far, we have only tested this script on Linux, but the general idea should work on any common OS, provided the necessary tools and libraries are available.
# In other words, this script should be portable as far as pdfium's build system and code is.
# On Windows, you might be better off with the toolchained build, though, due to lack of a Unix-like system library environment.
# Known issues:
# - This script does not currently handle rebuilds. You have to manually delete the pdfium/ directory if you want to rebuild with a different version.
import re
import os
import sys
import shutil
import argparse
from enum import Enum
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parents[1]))
from pypdfium2_setup.base import *
# The pdfium version this has last been tested with. Ideally, this should be close to the release version in autorelease/record.json
# To bump this version, first test locally and update any patches as needed. Then, make a branch and run "Test Sourcebuild" on CI to see if all targets continue to work. Commit the new version to the main branch only when all is green. Better stay on an older version for a while than break a target.
# Updating and testing the patch sets can be a lot of work, so we might not want to do this too frequrently.
DEFAULT_VER = 7122
# assert DEFAULT_VER >= PDFIUM_MIN_REQ
PDFIUM_URL = "https://pdfium.googlesource.com/pdfium"
_CR_PREFIX = "https://chromium.googlesource.com/"
DEPS_URLS = dict(
build = _CR_PREFIX + "chromium/src/build",
abseil = _CR_PREFIX + "chromium/src/third_party/abseil-cpp",
fast_float = _CR_PREFIX + "external/github.com/fastfloat/fast_float",
gtest = _CR_PREFIX + "external/github.com/google/googletest",
test_fonts = _CR_PREFIX + "chromium/src/third_party/test_fonts"
)
SOURCES_DIR = ProjectDir / "sbuild" / "native"
PDFIUM_DIR = SOURCES_DIR / "pdfium"
PDFIUM_3RDPARTY = PDFIUM_DIR / "third_party"
DefaultConfig = {
"use_sysroot": False,
"clang_use_chrome_plugins": False,
"treat_warnings_as_errors": False,
"use_remoteexec": False,
"is_debug": False,
"is_component_build": True,
"pdf_is_standalone": True,
"pdf_enable_v8": False,
"pdf_enable_xfa": False,
"pdf_use_skia": False,
"pdf_use_partition_alloc": False,
"use_system_freetype": True,
"pdf_bundle_freetype": False,
"use_system_lcms2": True,
"use_system_libjpeg": True,
"use_system_libopenjpeg2": True,
"use_system_libpng": True,
"use_system_libtiff": True,
"use_system_zlib": True,
"use_custom_libcxx": False,
"use_libcxx_modules": False,
}
if sys.platform.startswith("darwin"):
DefaultConfig["mac_deployment_target"] = "10.13.0"
DefaultConfig["use_system_xcode"] = True
Compiler = Enum("Compiler", "gcc clang")
def _get_repo(url, target_dir, rev, depth=1):
if target_dir.exists():
return False
# see https://stackoverflow.com/questions/31278902/how-to-shallow-clone-a-specific-commit-with-depth-1
# TODO: on git >= 2.49, we can use `git clone --revision ...`
mkdir(target_dir)
run_cmd(["git", "init"], cwd=target_dir)
run_cmd(["git", "remote", "add", "origin", url], cwd=target_dir)
run_cmd(["git", "fetch", "--depth", str(depth), "origin", rev], cwd=target_dir)
run_cmd(["git", "checkout", rev], cwd=target_dir)
return True
DEPS_RE = r"\s*'{key}': '(\w+)'"
DEPS_FIELDS = "build abseil fast_float gtest test_fonts".split(" ")
class _DeferredClass:
@cached_property # included from base.py
def deps(self):
# TODO get a proper parser for the DEPS file format?
deps_content = (PDFIUM_DIR/"DEPS").read_text()
result = {}
for field in DEPS_FIELDS:
field_re = DEPS_RE.format(key=f"{field}_revision")
match = re.search(field_re, deps_content)
assert match, f"Could not find {field!r} in DEPS file"
result[field] = match.group(1)
log(f"Found DEPS revisions:\n{result}")
return result
_Deferred = _DeferredClass()
def _fetch_dep(name, target_dir):
if target_dir.exists():
log(f"Using existing {target_dir}")
return False
# parse out DEPS revisions only if we actually need them
return _get_repo(DEPS_URLS[name], target_dir, rev=_Deferred.deps[name])
def autopatch(file, pattern, repl, is_regex):
log(f"Patch {pattern!r} -> {repl!r} (is_regex={is_regex}) on {file}")
content = file.read_text()
if is_regex:
content, n_subs = re.subn(pattern, repl, content)
assert n_subs > 0
else:
content = content.replace(pattern, repl)
file.write_text(content)
def autopatch_dir(dir, globexpr, pattern, repl, is_regex):
for file in dir.glob(globexpr):
autopatch(file, pattern, repl, is_regex)
def get_sources(short_ver, with_tests, compiler, clang_path):
if short_ver == "main":
pdfium_rev = short_ver
chromium_rev = short_ver
full_ver = PdfiumVer.get_latest_upstream()
else:
full_ver = PdfiumVer.to_full(short_ver)
full_ver_str = ".".join(str(v) for v in full_ver)
pdfium_rev = f"chromium/{short_ver}"
chromium_rev = full_ver_str
is_new = _get_repo(PDFIUM_URL, PDFIUM_DIR, rev=pdfium_rev)
if is_new:
autopatch_dir(
PDFIUM_DIR/"public"/"cpp", "*.h",
r'"public/(.+)"', r'"../\1"',
is_regex = True,
)
# don't build the test fonts (needed for embedder tests only)
autopatch(
PDFIUM_DIR/"testing"/"BUILD.gn",
r'(\s*)("//third_party/test_fonts")', r"\1# \2",
is_regex = True,
)
is_new = _fetch_dep("build", PDFIUM_DIR/"build")
if is_new:
# Work around error about path_exists() being undefined
git_apply_patch(PatchDir/"siso.patch", cwd=PDFIUM_DIR/"build")
if compiler is Compiler.gcc:
# https://crbug.com/402282789
git_apply_patch(PatchDir/"ffp_contract.patch", cwd=PDFIUM_DIR/"build")
elif compiler is Compiler.clang:
# https://crbug.com/410883044
clang_patches = ("system_libcxx_with_clang", "avoid_new_clang_flags")
for patchname in clang_patches:
git_apply_patch(PatchDir/f"{patchname}.patch", cwd=PDFIUM_DIR/"build")
lld_path = clang_path/"bin"/"ld.lld"
autopatch(
PDFIUM_DIR/"build"/"config"/"compiler"/"BUILD.gn",
'ldflags += [ "-fuse-ld=lld" ]',
f'ldflags += [ "-fuse-ld={lld_path}" ]', is_regex=False
)
get_shimheaders_tool(PDFIUM_DIR, rev=chromium_rev)
is_new = _fetch_dep("abseil", PDFIUM_3RDPARTY/"abseil-cpp")
if is_new:
autopatch(
PDFIUM_3RDPARTY/"abseil-cpp"/"BUILD.gn",
'component("absl")', 'static_library("absl")',
is_regex = False,
)
_fetch_dep("fast_float", PDFIUM_3RDPARTY/"fast_float"/"src")
if with_tests:
_fetch_dep("gtest", PDFIUM_3RDPARTY/"googletest"/"src")
_fetch_dep("test_fonts", PDFIUM_3RDPARTY/"test_fonts")
return full_ver
def prepare(config_dict, build_dir):
# Create an empty gclient config
(PDFIUM_DIR/"build"/"config"/"gclient_args.gni").touch(exist_ok=True)
# Unbundle ICU
# alternatively, we could call build/linux/unbundle/replace_gn_files.py --system-libraries icu
(PDFIUM_3RDPARTY/"icu").mkdir(exist_ok=True)
shutil.copyfile(
PDFIUM_DIR/"build"/"linux"/"unbundle"/"icu.gn",
PDFIUM_3RDPARTY/"icu"/"BUILD.gn"
)
# Create target dir and write build config
mkdir(build_dir)
config_str = serialize_gn_config(config_dict)
(build_dir/"args.gn").write_text(config_str)
def build(with_tests, build_dir, compiler, n_jobs):
ninja_args = []
if n_jobs is not None:
ninja_args.extend(["-j", str(n_jobs)])
targets = ["pdfium"]
if with_tests:
targets.append("pdfium_unittests")
build_dir_rel = build_dir.relative_to(PDFIUM_DIR)
run_cmd(["gn", "gen", str(build_dir_rel)], cwd=PDFIUM_DIR)
run_cmd(["ninja", *ninja_args, "-C", str(build_dir_rel), *targets], cwd=PDFIUM_DIR)
def test(build_dir):
# FlateModule.Encode may fail with older zlib (generates different results)
os.environ["GTEST_FILTER"] = "*-FlateModule.Encode"
run_cmd([build_dir/"pdfium_unittests"], cwd=PDFIUM_DIR, check=False)
def _get_clang_ver(clang_path):
from packaging.version import Version
try_libpaths = [
clang_path/"lib"/"clang",
clang_path/"lib64"/"clang",
]
libpath = next(filter(Path.exists, try_libpaths))
candidates = (Version(p.name) for p in libpath.iterdir() if re.fullmatch(r"[\d\.]+", p.name))
version = max(candidates)
return version.major
def setup_compiler(config, compiler, clang_path):
if compiler is Compiler.gcc:
config["is_clang"] = False
elif compiler is Compiler.clang:
assert clang_path, "Clang path must be set"
clang_version = _get_clang_ver(clang_path)
config.update({
"is_clang": True,
"clang_base_path": str(clang_path),
"clang_version": clang_version,
})
else:
assert False, f"Unhandled compiler {compiler}"
def main_api(build_ver=None, with_tests=False, n_jobs=None, compiler=None, clang_path=None):
if build_ver is None:
build_ver = DEFAULT_VER
if compiler is None:
if shutil.which("gcc"):
compiler = Compiler.gcc
elif shutil.which("clang"):
log("gcc not available, will try clang. Note, you may need to set up some symlinks to match the clang directory layout expected by pdfium. Also, make sure libclang_rt builtins are installed.")
compiler = Compiler.clang
else:
raise RuntimeError("Neither gcc nor clang installed.")
if compiler is Compiler.clang and clang_path is None and os.name != "nt":
clang_path = Path(USR_PREFIX)
mkdir(SOURCES_DIR)
full_ver = get_sources(build_ver, with_tests, compiler, clang_path)
build_dir = PDFIUM_DIR/"out"/"Default"
config = DefaultConfig.copy()
setup_compiler(config, compiler, clang_path)
prepare(config, build_dir)
build(with_tests, build_dir, compiler, n_jobs)
if with_tests:
test(build_dir)
n_commits, hash = 0, None
if build_ver == "main":
# don't know how to determine the number of commits with a --depth 1 checkout, so set a placeholder
n_commits = NaN
hash = "g" + run_cmd(["git", "rev-parse", "--short", "HEAD"], cwd=PDFIUM_DIR, capture=True)
pack_sourcebuild(PDFIUM_DIR, build_dir, full_ver, n_commits=n_commits, hash=hash)
def parse_args(argv):
parser = argparse.ArgumentParser(
description = "Build PDFium from source natively with system tools/libraries. This does not depend on Google's binary toolchain, so it should be portable across different Linux architectures.",
)
parser.add_argument(
"--version",
dest = "build_ver",
help = f"The pdfium version to use. Defaults to the version last tested by pypdfium2 maintainers (recommended, currently {DEFAULT_VER}). Otherwise, this can be a specific build number, or 'main' to try the latest upstream state.",
)
parser.add_argument(
"--test",
dest = "with_tests",
action = "store_true",
help = "Whether to build and run tests. Recommended, except on very slow hosts.",
)
parser.add_argument(
"-j", "--jobs",
dest = "n_jobs",
type = int,
metavar = "N",
help = "The number of build jobs to use. If not given, ninja will choose this value. Pass -j $(nprocs) if you wanna make sure this matches the number of processor cores.",
)
parser.add_argument(
"-c", "--compiler",
type = str.lower,
help = "The compiler to use (gcc or clang). By default, gcc is preferred.",
)
# Hint: If you have a simultaneous toolchained checkout, you could use e.g. './sbuild/toolchained/pdfium/third_party/llvm-build/Release+Asserts'
parser.add_argument(
"--clang-path",
type = lambda p: Path(p).expanduser().resolve(),
help = f"Path to clang release folder, without trailing slash. Passing `--compiler clang` is a pre-requisite. By default, we try '{USR_PREFIX}', but your system's folder structure might not match the layout expected by pdfium. Consider creating symlinks as described in pypdfium2's README.md.",
)
args = parser.parse_args(argv)
if args.compiler:
args.compiler = Compiler[args.compiler]
return args
def main_cli():
args = parse_args(sys.argv[1:])
main_api(**vars(args))
if __name__ == "__main__":
main_cli()