Skip to content

bpo-46608: exclude marshalled-frozen data if deep-freezing to save 300 KB space #31074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Doc/c-api/import.rst
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,12 @@ Importing Modules
const char *name;
const unsigned char *code;
int size;
bool is_package;
};

.. versionchanged:: 3.11
The new ``is_package`` field indicates whether the module is a package or not.
This replaces setting the ``size`` field to a negative value.

.. c:var:: const struct _frozen* PyImport_FrozenModules

Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.11.rst
Original file line number Diff line number Diff line change
Expand Up @@ -638,6 +638,11 @@ C API Changes
fields of the result from the exception instance (the ``value`` field).
(Contributed by Irit Katriel in :issue:`45711`.)

* :c:struct:`_frozen` has a new ``is_package`` field to indicate whether
or not the frozen module is a package. Previously, a negative value
in the ``size`` field was the indicator. Now only non-negative values
be used for ``size``.
(Contributed by Kumar Aditya in :issue:`46608`.)

New Features
------------
Expand Down
1 change: 1 addition & 0 deletions Include/cpython/import.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct _frozen {
const char *name; /* ASCII encoded string */
const unsigned char *code;
int size;
bool is_package;
PyObject *(*get_code)(void);
};

Expand Down
10 changes: 6 additions & 4 deletions Lib/ctypes/test/test_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class struct_frozen(Structure):
_fields_ = [("name", c_char_p),
("code", POINTER(c_ubyte)),
("size", c_int),
("is_package", c_bool),
("get_code", POINTER(c_ubyte)), # Function ptr
]
FrozenTable = POINTER(struct_frozen)
Expand All @@ -71,13 +72,14 @@ class struct_frozen(Structure):
modname = entry.name.decode("ascii")
modules.append(modname)
with self.subTest(modname):
# Do a sanity check on entry.size and entry.code.
self.assertGreater(abs(entry.size), 10)
self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
if entry.size != 0:
# Do a sanity check on entry.size and entry.code.
self.assertGreater(abs(entry.size), 10)
self.assertTrue([entry.code[i] for i in range(abs(entry.size))])
# Check the module's package-ness.
with import_helper.frozen_modules():
spec = importlib.util.find_spec(modname)
if entry.size < 0:
if entry.is_package:
# It's a package.
self.assertIsNotNone(spec.submodule_search_locations)
else:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Exclude marshalled-frozen data if deep-freezing to save 300 KB disk space. This includes adding
a new ``is_package`` field to :c:struct:`_frozen`. Patch by Kumar Aditya.
83 changes: 29 additions & 54 deletions Python/frozen.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,29 +39,6 @@
#include "pycore_import.h"

/* Includes for frozen modules: */
#include "frozen_modules/importlib._bootstrap.h"
#include "frozen_modules/importlib._bootstrap_external.h"
#include "frozen_modules/zipimport.h"
#include "frozen_modules/abc.h"
#include "frozen_modules/codecs.h"
#include "frozen_modules/io.h"
#include "frozen_modules/_collections_abc.h"
#include "frozen_modules/_sitebuiltins.h"
#include "frozen_modules/genericpath.h"
#include "frozen_modules/ntpath.h"
#include "frozen_modules/posixpath.h"
#include "frozen_modules/os.h"
#include "frozen_modules/site.h"
#include "frozen_modules/stat.h"
#include "frozen_modules/importlib.util.h"
#include "frozen_modules/importlib.machinery.h"
#include "frozen_modules/runpy.h"
#include "frozen_modules/__hello__.h"
#include "frozen_modules/__phello__.h"
#include "frozen_modules/__phello__.ham.h"
#include "frozen_modules/__phello__.ham.eggs.h"
#include "frozen_modules/__phello__.spam.h"
#include "frozen_modules/frozen_only.h"
/* End includes */

#define GET_CODE(name) _Py_get_##name##_toplevel
Expand Down Expand Up @@ -98,49 +75,47 @@ extern PyObject *_Py_get___phello___spam_toplevel(void);
extern PyObject *_Py_get_frozen_only_toplevel(void);
/* End extern declarations */

/* Note that a negative size indicates a package. */

static const struct _frozen bootstrap_modules[] = {
{"_frozen_importlib", _Py_M__importlib__bootstrap, (int)sizeof(_Py_M__importlib__bootstrap), GET_CODE(importlib__bootstrap)},
{"_frozen_importlib_external", _Py_M__importlib__bootstrap_external, (int)sizeof(_Py_M__importlib__bootstrap_external), GET_CODE(importlib__bootstrap_external)},
{"zipimport", _Py_M__zipimport, (int)sizeof(_Py_M__zipimport), GET_CODE(zipimport)},
{"_frozen_importlib", NULL, 0, false, GET_CODE(importlib__bootstrap)},
{"_frozen_importlib_external", NULL, 0, false, GET_CODE(importlib__bootstrap_external)},
{"zipimport", NULL, 0, false, GET_CODE(zipimport)},
{0, 0, 0} /* bootstrap sentinel */
};
static const struct _frozen stdlib_modules[] = {
/* stdlib - startup, without site (python -S) */
{"abc", _Py_M__abc, (int)sizeof(_Py_M__abc), GET_CODE(abc)},
{"codecs", _Py_M__codecs, (int)sizeof(_Py_M__codecs), GET_CODE(codecs)},
{"io", _Py_M__io, (int)sizeof(_Py_M__io), GET_CODE(io)},
{"abc", NULL, 0, false, GET_CODE(abc)},
{"codecs", NULL, 0, false, GET_CODE(codecs)},
{"io", NULL, 0, false, GET_CODE(io)},

/* stdlib - startup, with site */
{"_collections_abc", _Py_M___collections_abc, (int)sizeof(_Py_M___collections_abc), GET_CODE(_collections_abc)},
{"_sitebuiltins", _Py_M___sitebuiltins, (int)sizeof(_Py_M___sitebuiltins), GET_CODE(_sitebuiltins)},
{"genericpath", _Py_M__genericpath, (int)sizeof(_Py_M__genericpath), GET_CODE(genericpath)},
{"ntpath", _Py_M__ntpath, (int)sizeof(_Py_M__ntpath), GET_CODE(ntpath)},
{"posixpath", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
{"os.path", _Py_M__posixpath, (int)sizeof(_Py_M__posixpath), GET_CODE(posixpath)},
{"os", _Py_M__os, (int)sizeof(_Py_M__os), GET_CODE(os)},
{"site", _Py_M__site, (int)sizeof(_Py_M__site), GET_CODE(site)},
{"stat", _Py_M__stat, (int)sizeof(_Py_M__stat), GET_CODE(stat)},
{"_collections_abc", NULL, 0, false, GET_CODE(_collections_abc)},
{"_sitebuiltins", NULL, 0, false, GET_CODE(_sitebuiltins)},
{"genericpath", NULL, 0, false, GET_CODE(genericpath)},
{"ntpath", NULL, 0, false, GET_CODE(ntpath)},
{"posixpath", NULL, 0, false, GET_CODE(posixpath)},
{"os.path", NULL, 0, false, GET_CODE(posixpath)},
{"os", NULL, 0, false, GET_CODE(os)},
{"site", NULL, 0, false, GET_CODE(site)},
{"stat", NULL, 0, false, GET_CODE(stat)},

/* runpy - run module with -m */
{"importlib.util", _Py_M__importlib_util, (int)sizeof(_Py_M__importlib_util), GET_CODE(importlib_util)},
{"importlib.machinery", _Py_M__importlib_machinery, (int)sizeof(_Py_M__importlib_machinery), GET_CODE(importlib_machinery)},
{"runpy", _Py_M__runpy, (int)sizeof(_Py_M__runpy), GET_CODE(runpy)},
{"importlib.util", NULL, 0, false, GET_CODE(importlib_util)},
{"importlib.machinery", NULL, 0, false, GET_CODE(importlib_machinery)},
{"runpy", NULL, 0, false, GET_CODE(runpy)},
{0, 0, 0} /* stdlib sentinel */
};
static const struct _frozen test_modules[] = {
{"__hello__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__hello_alias__", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello_alias__", _Py_M____hello__, -(int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello_alias__.spam", _Py_M____hello__, (int)sizeof(_Py_M____hello__), GET_CODE(__hello__)},
{"__phello__", _Py_M____phello__, -(int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
{"__phello__.__init__", _Py_M____phello__, (int)sizeof(_Py_M____phello__), GET_CODE(__phello__)},
{"__phello__.ham", _Py_M____phello___ham, -(int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
{"__phello__.ham.__init__", _Py_M____phello___ham, (int)sizeof(_Py_M____phello___ham), GET_CODE(__phello___ham)},
{"__phello__.ham.eggs", _Py_M____phello___ham_eggs, (int)sizeof(_Py_M____phello___ham_eggs), GET_CODE(__phello___ham_eggs)},
{"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam), GET_CODE(__phello___spam)},
{"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only), GET_CODE(frozen_only)},
{"__hello__", NULL, 0, false, GET_CODE(__hello__)},
{"__hello_alias__", NULL, 0, false, GET_CODE(__hello__)},
{"__phello_alias__", NULL, 0, true, GET_CODE(__hello__)},
{"__phello_alias__.spam", NULL, 0, false, GET_CODE(__hello__)},
{"__phello__", NULL, 0, true, GET_CODE(__phello__)},
{"__phello__.__init__", NULL, 0, false, GET_CODE(__phello__)},
{"__phello__.ham", NULL, 0, true, GET_CODE(__phello___ham)},
{"__phello__.ham.__init__", NULL, 0, false, GET_CODE(__phello___ham)},
{"__phello__.ham.eggs", NULL, 0, false, GET_CODE(__phello___ham_eggs)},
{"__phello__.spam", NULL, 0, false, GET_CODE(__phello___spam)},
{"__hello_only__", NULL, 0, false, GET_CODE(frozen_only)},
{0, 0, 0} /* test sentinel */
};
const struct _frozen *_PyImport_FrozenBootstrap = bootstrap_modules;
Expand Down
16 changes: 12 additions & 4 deletions Python/import.c
Original file line number Diff line number Diff line change
Expand Up @@ -1297,13 +1297,21 @@ find_frozen(PyObject *nameobj, struct frozen_info *info)
info->nameobj = nameobj; // borrowed
info->data = (const char *)p->code;
info->get_code = p->get_code;
info->size = p->size < 0 ? -(p->size) : p->size;
info->is_package = p->size < 0 ? true : false;
info->size = p->size;
info->is_package = p->is_package;
if (p->size < 0) {
// backward compatibility with negative size values
info->size = -(p->size);
info->is_package = true;
}
info->origname = name;
info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases,
&info->origname);
}

if (p->code == NULL && p->size == 0 && p->get_code != NULL) {
/* It is only deepfrozen. */
return FROZEN_OKAY;
}
if (p->code == NULL) {
/* It is frozen but marked as un-importable. */
return FROZEN_EXCLUDED;
Expand Down Expand Up @@ -2224,7 +2232,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name,
if (info.nameobj == NULL) {
info.nameobj = name;
}
if (info.size == 0) {
if (info.size == 0 && info.get_code == NULL) {
/* Does not contain executable code. */
set_frozen_error(FROZEN_INVALID, name);
return NULL;
Expand Down
12 changes: 6 additions & 6 deletions Tools/freeze/makefreeze.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ def makefreeze(base, dict, debug=0, entry_point=None, fail_import=()):
print("freezing", mod, "...")
str = marshal.dumps(m.__code__)
size = len(str)
is_package = 'false'
if m.__path__:
# Indicate package by negative size
size = -size
done.append((mod, mangled, size))
is_package = 'true'
done.append((mod, mangled, size, is_package))
writecode(outfp, mangled, str)
if debug:
print("generating table of frozen modules")
with bkfile.open(base + 'frozen.c', 'w') as outfp:
for mod, mangled, size in done:
for mod, mangled, size, _ in done:
outfp.write('extern unsigned char M_%s[];\n' % mangled)
outfp.write(header)
for mod, mangled, size in done:
outfp.write('\t{"%s", M_%s, %d},\n' % (mod, mangled, size))
for mod, mangled, size, is_package in done:
outfp.write('\t{"%s", M_%s, %d, %s},\n' % (mod, mangled, size, is_package))
outfp.write('\n')
# The following modules have a NULL code pointer, indicating
# that the frozen program should not search for them on the host
Expand Down
38 changes: 22 additions & 16 deletions Tools/scripts/freeze_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import ntpath
import posixpath
import sys

import argparse
from update_file import updating_file_with_tmpfile


Expand Down Expand Up @@ -463,14 +463,15 @@ def replace_block(lines, start_marker, end_marker, replacements, file):
return lines[:start_pos + 1] + replacements + lines[end_pos:]


def regen_frozen(modules):
def regen_frozen(modules, deepfreeze_only: bool):
headerlines = []
parentdir = os.path.dirname(FROZEN_FILE)
for src in _iter_sources(modules):
# Adding a comment to separate sections here doesn't add much,
# so we don't.
header = relpath_for_posix_display(src.frozenfile, parentdir)
headerlines.append(f'#include "{header}"')
if not deepfreeze_only:
for src in _iter_sources(modules):
# Adding a comment to separate sections here doesn't add much,
# so we don't.
header = relpath_for_posix_display(src.frozenfile, parentdir)
headerlines.append(f'#include "{header}"')

externlines = []
bootstraplines = []
Expand Down Expand Up @@ -500,9 +501,13 @@ def regen_frozen(modules):
externlines.append("extern PyObject *%s(void);" % get_code_name)

symbol = mod.symbol
pkg = '-' if mod.ispkg else ''
line = ('{"%s", %s, %s(int)sizeof(%s), GET_CODE(%s)},'
) % (mod.name, symbol, pkg, symbol, code_name)
pkg = 'true' if mod.ispkg else 'false'
if deepfreeze_only:
line = ('{"%s", NULL, 0, %s, GET_CODE(%s)},'
) % (mod.name, pkg, code_name)
else:
line = ('{"%s", %s, (int)sizeof(%s), %s, GET_CODE(%s)},'
) % (mod.name, symbol, symbol, pkg, code_name)
lines.append(line)

if mod.isalias:
Expand Down Expand Up @@ -710,18 +715,19 @@ def regen_pcbuild(modules):
#######################################
# the script

def main():
def main(deepfreeze_only: bool):
# Expand the raw specs, preserving order.
modules = list(parse_frozen_specs())

# Regen build-related files.
regen_makefile(modules)
regen_pcbuild(modules)
regen_frozen(modules)
regen_frozen(modules, deepfreeze_only)


if __name__ == '__main__':
argv = sys.argv[1:]
if argv:
sys.exit(f'ERROR: got unexpected args {argv}')
main()
parser = argparse.ArgumentParser()
parser.add_argument("--deepfreeze-only", action="store_true",
help="Only use deepfrozen modules", default=True)
args = parser.parse_args()
main(args.deepfreeze_only)