diff --git a/src/Compilation.zig b/src/Compilation.zig index 21a0c6fe587c..8e82ac5cb105 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -35,6 +35,7 @@ const WaitGroup = @import("WaitGroup.zig"); const libtsan = @import("libtsan.zig"); const Zir = @import("Zir.zig"); const Color = @import("main.zig").Color; +const aro = @import("aro/lib.zig"); /// General-purpose allocator. Used for both temporary and long-term storage. gpa: Allocator, @@ -237,6 +238,10 @@ const Job = union(enum) { /// The value is the index into `link.File.Options.system_libs`. windows_import_lib: usize, + + /// Compile a C source file with Aro. + /// The value is the index into `c_source_files`. + arocc: usize, }; pub const CObject = struct { @@ -1677,17 +1682,20 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { try comp.astgen_wait_group.init(); errdefer comp.astgen_wait_group.deinit(); - // Add a `CObject` for each `c_source_files`. - try comp.c_object_table.ensureTotalCapacity(gpa, options.c_source_files.len); - for (options.c_source_files) |c_source_file| { - const c_object = try gpa.create(CObject); - errdefer gpa.destroy(c_object); - - c_object.* = .{ - .status = .{ .new = {} }, - .src = c_source_file, - }; - comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); + // When using Clang, add a `CObject` for each `c_source_files`. + const use_clang = build_options.have_llvm; + if (use_clang) { + try comp.c_object_table.ensureTotalCapacity(gpa, options.c_source_files.len); + for (options.c_source_files) |c_source_file| { + const c_object = try gpa.create(CObject); + errdefer gpa.destroy(c_object); + + c_object.* = .{ + .status = .{ .new = {} }, + .src = c_source_file, + }; + comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); + } } const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null; @@ -2025,9 +2033,17 @@ pub fn update(comp: *Compilation) !void { // For compiling C objects, we rely on the cache hash system to avoid duplicating work. // Add a Job for each C object. - try comp.c_object_work_queue.ensureUnusedCapacity(comp.c_object_table.count()); - for (comp.c_object_table.keys()) |key| { - comp.c_object_work_queue.writeItemAssumeCapacity(key); + // Note that when using Aro frontend instead of Clang, `c_object_work_queue` is always empty. + const use_clang = build_options.have_llvm; + if (use_clang) { + try comp.c_object_work_queue.ensureUnusedCapacity(comp.c_object_table.count()); + for (comp.c_object_table.keys()) |key| { + comp.c_object_work_queue.writeItemAssumeCapacity(key); + } + } else { + for (comp.c_source_files) |_, i| { + try comp.work_queue.writeItem(.{ .arocc = i }); + } } const use_stage1 = build_options.is_stage1 and comp.bin_file.options.use_stage1; @@ -2811,6 +2827,15 @@ fn processOneJob(comp: *Compilation, job: Job, main_progress_node: *std.Progress return; }, }, + .arocc => |c_source_file_index| { + const c_source_file = comp.c_source_files[c_source_file_index]; + comp.compileWithAro(c_source_file) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + @panic("properly handle arocc errors"); + }, + }; + }, .emit_h_decl => |decl| switch (decl.analysis) { .unreferenced => unreachable, .in_progress => unreachable, @@ -3145,6 +3170,58 @@ fn processOneJob(comp: *Compilation, job: Job, main_progress_node: *std.Progress } } +fn compileWithAro(comp: *Compilation, c_source_file: CSourceFile) !void { + const src_path = c_source_file.src_path; + + var arena_allocator = std.heap.ArenaAllocator.init(comp.gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var aro_comp = aro.Compilation.init(comp.gpa); + defer aro_comp.deinit(); + + aro_comp.target = comp.getTarget(); + + try aro_comp.addDefaultPragmaHandlers(); + + const c_headers_dir = try std.fs.path.join(arena, &[_][]const u8{ comp.zig_lib_directory.path.?, "include" }); + try aro_comp.system_include_dirs.append(c_headers_dir); + + for (comp.libc_include_dir_list) |include_dir| { + try aro_comp.system_include_dirs.append(include_dir); + } + + var macro_buf = std.ArrayList(u8).init(comp.gpa); + defer macro_buf.deinit(); + + const builtin_macros = try aro_comp.generateBuiltinMacros(); + const user_macros = try aro_comp.addSourceFromBuffer("", macro_buf.items); + + const source = try aro_comp.addSourceFromPath(src_path); + + aro_comp.generated_buf.items.len = 0; + var pp = aro.Preprocessor.init(&aro_comp); + defer pp.deinit(); + try pp.addBuiltinMacros(); + + _ = try pp.preprocess(builtin_macros); + _ = try pp.preprocess(user_macros); + const eof = try pp.preprocess(source); + try pp.tokens.append(comp.gpa, eof); + + var tree = try aro.Parser.parse(&pp); + defer tree.deinit(); + + aro_comp.renderErrors(); // populates aro_comp.diag.errors + + if (aro_comp.diag.errors != 0) { + // errors occurred + @panic("report aro errors"); + } + + try aro.Codegen.generateTree(comp, &aro_comp, tree, arena); +} + const AstGenSrc = union(enum) { root, import: struct { diff --git a/src/Module.zig b/src/Module.zig index e2e250592751..7fba875e1212 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -4709,6 +4709,50 @@ pub fn createAnonymousDecl(mod: *Module, block: *Sema.Block, typed_value: TypedV return mod.createAnonymousDeclFromDecl(block.src_decl, block.namespace, block.wip_capture_scope, typed_value); } +/// TODO cleanup +pub fn createAnonymousDecl2(mod: *Module, typed_value: TypedValue, src_name: []const u8) !*Decl { + const name_index = mod.getNextAnonNameIndex(); + const name = try std.fmt.allocPrintZ(mod.gpa, "{s}__anon_{d}", .{ + src_name, name_index, + }); + errdefer mod.gpa.free(name); + + const new_decl = try mod.allocateNewDecl(name, undefined, 0, null); + + new_decl.src_line = 0; + new_decl.ty = typed_value.ty; + new_decl.val = typed_value.val; + new_decl.align_val = Value.@"null"; + new_decl.linksection_val = Value.@"null"; + new_decl.has_tv = true; + new_decl.analysis = .complete; + new_decl.generation = mod.generation; + + try mod.comp.bin_file.allocateDeclIndexes(new_decl); + try mod.comp.anon_work_queue.writeItem(.{ .codegen_decl = new_decl }); + + return new_decl; +} + +pub fn createDecl2(mod: *Module, typed_value: TypedValue, name: []const u8) !*Decl { + const duped_name = try mod.gpa.dupeZ(u8, name); + const new_decl = try mod.allocateNewDecl(duped_name, undefined, 0, null); + + new_decl.src_line = 0; + new_decl.ty = typed_value.ty; + new_decl.val = typed_value.val; + new_decl.align_val = Value.@"null"; + new_decl.linksection_val = Value.@"null"; + new_decl.has_tv = true; + new_decl.analysis = .complete; + new_decl.generation = mod.generation; + new_decl.alive = true; + + try mod.comp.bin_file.allocateDeclIndexes(new_decl); + + return new_decl; +} + pub fn createAnonymousDeclFromDecl( mod: *Module, src_decl: *Decl, diff --git a/src/Sema.zig b/src/Sema.zig index c4b3ad8c3330..b272cfd7d253 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -164,11 +164,6 @@ pub const Block = struct { br_list: std.ArrayListUnmanaged(Air.Inst.Index), }; - /// For debugging purposes. - pub fn dump(block: *Block, mod: Module) void { - Zir.dumpBlock(mod, block); - } - pub fn makeSubBlock(parent: *Block) Block { return .{ .parent = parent, diff --git a/src/aro/Attribute.zig b/src/aro/Attribute.zig new file mode 100644 index 000000000000..95a151fe9b50 --- /dev/null +++ b/src/aro/Attribute.zig @@ -0,0 +1,978 @@ +const std = @import("std"); +const mem = std.mem; +const Tree = @import("Tree.zig"); +const Diagnostics = @import("Diagnostics.zig"); +const Value = @import("Value.zig"); +const Compilation = @import("Compilation.zig"); +const Type = @import("Type.zig"); +const NodeIndex = Tree.NodeIndex; +const TokenIndex = Tree.TokenIndex; +const TypeInfo = std.builtin.TypeInfo; + +const Attribute = @This(); + +tag: Tag, +args: Arguments, + +pub const Kind = enum { + c2x, + declspec, + gnu, +}; + +pub const ArgumentType = enum { + string, + identifier, + int, + alignment, + float, + array, + expression, + + pub fn toString(self: ArgumentType) []const u8 { + return switch (self) { + .string => "a string", + .identifier => "an identifier", + .int, .alignment => "an integer constant", + .float => "a floating point number", + .array => "an array", + .expression => "an expression", + }; + } + + fn fromType(comptime T: type) ArgumentType { + return switch (T) { + []const u8 => .string, + Identifier => .identifier, + u32 => .int, + Alignment => .alignment, + else => switch (@typeInfo(T)) { + .Enum => if (T.opts.enum_kind == .string) .string else .identifier, + else => unreachable, + }, + }; + } + + fn fromVal(value: Value) ArgumentType { + return switch (value.tag) { + .int => .int, + .bytes => .string, + .unavailable => .expression, + .float => .float, + .array => .array, + }; + } +}; + +fn getArguments(comptime descriptor: type) []const TypeInfo.StructField { + return if (@hasDecl(descriptor, "Args")) std.meta.fields(descriptor.Args) else &.{}; +} + +/// number of required arguments +pub fn requiredArgCount(attr: Tag) u32 { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) comptime { + var needed = 0; + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + for (fields) |arg_field| { + if (!mem.eql(u8, arg_field.name, "__name_tok") and @typeInfo(arg_field.field_type) != .Optional) needed += 1; + } + return needed; + }; + } + unreachable; +} + +/// maximum number of args that can be passed +pub fn maxArgCount(attr: Tag) u32 { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) comptime { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + var max = 0; + for (fields) |arg_field| { + if (!mem.eql(u8, arg_field.name, "__name_tok")) max += 1; + } + return max; + }; + } + unreachable; +} + +fn UnwrapOptional(comptime T: type) type { + return switch (@typeInfo(T)) { + .Optional => |optional| optional.child, + else => T, + }; +} + +pub const Formatting = struct { + /// The quote char (single or double) to use when printing identifiers/strings corresponding + /// to the enum in the first field of the Args of `attr`. Identifier enums use single quotes, string enums + /// use double quotes + fn quoteChar(attr: Tag) []const u8 { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + + if (fields.len == 0) unreachable; + const Unwrapped = UnwrapOptional(fields[0].field_type); + if (@typeInfo(Unwrapped) != .Enum) unreachable; + + return if (Unwrapped.opts.enum_kind == .identifier) "'" else "\""; + } + } + unreachable; + } + + /// returns a comma-separated string of quoted enum values, representing the valid + /// choices for the string or identifier enum of the first field of the Args of `attr`. + pub fn choices(attr: Tag) []const u8 { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + + if (fields.len == 0) unreachable; + const Unwrapped = UnwrapOptional(fields[0].field_type); + if (@typeInfo(Unwrapped) != .Enum) unreachable; + + const enum_fields = @typeInfo(Unwrapped).Enum.fields; + @setEvalBranchQuota(3000); + const quote = comptime quoteChar(@intToEnum(Tag, field.value)); + comptime var values: []const u8 = quote ++ enum_fields[0].name ++ quote; + inline for (enum_fields[1..]) |enum_field| { + values = values ++ ", "; + values = values ++ quote ++ enum_field.name ++ quote; + } + return values; + } + } + unreachable; + } +}; + +/// Checks if the first argument (if it exists) is an identifier enum +pub fn wantsIdentEnum(attr: Tag) bool { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + + if (fields.len == 0) return false; + const Unwrapped = UnwrapOptional(fields[0].field_type); + if (@typeInfo(Unwrapped) != .Enum) return false; + + return Unwrapped.opts.enum_kind == .identifier; + } + } + unreachable; +} + +pub fn diagnoseIdent(attr: Tag, arguments: *Arguments, ident: []const u8) ?Diagnostics.Message { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + if (fields.len == 0) unreachable; + const Unwrapped = UnwrapOptional(fields[0].field_type); + if (@typeInfo(Unwrapped) != .Enum) unreachable; + if (std.meta.stringToEnum(Unwrapped, normalize(ident))) |enum_val| { + @field(@field(arguments, decl.name), fields[0].name) = enum_val; + return null; + } + return Diagnostics.Message{ + .tag = .unknown_attr_enum, + .extra = .{ .attr_enum = .{ .tag = attr } }, + }; + } + } + unreachable; +} + +pub fn wantsAlignment(attr: Tag, idx: usize) bool { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const fields = getArguments(@field(attributes, decl.name)); + + if (idx >= fields.len) return false; + inline for (fields) |arg_field, field_idx| { + if (field_idx == idx) { + return UnwrapOptional(arg_field.field_type) == Alignment; + } + } + } + } + unreachable; +} + +pub fn diagnoseAlignment(attr: Tag, arguments: *Arguments, arg_idx: u32, val: Value, ty: Type, comp: *Compilation) ?Diagnostics.Message { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const arg_fields = getArguments(@field(attributes, decl.name)); + inline for (arg_fields) |arg_field, arg_i| { + if (arg_idx == arg_i) { + if (UnwrapOptional(arg_field.field_type) != Alignment) unreachable; + + if (val.tag == .unavailable) return Diagnostics.Message{ .tag = .alignas_unavailable }; + if (val.compare(.lt, Value.int(0), ty, comp)) { + return Diagnostics.Message{ .tag = .negative_alignment, .extra = .{ .signed = val.signExtend(ty, comp) } }; + } + const requested = std.math.cast(u29, val.data.int) catch { + return Diagnostics.Message{ .tag = .maximum_alignment, .extra = .{ .unsigned = val.data.int } }; + }; + if (!std.mem.isValidAlign(requested)) return Diagnostics.Message{ .tag = .non_pow2_align }; + + @field(@field(arguments, decl.name), arg_field.name) = Alignment{ .requested = requested }; + return null; + } + } + unreachable; + } + } + unreachable; +} + +fn diagnoseField( + comptime decl: TypeInfo.Declaration, + comptime field: TypeInfo.StructField, + comptime wanted: type, + arguments: *Arguments, + val: Value, + node: Tree.Node, +) ?Diagnostics.Message { + switch (val.tag) { + .int => { + if (@typeInfo(wanted) == .Int) { + @field(@field(arguments, decl.name), field.name) = val.getInt(wanted); + return null; + } + }, + .bytes => { + const bytes = @as([]const u8, val.data.bytes[0 .. val.data.bytes.len - 1]); + if (wanted == []const u8) { + @field(@field(arguments, decl.name), field.name) = bytes; + return null; + } else if (@typeInfo(wanted) == .Enum and wanted.opts.enum_kind == .string) { + if (std.meta.stringToEnum(wanted, bytes)) |enum_val| { + @field(@field(arguments, decl.name), field.name) = enum_val; + return null; + } else { + @setEvalBranchQuota(3000); + return Diagnostics.Message{ + .tag = .unknown_attr_enum, + .extra = .{ .attr_enum = .{ .tag = std.meta.stringToEnum(Tag, decl.name).? } }, + }; + } + } + }, + else => { + if (wanted == Identifier and node.tag == .decl_ref_expr) { + @field(@field(arguments, decl.name), field.name) = Identifier{ .tok = node.data.decl_ref }; + return null; + } + }, + } + return Diagnostics.Message{ + .tag = .attribute_arg_invalid, + .extra = .{ .attr_arg_type = .{ .expected = ArgumentType.fromType(wanted), .actual = ArgumentType.fromVal(val) } }, + }; +} + +pub fn diagnose(attr: Tag, arguments: *Arguments, arg_idx: u32, val: Value, node: Tree.Node) ?Diagnostics.Message { + inline for (@typeInfo(Tag).Enum.fields) |field, i| { + if (field.value == @enumToInt(attr)) { + const decl = @typeInfo(attributes).Struct.decls[i]; + const max_arg_count = maxArgCount(attr); + if (arg_idx >= max_arg_count) return Diagnostics.Message{ + .tag = .attribute_too_many_args, + .extra = .{ .attr_arg_count = .{ .attribute = attr, .expected = max_arg_count } }, + }; + const arg_fields = getArguments(@field(attributes, decl.name)); + inline for (arg_fields) |arg_field, arg_i| { + if (arg_idx == arg_i) { + return diagnoseField(decl, arg_field, UnwrapOptional(arg_field.field_type), arguments, val, node); + } + } + unreachable; + } + } + unreachable; +} + +const EnumTypes = enum { + string, + identifier, +}; +pub const Alignment = struct { + node: NodeIndex = .none, + requested: u29, + alignas: bool = false, +}; +pub const Identifier = struct { + tok: TokenIndex = 0, +}; + +const attributes = struct { + const access = struct { + const gnu = "access"; + + const Args = struct { + access_mode: enum { + read_only, + read_write, + write_only, + none, + + const opts = struct { + const enum_kind = .identifier; + }; + }, + ref_index: u32, + size_index: ?u32 = null, + }; + }; + const alias = struct { + const gnu = "alias"; + const Args = struct { + alias: []const u8, + }; + }; + const aligned = struct { + const gnu = "aligned"; + const declspec = "align"; + + const Args = struct { + alignment: ?Alignment = null, + __name_tok: TokenIndex = undefined, + }; + }; + const alloc_align = struct { + const gnu = "alloc_align"; + + const Args = struct { + position: u32, + }; + }; + const alloc_size = struct { + const gnu = "alloc_size"; + + const Args = struct { + position_1: u32, + position_2: ?u32 = null, + }; + }; + const allocate = struct { + const declspec = "allocate"; + + const Args = struct { + segname: []const u8, + }; + }; + const allocator = struct { + const declspec = "allocator"; + }; + const always_inline = struct { + const gnu = "always_inline"; + }; + const appdomain = struct { + const declspec = "appdomain"; + }; + const artificial = struct { + const gnu = "artificial"; + }; + const assume_aligned = struct { + const gnu = "assume_aligned"; + const Args = struct { + alignment: Alignment, + offset: ?u32 = null, + }; + }; + const cleanup = struct { + const gnu = "cleanup"; + const Args = struct { + function: Identifier, + }; + }; + const code_seg = struct { + const declspec = "code_seg"; + const Args = struct { + segname: []const u8, + }; + }; + const cold = struct { + const gnu = "cold"; + }; + const common = struct { + const gnu = "common"; + }; + const @"const" = struct { + const gnu = "const"; + }; + const constructor = struct { + const gnu = "constructor"; + const Args = struct { + priority: ?u32 = null, + }; + }; + const copy = struct { + const gnu = "copy"; + const Args = struct { + function: Identifier, + }; + }; + const deprecated = struct { + const gnu = "deprecated"; + const declspec = "deprecated"; + const c2x = "deprecated"; + + const Args = struct { + msg: ?[]const u8 = null, + __name_tok: TokenIndex = undefined, + }; + }; + const designated_init = struct { + const gnu = "designated_init"; + }; + const destructor = struct { + const gnu = "destructor"; + const Args = struct { + priority: ?u32 = null, + }; + }; + const dllexport = struct { + const declspec = "dllexport"; + }; + const dllimport = struct { + const declspec = "dllimport"; + }; + const @"error" = struct { + const gnu = "error"; + const Args = struct { + message: []const u8, + }; + }; + const externally_visible = struct { + const gnu = "externally_visible"; + }; + const fallthrough = struct { + const gnu = "fallthrough"; + const c2x = "fallthrough"; + }; + const flatten = struct { + const gnu = "flatten"; + }; + const format = struct { + const gnu = "format"; + const Args = struct { + archetype: enum { + printf, + scanf, + strftime, + strfmon, + + const opts = struct { + const enum_kind = .identifier; + }; + }, + string_index: u32, + first_to_check: u32, + }; + }; + const format_arg = struct { + const gnu = "format_arg"; + const Args = struct { + string_index: u32, + }; + }; + const gnu_inline = struct { + const gnu = "gnu_inline"; + }; + const hot = struct { + const gnu = "hot"; + }; + const ifunc = struct { + const gnu = "ifunc"; + const Args = struct { + resolver: []const u8, + }; + }; + const interrupt = struct { + const gnu = "interrupt"; + }; + const interrupt_handler = struct { + const gnu = "interrupt_handler"; + }; + const jitintrinsic = struct { + const declspec = "jitintrinsic"; + }; + const leaf = struct { + const gnu = "leaf"; + }; + const malloc = struct { + const gnu = "malloc"; + }; + const may_alias = struct { + const gnu = "may_alias"; + }; + const mode = struct { + const gnu = "mode"; + const Args = struct { + mode: enum { + // zig fmt: off + byte, word, pointer, + BI, QI, HI, + PSI, SI, PDI, + DI, TI, OI, + XI, QF, HF, + TQF, SF, DF, + XF, SD, DD, + TD, TF, QQ, + HQ, SQ, DQ, + TQ, UQQ, UHQ, + USQ, UDQ, UTQ, + HA, SA, DA, + TA, UHA, USA, + UDA, UTA, CC, + BLK, VOID, QC, + HC, SC, DC, + XC, TC, CQI, + CHI, CSI, CDI, + CTI, COI, CPSI, + BND32, BND64, + // zig fmt: on + + const opts = struct { + const enum_kind = .identifier; + }; + }, + }; + }; + const naked = struct { + const declspec = "naked"; + }; + const no_address_safety_analysis = struct { + const gnu = "no_address_safety_analysise"; + }; + const no_icf = struct { + const gnu = "no_icf"; + }; + const no_instrument_function = struct { + const gnu = "no_instrument_function"; + }; + const no_profile_instrument_function = struct { + const gnu = "no_profile_instrument_function"; + }; + const no_reorder = struct { + const gnu = "no_reorder"; + }; + const no_sanitize = struct { + const gnu = "no_sanitize"; + /// Todo: represent args as union? + const Args = struct { + alignment: []const u8, + object_size: ?[]const u8 = null, + }; + }; + const no_sanitize_address = struct { + const gnu = "no_sanitize_address"; + const declspec = "no_sanitize_address"; + }; + const no_sanitize_coverage = struct { + const gnu = "no_sanitize_coverage"; + }; + const no_sanitize_thread = struct { + const gnu = "no_sanitize_thread"; + }; + const no_sanitize_undefined = struct { + const gnu = "no_sanitize_undefined"; + }; + const no_split_stack = struct { + const gnu = "no_split_stack"; + }; + const no_stack_limit = struct { + const gnu = "no_stack_limit"; + }; + const no_stack_protector = struct { + const gnu = "no_stack_protector"; + }; + const @"noalias" = struct { + const declspec = "noalias"; + }; + const noclone = struct { + const gnu = "noclone"; + }; + const nocommon = struct { + const gnu = "nocommon"; + }; + const nodiscard = struct { + const c2x = "nodiscard"; + }; + const noinit = struct { + const gnu = "noinit"; + }; + const @"noinline" = struct { + const gnu = "noinline"; + const declspec = "noinline"; + }; + const noipa = struct { + const gnu = "noipa"; + }; + // TODO: arbitrary number of arguments + // const nonnull = struct { + // const gnu = "nonnull"; + // const Args = struct { + // arg_index: []const u32, + // }; + // }; + const nonstring = struct { + const gnu = "nonstring"; + }; + const noplt = struct { + const gnu = "noplt"; + }; + const @"noreturn" = struct { + const gnu = "noreturn"; + const c2x = "noreturn"; + const declspec = "noreturn"; + }; + const nothrow = struct { + const gnu = "nothrow"; + const declspec = "nothrow"; + }; + const novtable = struct { + const declspec = "novtable"; + }; + // TODO: union args ? + // const optimize = struct { + // const gnu = "optimize"; + // const Args = struct { + // optimize, // u32 | []const u8 -- optimize? + // }; + // }; + const @"packed" = struct { + const gnu = "packed"; + }; + const patchable_function_entry = struct { + const gnu = "patchable_function_entry"; + }; + const persistent = struct { + const gnu = "persistent"; + }; + const process = struct { + const declspec = "process"; + }; + const pure = struct { + const gnu = "pure"; + }; + const restrict = struct { + const declspec = "restrict"; + }; + const retain = struct { + const gnu = "retain"; + }; + const returns_nonnull = struct { + const gnu = "returns_nonnull"; + }; + const returns_twice = struct { + const gnu = "returns_twice"; + }; + const safebuffers = struct { + const declspec = "safebuffers"; + }; + const scalar_storage_order = struct { + const gnu = "scalar_storage_order"; + const Args = struct { + order: enum { + @"little-endian", + @"big-endian", + + const opts = struct { + const enum_kind = .string; + }; + }, + }; + }; + const section = struct { + const gnu = "section"; + const Args = struct { + name: []const u8, + }; + }; + const selectany = struct { + const declspec = "selectany"; + }; + const sentinel = struct { + const gnu = "sentinel"; + const Args = struct { + position: ?u32 = null, + }; + }; + const simd = struct { + const gnu = "simd"; + const Args = struct { + mask: ?enum { + notinbranch, + inbranch, + + const opts = struct { + const enum_kind = .string; + }; + } = null, + }; + }; + const spectre = struct { + const declspec = "spectre"; + const Args = struct { + arg: enum { + nomitigation, + + const opts = struct { + const enum_kind = .identifier; + }; + }, + }; + }; + const stack_protect = struct { + const gnu = "stack_protect"; + }; + const symver = struct { + const gnu = "symver"; + const Args = struct { + version: []const u8, // TODO: validate format "name2@nodename" + }; + }; + const target = struct { + const gnu = "target"; + const Args = struct { + options: []const u8, // TODO: multiple arguments + }; + }; + const target_clones = struct { + const gnu = "target_clones"; + const Args = struct { + options: []const u8, // TODO: multiple arguments + }; + }; + const thread = struct { + const declspec = "thread"; + }; + const tls_model = struct { + const gnu = "tls_model"; + const Args = struct { + model: enum { + @"global-dynamic", + @"local-dynamic", + @"initial-exec", + @"local-exec", + + const opts = struct { + const enum_kind = .string; + }; + }, + }; + }; + const transparent_union = struct { + const gnu = "transparent_union"; + }; + const unavailable = struct { + const gnu = "unavailable"; + const Args = struct { + msg: ?[]const u8 = null, + __name_tok: TokenIndex = undefined, + }; + }; + const uninitialized = struct { + const gnu = "uninitialized"; + }; + const unused = struct { + const gnu = "unused"; + const c2x = "maybe_unused"; + }; + const used = struct { + const gnu = "used"; + }; + const uuid = struct { + const declspec = "uuid"; + const Args = struct { + uuid: []const u8, + }; + }; + const vector_size = struct { + const gnu = "vector_size"; + const Args = struct { + bytes: u32, // TODO: validate "The bytes argument must be a positive power-of-two multiple of the base type size" + }; + }; + const visibility = struct { + const gnu = "visibility"; + const Args = struct { + visibility_type: enum { + default, + hidden, + internal, + protected, + + const opts = struct { + const enum_kind = .string; + }; + }, + }; + }; + const warn_if_not_aligned = struct { + const gnu = "warn_if_not_aligned"; + const Args = struct { + alignment: Alignment, + }; + }; + const warn_unused_result = struct { + const gnu = "warn_unused_result"; + }; + const warning = struct { + const gnu = "warning"; + const Args = struct { + message: []const u8, + }; + }; + const weak = struct { + const gnu = "weak"; + }; + const weakref = struct { + const gnu = "weakref"; + const Args = struct { + target: ?[]const u8 = null, + }; + }; + const zero_call_used_regs = struct { + const gnu = "zero_call_used_regs"; + const Args = struct { + choice: enum { + skip, + used, + @"used-gpr", + @"used-arg", + @"used-gpr-arg", + all, + @"all-gpr", + @"all-arg", + @"all-gpr-arg", + + const opts = struct { + const enum_kind = .string; + }; + }, + }; + }; + const asm_label = struct { + const Args = struct { + name: []const u8, + }; + }; +}; + +pub const Tag = std.meta.DeclEnum(attributes); + +pub const Arguments = blk: { + const decls = @typeInfo(attributes).Struct.decls; + var union_fields: [decls.len]std.builtin.TypeInfo.UnionField = undefined; + inline for (decls) |decl, i| { + union_fields[i] = .{ + .name = decl.name, + .field_type = if (@hasDecl(@field(attributes, decl.name), "Args")) @field(attributes, decl.name).Args else void, + .alignment = 0, + }; + } + + break :blk @Type(.{ + .Union = .{ + .layout = .Auto, + .tag_type = null, + .fields = &union_fields, + .decls = &.{}, + }, + }); +}; + +pub fn ArgumentsForTag(comptime tag: Tag) type { + const decl = @typeInfo(attributes).Struct.decls[@enumToInt(tag)]; + return if (@hasDecl(@field(attributes, decl.name), "Args")) @field(attributes, decl.name).Args else void; +} + +pub fn initArguments(tag: Tag, name_tok: TokenIndex) Arguments { + inline for (@typeInfo(Tag).Enum.fields) |field| { + if (@enumToInt(tag) == field.value) { + var args = @unionInit(Arguments, field.name, undefined); + const decl = @typeInfo(attributes).Struct.decls[field.value]; + if (@hasDecl(@field(attributes, decl.name), "Args") and @hasField(@field(attributes, decl.name).Args, "__name_tok")) { + @field(@field(args, field.name), "__name_tok") = name_tok; + } + return args; + } + } + unreachable; +} + +pub fn fromString(kind: Kind, namespace: ?[]const u8, name: []const u8) ?Tag { + return switch (kind) { + .c2x => fromStringC2X(namespace, name), + .declspec => fromStringDeclspec(name), + .gnu => fromStringGnu(name), + }; +} + +fn fromStringGnu(name: []const u8) ?Tag { + const normalized = normalize(name); + const decls = @typeInfo(attributes).Struct.decls; + @setEvalBranchQuota(3000); + inline for (decls) |decl, i| { + if (@hasDecl(@field(attributes, decl.name), "gnu")) { + if (mem.eql(u8, @field(attributes, decl.name).gnu, normalized)) { + return @intToEnum(Tag, i); + } + } + } + return null; +} + +fn fromStringC2X(namespace: ?[]const u8, name: []const u8) ?Tag { + const normalized = normalize(name); + if (namespace) |ns| { + const normalized_ns = normalize(ns); + if (mem.eql(u8, normalized_ns, "gnu")) { + return fromStringGnu(normalized); + } + return null; + } + const decls = @typeInfo(attributes).Struct.decls; + inline for (decls) |decl, i| { + if (@hasDecl(@field(attributes, decl.name), "c2x")) { + if (mem.eql(u8, @field(attributes, decl.name).c2x, normalized)) { + return @intToEnum(Tag, i); + } + } + } + return null; +} + +fn fromStringDeclspec(name: []const u8) ?Tag { + const decls = @typeInfo(attributes).Struct.decls; + inline for (decls) |decl, i| { + if (@hasDecl(@field(attributes, decl.name), "declspec")) { + if (mem.eql(u8, @field(attributes, decl.name).declspec, name)) { + return @intToEnum(Tag, i); + } + } + } + return null; +} + +fn normalize(name: []const u8) []const u8 { + if (name.len >= 4 and mem.startsWith(u8, name, "__") and mem.endsWith(u8, name, "__")) { + return name[2 .. name.len - 2]; + } + return name; +} diff --git a/src/aro/Builtins.zig b/src/aro/Builtins.zig new file mode 100644 index 000000000000..e749e05a97c5 --- /dev/null +++ b/src/aro/Builtins.zig @@ -0,0 +1,90 @@ +const std = @import("std"); +const Compilation = @import("Compilation.zig"); +const Type = @import("Type.zig"); + +const Builtins = @This(); + +const Builtin = struct { + spec: Type.Specifier, + func_ty: Type.Func, + attrs: Attributes, + + const Attributes = packed struct { + printf_like: u8 = 0, + vprintf_like: u8 = 0, + noreturn: bool = false, + libm: bool = false, + libc: bool = false, + returns_twice: bool = false, + eval_args: bool = true, + }; +}; +const BuiltinMap = std.StringHashMapUnmanaged(Builtin); + +_builtins: BuiltinMap = .{}, +_params: []Type.Func.Param = &.{}, + +pub fn deinit(b: *Builtins, gpa: std.mem.Allocator) void { + b._builtins.deinit(gpa); + gpa.free(b._params); +} + +fn add( + a: std.mem.Allocator, + b: *BuiltinMap, + name: []const u8, + ret_ty: Type, + param_types: []const Type, + spec: Type.Specifier, + attrs: Builtin.Attributes, +) void { + var params = a.alloc(Type.Func.Param, param_types.len) catch unreachable; // fib + for (param_types) |param_ty, i| { + params[i] = .{ .name_tok = 0, .ty = param_ty, .name = "" }; + } + b.putAssumeCapacity(name, .{ + .spec = spec, + .func_ty = .{ + .return_type = ret_ty, + .params = params, + }, + .attrs = attrs, + }); +} + +pub fn create(comp: *Compilation) !Builtins { + const builtin_count = 3; + const param_count = 5; + + var b = BuiltinMap{}; + try b.ensureTotalCapacity(comp.gpa, builtin_count); + errdefer b.deinit(comp.gpa); + var _params = try comp.gpa.alloc(Type.Func.Param, param_count); + errdefer comp.gpa.free(_params); + var fib_state = std.heap.FixedBufferAllocator.init(std.mem.sliceAsBytes(_params)); + const a = fib_state.allocator(); + + const void_ty = Type{ .specifier = .void }; + var va_list = comp.types.va_list; + if (va_list.isArray()) va_list.decayArray(); + + add(a, &b, "__builtin_va_start", void_ty, &.{ va_list, .{ .specifier = .special_va_start } }, .func, .{}); + add(a, &b, "__builtin_va_end", void_ty, &.{va_list}, .func, .{}); + add(a, &b, "__builtin_va_copy", void_ty, &.{ va_list, va_list }, .func, .{}); + + return Builtins{ ._builtins = b, ._params = _params }; +} + +pub fn hasBuiltin(b: Builtins, name: []const u8) bool { + if (std.mem.eql(u8, name, "__builtin_va_arg") or + std.mem.eql(u8, name, "__builtin_choose_expr")) return true; + return b._builtins.getPtr(name) != null; +} + +pub fn get(b: Builtins, name: []const u8) ?Type { + const builtin = b._builtins.getPtr(name) orelse return null; + return Type{ + .specifier = builtin.spec, + .data = .{ .func = &builtin.func_ty }, + }; +} diff --git a/src/aro/CharInfo.zig b/src/aro/CharInfo.zig new file mode 100644 index 000000000000..f5935fd56ce1 --- /dev/null +++ b/src/aro/CharInfo.zig @@ -0,0 +1,487 @@ +//! This module provides functions for classifying characters according to +//! various C standards. All classification routines *do not* consider +//! characters from the basic character set; it is assumed those will be +//! checked separately + +const assert = @import("std").debug.assert; + +/// C11 Standard Annex D +pub fn isC11IdChar(codepoint: u21) bool { + assert(codepoint > 0x7F); + return switch (codepoint) { + // 1 + 0x00A8, + 0x00AA, + 0x00AD, + 0x00AF, + 0x00B2...0x00B5, + 0x00B7...0x00BA, + 0x00BC...0x00BE, + 0x00C0...0x00D6, + 0x00D8...0x00F6, + 0x00F8...0x00FF, + + // 2 + 0x0100...0x167F, + 0x1681...0x180D, + 0x180F...0x1FFF, + + // 3 + 0x200B...0x200D, + 0x202A...0x202E, + 0x203F...0x2040, + 0x2054, + 0x2060...0x206F, + + // 4 + 0x2070...0x218F, + 0x2460...0x24FF, + 0x2776...0x2793, + 0x2C00...0x2DFF, + 0x2E80...0x2FFF, + + // 5 + 0x3004...0x3007, + 0x3021...0x302F, + 0x3031...0x303F, + + // 6 + 0x3040...0xD7FF, + + // 7 + 0xF900...0xFD3D, + 0xFD40...0xFDCF, + 0xFDF0...0xFE44, + 0xFE47...0xFFFD, + + // 8 + 0x10000...0x1FFFD, + 0x20000...0x2FFFD, + 0x30000...0x3FFFD, + 0x40000...0x4FFFD, + 0x50000...0x5FFFD, + 0x60000...0x6FFFD, + 0x70000...0x7FFFD, + 0x80000...0x8FFFD, + 0x90000...0x9FFFD, + 0xA0000...0xAFFFD, + 0xB0000...0xBFFFD, + 0xC0000...0xCFFFD, + 0xD0000...0xDFFFD, + 0xE0000...0xEFFFD, + => true, + else => false, + }; +} + +/// C99 Standard Annex D +pub fn isC99IdChar(codepoint: u21) bool { + assert(codepoint > 0x7F); + return switch (codepoint) { + // Latin + 0x00AA, + 0x00BA, + 0x00C0...0x00D6, + 0x00D8...0x00F6, + 0x00F8...0x01F5, + 0x01FA...0x0217, + 0x0250...0x02A8, + 0x1E00...0x1E9B, + 0x1EA0...0x1EF9, + 0x207F, + + // Greek + 0x0386, + 0x0388...0x038A, + 0x038C, + 0x038E...0x03A1, + 0x03A3...0x03CE, + 0x03D0...0x03D6, + 0x03DA, + 0x03DC, + 0x03DE, + 0x03E0, + 0x03E2...0x03F3, + 0x1F00...0x1F15, + 0x1F18...0x1F1D, + 0x1F20...0x1F45, + 0x1F48...0x1F4D, + 0x1F50...0x1F57, + 0x1F59, + 0x1F5B, + 0x1F5D, + 0x1F5F...0x1F7D, + 0x1F80...0x1FB4, + 0x1FB6...0x1FBC, + 0x1FC2...0x1FC4, + 0x1FC6...0x1FCC, + 0x1FD0...0x1FD3, + 0x1FD6...0x1FDB, + 0x1FE0...0x1FEC, + 0x1FF2...0x1FF4, + 0x1FF6...0x1FFC, + + // Cyrillic + 0x0401...0x040C, + 0x040E...0x044F, + 0x0451...0x045C, + 0x045E...0x0481, + 0x0490...0x04C4, + 0x04C7...0x04C8, + 0x04CB...0x04CC, + 0x04D0...0x04EB, + 0x04EE...0x04F5, + 0x04F8...0x04F9, + + // Armenian + 0x0531...0x0556, + 0x0561...0x0587, + + // Hebrew + 0x05B0...0x05B9, + 0x05BB...0x05BD, + 0x05BF, + 0x05C1...0x05C2, + 0x05D0...0x05EA, + 0x05F0...0x05F2, + + // Arabic + 0x0621...0x063A, + 0x0640...0x0652, + 0x0670...0x06B7, + 0x06BA...0x06BE, + 0x06C0...0x06CE, + 0x06D0...0x06DC, + 0x06E5...0x06E8, + 0x06EA...0x06ED, + + // Devanagari + 0x0901...0x0903, + 0x0905...0x0939, + 0x093E...0x094D, + 0x0950...0x0952, + 0x0958...0x0963, + + // Bengali + 0x0981...0x0983, + 0x0985...0x098C, + 0x098F...0x0990, + 0x0993...0x09A8, + 0x09AA...0x09B0, + 0x09B2, + 0x09B6...0x09B9, + 0x09BE...0x09C4, + 0x09C7...0x09C8, + 0x09CB...0x09CD, + 0x09DC...0x09DD, + 0x09DF...0x09E3, + 0x09F0...0x09F1, + + // Gurmukhi + 0x0A02, + 0x0A05...0x0A0A, + 0x0A0F...0x0A10, + 0x0A13...0x0A28, + 0x0A2A...0x0A30, + 0x0A32...0x0A33, + 0x0A35...0x0A36, + 0x0A38...0x0A39, + 0x0A3E...0x0A42, + 0x0A47...0x0A48, + 0x0A4B...0x0A4D, + 0x0A59...0x0A5C, + 0x0A5E, + 0x0A74, + + // Gujarati + 0x0A81...0x0A83, + 0x0A85...0x0A8B, + 0x0A8D, + 0x0A8F...0x0A91, + 0x0A93...0x0AA8, + 0x0AAA...0x0AB0, + 0x0AB2...0x0AB3, + 0x0AB5...0x0AB9, + 0x0ABD...0x0AC5, + 0x0AC7...0x0AC9, + 0x0ACB...0x0ACD, + 0x0AD0, + 0x0AE0, + + // Oriya + 0x0B01...0x0B03, + 0x0B05...0x0B0C, + 0x0B0F...0x0B10, + 0x0B13...0x0B28, + 0x0B2A...0x0B30, + 0x0B32...0x0B33, + 0x0B36...0x0B39, + 0x0B3E...0x0B43, + 0x0B47...0x0B48, + 0x0B4B...0x0B4D, + 0x0B5C...0x0B5D, + 0x0B5F...0x0B61, + + // Tamil + 0x0B82...0x0B83, + 0x0B85...0x0B8A, + 0x0B8E...0x0B90, + 0x0B92...0x0B95, + 0x0B99...0x0B9A, + 0x0B9C, + 0x0B9E...0x0B9F, + 0x0BA3...0x0BA4, + 0x0BA8...0x0BAA, + 0x0BAE...0x0BB5, + 0x0BB7...0x0BB9, + 0x0BBE...0x0BC2, + 0x0BC6...0x0BC8, + 0x0BCA...0x0BCD, + + // Telugu + 0x0C01...0x0C03, + 0x0C05...0x0C0C, + 0x0C0E...0x0C10, + 0x0C12...0x0C28, + 0x0C2A...0x0C33, + 0x0C35...0x0C39, + 0x0C3E...0x0C44, + 0x0C46...0x0C48, + 0x0C4A...0x0C4D, + 0x0C60...0x0C61, + + // Kannada + 0x0C82...0x0C83, + 0x0C85...0x0C8C, + 0x0C8E...0x0C90, + 0x0C92...0x0CA8, + 0x0CAA...0x0CB3, + 0x0CB5...0x0CB9, + 0x0CBE...0x0CC4, + 0x0CC6...0x0CC8, + 0x0CCA...0x0CCD, + 0x0CDE, + 0x0CE0...0x0CE1, + + // Malayalam + 0x0D02...0x0D03, + 0x0D05...0x0D0C, + 0x0D0E...0x0D10, + 0x0D12...0x0D28, + 0x0D2A...0x0D39, + 0x0D3E...0x0D43, + 0x0D46...0x0D48, + 0x0D4A...0x0D4D, + 0x0D60...0x0D61, + + // Thai (excluding digits 0x0E50...0x0E59; originally 0x0E01...0x0E3A and 0x0E40...0x0E5B + 0x0E01...0x0E3A, + 0x0E40...0x0E4F, + 0x0E5A...0x0E5B, + + // Lao + 0x0E81...0x0E82, + 0x0E84, + 0x0E87...0x0E88, + 0x0E8A, + 0x0E8D, + 0x0E94...0x0E97, + 0x0E99...0x0E9F, + 0x0EA1...0x0EA3, + 0x0EA5, + 0x0EA7, + 0x0EAA...0x0EAB, + 0x0EAD...0x0EAE, + 0x0EB0...0x0EB9, + 0x0EBB...0x0EBD, + 0x0EC0...0x0EC4, + 0x0EC6, + 0x0EC8...0x0ECD, + 0x0EDC...0x0EDD, + + // Tibetan + 0x0F00, + 0x0F18...0x0F19, + 0x0F35, + 0x0F37, + 0x0F39, + 0x0F3E...0x0F47, + 0x0F49...0x0F69, + 0x0F71...0x0F84, + 0x0F86...0x0F8B, + 0x0F90...0x0F95, + 0x0F97, + 0x0F99...0x0FAD, + 0x0FB1...0x0FB7, + 0x0FB9, + + // Georgian + 0x10A0...0x10C5, + 0x10D0...0x10F6, + + // Hiragana + 0x3041...0x3093, + 0x309B...0x309C, + + // Katakana + 0x30A1...0x30F6, + 0x30FB...0x30FC, + + // Bopomofo + 0x3105...0x312C, + + // CJK Unified Ideographs + 0x4E00...0x9FA5, + + // Hangul + 0xAC00...0xD7A3, + + // Digits + 0x0660...0x0669, + 0x06F0...0x06F9, + 0x0966...0x096F, + 0x09E6...0x09EF, + 0x0A66...0x0A6F, + 0x0AE6...0x0AEF, + 0x0B66...0x0B6F, + 0x0BE7...0x0BEF, + 0x0C66...0x0C6F, + 0x0CE6...0x0CEF, + 0x0D66...0x0D6F, + 0x0E50...0x0E59, + 0x0ED0...0x0ED9, + 0x0F20...0x0F33, + + // Special characters + 0x00B5, + 0x00B7, + 0x02B0...0x02B8, + 0x02BB, + 0x02BD...0x02C1, + 0x02D0...0x02D1, + 0x02E0...0x02E4, + 0x037A, + 0x0559, + 0x093D, + 0x0B3D, + 0x1FBE, + 0x203F...0x2040, + 0x2102, + 0x2107, + 0x210A...0x2113, + 0x2115, + 0x2118...0x211D, + 0x2124, + 0x2126, + 0x2128, + 0x212A...0x2131, + 0x2133...0x2138, + 0x2160...0x2182, + 0x3005...0x3007, + 0x3021...0x3029, + => true, + else => false, + }; +} + +/// C11 standard Annex D +pub fn isC11DisallowedInitialIdChar(codepoint: u21) bool { + assert(codepoint > 0x7F); + return switch (codepoint) { + 0x0300...0x036F, + 0x1DC0...0x1DFF, + 0x20D0...0x20FF, + 0xFE20...0xFE2F, + => true, + else => false, + }; +} + +/// These are "digit" characters; C99 disallows them as the first +/// character of an identifier +pub fn isC99DisallowedInitialIDChar(codepoint: u21) bool { + assert(codepoint > 0x7F); + return switch (codepoint) { + 0x0660...0x0669, + 0x06F0...0x06F9, + 0x0966...0x096F, + 0x09E6...0x09EF, + 0x0A66...0x0A6F, + 0x0AE6...0x0AEF, + 0x0B66...0x0B6F, + 0x0BE7...0x0BEF, + 0x0C66...0x0C6F, + 0x0CE6...0x0CEF, + 0x0D66...0x0D6F, + 0x0E50...0x0E59, + 0x0ED0...0x0ED9, + 0x0F20...0x0F33, + => true, + else => false, + }; +} + +pub fn isInvisible(codepoint: u21) bool { + assert(codepoint > 0x7F); + return switch (codepoint) { + 0x00ad, // SOFT HYPHEN + 0x200b, // ZERO WIDTH SPACE + 0x200c, // ZERO WIDTH NON-JOINER + 0x200d, // ZERO WIDTH JOINER + 0x2060, // WORD JOINER + 0x2061, // FUNCTION APPLICATION + 0x2062, // INVISIBLE TIMES + 0x2063, // INVISIBLE SEPARATOR + 0x2064, // INVISIBLE PLUS + 0xfeff, // ZERO WIDTH NO-BREAK SPACE + => true, + else => false, + }; +} + +/// Checks for identifier characters which resemble non-identifier characters +pub fn homoglyph(codepoint: u21) ?u21 { + assert(codepoint > 0x7F); + return switch (codepoint) { + 0x01c3 => '!', // LATIN LETTER RETROFLEX CLICK + 0x037e => ';', // GREEK QUESTION MARK + 0x2212 => '-', // MINUS SIGN + 0x2215 => '/', // DIVISION SLASH + 0x2216 => '\\', // SET MINUS + 0x2217 => '*', // ASTERISK OPERATOR + 0x2223 => '|', // DIVIDES + 0x2227 => '^', // LOGICAL AND + 0x2236 => ':', // RATIO + 0x223c => '~', // TILDE OPERATOR + 0xa789 => ':', // MODIFIER LETTER COLON + 0xff01 => '!', // FULLWIDTH EXCLAMATION MARK + 0xff03 => '#', // FULLWIDTH NUMBER SIGN + 0xff04 => '$', // FULLWIDTH DOLLAR SIGN + 0xff05 => '%', // FULLWIDTH PERCENT SIGN + 0xff06 => '&', // FULLWIDTH AMPERSAND + 0xff08 => '(', // FULLWIDTH LEFT PARENTHESIS + 0xff09 => ')', // FULLWIDTH RIGHT PARENTHESIS + 0xff0a => '*', // FULLWIDTH ASTERISK + 0xff0b => '+', // FULLWIDTH ASTERISK + 0xff0c => ',', // FULLWIDTH COMMA + 0xff0d => '-', // FULLWIDTH HYPHEN-MINUS + 0xff0e => '.', // FULLWIDTH FULL STOP + 0xff0f => '/', // FULLWIDTH SOLIDUS + 0xff1a => ':', // FULLWIDTH COLON + 0xff1b => ';', // FULLWIDTH SEMICOLON + 0xff1c => '<', // FULLWIDTH LESS-THAN SIGN + 0xff1d => '=', // FULLWIDTH EQUALS SIGN + 0xff1e => '>', // FULLWIDTH GREATER-THAN SIGN + 0xff1f => '?', // FULLWIDTH QUESTION MARK + 0xff20 => '@', // FULLWIDTH COMMERCIAL AT + 0xff3b => '[', // FULLWIDTH LEFT SQUARE BRACKET + 0xff3c => '\\', // FULLWIDTH REVERSE SOLIDUS + 0xff3d => ']', // FULLWIDTH RIGHT SQUARE BRACKET + 0xff3e => '^', // FULLWIDTH CIRCUMFLEX ACCENT + 0xff5b => '{', // FULLWIDTH LEFT CURLY BRACKET + 0xff5c => '|', // FULLWIDTH VERTICAL LINE + 0xff5d => '}', // FULLWIDTH RIGHT CURLY BRACKET + 0xff5e => '~', // FULLWIDTH TILDE + else => null, + }; +} diff --git a/src/aro/Codegen.zig b/src/aro/Codegen.zig new file mode 100644 index 000000000000..2afac49672fa --- /dev/null +++ b/src/aro/Codegen.zig @@ -0,0 +1,847 @@ +aro_comp: *aro.Compilation, +tree: aro.Tree, +bin_file: *link.File, +arena: Allocator, +gpa: Allocator, +symbols: std.StringHashMapUnmanaged(*Module.Decl), +verbose_air: bool, + +const builtin = @import("builtin"); +const std = @import("std"); +const Allocator = std.mem.Allocator; +const log = std.log.scoped(.aro); + +const Codegen = @This(); +const aro = @import("lib.zig"); +const link = @import("../link.zig"); +const NodeIndex = aro.Tree.NodeIndex; +const Value = @import("../value.zig").Value; +const Type = @import("../type.zig").Type; +const TypedValue = @import("../TypedValue.zig"); +const Air = @import("../Air.zig"); +const Compilation = @import("../Compilation.zig"); +const Module = @import("../Module.zig"); +const Liveness = @import("../Liveness.zig"); + +pub fn generateTree(comp: *Compilation, aro_comp: *aro.Compilation, tree: aro.Tree, arena: Allocator) !void { + var c = Codegen{ + .bin_file = comp.bin_file, + .aro_comp = aro_comp, + .tree = tree, + .arena = arena, + .gpa = comp.gpa, + .verbose_air = comp.verbose_air, + .symbols = .{}, + }; + defer c.symbols.deinit(comp.gpa); + + const node_tags = tree.nodes.items(.tag); + const node_datas = tree.nodes.items(.data); + const node_tys = tree.nodes.items(.ty); + + for (tree.root_decls) |decl_node| { + switch (node_tags[@enumToInt(decl_node)]) { + // these produce no code + .static_assert, + .typedef, + .struct_decl_two, + .union_decl_two, + .enum_decl_two, + .struct_decl, + .union_decl, + .enum_decl, + => continue, + + // symbol definitions + .fn_proto => { + const mod = comp.bin_file.options.module.?; + const name = c.tree.tokSlice(node_datas[@enumToInt(decl_node)].decl.name); + const fn_proto_ty = node_tys[@enumToInt(decl_node)]; + const zig_fn_ty = try c.lowerType(fn_proto_ty); + const new_decl = try mod.createDecl2(.{ + .ty = zig_fn_ty, + .val = undefined, + }, name); + new_decl.val = try Value.Tag.extern_fn.create(c.arena, new_decl); + try c.symbols.put(c.gpa, name, new_decl); + }, + + .static_fn_proto, + .inline_fn_proto, + .inline_static_fn_proto, + .extern_var, + .threadlocal_extern_var, + => { + const name = c.tree.tokSlice(node_datas[@enumToInt(decl_node)].decl.name); + log.debug("ignoring the opportunity to define a symbol named {s}", .{name}); + }, + + // function definition + .fn_def, + .static_fn_def, + .inline_fn_def, + .inline_static_fn_def, + => try c.genFn(decl_node), + + .@"var", + .static_var, + .threadlocal_var, + .threadlocal_static_var, + => try c.genVar(decl_node), + + else => unreachable, + } + } +} + +const Func = struct { + codegen: *Codegen, + name: []const u8, + + air_instructions: std.MultiArrayList(Air.Inst) = .{}, + air_extra: std.ArrayListUnmanaged(u32) = .{}, + air_values: std.ArrayListUnmanaged(Value) = .{}, + + fn deinit(func: *Func) void { + const gpa = func.codegen.gpa; + func.air_instructions.deinit(gpa); + func.air_extra.deinit(gpa); + func.air_values.deinit(gpa); + func.* = undefined; + } + + /// Reminder to refactor this out with the equivalent Sema function. + fn addConstant(func: *Func, ty: Type, val: Value) !Air.Inst.Ref { + const gpa = func.codegen.gpa; + const ty_inst = try func.addType(ty); + try func.air_values.append(gpa, val); + try func.air_instructions.append(gpa, .{ + .tag = .constant, + .data = .{ .ty_pl = .{ + .ty = ty_inst, + .payload = @intCast(u32, func.air_values.items.len - 1), + } }, + }); + return Air.indexToRef(@intCast(u32, func.air_instructions.len - 1)); + } + + /// Reminder to refactor this out with the equivalent Sema function. + fn addType(func: *Func, ty: Type) !Air.Inst.Ref { + switch (ty.tag()) { + .u1 => return .u1_type, + .u8 => return .u8_type, + .i8 => return .i8_type, + .u16 => return .u16_type, + .i16 => return .i16_type, + .u32 => return .u32_type, + .i32 => return .i32_type, + .u64 => return .u64_type, + .i64 => return .i64_type, + .u128 => return .u128_type, + .i128 => return .i128_type, + .usize => return .usize_type, + .isize => return .isize_type, + .c_short => return .c_short_type, + .c_ushort => return .c_ushort_type, + .c_int => return .c_int_type, + .c_uint => return .c_uint_type, + .c_long => return .c_long_type, + .c_ulong => return .c_ulong_type, + .c_longlong => return .c_longlong_type, + .c_ulonglong => return .c_ulonglong_type, + .c_longdouble => return .c_longdouble_type, + .f16 => return .f16_type, + .f32 => return .f32_type, + .f64 => return .f64_type, + .f80 => return .f80_type, + .f128 => return .f128_type, + .anyopaque => return .anyopaque_type, + .bool => return .bool_type, + .void => return .void_type, + .type => return .type_type, + .anyerror => return .anyerror_type, + .comptime_int => return .comptime_int_type, + .comptime_float => return .comptime_float_type, + .noreturn => return .noreturn_type, + .@"anyframe" => return .anyframe_type, + .@"null" => return .null_type, + .@"undefined" => return .undefined_type, + .enum_literal => return .enum_literal_type, + .atomic_order => return .atomic_order_type, + .atomic_rmw_op => return .atomic_rmw_op_type, + .calling_convention => return .calling_convention_type, + .address_space => return .address_space_type, + .float_mode => return .float_mode_type, + .reduce_op => return .reduce_op_type, + .call_options => return .call_options_type, + .prefetch_options => return .prefetch_options_type, + .export_options => return .export_options_type, + .extern_options => return .extern_options_type, + .type_info => return .type_info_type, + .manyptr_u8 => return .manyptr_u8_type, + .manyptr_const_u8 => return .manyptr_const_u8_type, + .fn_noreturn_no_args => return .fn_noreturn_no_args_type, + .fn_void_no_args => return .fn_void_no_args_type, + .fn_naked_noreturn_no_args => return .fn_naked_noreturn_no_args_type, + .fn_ccc_void_no_args => return .fn_ccc_void_no_args_type, + .single_const_pointer_to_comptime_int => return .single_const_pointer_to_comptime_int_type, + .const_slice_u8 => return .const_slice_u8_type, + .anyerror_void_error_union => return .anyerror_void_error_union_type, + .generic_poison => return .generic_poison_type, + else => {}, + } + try func.air_instructions.append(func.codegen.gpa, .{ + .tag = .const_ty, + .data = .{ .ty = ty }, + }); + return Air.indexToRef(@intCast(u32, func.air_instructions.len - 1)); + } + + fn getTmpAir(func: Func) Air { + return .{ + .instructions = func.air_instructions.slice(), + .extra = func.air_extra.items, + .values = func.air_values.items, + }; + } + + fn addExtra(func: *Func, extra: anytype) Allocator.Error!u32 { + const fields = std.meta.fields(@TypeOf(extra)); + try func.air_extra.ensureUnusedCapacity(func.gpa, fields.len); + return addExtraAssumeCapacity(func, extra); + } + + fn addExtraAssumeCapacity(func: *Func, extra: anytype) u32 { + const fields = std.meta.fields(@TypeOf(extra)); + const result = @intCast(u32, func.air_extra.items.len); + inline for (fields) |field| { + func.air_extra.appendAssumeCapacity(switch (field.field_type) { + u32 => @field(extra, field.name), + Air.Inst.Ref => @enumToInt(@field(extra, field.name)), + i32 => @bitCast(u32, @field(extra, field.name)), + else => @compileError("bad field type"), + }); + } + return result; + } + + fn appendRefsAssumeCapacity(func: *Func, refs: []const Air.Inst.Ref) void { + const coerced = @bitCast([]const u32, refs); + func.air_extra.appendSliceAssumeCapacity(coerced); + } + + fn declRef(func: *Func, decl: *Module.Decl) !Air.Inst.Ref { + return func.addConstant( + try Type.ptr(func.codegen.arena, .{ + .pointee_type = decl.ty, + .mutable = false, + .@"addrspace" = decl.@"addrspace", + }), + try Value.Tag.decl_ref.create(func.codegen.arena, decl), + ); + } +}; + +fn genFn(c: *Codegen, decl_node: NodeIndex) !void { + const node_datas = c.tree.nodes.items(.data); + const node_data = node_datas[@enumToInt(decl_node)].decl; + const name = c.tree.tokSlice(node_data.name); + log.debug("genFn {s}", .{name}); + const body_node = node_data.node; + + var func: Func = .{ + .codegen = c, + .name = name, + }; + defer func.deinit(); + + // First few indexes of extra are reserved and set at the end. + const reserved_count = @typeInfo(Air.ExtraIndex).Enum.fields.len; + try func.air_extra.ensureTotalCapacity(c.gpa, reserved_count); + func.air_extra.items.len += reserved_count; + + var block: Block = .{ + .func = &func, + .instructions = .{}, + }; + defer block.instructions.deinit(c.gpa); + + _ = try genNode(&func, &block, body_node); + + try func.air_extra.ensureUnusedCapacity(c.gpa, @typeInfo(Air.Block).Struct.fields.len + + block.instructions.items.len); + const main_block_index = func.addExtraAssumeCapacity(Air.Block{ + .body_len = @intCast(u32, block.instructions.items.len), + }); + func.air_extra.appendSliceAssumeCapacity(block.instructions.items); + func.air_extra.items[@enumToInt(Air.ExtraIndex.main_block)] = main_block_index; + + var air = func.getTmpAir(); + + var liveness = try Liveness.analyze(c.gpa, air, undefined); + defer liveness.deinit(c.gpa); + + if (builtin.mode == .Debug and c.verbose_air) { + std.debug.print("# Begin Function AIR: {s}:\n", .{name}); + @import("../print_air.zig").dump(c.gpa, air, undefined, liveness); + std.debug.print("# End Function AIR: {s}\n\n", .{name}); + } + + const node_tys = c.tree.nodes.items(.ty); + const aro_fn_ty = node_tys[@enumToInt(decl_node)]; + const zig_fn_ty = try c.lowerType(aro_fn_ty); + + const mod = c.bin_file.options.module.?; + const module_fn = try c.gpa.create(Module.Fn); + const fn_decl = try mod.createDecl2(.{ + .ty = zig_fn_ty, + .val = try Value.Tag.function.create(c.arena, module_fn), + }, name); + //defer { + // module_fn.deinit(c.gpa); + // c.gpa.destroy(module_fn); + //} + module_fn.* = .{ + .owner_decl = fn_decl, + .zir_body_inst = undefined, + .lbrace_line = 0, + .rbrace_line = 0, + .lbrace_column = 0, + .rbrace_column = 0, + .state = .success, + }; + + c.bin_file.updateFunc(mod, module_fn, air, liveness) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => { + fn_decl.analysis = .codegen_failure; + return; + }, + else => { + try mod.failed_decls.ensureUnusedCapacity(c.gpa, 1); + mod.failed_decls.putAssumeCapacityNoClobber(fn_decl, try Module.ErrorMsg.create( + c.gpa, + fn_decl.srcLoc(), + "unable to codegen: {s}", + .{@errorName(err)}, + )); + fn_decl.analysis = .codegen_failure_retryable; + return; + }, + }; +} + +fn lowerType(c: *Codegen, aro_ty: aro.Type) Allocator.Error!Type { + switch (aro_ty.specifier) { + .void => return Type.void, + .bool => return Type.bool, + .char, .schar => return Type.initTag(.i8), + .uchar => return Type.initTag(.u8), + .short => return Type.initTag(.c_short), + .ushort => return Type.initTag(.c_ushort), + .int => return Type.initTag(.c_int), + .uint => return Type.initTag(.c_uint), + .long => return Type.initTag(.c_long), + .ulong => return Type.initTag(.c_ulong), + .long_long => return Type.initTag(.c_longlong), + .ulong_long => return Type.initTag(.c_ulonglong), + + .float => return Type.initTag(.f32), + .double => return Type.initTag(.f64), + .long_double => return Type.initTag(.c_longdouble), + + // int foo(int bar, char baz, ...) + .var_args_func => return c.lowerTypeFunc(aro_ty, true), + // data.func + // int foo(int bar, char baz) and int (void) + .func => return c.lowerTypeFunc(aro_ty, false), + + .pointer => { + const zig_ty = try Type.ptr(c.arena, .{ + .pointee_type = try c.lowerType(aro_ty.data.sub_type.*), + .@"addrspace" = .generic, + .mutable = !aro_ty.qual.@"const", + .@"volatile" = aro_ty.qual.@"volatile", + .@"allowzero" = true, + .size = .C, + }); + return zig_ty; + }, + + .complex_float, + .complex_double, + .complex_long_double, + + // data.sub_type + .unspecified_variable_len_array, + .decayed_unspecified_variable_len_array, + // int foo(bar, baz) and int foo() + // is also var args, but we can give warnings about incorrect amounts of parameters + .old_style_func, + + // data.array + .array, + .decayed_array, + .static_array, + .decayed_static_array, + .incomplete_array, + .decayed_incomplete_array, + // data.expr + .variable_len_array, + .decayed_variable_len_array, + + // data.record + .@"struct", + .@"union", + + // data.enum + .@"enum", + + // typeof(type-name) + .typeof_type, + // decayed array created with typeof(type-name) + .decayed_typeof_type, + + // typeof(expression) + .typeof_expr, + // decayed array created with typeof(expression) + .decayed_typeof_expr, + + // data.attributed + .attributed, + + // special type used to implement __builtin_va_start + .special_va_start, + => std.debug.panic("TODO handle {s}", .{@tagName(aro_ty.specifier)}), + } +} + +fn lowerTypeFunc(c: *Codegen, aro_ty: aro.Type, is_var_args: bool) Allocator.Error!Type { + const param_types = try c.arena.alloc(Type, aro_ty.data.func.params.len); + for (param_types) |*ty, i| { + ty.* = try c.lowerType(aro_ty.data.func.params[i].ty); + } + + const zig_ty = try Type.Tag.function.create(c.arena, .{ + .param_types = param_types, + .comptime_params = undefined, + .return_type = try c.lowerType(aro_ty.data.func.return_type), + .alignment = 0, + .cc = .C, + .is_var_args = is_var_args, + .is_generic = false, + }); + return zig_ty; +} + +fn lowerValue(c: *Codegen, aro_ty: aro.Type, aro_val: aro.Value) Allocator.Error!TypedValue { + const zig_ty = try c.lowerType(aro_ty); + switch (aro_val.tag) { + .unavailable => unreachable, + .int => { + if (zig_ty.isSignedInt()) { + const signed_int = aro_val.signExtend(aro_ty, c.aro_comp); + return TypedValue{ + .ty = zig_ty, + .val = try Value.Tag.int_i64.create(c.arena, signed_int), + }; + } else { + return TypedValue{ + .ty = zig_ty, + .val = try Value.Tag.int_u64.create(c.arena, aro_val.data.int), + }; + } + }, + .float => @panic("TODO"), + .array => @panic("TODO"), + .bytes => @panic("TODO"), + } +} + +const Error = error{OutOfMemory}; + +fn genNode(func: *Func, block: *Block, node: NodeIndex) Error!Air.Inst.Ref { + const tree = func.codegen.tree; + const node_tys = tree.nodes.items(.ty); + const node_datas = tree.nodes.items(.data); + const node_tags = tree.nodes.items(.tag); + + if (tree.value_map.get(node)) |some| { + if (some.tag == .int) { + const zig_tv = try func.codegen.lowerValue(node_tys[@enumToInt(node)], some); + return func.addConstant(zig_tv.ty, zig_tv.val); + } + } + + const data = node_datas[@enumToInt(node)]; + switch (node_tags[@enumToInt(node)]) { + .static_assert => return Air.Inst.Ref.void_value, + .compound_stmt_two => { + if (data.bin.lhs != .none) _ = try genNode(func, block, data.bin.lhs); + if (data.bin.rhs != .none) _ = try genNode(func, block, data.bin.rhs); + return Air.Inst.Ref.void_value; + }, + .compound_stmt => { + const body = tree.data[data.range.start..data.range.end]; + for (body) |stmt| { + _ = try genNode(func, block, stmt); + } + return Air.Inst.Ref.void_value; + }, + .call_expr_one => if (data.bin.rhs != .none) + return genCall(func, block, data.bin.lhs, &.{data.bin.rhs}) + else + return genCall(func, block, data.bin.lhs, &.{}), + .call_expr => return genCall(func, block, tree.data[data.range.start], tree.data[data.range.start + 1 .. data.range.end]), + .function_to_pointer => return genNode(func, block, data.un), // no-op + .array_to_pointer => { + const operand = try genNode(func, block, data.un); + const array_val = func.getTmpAir().value(operand).?; + const tmp_bytes = array_val.castTag(.bytes).?.data; + + var anon_decl = try block.startAnonDecl(); + defer anon_decl.deinit(); + + const bytes = try anon_decl.arena().dupeZ(u8, tmp_bytes); + + const new_decl = try anon_decl.finish( + try Type.Tag.array_u8_sentinel_0.create(anon_decl.arena(), bytes.len), + try Value.Tag.bytes.create(anon_decl.arena(), bytes[0 .. bytes.len + 1]), + ); + + return func.declRef(new_decl); + }, + .decl_ref_expr => { + // TODO locals and arguments + const name = tree.tokSlice(data.decl_ref); + const decl = func.codegen.symbols.get(name).?; + return func.declRef(decl); + }, + .return_stmt => { + const operand = try genNode(func, block, data.un); + _ = try block.addUnOp(.ret, operand); + return Air.Inst.Ref.unreachable_value; + }, + .implicit_return => { + _ = try block.addUnOp(.ret, .void_value); + return Air.Inst.Ref.void_value; + }, + .int_literal => { + const zig_ty = try func.codegen.lowerType(node_tys[@enumToInt(node)]); + if (zig_ty.isSignedInt()) { + @panic("TODO"); + } + const zig_val = try Value.Tag.int_u64.create(func.codegen.arena, data.int); + return func.addConstant(zig_ty, zig_val); + }, + .string_literal_expr => { + const ast_bytes = tree.value_map.get(node).?.data.bytes; + const array_val = try Value.Tag.bytes.create(func.codegen.arena, ast_bytes); + const array_ty = try Type.Tag.array_u8.create(func.codegen.arena, ast_bytes.len); + return func.addConstant(array_ty, array_val); + }, + else => return std.debug.panic("TODO lower Aro AST tag {}\n", .{node_tags[@enumToInt(node)]}), + } +} + +fn genCall(func: *Func, block: *Block, lhs: NodeIndex, args: []const NodeIndex) Error!Air.Inst.Ref { + const callee = try genNode(func, block, lhs); + + const air_args = try func.codegen.arena.alloc(Air.Inst.Ref, args.len); + for (args) |arg_node, i| { + air_args[i] = try genNode(func, block, arg_node); + } + + try func.air_extra.ensureUnusedCapacity(func.codegen.gpa, @typeInfo(Air.Call).Struct.fields.len + args.len); + + const func_inst = try block.addInst(.{ + .tag = .call, + .data = .{ .pl_op = .{ + .operand = callee, + .payload = func.addExtraAssumeCapacity(Air.Call{ + .args_len = @intCast(u32, args.len), + }), + } }, + }); + func.appendRefsAssumeCapacity(air_args); + + return func_inst; +} + +fn genVar(c: *Codegen, decl: NodeIndex) !void { + const node_datas = c.tree.nodes.items(.data); + const name = c.tree.tokSlice(node_datas[@enumToInt(decl)].decl.name); + log.debug("genVar {s}", .{name}); +} + +pub const Block = struct { + func: *Func, + /// The AIR instructions generated for this block. + instructions: std.ArrayListUnmanaged(Air.Inst.Index), + + pub fn addTy( + block: *Block, + tag: Air.Inst.Tag, + ty: Type, + ) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = tag, + .data = .{ .ty = ty }, + }); + } + + pub fn addTyOp( + block: *Block, + tag: Air.Inst.Tag, + ty: Type, + operand: Air.Inst.Ref, + ) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = tag, + .data = .{ .ty_op = .{ + .ty = try block.func.addType(ty), + .operand = operand, + } }, + }); + } + + pub fn addBitCast(block: *Block, ty: Type, operand: Air.Inst.Ref) Allocator.Error!Air.Inst.Ref { + return block.addInst(.{ + .tag = .bitcast, + .data = .{ .ty_op = .{ + .ty = try block.func.addType(ty), + .operand = operand, + } }, + }); + } + + pub fn addNoOp(block: *Block, tag: Air.Inst.Tag) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = tag, + .data = .{ .no_op = {} }, + }); + } + + pub fn addUnOp( + block: *Block, + tag: Air.Inst.Tag, + operand: Air.Inst.Ref, + ) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = tag, + .data = .{ .un_op = operand }, + }); + } + + pub fn addBr( + block: *Block, + target_block: Air.Inst.Index, + operand: Air.Inst.Ref, + ) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = .br, + .data = .{ .br = .{ + .block_inst = target_block, + .operand = operand, + } }, + }); + } + + fn addBinOp( + block: *Block, + tag: Air.Inst.Tag, + lhs: Air.Inst.Ref, + rhs: Air.Inst.Ref, + ) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = tag, + .data = .{ .bin_op = .{ + .lhs = lhs, + .rhs = rhs, + } }, + }); + } + + fn addArg(block: *Block, ty: Type, name: u32) error{OutOfMemory}!Air.Inst.Ref { + return block.addInst(.{ + .tag = .arg, + .data = .{ .ty_str = .{ + .ty = try block.func.addType(ty), + .str = name, + } }, + }); + } + + fn addStructFieldPtr( + block: *Block, + struct_ptr: Air.Inst.Ref, + field_index: u32, + ptr_field_ty: Type, + ) !Air.Inst.Ref { + const ty = try block.func.addType(ptr_field_ty); + const tag: Air.Inst.Tag = switch (field_index) { + 0 => .struct_field_ptr_index_0, + 1 => .struct_field_ptr_index_1, + 2 => .struct_field_ptr_index_2, + 3 => .struct_field_ptr_index_3, + else => { + return block.addInst(.{ + .tag = .struct_field_ptr, + .data = .{ .ty_pl = .{ + .ty = ty, + .payload = try block.func.addExtra(Air.StructField{ + .struct_operand = struct_ptr, + .field_index = field_index, + }), + } }, + }); + }, + }; + return block.addInst(.{ + .tag = tag, + .data = .{ .ty_op = .{ + .ty = ty, + .operand = struct_ptr, + } }, + }); + } + + pub fn addStructFieldVal( + block: *Block, + struct_val: Air.Inst.Ref, + field_index: u32, + field_ty: Type, + ) !Air.Inst.Ref { + return block.addInst(.{ + .tag = .struct_field_val, + .data = .{ .ty_pl = .{ + .ty = try block.func.addType(field_ty), + .payload = try block.func.addExtra(Air.StructField{ + .struct_operand = struct_val, + .field_index = field_index, + }), + } }, + }); + } + + pub fn addSliceElemPtr( + block: *Block, + slice: Air.Inst.Ref, + elem_index: Air.Inst.Ref, + elem_ptr_ty: Type, + ) !Air.Inst.Ref { + return block.addInst(.{ + .tag = .slice_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = try block.func.addType(elem_ptr_ty), + .payload = try block.func.addExtra(Air.Bin{ + .lhs = slice, + .rhs = elem_index, + }), + } }, + }); + } + + pub fn addPtrElemPtr( + block: *Block, + array_ptr: Air.Inst.Ref, + elem_index: Air.Inst.Ref, + elem_ptr_ty: Type, + ) !Air.Inst.Ref { + const ty_ref = try block.func.addType(elem_ptr_ty); + return block.addPtrElemPtrTypeRef(array_ptr, elem_index, ty_ref); + } + + pub fn addPtrElemPtrTypeRef( + block: *Block, + array_ptr: Air.Inst.Ref, + elem_index: Air.Inst.Ref, + elem_ptr_ty: Air.Inst.Ref, + ) !Air.Inst.Ref { + return block.addInst(.{ + .tag = .ptr_elem_ptr, + .data = .{ .ty_pl = .{ + .ty = elem_ptr_ty, + .payload = try block.func.addExtra(Air.Bin{ + .lhs = array_ptr, + .rhs = elem_index, + }), + } }, + }); + } + + pub fn addVectorInit( + block: *Block, + vector_ty: Type, + elements: []const Air.Inst.Ref, + ) !Air.Inst.Ref { + const func = block.func; + const ty_ref = try func.addType(vector_ty); + try func.air_extra.ensureUnusedCapacity(func.gpa, elements.len); + const extra_index = @intCast(u32, func.air_extra.items.len); + func.appendRefsAssumeCapacity(elements); + + return block.addInst(.{ + .tag = .vector_init, + .data = .{ .ty_pl = .{ + .ty = ty_ref, + .payload = extra_index, + } }, + }); + } + + pub fn addInst(block: *Block, inst: Air.Inst) error{OutOfMemory}!Air.Inst.Ref { + return Air.indexToRef(try block.addInstAsIndex(inst)); + } + + pub fn addInstAsIndex(block: *Block, inst: Air.Inst) error{OutOfMemory}!Air.Inst.Index { + const func = block.func; + const gpa = func.codegen.gpa; + + try func.air_instructions.ensureUnusedCapacity(gpa, 1); + try block.instructions.ensureUnusedCapacity(gpa, 1); + + const result_index = @intCast(Air.Inst.Index, func.air_instructions.len); + func.air_instructions.appendAssumeCapacity(inst); + block.instructions.appendAssumeCapacity(result_index); + return result_index; + } + + pub fn startAnonDecl(block: *Block) !WipAnonDecl { + return WipAnonDecl{ + .block = block, + .new_decl_arena = std.heap.ArenaAllocator.init(block.func.codegen.gpa), + .finished = false, + }; + } + + pub const WipAnonDecl = struct { + block: *Block, + new_decl_arena: std.heap.ArenaAllocator, + finished: bool, + + pub fn arena(wad: *WipAnonDecl) Allocator { + return wad.new_decl_arena.allocator(); + } + + pub fn deinit(wad: *WipAnonDecl) void { + if (!wad.finished) { + wad.new_decl_arena.deinit(); + } + wad.* = undefined; + } + + pub fn finish(wad: *WipAnonDecl, ty: Type, val: Value) !*Module.Decl { + const func = wad.block.func; + const mod = func.codegen.bin_file.options.module.?; + const new_decl = try mod.createAnonymousDecl2(.{ + .ty = ty, + .val = val, + }, func.name); + errdefer mod.abortAnonDecl(new_decl); + try new_decl.finalizeNewArena(&wad.new_decl_arena); + wad.finished = true; + return new_decl; + } + }; +}; diff --git a/src/aro/Compilation.zig b/src/aro/Compilation.zig new file mode 100644 index 000000000000..9060a3dfe763 --- /dev/null +++ b/src/aro/Compilation.zig @@ -0,0 +1,834 @@ +const std = @import("std"); +const assert = std.debug.assert; +const mem = std.mem; +const Allocator = mem.Allocator; +const EpochSeconds = std.time.epoch.EpochSeconds; +const Builtins = @import("Builtins.zig"); +const Diagnostics = @import("Diagnostics.zig"); +const LangOpts = @import("LangOpts.zig"); +const Source = @import("Source.zig"); +const Tokenizer = @import("Tokenizer.zig"); +const Token = Tokenizer.Token; +const Type = @import("Type.zig"); +const Pragma = @import("Pragma.zig"); + +const Compilation = @This(); + +pub const Error = error{ + /// A fatal error has ocurred and compilation has stopped. + FatalError, +} || Allocator.Error; + +gpa: Allocator, +sources: std.StringArrayHashMap(Source), +diag: Diagnostics, +include_dirs: std.ArrayList([]const u8), +system_include_dirs: std.ArrayList([]const u8), +output_name: ?[]const u8 = null, +builtin_header_path: ?[]u8 = null, +target: std.Target = @import("builtin").target, +pragma_handlers: std.StringArrayHashMap(*Pragma), +only_preprocess: bool = false, +only_compile: bool = false, +verbose_ast: bool = false, +langopts: LangOpts = .{}, +generated_buf: std.ArrayList(u8), +builtins: Builtins = .{}, +types: struct { + wchar: Type, + ptrdiff: Type, + size: Type, + va_list: Type, +} = undefined, + +pub fn init(gpa: Allocator) Compilation { + return .{ + .gpa = gpa, + .sources = std.StringArrayHashMap(Source).init(gpa), + .diag = Diagnostics.init(gpa), + .include_dirs = std.ArrayList([]const u8).init(gpa), + .system_include_dirs = std.ArrayList([]const u8).init(gpa), + .pragma_handlers = std.StringArrayHashMap(*Pragma).init(gpa), + .generated_buf = std.ArrayList(u8).init(gpa), + }; +} + +pub fn deinit(comp: *Compilation) void { + for (comp.pragma_handlers.values()) |pragma| { + pragma.deinit(pragma, comp); + } + for (comp.sources.values()) |source| { + comp.gpa.free(source.path); + comp.gpa.free(source.buf); + comp.gpa.free(source.splice_locs); + } + comp.sources.deinit(); + comp.diag.deinit(); + comp.include_dirs.deinit(); + comp.system_include_dirs.deinit(); + comp.pragma_handlers.deinit(); + if (comp.builtin_header_path) |some| comp.gpa.free(some); + comp.generated_buf.deinit(); + comp.builtins.deinit(comp.gpa); +} + +fn generateDateAndTime(w: anytype) !void { + // TODO take timezone into account here once it is supported in Zig std + const timestamp = std.math.clamp(std.time.timestamp(), 0, std.math.maxInt(i64)); + const epoch_seconds = EpochSeconds{ .secs = @intCast(u64, timestamp) }; + const epoch_day = epoch_seconds.getEpochDay(); + const day_seconds = epoch_seconds.getDaySeconds(); + const year_day = epoch_day.calculateYearDay(); + const month_day = year_day.calculateMonthDay(); + + const month_names = [_][]const u8{ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + std.debug.assert(std.time.epoch.Month.jan.numeric() == 1); + + const month_name = month_names[month_day.month.numeric() - 1]; + try w.print("#define __DATE__ \"{s} {d: >2} {d}\"\n", .{ + month_name, + month_day.day_index + 1, + year_day.year, + }); + try w.print("#define __TIME__ \"{d:0>2}:{d:0>2}:{d:0>2}\"\n", .{ + day_seconds.getHoursIntoDay(), + day_seconds.getMinutesIntoHour(), + day_seconds.getSecondsIntoMinute(), + }); + + const day_names = [_][]const u8{ "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" }; + // days since Thu Oct 1 1970 + const day_name = day_names[(epoch_day.day + 3) % 7]; + try w.print("#define __TIMESTAMP__ \"{s} {s} {d: >2} {d:0>2}:{d:0>2}:{d:0>2} {d}\"\n", .{ + day_name, + month_name, + month_day.day_index + 1, + day_seconds.getHoursIntoDay(), + day_seconds.getMinutesIntoHour(), + day_seconds.getSecondsIntoMinute(), + year_day.year, + }); +} + +/// Generate builtin macros that will be available to each source file. +pub fn generateBuiltinMacros(comp: *Compilation) !Source { + try comp.generateBuiltinTypes(); + comp.builtins = try Builtins.create(comp); + + var buf = std.ArrayList(u8).init(comp.gpa); + defer buf.deinit(); + const w = buf.writer(); + + // standard macros + try w.writeAll( + \\#define __VERSION__ "Aro + ++ @import("lib.zig").version_str ++ "\"\n" ++ + \\#define __Aro__ + \\#define __STDC__ 1 + \\#define __STDC_HOSTED__ 1 + \\#define __STDC_NO_ATOMICS__ 1 + \\#define __STDC_NO_COMPLEX__ 1 + \\#define __STDC_NO_THREADS__ 1 + \\#define __STDC_NO_VLA__ 1 + \\ + ); + if (comp.langopts.standard.StdCVersionMacro()) |stdc_version| { + try w.print("#define __STDC_VERSION__ {s}\n", .{stdc_version}); + } + + // os macros + switch (comp.target.os.tag) { + .linux => try w.writeAll( + \\#define linux 1 + \\#define __linux 1 + \\#define __linux__ 1 + \\ + ), + .windows => if (comp.target.cpu.arch.ptrBitWidth() == 32) try w.writeAll( + \\#define WIN32 1 + \\#define _WIN32 1 + \\#define __WIN32 1 + \\#define __WIN32__ 1 + \\ + ) else try w.writeAll( + \\#define WIN32 1 + \\#define WIN64 1 + \\#define _WIN32 1 + \\#define _WIN64 1 + \\#define __WIN32 1 + \\#define __WIN64 1 + \\#define __WIN32__ 1 + \\#define __WIN64__ 1 + \\ + ), + .freebsd => try w.print("#define __FreeBSD__ {d}\n", .{comp.target.os.version_range.semver.min.major}), + .netbsd => try w.writeAll("#define __NetBSD__ 1\n"), + .openbsd => try w.writeAll("#define __OpenBSD__ 1\n"), + .dragonfly => try w.writeAll("#define __DragonFly__ 1\n"), + .solaris => try w.writeAll( + \\#define sun 1 + \\#define __sun 1 + \\ + ), + .macos => try w.writeAll( + \\#define __APPLE__ 1 + \\#define __MACH__ 1 + \\ + ), + else => {}, + } + + // unix and other additional os macros + switch (comp.target.os.tag) { + .freebsd, + .netbsd, + .openbsd, + .dragonfly, + .linux, + => try w.writeAll( + \\#define unix 1 + \\#define __unix 1 + \\#define __unix__ 1 + \\ + ), + else => {}, + } + if (comp.target.abi == .android) { + try w.writeAll("#define __ANDROID__ 1\n"); + } + + // architecture macros + switch (comp.target.cpu.arch) { + .x86_64 => try w.writeAll( + \\#define __amd64__ 1 + \\#define __amd64 1 + \\#define __x86_64 1 + \\#define __x86_64__ 1 + \\ + ), + .i386 => try w.writeAll( + \\#define i386 1 + \\#define __i386 1 + \\#define __i386__ 1 + \\ + ), + .mips, + .mipsel, + .mips64, + .mips64el, + => try w.writeAll( + \\#define __mips__ 1 + \\#define mips 1 + \\ + ), + .powerpc, + .powerpcle, + => try w.writeAll( + \\#define __powerpc__ 1 + \\#define __POWERPC__ 1 + \\#define __ppc__ 1 + \\#define __PPC__ 1 + \\#define _ARCH_PPC 1 + \\ + ), + .powerpc64, + .powerpc64le, + => try w.writeAll( + \\#define __powerpc 1 + \\#define __powerpc__ 1 + \\#define __powerpc64__ 1 + \\#define __POWERPC__ 1 + \\#define __ppc__ 1 + \\#define __ppc64__ 1 + \\#define __PPC__ 1 + \\#define __PPC64__ 1 + \\#define _ARCH_PPC 1 + \\#define _ARCH_PPC64 1 + \\ + ), + .sparcv9 => try w.writeAll( + \\#define __sparc__ 1 + \\#define __sparc 1 + \\#define __sparc_v9__ 1 + \\ + ), + .sparc, .sparcel => try w.writeAll( + \\#define __sparc__ 1 + \\#define __sparc 1 + \\ + ), + .arm, .armeb => try w.writeAll( + \\#define __arm__ 1 + \\#define __arm 1 + \\ + ), + .thumb, .thumbeb => try w.writeAll( + \\#define __arm__ 1 + \\#define __arm 1 + \\#define __thumb__ 1 + \\ + ), + .aarch64, .aarch64_be => try w.writeAll("#define __aarch64__ 1\n"), + else => {}, + } + + if (comp.target.os.tag != .windows) switch (comp.target.cpu.arch.ptrBitWidth()) { + 64 => try w.writeAll( + \\#define _LP64 1 + \\#define __LP64__ 1 + \\ + ), + 32 => try w.writeAll("#define _ILP32 1\n"), + else => {}, + }; + + try w.writeAll( + \\#define __ORDER_LITTLE_ENDIAN__ 1234 + \\#define __ORDER_BIG_ENDIAN__ 4321 + \\#define __ORDER_PDP_ENDIAN__ 3412 + \\ + ); + if (comp.target.cpu.arch.endian() == .Little) try w.writeAll( + \\#define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ + \\#define __LITTLE_ENDIAN__ 1 + \\ + ) else try w.writeAll( + \\#define __BYTE_ORDER__ __ORDER_BIG_ENDIAN__; + \\#define __BIG_ENDIAN__ 1 + \\ + ); + + // timestamps + try generateDateAndTime(w); + + // types + if (Type.getCharSignedness(comp) == .unsigned) try w.writeAll("#define __CHAR_UNSIGNED__ 1\n"); + try w.writeAll("#define __CHAR_BIT__ 8\n"); + + // int maxs + try comp.generateIntMax(w, "__SCHAR_MAX__", .{ .specifier = .schar }); + try comp.generateIntMax(w, "__SHRT_MAX__", .{ .specifier = .short }); + try comp.generateIntMax(w, "__INT_MAX__", .{ .specifier = .int }); + try comp.generateIntMax(w, "__LONG_MAX__", .{ .specifier = .long }); + try comp.generateIntMax(w, "__LONG_LONG_MAX__", .{ .specifier = .long_long }); + try comp.generateIntMax(w, "__WCHAR_MAX__", comp.types.wchar); + // try comp.generateIntMax(w, "__WINT_MAX__", comp.types.wchar); + // try comp.generateIntMax(w, "__INTMAX_MAX__", comp.types.wchar); + try comp.generateIntMax(w, "__SIZE_MAX__", comp.types.size); + // try comp.generateIntMax(w, "__UINTMAX_MAX__", comp.types.wchar); + try comp.generateIntMax(w, "__PTRDIFF_MAX__", comp.types.ptrdiff); + // try comp.generateIntMax(w, "__INTPTR_MAX__", comp.types.wchar); + // try comp.generateIntMax(w, "__UINTPTR_MAX__", comp.types.size); + + // sizeof types + try comp.generateSizeofType(w, "__SIZEOF_FLOAT__", .{ .specifier = .float }); + try comp.generateSizeofType(w, "__SIZEOF_DOUBLE__", .{ .specifier = .double }); + try comp.generateSizeofType(w, "__SIZEOF_LONG_DOUBLE__", .{ .specifier = .long_double }); + try comp.generateSizeofType(w, "__SIZEOF_SHORT__", .{ .specifier = .short }); + try comp.generateSizeofType(w, "__SIZEOF_INT__", .{ .specifier = .int }); + try comp.generateSizeofType(w, "__SIZEOF_LONG__", .{ .specifier = .long }); + try comp.generateSizeofType(w, "__SIZEOF_LONG_LONG__", .{ .specifier = .long_long }); + try comp.generateSizeofType(w, "__SIZEOF_POINTER__", .{ .specifier = .pointer }); + try comp.generateSizeofType(w, "__SIZEOF_PTRDIFF_T__", comp.types.ptrdiff); + try comp.generateSizeofType(w, "__SIZEOF_SIZE_T__", comp.types.size); + try comp.generateSizeofType(w, "__SIZEOF_WCHAR_T__", comp.types.wchar); + // try comp.generateSizeofType(w, "__SIZEOF_WINT_T__", .{ .specifier = .pointer }); + + // various int types + try generateTypeMacro(w, "__PTRDIFF_TYPE__", comp.types.ptrdiff); + try generateTypeMacro(w, "__SIZE_TYPE__", comp.types.size); + try generateTypeMacro(w, "__WCHAR_TYPE__", comp.types.wchar); + + return comp.addSourceFromBuffer("", buf.items); +} + +fn generateTypeMacro(w: anytype, name: []const u8, ty: Type) !void { + try w.print("#define {s} ", .{name}); + try ty.print(w); + try w.writeByte('\n'); +} + +fn generateBuiltinTypes(comp: *Compilation) !void { + const os = comp.target.os.tag; + const wchar: Type = switch (comp.target.cpu.arch) { + .xcore => .{ .specifier = .uchar }, + .ve => .{ .specifier = .uint }, + .arm, .armeb, .thumb, .thumbeb => .{ + .specifier = if (os != .windows and os != .netbsd and os != .openbsd) .uint else .int, + }, + .aarch64, .aarch64_be, .aarch64_32 => .{ + .specifier = if (!os.isDarwin() and os != .netbsd) .uint else .int, + }, + .x86_64, .i386 => .{ .specifier = if (os == .windows) .ushort else .int }, + else => .{ .specifier = .int }, + }; + + const ptrdiff = if (os == .windows and comp.target.cpu.arch.ptrBitWidth() == 64) + Type{ .specifier = .long_long } + else switch (comp.target.cpu.arch.ptrBitWidth()) { + 32 => Type{ .specifier = .int }, + 64 => Type{ .specifier = .long }, + else => unreachable, + }; + + const size = if (os == .windows and comp.target.cpu.arch.ptrBitWidth() == 64) + Type{ .specifier = .ulong_long } + else switch (comp.target.cpu.arch.ptrBitWidth()) { + 32 => Type{ .specifier = .uint }, + 64 => Type{ .specifier = .ulong }, + else => unreachable, + }; + + const va_list = try comp.generateVaListType(); + + comp.types = .{ + .wchar = wchar, + .ptrdiff = ptrdiff, + .size = size, + .va_list = va_list, + }; +} + +fn generateVaListType(comp: *Compilation) !Type { + const Kind = enum { char_ptr, void_ptr, aarch64_va_list, x86_64_va_list }; + const kind: Kind = switch (comp.target.cpu.arch) { + .aarch64 => switch (comp.target.os.tag) { + .windows => @as(Kind, .char_ptr), + .ios, .macos, .tvos, .watchos => .char_ptr, + else => .aarch64_va_list, + }, + .sparc, .wasm32, .wasm64, .bpfel, .bpfeb, .riscv32, .riscv64, .avr, .spirv32, .spirv64 => .void_ptr, + .powerpc => switch (comp.target.os.tag) { + .ios, .macos, .tvos, .watchos, .aix => @as(Kind, .char_ptr), + else => return Type{ .specifier = .void }, // unknown + }, + .i386 => .char_ptr, + .x86_64 => switch (comp.target.os.tag) { + .windows => @as(Kind, .char_ptr), + else => .x86_64_va_list, + }, + else => return Type{ .specifier = .void }, // unknown + }; + + // TODO this might be bad? + const arena = comp.diag.arena.allocator(); + + var ty: Type = undefined; + switch (kind) { + .char_ptr => ty = .{ .specifier = .char }, + .void_ptr => ty = .{ .specifier = .void }, + .aarch64_va_list => { + const record_ty = try arena.create(Type.Record); + record_ty.* = .{ + .name = "__va_list_tag", + .fields = try arena.alloc(Type.Record.Field, 5), + .size = 32, + .alignment = 8, + }; + const void_ty = try arena.create(Type); + void_ty.* = .{ .specifier = .void }; + const void_ptr = Type{ .specifier = .pointer, .data = .{ .sub_type = void_ty } }; + record_ty.fields[0] = .{ .name = "__stack", .ty = void_ptr }; + record_ty.fields[1] = .{ .name = "__gr_top", .ty = void_ptr }; + record_ty.fields[2] = .{ .name = "__vr_top", .ty = void_ptr }; + record_ty.fields[3] = .{ .name = "__gr_offs", .ty = .{ .specifier = .int } }; + record_ty.fields[4] = .{ .name = "__vr_offs", .ty = .{ .specifier = .int } }; + ty = .{ .specifier = .@"struct", .data = .{ .record = record_ty } }; + }, + .x86_64_va_list => { + const record_ty = try arena.create(Type.Record); + record_ty.* = .{ + .name = "__va_list_tag", + .fields = try arena.alloc(Type.Record.Field, 4), + .size = 24, + .alignment = 8, + }; + const void_ty = try arena.create(Type); + void_ty.* = .{ .specifier = .void }; + const void_ptr = Type{ .specifier = .pointer, .data = .{ .sub_type = void_ty } }; + record_ty.fields[0] = .{ .name = "gp_offset", .ty = .{ .specifier = .uint } }; + record_ty.fields[1] = .{ .name = "fp_offset", .ty = .{ .specifier = .uint } }; + record_ty.fields[2] = .{ .name = "overflow_arg_area", .ty = void_ptr }; + record_ty.fields[3] = .{ .name = "reg_save_area", .ty = void_ptr }; + ty = .{ .specifier = .@"struct", .data = .{ .record = record_ty } }; + }, + } + if (kind == .char_ptr or kind == .void_ptr) { + const elem_ty = try arena.create(Type); + elem_ty.* = ty; + ty = Type{ .specifier = .pointer, .data = .{ .sub_type = elem_ty } }; + } else { + const arr_ty = try arena.create(Type.Array); + arr_ty.* = .{ .len = 1, .elem = ty }; + ty = Type{ .specifier = .array, .data = .{ .array = arr_ty } }; + } + + return ty; +} + +fn generateIntMax(comp: *Compilation, w: anytype, name: []const u8, ty: Type) !void { + const bit_count = @intCast(u8, ty.sizeof(comp).? * 8); + const unsigned = ty.isUnsignedInt(comp); + const max = if (bit_count == 128) + @as(u128, if (unsigned) std.math.maxInt(u128) else std.math.maxInt(u128)) + else + (@as(u64, 1) << @truncate(u6, bit_count - @boolToInt(!unsigned))) - 1; + try w.print("#define {s} {d}\n", .{ name, max }); +} + +fn generateSizeofType(comp: *Compilation, w: anytype, name: []const u8, ty: Type) !void { + try w.print("#define {s} {d}\n", .{ name, ty.sizeof(comp).? }); +} + +pub fn defineSystemIncludes(comp: *Compilation) !void { + var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var search_path: []const u8 = std.fs.selfExePath(&buf) catch return error.SelfExeNotFound; + while (std.fs.path.dirname(search_path)) |dirname| : (search_path = dirname) { + var base_dir = std.fs.cwd().openDir(dirname, .{}) catch continue; + defer base_dir.close(); + + base_dir.access("include/stddef.h", .{}) catch continue; + const path = try std.fs.path.join(comp.gpa, &.{ dirname, "include" }); + comp.builtin_header_path = path; + try comp.system_include_dirs.append(path); + break; + } else return error.AroIncludeNotFound; + + try comp.system_include_dirs.append("/usr/include"); +} + +pub fn getSource(comp: *Compilation, id: Source.Id) Source { + if (id == .generated) return .{ + .path = "", + .buf = comp.generated_buf.items, + .id = .generated, + .splice_locs = &.{}, + }; + return comp.sources.values()[@enumToInt(id) - 2]; +} + +/// Write bytes from `reader` into `contents`, performing newline splicing, +/// line-ending normalization (convert line endings to \n), and UTF-8 validation. +/// Creates a Source with `contents` as the buf and adds it to the Compilation. +/// `contents` is assumed to be large enough to hold the entire content of `reader`. +/// `contents` must have been allocated by `comp`'s allocator since it will be reallocated +/// if splicing occurred. +/// Compilation owns `contents` if and only if this call succeeds; caller always retains +/// ownership of `path`. +pub fn addSourceFromReader(comp: *Compilation, reader: anytype, path: []const u8, contents: []u8) !Source { + const duped_path = try comp.gpa.dupe(u8, path); + errdefer comp.gpa.free(duped_path); + + var splice_list = std.ArrayList(u32).init(comp.gpa); + defer splice_list.deinit(); + + const source_id = @intToEnum(Source.Id, comp.sources.count() + 2); + + var i: u32 = 0; + var backslash_loc: u32 = undefined; + var state: enum { start, back_slash, cr, back_slash_cr, trailing_ws } = .start; + var line: u32 = 1; + + while (true) { + const byte = reader.readByte() catch break; + contents[i] = byte; + + switch (byte) { + '\r' => { + switch (state) { + .start, .cr => { + line += 1; + state = .cr; + contents[i] = '\n'; + i += 1; + }, + .back_slash, .trailing_ws, .back_slash_cr => { + i = backslash_loc; + try splice_list.append(i); + if (state == .trailing_ws) { + try comp.diag.add(.{ + .tag = .backslash_newline_escape, + .loc = .{ .id = source_id, .byte_offset = i, .line = line }, + }, &.{}); + } + state = if (state == .back_slash_cr) .cr else .back_slash_cr; + }, + } + }, + '\n' => { + switch (state) { + .start => { + line += 1; + i += 1; + }, + .cr, .back_slash_cr => {}, + .back_slash, .trailing_ws => { + i = backslash_loc; + if (state == .back_slash or state == .trailing_ws) { + try splice_list.append(i); + } + if (state == .trailing_ws) { + try comp.diag.add(.{ + .tag = .backslash_newline_escape, + .loc = .{ .id = source_id, .byte_offset = i, .line = line }, + }, &.{}); + } + }, + } + state = .start; + }, + '\\' => { + backslash_loc = i; + state = .back_slash; + i += 1; + }, + '\t', '\x0B', '\x0C', ' ' => { + switch (state) { + .start, .trailing_ws => {}, + .cr, .back_slash_cr => state = .start, + .back_slash => state = .trailing_ws, + } + i += 1; + }, + else => { + i += 1; + state = .start; + }, + } + } + + const splice_locs = splice_list.toOwnedSlice(); + errdefer comp.gpa.free(splice_locs); + + var source = Source{ + .id = source_id, + .path = duped_path, + .buf = if (i == contents.len) contents else try comp.gpa.realloc(contents, i), + .splice_locs = splice_locs, + }; + + source.checkUtf8(); + try comp.sources.put(path, source); + return source; +} + +/// Caller retains ownership of `path` and `buf`. +pub fn addSourceFromBuffer(comp: *Compilation, path: []const u8, buf: []const u8) !Source { + if (comp.sources.get(path)) |some| return some; + + if (buf.len > std.math.maxInt(u32)) return error.StreamTooLong; + + const reader = std.io.fixedBufferStream(buf).reader(); + const contents = try comp.gpa.alloc(u8, buf.len); + errdefer comp.gpa.free(contents); + + return comp.addSourceFromReader(reader, path, contents); +} + +/// Caller retains ownership of `path` +pub fn addSourceFromPath(comp: *Compilation, path: []const u8) !Source { + if (comp.sources.get(path)) |some| return some; + + if (mem.indexOfScalar(u8, path, 0) != null) { + return error.FileNotFound; + } + + const file = try std.fs.cwd().openFile(path, .{}); + defer file.close(); + + const size = std.math.cast(u32, try file.getEndPos()) catch return error.StreamTooLong; + + var reader = std.io.bufferedReader(file.reader()).reader(); + const contents = try comp.gpa.alloc(u8, size); + errdefer comp.gpa.free(contents); + + return comp.addSourceFromReader(reader, path, contents); +} + +pub fn findInclude(comp: *Compilation, tok: Token, filename: []const u8, search_cwd: bool) !?Source { + var path_buf: [std.fs.MAX_PATH_BYTES]u8 = undefined; + var fib = std.heap.FixedBufferAllocator.init(&path_buf); + if (search_cwd) blk: { + const source = comp.getSource(tok.source); + const path = if (std.fs.path.dirname(source.path)) |some| + std.fs.path.join(fib.allocator(), &.{ some, filename }) catch break :blk + else + std.fs.path.join(fib.allocator(), &.{ ".", filename }) catch break :blk; + if (comp.addSourceFromPath(path)) |some| + return some + else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => {}, + } + } + for (comp.include_dirs.items) |dir| { + fib.end_index = 0; + const path = std.fs.path.join(fib.allocator(), &.{ dir, filename }) catch continue; + if (comp.addSourceFromPath(path)) |some| + return some + else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => {}, + } + } + for (comp.system_include_dirs.items) |dir| { + fib.end_index = 0; + const path = std.fs.path.join(fib.allocator(), &.{ dir, filename }) catch continue; + if (comp.addSourceFromPath(path)) |some| + return some + else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => {}, + } + } + return null; +} + +pub fn addPragmaHandler(comp: *Compilation, name: []const u8, handler: *Pragma) Allocator.Error!void { + try comp.pragma_handlers.putNoClobber(name, handler); +} + +pub fn addDefaultPragmaHandlers(comp: *Compilation) Allocator.Error!void { + const GCC = @import("pragmas/gcc.zig"); + var gcc = try GCC.init(comp.gpa); + errdefer gcc.deinit(gcc, comp); + + const Once = @import("pragmas/once.zig"); + var once = try Once.init(comp.gpa); + errdefer once.deinit(once, comp); + + const Message = @import("pragmas/message.zig"); + var message = try Message.init(comp.gpa); + errdefer message.deinit(message, comp); + + try comp.addPragmaHandler("GCC", gcc); + try comp.addPragmaHandler("once", once); + try comp.addPragmaHandler("message", message); +} + +pub fn getPragma(comp: *Compilation, name: []const u8) ?*Pragma { + return comp.pragma_handlers.get(name); +} + +const PragmaEvent = enum { + before_preprocess, + before_parse, + after_parse, +}; + +pub fn pragmaEvent(comp: *Compilation, event: PragmaEvent) void { + for (comp.pragma_handlers.values()) |pragma| { + const maybe_func = switch (event) { + .before_preprocess => pragma.beforePreprocess, + .before_parse => pragma.beforeParse, + .after_parse => pragma.afterParse, + }; + if (maybe_func) |func| func(pragma, comp); + } +} + +pub const renderErrors = Diagnostics.render; + +pub fn isTlsSupported(comp: *Compilation) bool { + if (comp.target.isDarwin()) { + var supported = false; + switch (comp.target.os.tag) { + .macos => supported = !(comp.target.os.isAtLeast(.macos, .{ .major = 10, .minor = 7 }) orelse false), + else => {}, + } + return supported; + } + return switch (comp.target.cpu.arch) { + .tce, .tcele, .bpfel, .bpfeb, .msp430, .nvptx, .nvptx64, .i386, .arm, .armeb, .thumb, .thumbeb => false, + else => true, + }; +} + +/// Default alignment (in bytes) for __attribute__((aligned)) when no alignment is specified +pub fn defaultAlignment(comp: *const Compilation) u29 { + switch (comp.target.cpu.arch) { + .avr => return 1, + .arm, + .armeb, + .thumb, + .thumbeb, + => switch (comp.target.abi) { + .gnueabi, .gnueabihf, .eabi, .eabihf, .musleabi, .musleabihf => return 8, + else => {}, + }, + else => {}, + } + return 16; +} + +test "addSourceFromReader" { + const Test = struct { + fn addSourceFromReader(str: []const u8, expected: []const u8, warning_count: u32, splices: []const u32) !void { + var comp = Compilation.init(std.testing.allocator); + defer comp.deinit(); + + const contents = try comp.gpa.alloc(u8, 1024); + + var reader = std.io.fixedBufferStream(str).reader(); + const source = try comp.addSourceFromReader(reader, "path", contents); + + try std.testing.expectEqualStrings(expected, source.buf); + try std.testing.expectEqual(warning_count, @intCast(u32, comp.diag.list.items.len)); + try std.testing.expectEqualSlices(u32, splices, source.splice_locs); + } + }; + try Test.addSourceFromReader("ab\\\nc", "abc", 0, &.{2}); + try Test.addSourceFromReader("ab\\\rc", "abc", 0, &.{2}); + try Test.addSourceFromReader("ab\\\r\nc", "abc", 0, &.{2}); + try Test.addSourceFromReader("ab\\ \nc", "abc", 1, &.{2}); + try Test.addSourceFromReader("ab\\\t\nc", "abc", 1, &.{2}); + try Test.addSourceFromReader("ab\\ \t\nc", "abc", 1, &.{2}); + try Test.addSourceFromReader("ab\\\r \nc", "ab \nc", 0, &.{2}); + try Test.addSourceFromReader("ab\\\\\nc", "ab\\c", 0, &.{3}); + try Test.addSourceFromReader("ab\\ \r\nc", "abc", 1, &.{2}); + try Test.addSourceFromReader("ab\\ \\\nc", "ab\\ c", 0, &.{4}); + try Test.addSourceFromReader("ab\\\r\\\nc", "abc", 0, &.{ 2, 2 }); + try Test.addSourceFromReader("ab\\ \rc", "abc", 1, &.{2}); + try Test.addSourceFromReader("ab\\", "ab\\", 0, &.{}); + try Test.addSourceFromReader("ab\\\\", "ab\\\\", 0, &.{}); + try Test.addSourceFromReader("ab\\ ", "ab\\ ", 0, &.{}); + try Test.addSourceFromReader("ab\\\n", "ab", 0, &.{2}); + try Test.addSourceFromReader("ab\\\r\n", "ab", 0, &.{2}); + try Test.addSourceFromReader("ab\\\r", "ab", 0, &.{2}); + + // carriage return normalization + try Test.addSourceFromReader("ab\r", "ab\n", 0, &.{}); + try Test.addSourceFromReader("ab\r\r", "ab\n\n", 0, &.{}); + try Test.addSourceFromReader("ab\r\r\n", "ab\n\n", 0, &.{}); + try Test.addSourceFromReader("ab\r\r\n\r", "ab\n\n\n", 0, &.{}); + try Test.addSourceFromReader("\r\\", "\n\\", 0, &.{}); + try Test.addSourceFromReader("\\\r\\", "\\", 0, &.{0}); +} + +test "addSourceFromReader - exhaustive check for carriage return elimination" { + const alphabet = [_]u8{ '\r', '\n', ' ', '\\', 'a' }; + const alen = alphabet.len; + var buf: [alphabet.len]u8 = [1]u8{alphabet[0]} ** alen; + + var comp = Compilation.init(std.testing.allocator); + defer comp.deinit(); + + var source_count: u32 = 0; + + while (true) { + const source = try comp.addSourceFromBuffer(&buf, &buf); + source_count += 1; + try std.testing.expect(std.mem.indexOfScalar(u8, source.buf, '\r') == null); + + if (std.mem.allEqual(u8, &buf, alphabet[alen - 1])) break; + + var idx = std.mem.indexOfScalar(u8, &alphabet, buf[buf.len - 1]).?; + buf[buf.len - 1] = alphabet[(idx + 1) % alen]; + var j = buf.len - 1; + while (j > 0) : (j -= 1) { + idx = std.mem.indexOfScalar(u8, &alphabet, buf[j - 1]).?; + if (buf[j] == alphabet[0]) buf[j - 1] = alphabet[(idx + 1) % alen] else break; + } + } + try std.testing.expect(source_count == std.math.powi(usize, alen, alen) catch unreachable); +} diff --git a/src/aro/Diagnostics.zig b/src/aro/Diagnostics.zig new file mode 100644 index 000000000000..e619ec7de855 --- /dev/null +++ b/src/aro/Diagnostics.zig @@ -0,0 +1,1943 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; +const Source = @import("Source.zig"); +const Compilation = @import("Compilation.zig"); +const Attribute = @import("Attribute.zig"); +const Tree = @import("Tree.zig"); +const util = @import("util.zig"); +const is_windows = @import("builtin").os.tag == .windows; + +const Diagnostics = @This(); + +pub const Message = struct { + tag: Tag, + kind: Kind = undefined, + loc: Source.Location = .{}, + extra: Extra = .{ .none = {} }, + + pub const Extra = union { + str: []const u8, + tok_id: struct { + expected: Tree.Token.Id, + actual: Tree.Token.Id, + }, + tok_id_expected: Tree.Token.Id, + arguments: struct { + expected: u32, + actual: u32, + }, + codepoints: struct { + actual: u21, + resembles: u21, + }, + attr_arg_count: struct { + attribute: Attribute.Tag, + expected: u32, + }, + attr_arg_type: struct { + expected: Attribute.ArgumentType, + actual: Attribute.ArgumentType, + }, + attr_enum: struct { + tag: Attribute.Tag, + }, + ignored_record_attr: struct { + tag: Attribute.Tag, + specifier: enum { @"struct", @"union", @"enum" }, + }, + actual_codepoint: u21, + unsigned: u64, + signed: i64, + none: void, + }; +}; + +pub const Tag = std.meta.DeclEnum(messages); + +// u4 to avoid any possible packed struct issues +pub const Kind = enum(u4) { @"fatal error", @"error", note, warning, off, default }; + +pub const Options = packed struct { + // do not directly use these, instead add `const NAME = true;` + all: Kind = .default, + extra: Kind = .default, + pedantic: Kind = .default, + + @"unsupported-pragma": Kind = .default, + @"c99-extensions": Kind = .default, + @"implicit-int": Kind = .default, + @"duplicate-decl-specifier": Kind = .default, + @"missing-declaration": Kind = .default, + @"extern-initializer": Kind = .default, + @"implicit-function-declaration": Kind = .default, + @"unused-value": Kind = .default, + @"unreachable-code": Kind = .default, + @"unknown-warning-option": Kind = .default, + @"gnu-empty-struct": Kind = .default, + @"gnu-alignof-expression": Kind = .default, + @"macro-redefined": Kind = .default, + @"generic-qual-type": Kind = .default, + multichar: Kind = .default, + @"pointer-integer-compare": Kind = .default, + @"compare-distinct-pointer-types": Kind = .default, + @"literal-conversion": Kind = .default, + @"cast-qualifiers": Kind = .default, + @"array-bounds": Kind = .default, + @"int-conversion": Kind = .default, + @"pointer-type-mismatch": Kind = .default, + @"c2x-extensions": Kind = .default, + @"incompatible-pointer-types": Kind = .default, + @"excess-initializers": Kind = .default, + @"division-by-zero": Kind = .default, + @"initializer-overrides": Kind = .default, + @"incompatible-pointer-types-discards-qualifiers": Kind = .default, + @"unknown-attributes": Kind = .default, + @"ignored-attributes": Kind = .default, + @"builtin-macro-redefined": Kind = .default, + @"gnu-label-as-value": Kind = .default, + @"malformed-warning-check": Kind = .default, + @"#pragma-messages": Kind = .default, + @"newline-eof": Kind = .default, + @"empty-translation-unit": Kind = .default, + @"implicitly-unsigned-literal": Kind = .default, + @"c99-compat": Kind = .default, + @"unicode-zero-width": Kind = .default, + @"unicode-homoglyph": Kind = .default, + @"return-type": Kind = .default, + @"dollar-in-identifier-extension": Kind = .default, + @"unknown-pragmas": Kind = .default, + @"predefined-identifier-outside-function": Kind = .default, + @"many-braces-around-scalar-init": Kind = .default, + uninitialized: Kind = .default, + @"gnu-statement-expression": Kind = .default, + @"gnu-imaginary-constant": Kind = .default, + @"ignored-qualifiers": Kind = .default, + @"integer-overflow": Kind = .default, + @"extra-semi": Kind = .default, + @"gnu-binary-literal": Kind = .default, + @"variadic-macros": Kind = .default, + varargs: Kind = .default, + @"#warnings": Kind = .default, + @"deprecated-declarations": Kind = .default, + @"backslash-newline-escape": Kind = .default, +}; + +const messages = struct { + const todo = struct { // Maybe someday this will no longer be needed. + const msg = "TODO: {s}"; + const extra = .str; + const kind = .@"error"; + }; + const error_directive = struct { + const msg = "{s}"; + const extra = .str; + const kind = .@"error"; + }; + const warning_directive = struct { + const msg = "{s}"; + const opt = "#warnings"; + const extra = .str; + const kind = .@"warning"; + }; + const elif_without_if = struct { + const msg = "#elif without #if"; + const kind = .@"error"; + }; + const elif_after_else = struct { + const msg = "#elif after #else"; + const kind = .@"error"; + }; + const else_without_if = struct { + const msg = "#else without #if"; + const kind = .@"error"; + }; + const else_after_else = struct { + const msg = "#else after #else"; + const kind = .@"error"; + }; + const endif_without_if = struct { + const msg = "#endif without #if"; + const kind = .@"error"; + }; + const unknown_pragma = struct { + const msg = "unknown pragma ignored"; + const opt = "unknown-pragmas"; + const kind = .off; + const all = true; + }; + const line_simple_digit = struct { + const msg = "#line directive requires a simple digit sequence"; + const kind = .@"error"; + }; + const line_invalid_filename = struct { + const msg = "invalid filename for #line directive"; + const kind = .@"error"; + }; + const unterminated_conditional_directive = struct { + const msg = "unterminated conditional directive"; + const kind = .@"error"; + }; + const invalid_preprocessing_directive = struct { + const msg = "invalid preprocessing directive"; + const kind = .@"error"; + }; + const macro_name_missing = struct { + const msg = "macro name missing"; + const kind = .@"error"; + }; + const extra_tokens_directive_end = struct { + const msg = "extra tokens at end of macro directive"; + const kind = .@"error"; + }; + const expected_value_in_expr = struct { + const msg = "expected value in expression"; + const kind = .@"error"; + }; + const closing_paren = struct { + const msg = "expected closing ')'"; + const kind = .@"error"; + }; + const to_match_paren = struct { + const msg = "to match this '('"; + const kind = .note; + }; + const to_match_brace = struct { + const msg = "to match this '{'"; + const kind = .note; + }; + const to_match_bracket = struct { + const msg = "to match this '['"; + const kind = .note; + }; + const header_str_closing = struct { + const msg = "expected closing '>'"; + const kind = .@"error"; + }; + const header_str_match = struct { + const msg = "to match this '<'"; + const kind = .note; + }; + const string_literal_in_pp_expr = struct { + const msg = "string literal in preprocessor expression"; + const kind = .@"error"; + }; + const float_literal_in_pp_expr = struct { + const msg = "floating point literal in preprocessor expression"; + const kind = .@"error"; + }; + const defined_as_macro_name = struct { + const msg = "'defined' cannot be used as a macro name"; + const kind = .@"error"; + }; + const macro_name_must_be_identifier = struct { + const msg = "macro name must be an identifier"; + const kind = .@"error"; + }; + const whitespace_after_macro_name = struct { + const msg = "ISO C99 requires whitespace after the macro name"; + const opt = "c99-extensions"; + const kind = .warning; + }; + const hash_hash_at_start = struct { + const msg = "'##' cannot appear at the start of a macro expansion"; + const kind = .@"error"; + }; + const hash_hash_at_end = struct { + const msg = "'##' cannot appear at the end of a macro expansion"; + const kind = .@"error"; + }; + const pasting_formed_invalid = struct { + const msg = "pasting formed '{s}', an invalid preprocessing token"; + const extra = .str; + const kind = .@"error"; + }; + const missing_paren_param_list = struct { + const msg = "missing ')' in macro parameter list"; + const kind = .@"error"; + }; + const unterminated_macro_param_list = struct { + const msg = "unterminated macro param list"; + const kind = .@"error"; + }; + const invalid_token_param_list = struct { + const msg = "invalid token in macro parameter list"; + const kind = .@"error"; + }; + const expected_comma_param_list = struct { + const msg = "expected comma in macro parameter list"; + const kind = .@"error"; + }; + const hash_not_followed_param = struct { + const msg = "'#' is not followed by a macro parameter"; + const kind = .@"error"; + }; + const expected_filename = struct { + const msg = "expected \"FILENAME\" or "; + const kind = .@"error"; + }; + const empty_filename = struct { + const msg = "empty filename"; + const kind = .@"error"; + }; + const expected_invalid = struct { + const msg = "expected '{s}', found invalid bytes"; + const extra = .tok_id_expected; + const kind = .@"error"; + }; + const expected_eof = struct { + const msg = "expected '{s}' before end of file"; + const extra = .tok_id_expected; + const kind = .@"error"; + }; + const expected_token = struct { + const msg = "expected '{s}', found '{s}'"; + const extra = .tok_id; + const kind = .@"error"; + }; + const expected_expr = struct { + const msg = "expected expression"; + const kind = .@"error"; + }; + const expected_integer_constant_expr = struct { + const msg = "expression is not an integer constant expression"; + const kind = .@"error"; + }; + const missing_type_specifier = struct { + const msg = "type specifier missing, defaults to 'int'"; + const opt = "implicit-int"; + const kind = .warning; + const all = true; + }; + const multiple_storage_class = struct { + const msg = "cannot combine with previous '{s}' declaration specifier"; + const extra = .str; + const kind = .@"error"; + }; + const static_assert_failure = struct { + const msg = "static assertion failed"; + const kind = .@"error"; + }; + const static_assert_failure_message = struct { + const msg = "static assertion failed {s}"; + const extra = .str; + const kind = .@"error"; + }; + const expected_type = struct { + const msg = "expected a type"; + const kind = .@"error"; + }; + const cannot_combine_spec = struct { + const msg = "cannot combine with previous '{s}' specifier"; + const extra = .str; + const kind = .@"error"; + }; + const duplicate_decl_spec = struct { + const msg = "duplicate '{s}' declaration specifier"; + const extra = .str; + const opt = "duplicate-decl-specifier"; + const kind = .warning; + const all = true; + }; + const restrict_non_pointer = struct { + const msg = "restrict requires a pointer or reference ('{s}' is invalid)"; + const extra = .str; + const kind = .@"error"; + }; + const expected_external_decl = struct { + const msg = "expected external declaration"; + const kind = .@"error"; + }; + const expected_ident_or_l_paren = struct { + const msg = "expected identifier or '('"; + const kind = .@"error"; + }; + const missing_declaration = struct { + const msg = "declaration does not declare anything"; + const opt = "missing-declaration"; + const kind = .warning; + }; + const func_not_in_root = struct { + const msg = "function definition is not allowed here"; + const kind = .@"error"; + }; + const illegal_initializer = struct { + const msg = "illegal initializer (only variables can be initialized)"; + const kind = .@"error"; + }; + const extern_initializer = struct { + const msg = "extern variable has initializer"; + const opt = "extern-initializer"; + const kind = .warning; + }; + const spec_from_typedef = struct { + const msg = "'{s}' came from typedef"; + const extra = .str; + const kind = .note; + }; + const type_is_invalid = struct { + const msg = "'{s}' is invalid"; + const extra = .str; + const kind = .@"error"; + }; + const param_before_var_args = struct { + const msg = "ISO C requires a named parameter before '...'"; + const kind = .@"error"; + }; + const void_only_param = struct { + const msg = "'void' must be the only parameter if specified"; + const kind = .@"error"; + }; + const void_param_qualified = struct { + const msg = "'void' parameter cannot be qualified"; + const kind = .@"error"; + }; + const void_must_be_first_param = struct { + const msg = "'void' must be the first parameter if specified"; + const kind = .@"error"; + }; + const invalid_storage_on_param = struct { + const msg = "invalid storage class on function parameter"; + const kind = .@"error"; + }; + const threadlocal_non_var = struct { + const msg = "_Thread_local only allowed on variables"; + const kind = .@"error"; + }; + const func_spec_non_func = struct { + const msg = "'{s}' can only appear on functions"; + const extra = .str; + const kind = .@"error"; + }; + const illegal_storage_on_func = struct { + const msg = "illegal storage class on function"; + const kind = .@"error"; + }; + const illegal_storage_on_global = struct { + const msg = "illegal storage class on global variable"; + const kind = .@"error"; + }; + const expected_stmt = struct { + const msg = "expected statement"; + const kind = .@"error"; + }; + const func_cannot_return_func = struct { + const msg = "function cannot return a function"; + const kind = .@"error"; + }; + const func_cannot_return_array = struct { + const msg = "function cannot return an array"; + const kind = .@"error"; + }; + const undeclared_identifier = struct { + const msg = "use of undeclared identifier '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const not_callable = struct { + const msg = "cannot call non function type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const unsupported_str_cat = struct { + const msg = "unsupported string literal concatenation"; + const kind = .@"error"; + }; + const static_func_not_global = struct { + const msg = "static functions must be global"; + const kind = .@"error"; + }; + const implicit_func_decl = struct { + const msg = "implicit declaration of function '{s}' is invalid in C99"; + const extra = .str; + const opt = "implicit-function-declaration"; + const kind = .warning; + const all = true; + }; + const unknown_builtin = struct { + const msg = "use of unknown builtin '{s}'"; + const extra = .str; + const opt = "implicit-function-declaration"; + const kind = .@"error"; + const all = true; + }; + const expected_param_decl = struct { + const msg = "expected parameter declaration"; + const kind = .@"error"; + }; + const invalid_old_style_params = struct { + const msg = "identifier parameter lists are only allowed in function definitions"; + const kind = .@"error"; + }; + const expected_fn_body = struct { + const msg = "expected function body after function declaration"; + const kind = .@"error"; + }; + const invalid_void_param = struct { + const msg = "parameter cannot have void type"; + const kind = .@"error"; + }; + const unused_value = struct { + const msg = "expression result unused"; + const opt = "unused-value"; + const kind = .warning; + const all = true; + }; + const continue_not_in_loop = struct { + const msg = "'continue' statement not in a loop"; + const kind = .@"error"; + }; + const break_not_in_loop_or_switch = struct { + const msg = "'break' statement not in a loop or a switch"; + const kind = .@"error"; + }; + const unreachable_code = struct { + const msg = "unreachable code"; + const opt = "unreachable-code"; + const kind = .warning; + const all = true; + }; + const duplicate_label = struct { + const msg = "duplicate label '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const previous_label = struct { + const msg = "previous definition of label '{s}' was here"; + const extra = .str; + const kind = .note; + }; + const undeclared_label = struct { + const msg = "use of undeclared label '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const case_not_in_switch = struct { + const msg = "'{s}' statement not in a switch statement"; + const extra = .str; + const kind = .@"error"; + }; + const duplicate_switch_case_signed = struct { + const msg = "duplicate case value '{d}'"; + const extra = .signed; + const kind = .@"error"; + }; + const duplicate_switch_case_unsigned = struct { + const msg = "duplicate case value '{d}'"; + const extra = .unsigned; + const kind = .@"error"; + }; + const multiple_default = struct { + const msg = "multiple default cases in the same switch"; + const kind = .@"error"; + }; + const previous_case = struct { + const msg = "previous case defined here"; + const kind = .note; + }; + const expected_arguments = struct { + const msg = "expected {d} argument(s) got {d}"; + const extra = .arguments; + const kind = .@"error"; + }; + const expected_arguments_old = struct { + const msg = expected_arguments.msg; + const extra = .arguments; + const kind = .warning; + }; + const expected_at_least_arguments = struct { + const msg = "expected at least {d} argument(s) got {d}"; + const extra = .arguments; + const kind = .warning; + }; + const invalid_static_star = struct { + const msg = "'static' may not be used with an unspecified variable length array size"; + const kind = .@"error"; + }; + const static_non_param = struct { + const msg = "'static' used outside of function parameters"; + const kind = .@"error"; + }; + const array_qualifiers = struct { + const msg = "type qualifier in non parameter array type"; + const kind = .@"error"; + }; + const star_non_param = struct { + const msg = "star modifier used outside of function parameters"; + const kind = .@"error"; + }; + const variable_len_array_file_scope = struct { + const msg = "variable length arrays not allowed at file scope"; + const kind = .@"error"; + }; + const useless_static = struct { + const msg = "'static' useless without a constant size"; + const kind = .warning; + const w_extra = true; + }; + const negative_array_size = struct { + const msg = "array size must be 0 or greater"; + const kind = .@"error"; + }; + const array_incomplete_elem = struct { + const msg = "array has incomplete element type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const array_func_elem = struct { + const msg = "arrays cannot have functions as their element type"; + const kind = .@"error"; + }; + const static_non_outermost_array = struct { + const msg = "'static' used in non-outermost array type"; + const kind = .@"error"; + }; + const qualifier_non_outermost_array = struct { + const msg = "type qualifier used in non-outermost array type"; + const kind = .@"error"; + }; + const unterminated_macro_arg_list = struct { + const msg = "unterminated function macro argument list"; + const kind = .@"error"; + }; + const unknown_warning = struct { + const msg = "unknown warning '{s}'"; + const extra = .str; + const opt = "unknown-warning-option"; + const kind = .warning; + }; + const overflow_signed = struct { + const msg = "overflow in expression; result is '{d}'"; + const extra = .signed; + const opt = "integer-overflow"; + const kind = .warning; + }; + const overflow_unsigned = struct { + const msg = overflow_signed.msg; + const extra = .unsigned; + const opt = "integer-overflow"; + const kind = .warning; + }; + const int_literal_too_big = struct { + const msg = "integer literal is too large to be represented in any integer type"; + const kind = .@"error"; + }; + const indirection_ptr = struct { + const msg = "indirection requires pointer operand"; + const kind = .@"error"; + }; + const addr_of_rvalue = struct { + const msg = "cannot take the address of an rvalue"; + const kind = .@"error"; + }; + const not_assignable = struct { + const msg = "expression is not assignable"; + const kind = .@"error"; + }; + const ident_or_l_brace = struct { + const msg = "expected identifier or '{'"; + const kind = .@"error"; + }; + const empty_enum = struct { + const msg = "empty enum is invalid"; + const kind = .@"error"; + }; + const redefinition = struct { + const msg = "redefinition of '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const previous_definition = struct { + const msg = "previous definition is here"; + const kind = .note; + }; + const expected_identifier = struct { + const msg = "expected identifier"; + const kind = .@"error"; + }; + const expected_str_literal = struct { + const msg = "expected string literal for diagnostic message in static_assert"; + const kind = .@"error"; + }; + const expected_str_literal_in = struct { + const msg = "expected string literal in '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const parameter_missing = struct { + const msg = "parameter named '{s}' is missing"; + const extra = .str; + const kind = .@"error"; + }; + const empty_record = struct { + const msg = "empty {s} is a GNU extension"; + const extra = .str; + const opt = "gnu-empty-struct"; + const kind = .off; + const pedantic = true; + }; + const wrong_tag = struct { + const msg = "use of '{s}' with tag type that does not match previous definition"; + const extra = .str; + const kind = .@"error"; + }; + const expected_parens_around_typename = struct { + const msg = "expected parentheses around type name"; + const kind = .@"error"; + }; + const alignof_expr = struct { + const msg = "'_Alignof' applied to an expression is a GNU extension"; + const opt = "gnu-alignof-expression"; + const kind = .warning; + const suppress_gnu = true; + }; + const invalid_sizeof = struct { + const msg = "invalid application of 'sizeof' to an incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const macro_redefined = struct { + const msg = "'{s}' macro redefined"; + const extra = .str; + const opt = "macro-redefined"; + const kind = .warning; + }; + const generic_qual_type = struct { + const msg = "generic association with qualifiers cannot be matched with"; + const opt = "generic-qual-type"; + const kind = .warning; + }; + const generic_duplicate = struct { + const msg = "type '{s}' in generic association compatible with previously specified type"; + const extra = .str; + const kind = .@"error"; + }; + const generic_duplicate_default = struct { + const msg = "duplicate default generic association"; + const kind = .@"error"; + }; + const generic_no_match = struct { + const msg = "controlling expression type '{s}' not compatible with any generic association type"; + const extra = .str; + const kind = .@"error"; + }; + const escape_sequence_overflow = struct { + const msg = "escape sequence out of range"; + const kind = .@"error"; + }; + const invalid_universal_character = struct { + const msg = "invalid universal character"; + const kind = .@"error"; + }; + const multichar_literal = struct { + const msg = "multi-character character constant"; + const opt = "multichar"; + const kind = .warning; + const all = true; + }; + const unicode_multichar_literal = struct { + const msg = "Unicode character literals may not contain multiple characters"; + const kind = .@"error"; + }; + const wide_multichar_literal = struct { + const msg = "extraneous characters in character constant ignored"; + const kind = .warning; + }; + const char_lit_too_wide = struct { + const msg = "character constant too long for its type"; + const kind = .warning; + const all = true; + }; + const char_too_large = struct { + const msg = "character too large for enclosing character literal type"; + const kind = .@"error"; + }; + const must_use_struct = struct { + const msg = "must use 'struct' tag to refer to type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const must_use_union = struct { + const msg = "must use 'union' tag to refer to type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const must_use_enum = struct { + const msg = "must use 'enum' tag to refer to type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const redefinition_different_sym = struct { + const msg = "redefinition of '{s}' as different kind of symbol"; + const extra = .str; + const kind = .@"error"; + }; + const redefinition_incompatible = struct { + const msg = "redefinition of '{s}' with a different type"; + const extra = .str; + const kind = .@"error"; + }; + const redefinition_of_parameter = struct { + const msg = "redefinition of parameter '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_bin_types = struct { + const msg = "invalid operands to binary expression ({s})"; + const extra = .str; + const kind = .@"error"; + }; + const comparison_ptr_int = struct { + const msg = "comparison between pointer and integer ({s})"; + const extra = .str; + const opt = "pointer-integer-compare"; + const kind = .warning; + }; + const comparison_distinct_ptr = struct { + const msg = "comparison of distinct pointer types ({s})"; + const extra = .str; + const opt = "compare-distinct-pointer-types"; + const kind = .warning; + }; + const incompatible_pointers = struct { + const msg = "incompatible pointer types ({s})"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_argument_un = struct { + const msg = "invalid argument type '{s}' to unary expression"; + const extra = .str; + const kind = .@"error"; + }; + const incompatible_assign = struct { + const msg = "assignment to {s}"; + const extra = .str; + const kind = .@"error"; + }; + const implicit_ptr_to_int = struct { + const msg = "implicit pointer to integer conversion from {s}"; + const extra = .str; + const opt = "int-conversion"; + const kind = .warning; + }; + const invalid_cast_to_float = struct { + const msg = "pointer cannot be cast to type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_cast_to_pointer = struct { + const msg = "operand of type '{s}' cannot be cast to a pointer type"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_cast_type = struct { + const msg = "cannot cast to non arithmetic or pointer type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const qual_cast = struct { + const msg = "cast to type '{s}' will not preserve qualifiers"; + const extra = .str; + const opt = "cast-qualifiers"; + const kind = .warning; + }; + const invalid_index = struct { + const msg = "array subscript is not an integer"; + const kind = .@"error"; + }; + const invalid_subscript = struct { + const msg = "subscripted value is not an array or pointer"; + const kind = .@"error"; + }; + const array_after = struct { + const msg = "array index {d} is past the end of the array"; + const extra = .unsigned; + const opt = "array-bounds"; + const kind = .warning; + }; + const array_before = struct { + const msg = "array index {d} is before the beginning of the array"; + const extra = .signed; + const opt = "array-bounds"; + const kind = .warning; + }; + const statement_int = struct { + const msg = "statement requires expression with integer type ('{s}' invalid)"; + const extra = .str; + const kind = .@"error"; + }; + const statement_scalar = struct { + const msg = "statement requires expression with scalar type ('{s}' invalid)"; + const extra = .str; + const kind = .@"error"; + }; + const func_should_return = struct { + const msg = "non-void function '{s}' should return a value"; + const extra = .str; + const opt = "return-type"; + const kind = .@"error"; + const all = true; + }; + const incompatible_return = struct { + const msg = "returning '{s}' from a function with incompatible result type"; + const extra = .str; + const kind = .@"error"; + }; + const implicit_int_to_ptr = struct { + const msg = "implicit integer to pointer conversion from {s}"; + const extra = .str; + const opt = "int-conversion"; + const kind = .warning; + }; + const func_does_not_return = struct { + const msg = "non-void function '{s}' does not return a value"; + const extra = .str; + const opt = "return-type"; + const kind = .warning; + const all = true; + }; + const void_func_returns_value = struct { + const msg = "void function '{s}' should not return a value"; + const extra = .str; + const opt = "return-type"; + const kind = .@"error"; + const all = true; + }; + const incompatible_param = struct { + const msg = "passing '{s}' to parameter of incompatible type"; + const extra = .str; + const kind = .@"error"; + }; + const parameter_here = struct { + const msg = "passing argument to parameter here"; + const kind = .note; + }; + const atomic_array = struct { + const msg = "atomic cannot be applied to array type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const atomic_func = struct { + const msg = "atomic cannot be applied to function type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const atomic_incomplete = struct { + const msg = "atomic cannot be applied to incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const addr_of_register = struct { + const msg = "address of register variable requested"; + const kind = .@"error"; + }; + const variable_incomplete_ty = struct { + const msg = "variable has incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const parameter_incomplete_ty = struct { + const msg = "parameter has incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const deref_incomplete_ty_ptr = struct { + const msg = "dereferencing pointer to incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const alignas_on_func = struct { + const msg = "'_Alignas' attribute only applies to variables and fields"; + const kind = .@"error"; + }; + const alignas_on_param = struct { + const msg = "'_Alignas' attribute cannot be applied to a function parameter"; + const kind = .@"error"; + }; + const minimum_alignment = struct { + const msg = "requested alignment is less than minimum alignment of {d}"; + const extra = .unsigned; + const kind = .@"error"; + }; + const maximum_alignment = struct { + const msg = "requested alignment of {d} is too large"; + const extra = .unsigned; + const kind = .@"error"; + }; + const negative_alignment = struct { + const msg = "requested negative alignment of {d} is invalid"; + const extra = .signed; + const kind = .@"error"; + }; + const align_ignored = struct { + const msg = "'_Alignas' attribute is ignored here"; + const kind = .warning; + }; + const zero_align_ignored = struct { + const msg = "requested alignment of zero is ignored"; + const kind = .warning; + }; + const non_pow2_align = struct { + const msg = "requested alignment is not a power of 2"; + const kind = .@"error"; + }; + const pointer_mismatch = struct { + const msg = "pointer type mismatch ({s})"; + const extra = .str; + const opt = "pointer-type-mismatch"; + const kind = .warning; + }; + const static_assert_not_constant = struct { + const msg = "static_assert expression is not an integral constant expression"; + const kind = .@"error"; + }; + const static_assert_missing_message = struct { + const msg = "static_assert with no message is a C2X extension"; + const opt = "c2x-extensions"; + const kind = .warning; + const suppress_version = .c2x; + }; + const unbound_vla = struct { + const msg = "variable length array must be bound in function definition"; + const kind = .@"error"; + }; + const array_too_large = struct { + const msg = "array is too large"; + const kind = .@"error"; + }; + const incompatible_ptr_init = struct { + const msg = "incompatible pointer types initializing {s}"; + const extra = .str; + const opt = "incompatible-pointer-types"; + const kind = .warning; + }; + const incompatible_ptr_assign = struct { + const msg = "incompatible pointer types assigning to {s}"; + const extra = .str; + const opt = "incompatible-pointer-types"; + const kind = .warning; + }; + const vla_init = struct { + const msg = "variable-sized object may not be initialized"; + const kind = .@"error"; + }; + const func_init = struct { + const msg = "illegal initializer type"; + const kind = .@"error"; + }; + const incompatible_init = struct { + const msg = "initializing {s}"; + const extra = .str; + const kind = .@"error"; + }; + const empty_scalar_init = struct { + const msg = "scalar initializer cannot be empty"; + const kind = .@"error"; + }; + const excess_scalar_init = struct { + const msg = "excess elements in scalar initializer"; + const opt = "excess-initializers"; + const kind = .warning; + }; + const excess_str_init = struct { + const msg = "excess elements in string initializer"; + const opt = "excess-initializers"; + const kind = .warning; + }; + const excess_struct_init = struct { + const msg = "excess elements in struct initializer"; + const opt = "excess-initializers"; + const kind = .warning; + }; + const excess_array_init = struct { + const msg = "excess elements in array initializer"; + const opt = "excess-initializers"; + const kind = .warning; + }; + const str_init_too_long = struct { + const msg = "initializer-string for char array is too long"; + const opt = "excess-initializers"; + const kind = .warning; + }; + const arr_init_too_long = struct { + const msg = "cannot initialize type ({s})"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_typeof = struct { + const msg = "'{s} typeof' is invalid"; + const extra = .str; + const kind = .@"error"; + }; + const division_by_zero = struct { + const msg = "{s} by zero is undefined"; + const extra = .str; + const opt = "division-by-zero"; + const kind = .warning; + }; + const division_by_zero_macro = struct { + const msg = "{s} by zero in preprocessor expression"; + const extra = .str; + const kind = .@"error"; + }; + const builtin_choose_cond = struct { + const msg = "'__builtin_choose_expr' requires a constant expression"; + const kind = .@"error"; + }; + const alignas_unavailable = struct { + const msg = "'_Alignas' attribute requires integer constant expression"; + const kind = .@"error"; + }; + const case_val_unavailable = struct { + const msg = "case value must be an integer constant expression"; + const kind = .@"error"; + }; + const enum_val_unavailable = struct { + const msg = "enum value must be an integer constant expression"; + const kind = .@"error"; + }; + const incompatible_array_init = struct { + const msg = "cannot initialize array of type {s}"; + const extra = .str; + const kind = .@"error"; + }; + const array_init_str = struct { + const msg = "array initializer must be an initializer list or wide string literal"; + const kind = .@"error"; + }; + const initializer_overrides = struct { + const msg = "initializer overrides previous initialization"; + const opt = "initializer-overrides"; + const kind = .warning; + const w_extra = true; + }; + const previous_initializer = struct { + const msg = "previous initialization"; + const kind = .note; + }; + const invalid_array_designator = struct { + const msg = "array designator used for non-array type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const negative_array_designator = struct { + const msg = "array designator value {d} is negative"; + const extra = .signed; + const kind = .@"error"; + }; + const oob_array_designator = struct { + const msg = "array designator index {d} exceeds array bounds"; + const extra = .unsigned; + const kind = .@"error"; + }; + const invalid_field_designator = struct { + const msg = "field designator used for non-record type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const no_such_field_designator = struct { + const msg = "record type has no field named '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const empty_aggregate_init_braces = struct { + const msg = "initializer for aggregate with no elements requires explicit braces"; + const kind = .@"error"; + }; + const ptr_init_discards_quals = struct { + const msg = "initializing {s} discards qualifiers"; + const extra = .str; + const opt = "incompatible-pointer-types-discards-qualifiers"; + const kind = .warning; + }; + const ptr_assign_discards_quals = struct { + const msg = "assigning to {s} discards qualifiers"; + const extra = .str; + const opt = "incompatible-pointer-types-discards-qualifiers"; + const kind = .warning; + }; + const unknown_attribute = struct { + const msg = "unknown attribute '{s}' ignored"; + const extra = .str; + const opt = "unknown-attributes"; + const kind = .warning; + }; + const ignored_attribute = struct { + const msg = "{s}"; + const extra = .str; + const opt = "ignored-attributes"; + const kind = .warning; + }; + const invalid_fallthrough = struct { + const msg = "fallthrough annotation does not directly precede switch label"; + const kind = .@"error"; + }; + const cannot_apply_attribute_to_statement = struct { + const msg = "attribute cannot be applied to a statement"; + const kind = .@"error"; + }; + const builtin_macro_redefined = struct { + const msg = "redefining builtin macro"; + const opt = "builtin-macro-redefined"; + const kind = .warning; + }; + const feature_check_requires_identifier = struct { + const msg = "builtin feature check macro requires a parenthesized identifier"; + const kind = .@"error"; + }; + const missing_tok_builtin = struct { + const msg = "missing '{s}', after builtin feature-check macro"; + const extra = .tok_id_expected; + const kind = .@"error"; + }; + const gnu_label_as_value = struct { + const msg = "use of GNU address-of-label extension"; + const opt = "gnu-label-as-value"; + const kind = .off; + const pedantic = true; + }; + const expected_record_ty = struct { + const msg = "member reference base type '{s}' is not a structure or union"; + const extra = .str; + const kind = .@"error"; + }; + const member_expr_not_ptr = struct { + const msg = "member reference type '{s}' is not a pointer; did you mean to use '.'?"; + const extra = .str; + const kind = .@"error"; + }; + const member_expr_ptr = struct { + const msg = "member reference type '{s}' is a pointer; did you mean to use '->'?"; + const extra = .str; + const kind = .@"error"; + }; + const no_such_member = struct { + const msg = "no member named {s}"; + const extra = .str; + const kind = .@"error"; + }; + const malformed_warning_check = struct { + const msg = "{s} expected option name (e.g. \"-Wundef\")"; + const extra = .str; + const opt = "malformed-warning-check"; + const kind = .warning; + const all = true; + }; + const invalid_computed_goto = struct { + const msg = "computed goto in function with no address-of-label expressions"; + const kind = .@"error"; + }; + const pragma_warning_message = struct { + const msg = "{s}"; + const extra = .str; + const opt = "#pragma-messages"; + const kind = .warning; + }; + const pragma_error_message = struct { + const msg = "{s}"; + const extra = .str; + const kind = .@"error"; + }; + const pragma_message = struct { + const msg = "#pragma message: {s}"; + const extra = .str; + const kind = .note; + }; + const pragma_requires_string_literal = struct { + const msg = "pragma {s} requires string literal"; + const extra = .str; + const kind = .@"error"; + }; + const poisoned_identifier = struct { + const msg = "attempt to use a poisoned identifier"; + const kind = .@"error"; + }; + const pragma_poison_identifier = struct { + const msg = "can only poison identifier tokens"; + const kind = .@"error"; + }; + const pragma_poison_macro = struct { + const msg = "poisoning existing macro"; + const kind = .warning; + }; + const newline_eof = struct { + const msg = "no newline at end of file"; + const opt = "newline-eof"; + const kind = .off; + const pedantic = true; + }; + const empty_translation_unit = struct { + const msg = "ISO C requires a translation unit to contain at least one declaration"; + const opt = "empty-translation-unit"; + const kind = .off; + const pedantic = true; + }; + const omitting_parameter_name = struct { + const msg = "omitting the parameter name in a function definition is a C2x extension"; + const opt = "c2x-extensions"; + const kind = .warning; + const suppress_version = .c2x; + }; + const non_int_bitfield = struct { + const msg = "bit-field has non-integer type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const negative_bitwidth = struct { + const msg = "bit-field has negative width ({d})"; + const extra = .signed; + const kind = .@"error"; + }; + const zero_width_named_field = struct { + const msg = "named bit-field has zero width"; + const kind = .@"error"; + }; + const bitfield_too_big = struct { + const msg = "width of bit-field exceeds width of its type"; + const kind = .@"error"; + }; + const invalid_utf8 = struct { + const msg = "source file is not valid UTF-8"; + const kind = .@"error"; + }; + const implicitly_unsigned_literal = struct { + const msg = "integer literal is too large to be represented in a signed integer type, interpreting as unsigned"; + const opt = "implicitly-unsigned-literal"; + const kind = .warning; + }; + const invalid_preproc_operator = struct { + const msg = "token is not a valid binary operator in a preprocessor subexpression"; + const kind = .@"error"; + }; + const invalid_preproc_expr_start = struct { + const msg = "invalid token at start of a preprocessor expression"; + const kind = .@"error"; + }; + const c99_compat = struct { + const msg = "using this character in an identifier is incompatible with C99"; + const opt = "c99-compat"; + const kind = .off; + }; + const unicode_zero_width = struct { + const msg = "identifier contains Unicode character 4}> that is invisible in some environments"; + const opt = "unicode-homoglyph"; + const extra = .actual_codepoint; + const kind = .warning; + }; + const unicode_homoglyph = struct { + const msg = "treating Unicode character 4}> as identifier character rather than as '{u}' symbol"; + const extra = .codepoints; + const opt = "unicode-homoglyph"; + const kind = .warning; + }; + const meaningless_asm_qual = struct { + const msg = "meaningless '{s}' on assembly outside function"; + const extra = .str; + const kind = .@"error"; + }; + const duplicate_asm_qual = struct { + const msg = "duplicate asm qualifier '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const invalid_asm_str = struct { + const msg = "cannot use {s} string literal in assembly"; + const extra = .str; + const kind = .@"error"; + }; + const dollar_in_identifier_extension = struct { + const msg = "'$' in identifier"; + const opt = "dollar-in-identifier-extension"; + const kind = .off; + const suppress_language_option = "dollars_in_identifiers"; + const pedantic = true; + }; + const dollars_in_identifiers = struct { + const msg = "illegal character '$' in identifier"; + const kind = .@"error"; + }; + const expanded_from_here = struct { + const msg = "expanded from here"; + const kind = .note; + }; + const skipping_macro_backtrace = struct { + const msg = "(skipping {d} expansions in backtrace; use -fmacro-backtrace-limit=0 to see all)"; + const extra = .unsigned; + const kind = .note; + }; + const pragma_operator_string_literal = struct { + const msg = "_Pragma requires exactly one string literal token"; + const kind = .@"error"; + }; + const unknown_gcc_pragma = struct { + const msg = "pragma GCC expected 'error', 'warning', 'diagnostic', 'poison'"; + const opt = "unknown-pragmas"; + const kind = .off; + const all = true; + }; + const unknown_gcc_pragma_directive = struct { + const msg = "pragma GCC diagnostic expected 'error', 'warning', 'ignored', 'fatal', 'push', or 'pop'"; + const opt = "unknown-pragmas"; + const kind = .off; + const all = true; + }; + const predefined_top_level = struct { + const msg = "predefined identifier is only valid inside function"; + const opt = "predefined-identifier-outside-function"; + const kind = .warning; + }; + const incompatible_va_arg = struct { + const msg = "first argument to va_arg, is of type '{s}' and not 'va_list'"; + const extra = .str; + const kind = .@"error"; + }; + const too_many_scalar_init_braces = struct { + const msg = "too many braces around scalar initializer"; + const opt = "many-braces-around-scalar-init"; + const kind = .warning; + }; + const uninitialized_in_own_init = struct { + const msg = "variable '{s}' is uninitialized when used within its own initialization"; + const extra = .str; + const opt = "uninitialized"; + const kind = .off; + const all = true; + }; + const gnu_statement_expression = struct { + const msg = "use of GNU statement expression extension"; + const opt = "gnu-statement-expression"; + const kind = .off; + const suppress_gnu = true; + const pedantic = true; + }; + const stmt_expr_not_allowed_file_scope = struct { + const msg = "statement expression not allowed at file scope"; + const kind = .@"error"; + }; + const gnu_imaginary_constant = struct { + const msg = "imaginary constants are a GNU extension"; + const opt = "gnu-imaginary-constant"; + const kind = .off; + const suppress_gnu = true; + const pedantic = true; + }; + const plain_complex = struct { + const msg = "plain '_Complex' requires a type specifier; assuming '_Complex double'"; + const kind = .warning; + }; + const qual_on_ret_type = struct { + const msg = "'{s}' type qualifier on return type has no effect"; + const opt = "ignored-qualifiers"; + const extra = .str; + const kind = .off; + const all = true; + }; + const cli_invalid_standard = struct { + const msg = "invalid standard '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const cli_invalid_target = struct { + const msg = "invalid target '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const cli_unknown_arg = struct { + const msg = "unknown argument '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const cli_error = struct { + const msg = "{s}"; + const extra = .str; + const kind = .@"error"; + }; + const extra_semi = struct { + const msg = "extra ';' outside of a function"; + const opt = "extra-semi"; + const kind = .off; + const pedantic = true; + }; + const func_field = struct { + const msg = "field declared as a function"; + const kind = .@"error"; + }; + const vla_field = struct { + const msg = "variable length array fields extension is not supported"; + const kind = .@"error"; + }; + const field_incomplete_ty = struct { + const msg = "field has incomplete type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const flexible_in_union = struct { + const msg = "flexible array member in union is not allowed"; + const kind = .@"error"; + }; + const flexible_non_final = struct { + const msg = "flexible array member is not at the end of struct"; + const kind = .@"error"; + }; + const flexible_in_empty = struct { + const msg = "flexible array member in otherwise empty struct"; + const kind = .@"error"; + }; + const duplicate_member = struct { + const msg = "duplicate member '{s}'"; + const extra = .str; + const kind = .@"error"; + }; + const binary_integer_literal = struct { + const msg = "binary integer literals are a GNU extension"; + const kind = .off; + const opt = "gnu-binary-literal"; + const pedantic = true; + }; + const gnu_va_macro = struct { + const msg = "named variadic macros are a GNU extension"; + const opt = "variadic-macros"; + const kind = .off; + const pedantic = true; + }; + const builtin_must_be_called = struct { + const msg = "builtin function must be directly called"; + const kind = .@"error"; + }; + const va_start_not_in_func = struct { + const msg = "'va_start' cannot be used outside a function"; + const kind = .@"error"; + }; + const va_start_fixed_args = struct { + const msg = "'va_start' used in a function with fixed args"; + const kind = .@"error"; + }; + const va_start_not_last_param = struct { + const msg = "second argument to 'va_start' is not the last named parameter"; + const opt = "varargs"; + const kind = .warning; + }; + const attribute_not_enough_args = struct { + const msg = "'{s}' attribute takes at least {d} argument(s)"; + const kind = .@"error"; + const extra = .attr_arg_count; + }; + const attribute_too_many_args = struct { + const msg = "'{s}' attribute takes at most {d} argument(s)"; + const kind = .@"error"; + const extra = .attr_arg_count; + }; + const attribute_arg_invalid = struct { + const msg = "Attribute argument is invalid, expected {s} but got {s}"; + const kind = .@"error"; + const extra = .attr_arg_type; + }; + const unknown_attr_enum = struct { + const msg = "Unknown `{s}` argument. Possible values are: {s}"; + const kind = .@"error"; + const extra = .attr_enum; + }; + const attribute_requires_identifier = struct { + const msg = "'{s}' attribute requires an identifier"; + const kind = .@"error"; + const extra = .str; + }; + const declspec_not_enabled = struct { + const msg = "'__declspec' attributes are not enabled; use '-fdeclspec' or '-fms-extensions' to enable support for __declspec attributes"; + const kind = .@"error"; + }; + const declspec_attr_not_supported = struct { + const msg = "__declspec attribute '{s}' is not supported"; + const extra = .str; + const opt = "ignored-attributes"; + const kind = .warning; + }; + const deprecated_declarations = struct { + const msg = "{s}"; + const extra = .str; + const opt = "deprecated-declarations"; + const kind = .warning; + }; + const deprecated_note = struct { + const msg = "'{s}' has been explicitly marked deprecated here"; + const extra = .str; + const opt = "deprecated-declarations"; + const kind = .note; + }; + const unavailable = struct { + const msg = "{s}"; + const extra = .str; + const kind = .@"error"; + }; + const unavailable_note = struct { + const msg = "'{s}' has been explicitly marked unavailable here"; + const extra = .str; + const kind = .note; + }; + const ignored_record_attr = struct { + const msg = "attribute '{s}' is ignored, place it after \"{s}\" to apply attribute to type declaration"; + const extra = .ignored_record_attr; + const kind = .warning; + const opt = "ignored-attributes"; + }; + const backslash_newline_escape = struct { + const msg = "backslash and newline separated by space"; + const kind = .warning; + const opt = "backslash-newline-escape"; + }; + const array_size_non_int = struct { + const msg = "size of array has non-integer type '{s}'"; + const extra = .str; + const kind = .@"error"; + }; +}; + +list: std.ArrayListUnmanaged(Message) = .{}, +arena: std.heap.ArenaAllocator, +color: bool = true, +fatal_errors: bool = false, +options: Options = .{}, +errors: u32 = 0, +macro_backtrace_limit: u32 = 6, + +pub fn warningExists(name: []const u8) bool { + inline for (std.meta.fields(Options)) |f| { + if (mem.eql(u8, f.name, name)) return true; + } + return false; +} + +pub fn set(diag: *Diagnostics, name: []const u8, to: Kind) !void { + inline for (std.meta.fields(Options)) |f| { + if (mem.eql(u8, f.name, name)) { + @field(diag.options, f.name) = to; + return; + } + } + try diag.add(.{ + .tag = .unknown_warning, + .extra = .{ .str = name }, + }, &.{}); +} + +pub fn init(gpa: Allocator) Diagnostics { + return .{ + .color = std.io.getStdErr().supportsAnsiEscapeCodes() or (is_windows and std.io.getStdErr().isTty()), + .arena = std.heap.ArenaAllocator.init(gpa), + }; +} + +pub fn deinit(diag: *Diagnostics) void { + diag.list.deinit(diag.arena.allocator()); + diag.arena.deinit(); +} + +pub fn add(diag: *Diagnostics, msg: Message, expansion_locs: []const Source.Location) Compilation.Error!void { + const kind = diag.tagKind(msg.tag); + if (kind == .off) return; + var copy = msg; + copy.kind = kind; + + if (expansion_locs.len != 0) copy.loc = expansion_locs[expansion_locs.len - 1]; + try diag.list.append(diag.arena.allocator(), copy); + if (expansion_locs.len != 0) { + // Add macro backtrace notes in reverse order omitting from the middle if needed. + var i = expansion_locs.len - 1; + const half = diag.macro_backtrace_limit / 2; + const limit = if (i < diag.macro_backtrace_limit) 0 else i - half; + try diag.list.ensureUnusedCapacity( + diag.arena.allocator(), + if (limit == 0) expansion_locs.len else diag.macro_backtrace_limit + 1, + ); + while (i > limit) { + i -= 1; + diag.list.appendAssumeCapacity(.{ + .tag = .expanded_from_here, + .kind = .note, + .loc = expansion_locs[i], + }); + } + if (limit != 0) { + diag.list.appendAssumeCapacity(.{ + .tag = .skipping_macro_backtrace, + .kind = .note, + .extra = .{ .unsigned = expansion_locs.len - diag.macro_backtrace_limit }, + }); + i = half - 1; + while (i > 0) { + i -= 1; + diag.list.appendAssumeCapacity(.{ + .tag = .expanded_from_here, + .kind = .note, + .loc = expansion_locs[i], + }); + } + } + + diag.list.appendAssumeCapacity(.{ + .tag = .expanded_from_here, + .kind = .note, + .loc = msg.loc, + }); + } + if (kind == .@"fatal error" or (kind == .@"error" and diag.fatal_errors)) + return error.FatalError; +} + +pub fn fatal( + diag: *Diagnostics, + path: []const u8, + line: []const u8, + line_no: u32, + col: u32, + comptime fmt: []const u8, + args: anytype, +) Compilation.Error { + var m = MsgWriter.init(diag.color); + defer m.deinit(); + + m.location(path, line_no, col); + m.start(.@"fatal error"); + m.print(fmt, args); + m.end(line, col, false); + return error.FatalError; +} + +pub fn fatalNoSrc(diag: *Diagnostics, comptime fmt: []const u8, args: anytype) error{FatalError} { + if (!diag.color) { + std.debug.print("fatal error: " ++ fmt ++ "\n", args); + } else { + const std_err = std.io.getStdErr().writer(); + util.setColor(.red, std_err); + std_err.writeAll("fatal error: ") catch {}; + util.setColor(.white, std_err); + std_err.print(fmt ++ "\n", args) catch {}; + util.setColor(.reset, std_err); + } + return error.FatalError; +} + +pub fn render(comp: *Compilation) void { + if (comp.diag.list.items.len == 0) return; + var m = MsgWriter.init(comp.diag.color); + defer m.deinit(); + + renderExtra(comp, &m); +} + +pub fn renderExtra(comp: *Compilation, m: anytype) void { + var errors: u32 = 0; + var warnings: u32 = 0; + for (comp.diag.list.items) |msg| { + switch (msg.kind) { + .@"fatal error", .@"error" => errors += 1, + .warning => warnings += 1, + .note => {}, + .off => continue, // happens if an error is added before it is disabled + .default => unreachable, + } + + var line: ?[]const u8 = null; + var col = switch (msg.tag) { + .escape_sequence_overflow, + .invalid_universal_character, + // use msg.extra.unsigned for index into string literal + => @truncate(u32, msg.extra.unsigned), + else => 0, + }; + var width = col; + var end_with_splice = false; + if (msg.loc.id != .unused) { + const source = comp.getSource(msg.loc.id); + var line_col = source.lineCol(msg.loc); + line = line_col.line; + col += line_col.col; + width += line_col.width; + end_with_splice = line_col.end_with_splice; + if (msg.tag == .backslash_newline_escape) { + line = line_col.line[0 .. col - 1]; + col += 1; + width += 1; + } + m.location(source.path, line_col.line_no, col); + } + + m.start(msg.kind); + inline for (std.meta.fields(Tag)) |field| { + if (field.value == @enumToInt(msg.tag)) { + const info = @field(messages, field.name); + if (@hasDecl(info, "extra")) { + switch (info.extra) { + .str => m.print(info.msg, .{msg.extra.str}), + .tok_id => m.print(info.msg, .{ + msg.extra.tok_id.expected.symbol(), + msg.extra.tok_id.actual.symbol(), + }), + .tok_id_expected => m.print(info.msg, .{msg.extra.tok_id_expected.symbol()}), + .arguments => m.print(info.msg, .{ msg.extra.arguments.expected, msg.extra.arguments.actual }), + .codepoints => m.print(info.msg, .{ + msg.extra.codepoints.actual, + msg.extra.codepoints.resembles, + }), + .attr_arg_count => m.print(info.msg, .{ + @tagName(msg.extra.attr_arg_count.attribute), + msg.extra.attr_arg_count.expected, + }), + .attr_arg_type => m.print(info.msg, .{ + msg.extra.attr_arg_type.expected.toString(), + msg.extra.attr_arg_type.actual.toString(), + }), + .actual_codepoint => m.print(info.msg, .{msg.extra.actual_codepoint}), + .unsigned => m.print(info.msg, .{msg.extra.unsigned}), + .signed => m.print(info.msg, .{msg.extra.signed}), + .attr_enum => m.print(info.msg, .{ + @tagName(msg.extra.attr_enum.tag), + Attribute.Formatting.choices(msg.extra.attr_enum.tag), + }), + .ignored_record_attr => m.print(info.msg, .{ + @tagName(msg.extra.ignored_record_attr.tag), + @tagName(msg.extra.ignored_record_attr.specifier), + }), + else => unreachable, + } + } else { + m.write(info.msg); + } + + if (@hasDecl(info, "opt")) { + if (msg.kind == .@"error" and info.kind != .@"error") { + m.print(" [-Werror,-W{s}]", .{info.opt}); + } else if (msg.kind != .note) { + m.print(" [-W{s}]", .{info.opt}); + } + } + } + } + + m.end(line, width, end_with_splice); + } + const w_s: []const u8 = if (warnings == 1) "" else "s"; + const e_s: []const u8 = if (errors == 1) "" else "s"; + if (errors != 0 and warnings != 0) { + m.print("{d} warning{s} and {d} error{s} generated.\n", .{ warnings, w_s, errors, e_s }); + } else if (warnings != 0) { + m.print("{d} warning{s} generated.\n", .{ warnings, w_s }); + } else if (errors != 0) { + m.print("{d} error{s} generated.\n", .{ errors, e_s }); + } + + comp.diag.list.items.len = 0; + comp.diag.errors += errors; +} + +fn tagKind(diag: *Diagnostics, tag: Tag) Kind { + // XXX: horrible hack, do not do this + const comp = @fieldParentPtr(Compilation, "diag", diag); + + var kind: Kind = undefined; + inline for (std.meta.fields(Tag)) |field| { + if (field.value == @enumToInt(tag)) { + const info = @field(messages, field.name); + kind = info.kind; + + // stage1 doesn't like when I combine these ifs + if (@hasDecl(info, "all")) { + if (diag.options.all != .default) kind = diag.options.all; + } + if (@hasDecl(info, "w_extra")) { + if (diag.options.extra != .default) kind = diag.options.extra; + } + if (@hasDecl(info, "pedantic")) { + if (diag.options.pedantic != .default) kind = diag.options.pedantic; + } + if (@hasDecl(info, "opt")) { + if (@field(diag.options, info.opt) != .default) kind = @field(diag.options, info.opt); + } + if (@hasDecl(info, "suppress_version")) if (comp.langopts.standard.atLeast(info.suppress_version)) return .off; + if (@hasDecl(info, "suppress_gnu")) if (comp.langopts.standard.isExplicitGNU()) return .off; + if (@hasDecl(info, "suppress_language_option")) if (!@field(comp.langopts, info.suppress_language_option)) return .off; + if (kind == .@"error" and diag.fatal_errors) kind = .@"fatal error"; + return kind; + } + } + unreachable; +} + +const MsgWriter = struct { + w: std.io.BufferedWriter(4096, std.fs.File.Writer), + color: bool, + + fn init(color: bool) MsgWriter { + std.debug.getStderrMutex().lock(); + return .{ + .w = std.io.bufferedWriter(std.io.getStdErr().writer()), + .color = color, + }; + } + + fn deinit(m: *MsgWriter) void { + m.w.flush() catch {}; + std.debug.getStderrMutex().unlock(); + } + + fn print(m: *MsgWriter, comptime fmt: []const u8, args: anytype) void { + m.w.writer().print(fmt, args) catch {}; + } + + fn write(m: *MsgWriter, msg: []const u8) void { + m.w.writer().writeAll(msg) catch {}; + } + + fn setColor(m: *MsgWriter, color: util.Color) void { + util.setColor(color, m.w.writer()); + } + + fn location(m: *MsgWriter, path: []const u8, line: u32, col: u32) void { + const prefix = if (std.fs.path.dirname(path) == null and path[0] != '<') "." ++ std.fs.path.sep_str else ""; + if (!m.color) { + m.print("{s}{s}:{d}:{d}: ", .{ prefix, path, line, col }); + } else { + m.setColor(.white); + m.print("{s}{s}:{d}:{d}: ", .{ prefix, path, line, col }); + } + } + + fn start(m: *MsgWriter, kind: Kind) void { + if (!m.color) { + m.print("{s}: ", .{@tagName(kind)}); + } else { + switch (kind) { + .@"fatal error", .@"error" => m.setColor(.red), + .note => m.setColor(.cyan), + .warning => m.setColor(.purple), + .off, .default => unreachable, + } + m.write(switch (kind) { + .@"fatal error" => "fatal error: ", + .@"error" => "error: ", + .note => "note: ", + .warning => "warning: ", + .off, .default => unreachable, + }); + m.setColor(.white); + } + } + + fn end(m: *MsgWriter, maybe_line: ?[]const u8, col: u32, end_with_splice: bool) void { + const line = maybe_line orelse { + m.write("\n"); + return; + }; + const trailer = if (end_with_splice) "\\ " else ""; + if (!m.color) { + m.print("\n{s}{s}\n", .{ line, trailer }); + m.print("{s: >[1]}^\n", .{ "", col }); + } else { + m.setColor(.reset); + m.print("\n{s}{s}\n{s: >[3]}", .{ line, trailer, "", col }); + m.setColor(.green); + m.write("^\n"); + m.setColor(.reset); + } + } +}; diff --git a/src/aro/InitList.zig b/src/aro/InitList.zig new file mode 100644 index 000000000000..0e68e16d142b --- /dev/null +++ b/src/aro/InitList.zig @@ -0,0 +1,153 @@ +//! Sparsely populated list of used indexes. +//! Used for detecting duplicate initializers. +const std = @import("std"); +const Allocator = std.mem.Allocator; +const testing = std.testing; +const Tree = @import("Tree.zig"); +const Token = Tree.Token; +const TokenIndex = Tree.TokenIndex; +const NodeIndex = Tree.NodeIndex; +const Type = @import("Type.zig"); +const Diagnostics = @import("Diagnostics.zig"); +const NodeList = std.ArrayList(NodeIndex); +const Parser = @import("Parser.zig"); + +const InitList = @This(); + +const Item = struct { + list: InitList = .{}, + index: u64, + + fn order(_: void, a: Item, b: Item) std.math.Order { + return std.math.order(a.index, b.index); + } +}; + +list: std.ArrayListUnmanaged(Item) = .{}, +node: NodeIndex = .none, +tok: TokenIndex = 0, + +/// Deinitialize freeing all memory. +pub fn deinit(il: *InitList, gpa: Allocator) void { + for (il.list.items) |*item| item.list.deinit(gpa); + il.list.deinit(gpa); + il.* = undefined; +} + +/// Insert initializer at index, returning previous entry if one exists. +pub fn put(il: *InitList, gpa: Allocator, index: usize, node: NodeIndex, tok: TokenIndex) !?TokenIndex { + const items = il.list.items; + var left: usize = 0; + var right: usize = items.len; + + // Append new value to empty list + if (left == right) { + const item = try il.list.addOne(gpa); + item.* = .{ + .list = .{ .node = node, .tok = tok }, + .index = index, + }; + return null; + } + + while (left < right) { + // Avoid overflowing in the midpoint calculation + const mid = left + (right - left) / 2; + // Compare the key with the midpoint element + switch (std.math.order(index, items[mid].index)) { + .eq => { + // Replace previous entry. + const prev = items[mid].list.tok; + items[mid].list.deinit(gpa); + items[mid] = .{ + .list = .{ .node = node, .tok = tok }, + .index = index, + }; + return prev; + }, + .gt => left = mid + 1, + .lt => right = mid, + } + } + + // Insert a new value into a sorted position. + try il.list.insert(gpa, left, .{ + .list = .{ .node = node, .tok = tok }, + .index = index, + }); + return null; +} + +/// Find item at index, create new if one does not exist. +pub fn find(il: *InitList, gpa: Allocator, index: usize) !*InitList { + const items = il.list.items; + var left: usize = 0; + var right: usize = items.len; + + // Append new value to empty list + if (left == right) { + const item = try il.list.addOne(gpa); + item.* = .{ + .list = .{ .node = .none, .tok = 0 }, + .index = index, + }; + return &item.list; + } + + while (left < right) { + // Avoid overflowing in the midpoint calculation + const mid = left + (right - left) / 2; + // Compare the key with the midpoint element + switch (std.math.order(index, items[mid].index)) { + .eq => return &items[mid].list, + .gt => left = mid + 1, + .lt => right = mid, + } + } + + // Insert a new value into a sorted position. + try il.list.insert(gpa, left, .{ + .list = .{ .node = .none, .tok = 0 }, + .index = index, + }); + return &il.list.items[left].list; +} + +test "basic usage" { + const gpa = testing.allocator; + var il: InitList = .{}; + defer il.deinit(gpa); + + { + var i: usize = 0; + while (i < 5) : (i += 1) { + const prev = try il.put(gpa, i, .none, 0); + try testing.expect(prev == null); + } + } + + { + const failing = testing.failing_allocator; + var i: usize = 0; + while (i < 5) : (i += 1) { + _ = try il.find(failing, i); + } + } + + { + var item = try il.find(gpa, 0); + var i: usize = 1; + while (i < 5) : (i += 1) { + item = try item.find(gpa, i); + } + } + + { + const failing = testing.failing_allocator; + var item = try il.find(failing, 0); + var i: usize = 1; + while (i < 5) : (i += 1) { + item = try item.find(failing, i); + } + } +} diff --git a/src/aro/LangOpts.zig b/src/aro/LangOpts.zig new file mode 100644 index 000000000000..9fb582d41903 --- /dev/null +++ b/src/aro/LangOpts.zig @@ -0,0 +1,88 @@ +const std = @import("std"); +const DiagnosticTag = @import("Diagnostics.zig").Tag; + +const LangOpts = @This(); + +const Standard = enum { + /// ISO C 1990 + c89, + /// ISO C 1990 with amendment 1 + iso9899, + /// ISO C 1990 with GNU extensions + gnu89, + /// ISO C 1999 + c99, + /// ISO C 1999 with GNU extensions + gnu99, + /// ISO C 2011 + c11, + /// ISO C 2011 with GNU extensions + gnu11, + /// ISO C 2017 + c17, + /// Default value if nothing specified; adds the GNU keywords to + /// C17 but does not suppress warnings about using GNU extensions + default, + /// ISO C 2017 with GNU extensions + gnu17, + /// Working Draft for ISO C2x + c2x, + /// Working Draft for ISO C2x with GNU extensions + gnu2x, + + const NameMap = std.ComptimeStringMap(Standard, .{ + .{ "c89", .c89 }, .{ "c90", .c89 }, .{ "iso9899:1990", .c89 }, + .{ "iso9899:199409", .iso9899 }, .{ "gnu89", .gnu89 }, .{ "gnu90", .gnu89 }, + .{ "c99", .c99 }, .{ "iso9899:1999", .c99 }, .{ "gnu99", .gnu99 }, + .{ "c11", .c11 }, .{ "iso9899:2011", .c11 }, .{ "gnu11", .gnu11 }, + .{ "c17", .c17 }, .{ "iso9899:2017", .c17 }, .{ "c18", .c17 }, + .{ "iso9899:2018", .c17 }, .{ "gnu17", .gnu17 }, .{ "gnu18", .gnu17 }, + .{ "c2x", .c2x }, .{ "gnu2x", .gnu2x }, + }); + + pub fn atLeast(self: Standard, other: Standard) bool { + return @enumToInt(self) >= @enumToInt(other); + } + + pub fn isGNU(standard: Standard) bool { + return switch (standard) { + .gnu89, .gnu99, .gnu11, .default, .gnu17, .gnu2x => true, + else => false, + }; + } + + pub fn isExplicitGNU(standard: Standard) bool { + return standard.isGNU() and standard != .default; + } + + /// Value reported by __STDC_VERSION__ macro + pub fn StdCVersionMacro(standard: Standard) ?[]const u8 { + return switch (standard) { + .c89, .gnu89 => null, + .iso9899 => "199409L", + .c99, .gnu99 => "199901L", + .c11, .gnu11 => "201112L", + .default, .c17, .gnu17 => "201710L", + // todo: update once finalized; this currently matches clang + .c2x, .gnu2x => "201710L", + }; + } +}; + +standard: Standard = .default, +/// -fshort-enums option, makes enums only take up as much space as they need to hold all the values. +short_enums: bool = false, +dollars_in_identifiers: bool = true, +declspec_attrs: bool = false, + +pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void { + self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard; +} + +pub fn enableMSExtensions(self: *LangOpts) void { + self.declspec_attrs = true; +} + +pub fn disableMSExtensions(self: *LangOpts) void { + self.declspec_attrs = false; +} diff --git a/src/aro/Parser.zig b/src/aro/Parser.zig new file mode 100644 index 000000000000..96a14739ee22 --- /dev/null +++ b/src/aro/Parser.zig @@ -0,0 +1,6269 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; +const assert = std.debug.assert; +const Compilation = @import("Compilation.zig"); +const Source = @import("Source.zig"); +const Tokenizer = @import("Tokenizer.zig"); +const Preprocessor = @import("Preprocessor.zig"); +const Tree = @import("Tree.zig"); +const Token = Tree.Token; +const TokenIndex = Tree.TokenIndex; +const NodeIndex = Tree.NodeIndex; +const Type = @import("Type.zig"); +const Diagnostics = @import("Diagnostics.zig"); +const NodeList = std.ArrayList(NodeIndex); +const InitList = @import("InitList.zig"); +const Attribute = @import("Attribute.zig"); +const CharInfo = @import("CharInfo.zig"); +const Value = @import("Value.zig"); + +const Parser = @This(); + +const Scope = union(enum) { + typedef: Symbol, + @"struct": Symbol, + @"union": Symbol, + @"enum": Symbol, + decl: Symbol, + def: Symbol, + param: Symbol, + enumeration: Enumeration, + loop, + @"switch": *Switch, + block, + + const Symbol = struct { + name: []const u8, + ty: Type, + name_tok: TokenIndex, + }; + + const Enumeration = struct { + name: []const u8, + value: Result, + name_tok: TokenIndex, + }; + + const Switch = struct { + cases: CaseMap, + default: ?Case = null, + + const ResultContext = struct { + ty: Type, + comp: *Compilation, + + pub fn eql(ctx: ResultContext, a: Result, b: Result) bool { + return a.val.compare(.eq, b.val, ctx.ty, ctx.comp); + } + pub fn hash(_: ResultContext, a: Result) u64 { + return a.val.hash(); + } + }; + const CaseMap = std.HashMap(Result, Case, ResultContext, std.hash_map.default_max_load_percentage); + const Case = struct { + node: NodeIndex, + tok: TokenIndex, + }; + }; +}; + +const Label = union(enum) { + unresolved_goto: TokenIndex, + label: TokenIndex, +}; + +pub const Error = Compilation.Error || error{ParsingFailed}; + +/// An attribute that has been parsed but not yet validated in its context +const TentativeAttribute = struct { + attr: Attribute, + tok: TokenIndex, +}; + +// values from preprocessor +pp: *Preprocessor, +tok_ids: []const Token.Id, +tok_i: TokenIndex = 0, + +// values of the incomplete Tree +arena: Allocator, +nodes: Tree.Node.List = .{}, +data: NodeList, +strings: std.ArrayList(u8), +value_map: Tree.ValueMap, + +// buffers used during compilation +scopes: std.ArrayList(Scope), +labels: std.ArrayList(Label), +list_buf: NodeList, +decl_buf: NodeList, +param_buf: std.ArrayList(Type.Func.Param), +enum_buf: std.ArrayList(Type.Enum.Field), +record_buf: std.ArrayList(Type.Record.Field), +attr_buf: std.MultiArrayList(TentativeAttribute) = .{}, + +// configuration and miscellaneous info +no_eval: bool = false, +in_macro: bool = false, +extension_suppressed: bool = false, +contains_address_of_label: bool = false, +label_count: u32 = 0, +/// location of first computed goto in function currently being parsed +/// if a computed goto is used, the function must contain an +/// address-of-label expression (tracked with contains_address_of_label) +computed_goto_tok: ?TokenIndex = null, + +/// Various variables that are different for each function. +func: struct { + /// null if not in function, will always be plain func, var_args_func or old_style_func + ty: ?Type = null, + name: TokenIndex = 0, + ident: ?Result = null, + pretty_ident: ?Result = null, +} = .{}, +/// Various variables that are different for each record. +record: struct { + // invalid means we're not parsing a record + kind: Token.Id = .invalid, + flexible_field: ?TokenIndex = null, + scopes_top: usize = undefined, + + fn addField(r: @This(), p: *Parser, name_tok: TokenIndex) Error!void { + const name = p.tokSlice(name_tok); + var i = p.scopes.items.len; + while (i > r.scopes_top) { + i -= 1; + switch (p.scopes.items[i]) { + .def => |d| if (mem.eql(u8, d.name, name)) { + try p.errStr(.duplicate_member, name_tok, name); + try p.errTok(.previous_definition, d.name_tok); + break; + }, + else => {}, + } + } + try p.scopes.append(.{ + .def = .{ + .name = name, + .name_tok = name_tok, + .ty = undefined, // unused + }, + }); + } + + fn addFieldsFromAnonymous(r: @This(), p: *Parser, ty: Type) Error!void { + for (ty.data.record.fields) |f| { + if (f.isAnonymousRecord()) { + try r.addFieldsFromAnonymous(p, f.ty.canonicalize(.standard)); + } else if (f.name_tok != 0) { + try r.addField(p, f.name_tok); + } + } + } +} = .{}, + +fn checkIdentifierCodepoint(comp: *Compilation, codepoint: u21, loc: Source.Location) Compilation.Error!bool { + if (codepoint <= 0x7F) return false; + var diagnosed = false; + if (!CharInfo.isC99IdChar(codepoint)) { + try comp.diag.add(.{ + .tag = .c99_compat, + .loc = loc, + }, &.{}); + diagnosed = true; + } + if (CharInfo.isInvisible(codepoint)) { + try comp.diag.add(.{ + .tag = .unicode_zero_width, + .loc = loc, + .extra = .{ .actual_codepoint = codepoint }, + }, &.{}); + diagnosed = true; + } + if (CharInfo.homoglyph(codepoint)) |resembles| { + try comp.diag.add(.{ + .tag = .unicode_homoglyph, + .loc = loc, + .extra = .{ .codepoints = .{ .actual = codepoint, .resembles = resembles } }, + }, &.{}); + diagnosed = true; + } + return diagnosed; +} + +fn eatIdentifier(p: *Parser) !?TokenIndex { + switch (p.tok_ids[p.tok_i]) { + .identifier => {}, + .extended_identifier => { + const slice = p.tokSlice(p.tok_i); + var it = std.unicode.Utf8View.initUnchecked(slice).iterator(); + var loc = p.pp.tokens.items(.loc)[p.tok_i]; + + if (mem.indexOfScalar(u8, slice, '$')) |i| { + loc.byte_offset += @intCast(u32, i); + try p.pp.comp.diag.add(.{ + .tag = .dollar_in_identifier_extension, + .loc = loc, + }, &.{}); + loc = p.pp.tokens.items(.loc)[p.tok_i]; + } + + while (it.nextCodepoint()) |c| { + if (try checkIdentifierCodepoint(p.pp.comp, c, loc)) break; + loc.byte_offset += std.unicode.utf8CodepointSequenceLength(c) catch unreachable; + } + }, + else => return null, + } + p.tok_i += 1; + + // Handle illegal '$' characters in identifiers + if (!p.pp.comp.langopts.dollars_in_identifiers) { + if (p.tok_ids[p.tok_i] == .invalid and p.tokSlice(p.tok_i)[0] == '$') { + try p.err(.dollars_in_identifiers); + p.tok_i += 1; + return error.ParsingFailed; + } + } + + return p.tok_i - 1; +} + +fn expectIdentifier(p: *Parser) Error!TokenIndex { + const actual = p.tok_ids[p.tok_i]; + if (actual != .identifier and actual != .extended_identifier) { + return p.errExpectedToken(.identifier, actual); + } + + return (try p.eatIdentifier()) orelse unreachable; +} + +fn eatToken(p: *Parser, id: Token.Id) ?TokenIndex { + assert(id != .identifier and id != .extended_identifier); // use eatIdentifier + if (p.tok_ids[p.tok_i] == id) { + defer p.tok_i += 1; + return p.tok_i; + } else return null; +} + +fn expectToken(p: *Parser, expected: Token.Id) Error!TokenIndex { + assert(expected != .identifier and expected != .extended_identifier); // use expectIdentifier + const actual = p.tok_ids[p.tok_i]; + if (actual != expected) return p.errExpectedToken(expected, actual); + defer p.tok_i += 1; + return p.tok_i; +} + +fn tokSlice(p: *Parser, tok: TokenIndex) []const u8 { + if (p.tok_ids[tok].lexeme()) |some| return some; + const loc = p.pp.tokens.items(.loc)[tok]; + var tmp_tokenizer = Tokenizer{ + .buf = p.pp.comp.getSource(loc.id).buf, + .comp = p.pp.comp, + .index = loc.byte_offset, + .source = .generated, + }; + const res = tmp_tokenizer.next(); + return tmp_tokenizer.buf[res.start..res.end]; +} + +fn expectClosing(p: *Parser, opening: TokenIndex, id: Token.Id) Error!void { + _ = p.expectToken(id) catch |e| { + if (e == error.ParsingFailed) { + try p.errTok(switch (id) { + .r_paren => .to_match_paren, + .r_brace => .to_match_brace, + .r_bracket => .to_match_brace, + else => unreachable, + }, opening); + } + return e; + }; +} + +fn errOverflow(p: *Parser, op_tok: TokenIndex, res: Result) !void { + if (res.ty.isUnsignedInt(p.pp.comp)) { + try p.errExtra(.overflow_unsigned, op_tok, .{ .unsigned = res.val.data.int }); + } else { + try p.errExtra(.overflow_signed, op_tok, .{ .signed = res.val.signExtend(res.ty, p.pp.comp) }); + } +} + +fn errExpectedToken(p: *Parser, expected: Token.Id, actual: Token.Id) Error { + switch (actual) { + .invalid => try p.errExtra(.expected_invalid, p.tok_i, .{ .tok_id_expected = expected }), + .eof => try p.errExtra(.expected_eof, p.tok_i, .{ .tok_id_expected = expected }), + else => try p.errExtra(.expected_token, p.tok_i, .{ .tok_id = .{ + .expected = expected, + .actual = actual, + } }), + } + return error.ParsingFailed; +} + +pub fn errStr(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, str: []const u8) Compilation.Error!void { + @setCold(true); + return p.errExtra(tag, tok_i, .{ .str = str }); +} + +pub fn errExtra(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, extra: Diagnostics.Message.Extra) Compilation.Error!void { + @setCold(true); + const tok = p.pp.tokens.get(tok_i); + var loc = tok.loc; + if (tok_i != 0 and tok.id == .eof) { + // if the token is EOF, point at the end of the previous token instead + const prev = p.pp.tokens.get(tok_i - 1); + loc = prev.loc; + loc.byte_offset += @intCast(u32, p.tokSlice(tok_i - 1).len); + } + try p.pp.comp.diag.add(.{ + .tag = tag, + .loc = loc, + .extra = extra, + }, tok.expansionSlice()); +} + +pub fn errTok(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex) Compilation.Error!void { + @setCold(true); + return p.errExtra(tag, tok_i, .{ .none = {} }); +} + +pub fn err(p: *Parser, tag: Diagnostics.Tag) Compilation.Error!void { + @setCold(true); + return p.errTok(tag, p.tok_i); +} + +pub fn todo(p: *Parser, msg: []const u8) Error { + try p.errStr(.todo, p.tok_i, msg); + return error.ParsingFailed; +} + +pub fn ignoredAttrStr(p: *Parser, attr: Attribute.Tag, context: Attribute.ParseContext) ![]const u8 { + const strings_top = p.strings.items.len; + defer p.strings.items.len = strings_top; + + try p.strings.writer().print("Attribute '{s}' ignored in {s} context", .{ @tagName(attr), @tagName(context) }); + return try p.pp.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); +} + +pub fn typeStr(p: *Parser, ty: Type) ![]const u8 { + if (Type.Builder.fromType(ty).str()) |str| return str; + const strings_top = p.strings.items.len; + defer p.strings.items.len = strings_top; + + try ty.print(p.strings.writer()); + return try p.pp.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); +} + +pub fn typePairStr(p: *Parser, a: Type, b: Type) ![]const u8 { + return p.typePairStrExtra(a, " and ", b); +} + +pub fn typePairStrExtra(p: *Parser, a: Type, msg: []const u8, b: Type) ![]const u8 { + const strings_top = p.strings.items.len; + defer p.strings.items.len = strings_top; + + try p.strings.append('\''); + try a.print(p.strings.writer()); + try p.strings.append('\''); + try p.strings.appendSlice(msg); + try p.strings.append('\''); + try b.print(p.strings.writer()); + try p.strings.append('\''); + return try p.pp.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); +} + +fn checkDeprecatedUnavailable(p: *Parser, ty: Type, usage_tok: TokenIndex, decl_tok: TokenIndex) !void { + if (ty.getAttribute(.unavailable)) |unavailable| { + try p.errDeprecated(.unavailable, usage_tok, unavailable.msg); + try p.errStr(.unavailable_note, unavailable.__name_tok, p.tokSlice(decl_tok)); + return error.ParsingFailed; + } else if (ty.getAttribute(.deprecated)) |deprecated| { + try p.errDeprecated(.deprecated_declarations, usage_tok, deprecated.msg); + try p.errStr(.deprecated_note, deprecated.__name_tok, p.tokSlice(decl_tok)); + } +} + +fn errDeprecated(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, msg: ?[]const u8) Compilation.Error!void { + const strings_top = p.strings.items.len; + defer p.strings.items.len = strings_top; + + const w = p.strings.writer(); + try w.print("'{s}' is ", .{p.tokSlice(tok_i)}); + const reason: []const u8 = switch (tag) { + .unavailable => "unavailable", + .deprecated_declarations => "deprecated", + else => unreachable, + }; + try w.writeAll(reason); + if (msg) |m| { + try w.print(": {s}", .{m}); + } + const str = try p.pp.comp.diag.arena.allocator().dupe(u8, p.strings.items[strings_top..]); + return p.errStr(tag, tok_i, str); +} + +fn addNode(p: *Parser, node: Tree.Node) Allocator.Error!NodeIndex { + if (p.in_macro) return .none; + const res = p.nodes.len; + try p.nodes.append(p.pp.comp.gpa, node); + return @intToEnum(NodeIndex, res); +} + +fn addList(p: *Parser, nodes: []const NodeIndex) Allocator.Error!Tree.Node.Range { + if (p.in_macro) return Tree.Node.Range{ .start = 0, .end = 0 }; + const start = @intCast(u32, p.data.items.len); + try p.data.appendSlice(nodes); + const end = @intCast(u32, p.data.items.len); + return Tree.Node.Range{ .start = start, .end = end }; +} + +fn findTypedef(p: *Parser, name_tok: TokenIndex, no_type_yet: bool) !?Scope.Symbol { + const name = p.tokSlice(name_tok); + var i = p.scopes.items.len; + while (i > 0) { + i -= 1; + switch (p.scopes.items[i]) { + .typedef => |t| if (mem.eql(u8, t.name, name)) return t, + .@"struct" => |s| if (mem.eql(u8, s.name, name)) { + if (no_type_yet) return null; + try p.errStr(.must_use_struct, name_tok, name); + return s; + }, + .@"union" => |u| if (mem.eql(u8, u.name, name)) { + if (no_type_yet) return null; + try p.errStr(.must_use_union, name_tok, name); + return u; + }, + .@"enum" => |e| if (mem.eql(u8, e.name, name)) { + if (no_type_yet) return null; + try p.errStr(.must_use_enum, name_tok, name); + return e; + }, + .def, .decl => |d| if (mem.eql(u8, d.name, name)) return null, + else => {}, + } + } + return null; +} + +fn findSymbol(p: *Parser, name_tok: TokenIndex, ref_kind: enum { reference, definition }) ?Scope { + const name = p.tokSlice(name_tok); + var i = p.scopes.items.len; + while (i > 0) { + i -= 1; + const sym = p.scopes.items[i]; + switch (sym) { + .def, .decl, .param => |s| if (mem.eql(u8, s.name, name)) return sym, + .enumeration => |e| if (mem.eql(u8, e.name, name)) return sym, + .block => if (ref_kind == .definition) return null, + else => {}, + } + } + return null; +} + +fn findTag(p: *Parser, kind: Token.Id, name_tok: TokenIndex, ref_kind: enum { reference, definition }) !?Scope.Symbol { + const name = p.tokSlice(name_tok); + var i = p.scopes.items.len; + var saw_block = false; + while (i > 0) { + i -= 1; + const sym = p.scopes.items[i]; + switch (sym) { + .@"enum" => |e| if (mem.eql(u8, e.name, name)) { + if (kind == .keyword_enum) return e; + if (saw_block) return null; + try p.errStr(.wrong_tag, name_tok, name); + try p.errTok(.previous_definition, e.name_tok); + return null; + }, + .@"struct" => |s| if (mem.eql(u8, s.name, name)) { + if (kind == .keyword_struct) return s; + if (saw_block) return null; + try p.errStr(.wrong_tag, name_tok, name); + try p.errTok(.previous_definition, s.name_tok); + return null; + }, + .@"union" => |u| if (mem.eql(u8, u.name, name)) { + if (kind == .keyword_union) return u; + if (saw_block) return null; + try p.errStr(.wrong_tag, name_tok, name); + try p.errTok(.previous_definition, u.name_tok); + return null; + }, + .block => if (ref_kind == .reference) { + saw_block = true; + } else return null, + else => {}, + } + } + return null; +} + +fn inLoop(p: *Parser) bool { + var i = p.scopes.items.len; + while (i > 0) { + i -= 1; + switch (p.scopes.items[i]) { + .loop => return true, + else => {}, + } + } + return false; +} + +fn inLoopOrSwitch(p: *Parser) bool { + var i = p.scopes.items.len; + while (i > 0) { + i -= 1; + switch (p.scopes.items[i]) { + .loop, .@"switch" => return true, + else => {}, + } + } + return false; +} + +fn findLabel(p: *Parser, name: []const u8) ?TokenIndex { + for (p.labels.items) |item| { + switch (item) { + .label => |l| if (mem.eql(u8, p.tokSlice(l), name)) return l, + .unresolved_goto => {}, + } + } + return null; +} + +fn findSwitch(p: *Parser) ?*Scope.Switch { + var i = p.scopes.items.len; + while (i > 0) { + i -= 1; + switch (p.scopes.items[i]) { + .@"switch" => |s| return s, + else => {}, + } + } + return null; +} + +fn nodeIs(p: *Parser, node: NodeIndex, tag: Tree.Tag) bool { + return p.getNode(node, tag) != null; +} + +fn getNode(p: *Parser, node: NodeIndex, tag: Tree.Tag) ?NodeIndex { + var cur = node; + const tags = p.nodes.items(.tag); + const data = p.nodes.items(.data); + while (true) { + const cur_tag = tags[@enumToInt(cur)]; + if (cur_tag == .paren_expr) { + cur = data[@enumToInt(cur)].un; + } else if (cur_tag == tag) { + return cur; + } else { + return null; + } + } +} + +fn pragma(p: *Parser) Compilation.Error!bool { + var found_pragma = false; + while (p.eatToken(.keyword_pragma)) |_| { + found_pragma = true; + + const name_tok = p.tok_i; + const name = p.tokSlice(name_tok); + + const end_idx = mem.indexOfScalarPos(Token.Id, p.tok_ids, p.tok_i, .nl).?; + const pragma_len = @intCast(TokenIndex, end_idx) - p.tok_i; + defer p.tok_i += pragma_len + 1; // skip past .nl as well + if (p.pp.comp.getPragma(name)) |prag| { + try prag.parserCB(p, p.tok_i); + } + } + return found_pragma; +} + +/// root : (decl | assembly ';' | staticAssert)* +pub fn parse(pp: *Preprocessor) Compilation.Error!Tree { + pp.comp.pragmaEvent(.before_parse); + + var arena = std.heap.ArenaAllocator.init(pp.comp.gpa); + errdefer arena.deinit(); + var p = Parser{ + .pp = pp, + .arena = arena.allocator(), + .tok_ids = pp.tokens.items(.id), + .strings = std.ArrayList(u8).init(pp.comp.gpa), + .value_map = Tree.ValueMap.init(pp.comp.gpa), + .data = NodeList.init(pp.comp.gpa), + .labels = std.ArrayList(Label).init(pp.comp.gpa), + .scopes = std.ArrayList(Scope).init(pp.comp.gpa), + .list_buf = NodeList.init(pp.comp.gpa), + .decl_buf = NodeList.init(pp.comp.gpa), + .param_buf = std.ArrayList(Type.Func.Param).init(pp.comp.gpa), + .enum_buf = std.ArrayList(Type.Enum.Field).init(pp.comp.gpa), + .record_buf = std.ArrayList(Type.Record.Field).init(pp.comp.gpa), + }; + errdefer { + p.nodes.deinit(pp.comp.gpa); + p.strings.deinit(); + p.value_map.deinit(); + } + defer { + p.data.deinit(); + p.labels.deinit(); + p.scopes.deinit(); + p.list_buf.deinit(); + p.decl_buf.deinit(); + p.param_buf.deinit(); + p.enum_buf.deinit(); + p.record_buf.deinit(); + p.attr_buf.deinit(pp.comp.gpa); + } + + // NodeIndex 0 must be invalid + _ = try p.addNode(.{ .tag = .invalid, .ty = undefined, .data = undefined }); + + { + const ty = &pp.comp.types.va_list; + const sym = Scope.Symbol{ .name = "__builtin_va_list", .ty = ty.*, .name_tok = 0 }; + try p.scopes.append(.{ .typedef = sym }); + + if (ty.isArray()) ty.decayArray(); + } + + while (p.eatToken(.eof) == null) { + if (try p.pragma()) continue; + if (try p.parseOrNextDecl(staticAssert)) continue; + if (try p.parseOrNextDecl(decl)) continue; + if (p.eatToken(.keyword_extension)) |_| { + const saved_extension = p.extension_suppressed; + defer p.extension_suppressed = saved_extension; + p.extension_suppressed = true; + + if (try p.parseOrNextDecl(decl)) continue; + switch (p.tok_ids[p.tok_i]) { + .semicolon => p.tok_i += 1, + .keyword_static_assert, + .keyword_pragma, + .keyword_extension, + .keyword_asm, + .keyword_asm1, + .keyword_asm2, + => {}, + else => try p.err(.expected_external_decl), + } + continue; + } + if (p.assembly(.global) catch |er| switch (er) { + error.ParsingFailed => { + p.nextExternDecl(); + continue; + }, + else => |e| return e, + }) |_| continue; + if (p.eatToken(.semicolon)) |tok| { + try p.errTok(.extra_semi, tok); + continue; + } + try p.err(.expected_external_decl); + p.tok_i += 1; + } + const root_decls = p.decl_buf.toOwnedSlice(); + if (root_decls.len == 0) { + try p.errTok(.empty_translation_unit, p.tok_i - 1); + } + pp.comp.pragmaEvent(.after_parse); + return Tree{ + .comp = pp.comp, + .tokens = pp.tokens.slice(), + .arena = arena, + .generated = pp.comp.generated_buf.items, + .nodes = p.nodes.toOwnedSlice(), + .data = p.data.toOwnedSlice(), + .root_decls = root_decls, + .strings = p.strings.toOwnedSlice(), + .value_map = p.value_map, + }; +} + +fn skipToPragmaSentinel(p: *Parser) void { + while (true) : (p.tok_i += 1) { + if (p.tok_ids[p.tok_i] == .nl) return; + if (p.tok_ids[p.tok_i] == .eof) { + p.tok_i -= 1; + return; + } + } +} + +fn parseOrNextDecl(p: *Parser, comptime func: fn (*Parser) Error!bool) Compilation.Error!bool { + return func(p) catch |er| switch (er) { + error.ParsingFailed => { + p.nextExternDecl(); + return true; + }, + else => |e| return e, + }; +} + +fn nextExternDecl(p: *Parser) void { + var parens: u32 = 0; + while (true) : (p.tok_i += 1) { + switch (p.tok_ids[p.tok_i]) { + .l_paren, .l_brace, .l_bracket => parens += 1, + .r_paren, .r_brace, .r_bracket => if (parens != 0) { + parens -= 1; + }, + .keyword_typedef, + .keyword_extern, + .keyword_static, + .keyword_auto, + .keyword_register, + .keyword_thread_local, + .keyword_inline, + .keyword_inline1, + .keyword_inline2, + .keyword_noreturn, + .keyword_void, + .keyword_bool, + .keyword_char, + .keyword_short, + .keyword_int, + .keyword_long, + .keyword_signed, + .keyword_unsigned, + .keyword_float, + .keyword_double, + .keyword_complex, + .keyword_atomic, + .keyword_enum, + .keyword_struct, + .keyword_union, + .keyword_alignas, + .identifier, + .extended_identifier, + .keyword_typeof, + .keyword_typeof1, + .keyword_typeof2, + .keyword_extension, + => if (parens == 0) return, + .keyword_pragma => p.skipToPragmaSentinel(), + .eof => return, + .semicolon => if (parens == 0) { + p.tok_i += 1; + return; + }, + else => {}, + } + } +} + +fn skipTo(p: *Parser, id: Token.Id) void { + var parens: u32 = 0; + while (true) : (p.tok_i += 1) { + if (p.tok_ids[p.tok_i] == id and parens == 0) { + p.tok_i += 1; + return; + } + switch (p.tok_ids[p.tok_i]) { + .l_paren, .l_brace, .l_bracket => parens += 1, + .r_paren, .r_brace, .r_bracket => if (parens != 0) { + parens -= 1; + }, + .keyword_pragma => p.skipToPragmaSentinel(), + .eof => return, + else => {}, + } + } +} + +pub fn withAttributes(p: *Parser, ty: Type, attr_buf_start: usize) !Type { + const attrs = p.attr_buf.items(.attr)[attr_buf_start..]; + return ty.withAttributes(p.arena, attrs); +} + +// ====== declarations ====== + +/// decl +/// : declSpec (initDeclarator ( ',' initDeclarator)*)? ';' +/// | declSpec declarator decl* compoundStmt +fn decl(p: *Parser) Error!bool { + _ = try p.pragma(); + const first_tok = p.tok_i; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + + try p.attributeSpecifier(); + + var decl_spec = if (try p.declSpec(false)) |some| some else blk: { + if (p.func.ty != null) { + p.tok_i = first_tok; + return false; + } + switch (p.tok_ids[first_tok]) { + .asterisk, .l_paren, .identifier, .extended_identifier => {}, + else => if (p.tok_i != first_tok) { + try p.err(.expected_ident_or_l_paren); + return error.ParsingFailed; + } else return false, + } + var spec: Type.Builder = .{}; + break :blk DeclSpec{ .ty = try spec.finish(p, p.attr_buf.len) }; + }; + if (decl_spec.@"noreturn") |tok| { + const attr = Attribute{ .tag = .noreturn, .args = .{ .noreturn = {} } }; + try p.attr_buf.append(p.pp.comp.gpa, .{ .attr = attr, .tok = tok }); + } + try decl_spec.warnIgnoredAttrs(p, attr_buf_top); + var init_d = (try p.initDeclarator(&decl_spec)) orelse { + _ = try p.expectToken(.semicolon); + if (decl_spec.ty.is(.@"enum") or + (decl_spec.ty.isRecord() and !decl_spec.ty.isAnonymousRecord() and + !decl_spec.ty.isTypeof())) // we follow GCC and clang's behavior here + return true; + + try p.errTok(.missing_declaration, first_tok); + return true; + }; + + init_d.d.ty = try p.withAttributes(init_d.d.ty, attr_buf_top); + try p.validateAlignas(init_d.d.ty, null); + + // Check for function definition. + if (init_d.d.func_declarator != null and init_d.initializer == .none and init_d.d.ty.isFunc()) fn_def: { + switch (p.tok_ids[p.tok_i]) { + .comma, .semicolon => break :fn_def, + .l_brace => {}, + else => if (init_d.d.old_style_func == null) { + try p.err(.expected_fn_body); + return true; + }, + } + if (p.func.ty != null) try p.err(.func_not_in_root); + + if (p.findSymbol(init_d.d.name, .definition)) |sym| { + if (sym == .def) { + try p.errStr(.redefinition, init_d.d.name, p.tokSlice(init_d.d.name)); + try p.errTok(.previous_definition, sym.def.name_tok); + } + } + try p.scopes.append(.{ .def = .{ + .name = p.tokSlice(init_d.d.name), + .ty = init_d.d.ty, + .name_tok = init_d.d.name, + } }); + + const func = p.func; + p.func = .{ + .ty = init_d.d.ty, + .name = init_d.d.name, + }; + defer p.func = func; + + const scopes_top = p.scopes.items.len; + defer p.scopes.items.len = scopes_top; + + // findSymbol stops the search at .block + try p.scopes.append(.block); + + // Collect old style parameter declarations. + if (init_d.d.old_style_func != null) { + const attrs = init_d.d.ty.getAttributes(); + var base_ty = if (init_d.d.ty.specifier == .attributed) init_d.d.ty.elemType() else init_d.d.ty; + base_ty.specifier = .func; + init_d.d.ty = try base_ty.withAttributes(p.arena, attrs); + + const param_buf_top = p.param_buf.items.len; + defer p.param_buf.items.len = param_buf_top; + + param_loop: while (true) { + const param_decl_spec = (try p.declSpec(true)) orelse break; + if (p.eatToken(.semicolon)) |semi| { + try p.errTok(.missing_declaration, semi); + continue :param_loop; + } + + while (true) { + var d = (try p.declarator(param_decl_spec.ty, .normal)) orelse { + try p.errTok(.missing_declaration, first_tok); + _ = try p.expectToken(.semicolon); + continue :param_loop; + }; + if (d.ty.hasIncompleteSize() and !d.ty.is(.void)) try p.errStr(.parameter_incomplete_ty, d.name, try p.typeStr(d.ty)); + if (d.ty.isFunc()) { + // Params declared as functions are converted to function pointers. + const elem_ty = try p.arena.create(Type); + elem_ty.* = d.ty; + d.ty = Type{ + .specifier = .pointer, + .data = .{ .sub_type = elem_ty }, + }; + } else if (d.ty.isArray()) { + // params declared as arrays are converted to pointers + d.ty.decayArray(); + } else if (d.ty.is(.void)) { + try p.errTok(.invalid_void_param, d.name); + } + + // find and correct parameter types + // TODO check for missing declarations and redefinitions + const name_str = p.tokSlice(d.name); + for (init_d.d.ty.params()) |*param| { + if (mem.eql(u8, param.name, name_str)) { + param.ty = d.ty; + break; + } + } else { + try p.errStr(.parameter_missing, d.name, name_str); + } + + try p.scopes.append(.{ .param = .{ + .name = name_str, + .name_tok = d.name, + .ty = d.ty, + } }); + if (p.eatToken(.comma) == null) break; + } + _ = try p.expectToken(.semicolon); + } + } else { + for (init_d.d.ty.params()) |param| { + if (param.ty.hasUnboundVLA()) try p.errTok(.unbound_vla, param.name_tok); + if (param.ty.hasIncompleteSize() and !param.ty.is(.void)) try p.errStr(.parameter_incomplete_ty, param.name_tok, try p.typeStr(param.ty)); + + if (param.name.len == 0) { + try p.errTok(.omitting_parameter_name, param.name_tok); + continue; + } + + try p.scopes.append(.{ + .param = .{ + .name = param.name, + .ty = param.ty, + .name_tok = param.name_tok, + }, + }); + } + } + + const body = (try p.compoundStmt(true, null)) orelse { + assert(init_d.d.old_style_func != null); + try p.err(.expected_fn_body); + return true; + }; + const node = try p.addNode(.{ + .ty = init_d.d.ty, + .tag = try decl_spec.validateFnDef(p), + .data = .{ .decl = .{ .name = init_d.d.name, .node = body } }, + }); + try p.decl_buf.append(node); + + // check gotos + if (func.ty == null) { + for (p.labels.items) |item| { + if (item == .unresolved_goto) + try p.errStr(.undeclared_label, item.unresolved_goto, p.tokSlice(item.unresolved_goto)); + } + if (p.computed_goto_tok) |goto_tok| { + if (!p.contains_address_of_label) try p.errTok(.invalid_computed_goto, goto_tok); + } + p.labels.items.len = 0; + p.label_count = 0; + p.contains_address_of_label = false; + p.computed_goto_tok = null; + } + return true; + } + + // Declare all variable/typedef declarators. + while (true) { + if (init_d.d.old_style_func) |tok_i| try p.errTok(.invalid_old_style_params, tok_i); + const tag = try decl_spec.validate(p, &init_d.d.ty, init_d.initializer != .none); + // const attrs = p.attr_buf.items(.attr)[attr_buf_top..]; + // init_d.d.ty = try init_d.d.ty.withAttributes(p.arena, attrs); + + const node = try p.addNode(.{ .ty = init_d.d.ty, .tag = tag, .data = .{ + .decl = .{ .name = init_d.d.name, .node = init_d.initializer }, + } }); + try p.decl_buf.append(node); + + const sym = Scope.Symbol{ + .name = p.tokSlice(init_d.d.name), + .ty = init_d.d.ty, + .name_tok = init_d.d.name, + }; + if (decl_spec.storage_class == .typedef) { + try p.scopes.append(.{ .typedef = sym }); + } else if (init_d.initializer != .none) { + try p.scopes.append(.{ .def = sym }); + } else { + try p.scopes.append(.{ .decl = sym }); + } + + if (p.eatToken(.comma) == null) break; + + init_d = (try p.initDeclarator(&decl_spec)) orelse { + try p.err(.expected_ident_or_l_paren); + continue; + }; + } + + _ = try p.expectToken(.semicolon); + return true; +} + +/// staticAssert : keyword_static_assert '(' constExpr ',' STRING_LITERAL ')' ';' +fn staticAssert(p: *Parser) Error!bool { + const static_assert = p.eatToken(.keyword_static_assert) orelse return false; + const l_paren = try p.expectToken(.l_paren); + const res_token = p.tok_i; + const res = try p.constExpr(); + const str = if (p.eatToken(.comma) != null) + switch (p.tok_ids[p.tok_i]) { + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + => try p.stringLiteral(), + else => { + try p.err(.expected_str_literal); + return error.ParsingFailed; + }, + } + else + Result{}; + try p.expectClosing(l_paren, .r_paren); + _ = try p.expectToken(.semicolon); + if (str.node == .none) try p.errTok(.static_assert_missing_message, static_assert); + + if (res.val.tag == .unavailable) { + // an unavailable sizeof expression is already a compile error, so we don't emit + // another error for an invalid _Static_assert condition. This matches the behavior + // of gcc/clang + if (!p.nodeIs(res.node, .sizeof_expr)) try p.errTok(.static_assert_not_constant, res_token); + } else if (!res.val.getBool()) { + if (str.node != .none) { + var buf = std.ArrayList(u8).init(p.pp.comp.gpa); + defer buf.deinit(); + + const data = str.val.data.bytes; + try buf.ensureUnusedCapacity(data.len); + try Tree.dumpStr( + data, + p.nodes.items(.tag)[@enumToInt(str.node)], + buf.writer(), + ); + try p.errStr( + .static_assert_failure_message, + static_assert, + try p.pp.comp.diag.arena.allocator().dupe(u8, buf.items), + ); + } else try p.errTok(.static_assert_failure, static_assert); + } + const node = try p.addNode(.{ + .tag = .static_assert, + .data = .{ .bin = .{ + .lhs = res.node, + .rhs = str.node, + } }, + }); + try p.decl_buf.append(node); + return true; +} + +pub const DeclSpec = struct { + storage_class: union(enum) { + auto: TokenIndex, + @"extern": TokenIndex, + register: TokenIndex, + static: TokenIndex, + typedef: TokenIndex, + none, + } = .none, + thread_local: ?TokenIndex = null, + @"inline": ?TokenIndex = null, + @"noreturn": ?TokenIndex = null, + ty: Type, + + fn validateParam(d: DeclSpec, p: *Parser, ty: *Type) Error!void { + switch (d.storage_class) { + .none => {}, + .register => ty.qual.register = true, + .auto, .@"extern", .static, .typedef => |tok_i| try p.errTok(.invalid_storage_on_param, tok_i), + } + if (d.thread_local) |tok_i| try p.errTok(.threadlocal_non_var, tok_i); + if (d.@"inline") |tok_i| try p.errStr(.func_spec_non_func, tok_i, "inline"); + if (d.@"noreturn") |tok_i| try p.errStr(.func_spec_non_func, tok_i, "_Noreturn"); + } + + fn validateFnDef(d: DeclSpec, p: *Parser) Error!Tree.Tag { + switch (d.storage_class) { + .none, .@"extern", .static => {}, + .auto, .register, .typedef => |tok_i| try p.errTok(.illegal_storage_on_func, tok_i), + } + if (d.thread_local) |tok_i| try p.errTok(.threadlocal_non_var, tok_i); + + const is_static = d.storage_class == .static; + const is_inline = d.@"inline" != null; + if (is_static) { + if (is_inline) return .inline_static_fn_def; + return .static_fn_def; + } else { + if (is_inline) return .inline_fn_def; + return .fn_def; + } + } + + fn validate(d: DeclSpec, p: *Parser, ty: *Type, has_init: bool) Error!Tree.Tag { + const is_static = d.storage_class == .static; + if (ty.isFunc() and d.storage_class != .typedef) { + switch (d.storage_class) { + .none, .@"extern" => {}, + .static => |tok_i| if (p.func.ty != null) try p.errTok(.static_func_not_global, tok_i), + .typedef => unreachable, + .auto, .register => |tok_i| try p.errTok(.illegal_storage_on_func, tok_i), + } + if (d.thread_local) |tok_i| try p.errTok(.threadlocal_non_var, tok_i); + + const is_inline = d.@"inline" != null; + if (is_static) { + if (is_inline) return .inline_static_fn_proto; + return .static_fn_proto; + } else { + if (is_inline) return .inline_fn_proto; + return .fn_proto; + } + } else { + if (d.@"inline") |tok_i| try p.errStr(.func_spec_non_func, tok_i, "inline"); + // TODO move to attribute validation + if (d.@"noreturn") |tok_i| try p.errStr(.func_spec_non_func, tok_i, "_Noreturn"); + switch (d.storage_class) { + .auto, .register => if (p.func.ty == null) try p.err(.illegal_storage_on_global), + .typedef => return .typedef, + else => {}, + } + ty.qual.register = d.storage_class == .register; + + const is_extern = d.storage_class == .@"extern" and !has_init; + if (d.thread_local != null) { + if (is_static) return .threadlocal_static_var; + if (is_extern) return .threadlocal_extern_var; + return .threadlocal_var; + } else { + if (is_static) return .static_var; + if (is_extern) return .extern_var; + return .@"var"; + } + } + } + + fn warnIgnoredAttrs(d: DeclSpec, p: *Parser, attr_buf_start: usize) !void { + if (!d.ty.isEnumOrRecord()) return; + + var i = attr_buf_start; + while (i < p.attr_buf.len) : (i += 1) { + const ignored_attr = p.attr_buf.get(i); + try p.errExtra(.ignored_record_attr, ignored_attr.tok, .{ + .ignored_record_attr = .{ .tag = ignored_attr.attr.tag, .specifier = switch (d.ty.specifier) { + .@"enum" => .@"enum", + .@"struct" => .@"struct", + .@"union" => .@"union", + else => continue, + } }, + }); + } + } +}; + +/// typeof +/// : keyword_typeof '(' typeName ')' +/// | keyword_typeof '(' expr ')' +fn typeof(p: *Parser) Error!?Type { + switch (p.tok_ids[p.tok_i]) { + .keyword_typeof, .keyword_typeof1, .keyword_typeof2 => p.tok_i += 1, + else => return null, + } + const l_paren = try p.expectToken(.l_paren); + if (try p.typeName()) |ty| { + try p.expectClosing(l_paren, .r_paren); + const typeof_ty = try p.arena.create(Type); + typeof_ty.* = .{ + .data = ty.data, + .qual = ty.qual.inheritFromTypeof(), + .specifier = ty.specifier, + }; + + return Type{ + .data = .{ .sub_type = typeof_ty }, + .specifier = .typeof_type, + }; + } + const typeof_expr = try p.parseNoEval(expr); + try typeof_expr.expect(p); + try p.expectClosing(l_paren, .r_paren); + + const inner = try p.arena.create(Type.Expr); + inner.* = .{ + .node = typeof_expr.node, + .ty = .{ + .data = typeof_expr.ty.data, + .qual = typeof_expr.ty.qual.inheritFromTypeof(), + .specifier = typeof_expr.ty.specifier, + }, + }; + + return Type{ + .data = .{ .expr = inner }, + .specifier = .typeof_expr, + }; +} + +/// declSpec: (storageClassSpec | typeSpec | typeQual | funcSpec | alignSpec)+ +/// storageClassSpec: +/// : keyword_typedef +/// | keyword_extern +/// | keyword_static +/// | keyword_threadlocal +/// | keyword_auto +/// | keyword_register +/// funcSpec : keyword_inline | keyword_noreturn +fn declSpec(p: *Parser, is_param: bool) Error!?DeclSpec { + var d: DeclSpec = .{ .ty = .{ .specifier = undefined } }; + var spec: Type.Builder = .{}; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + + const start = p.tok_i; + while (true) { + if (try p.typeSpec(&spec)) continue; + const id = p.tok_ids[p.tok_i]; + switch (id) { + .keyword_typedef, + .keyword_extern, + .keyword_static, + .keyword_auto, + .keyword_register, + => { + if (d.storage_class != .none) { + try p.errStr(.multiple_storage_class, p.tok_i, @tagName(d.storage_class)); + return error.ParsingFailed; + } + if (d.thread_local != null) { + switch (id) { + .keyword_typedef, + .keyword_auto, + .keyword_register, + => try p.errStr(.cannot_combine_spec, p.tok_i, id.lexeme().?), + else => {}, + } + } + switch (id) { + .keyword_typedef => d.storage_class = .{ .typedef = p.tok_i }, + .keyword_extern => d.storage_class = .{ .@"extern" = p.tok_i }, + .keyword_static => d.storage_class = .{ .static = p.tok_i }, + .keyword_auto => d.storage_class = .{ .auto = p.tok_i }, + .keyword_register => d.storage_class = .{ .register = p.tok_i }, + else => unreachable, + } + }, + .keyword_thread_local => { + if (d.thread_local != null) { + try p.errStr(.duplicate_decl_spec, p.tok_i, "_Thread_local"); + } + switch (d.storage_class) { + .@"extern", .none, .static => {}, + else => try p.errStr(.cannot_combine_spec, p.tok_i, @tagName(d.storage_class)), + } + d.thread_local = p.tok_i; + }, + .keyword_inline, .keyword_inline1, .keyword_inline2 => { + if (d.@"inline" != null) { + try p.errStr(.duplicate_decl_spec, p.tok_i, "inline"); + } + d.@"inline" = p.tok_i; + }, + .keyword_noreturn => { + if (d.@"noreturn" != null) { + try p.errStr(.duplicate_decl_spec, p.tok_i, "_Noreturn"); + } + d.@"noreturn" = p.tok_i; + }, + else => break, + } + p.tok_i += 1; + } + + if (p.tok_i == start) return null; + + d.ty = try spec.finish(p, attr_buf_top); + if (is_param) try p.validateAlignas(d.ty, .alignas_on_param); + return d; +} + +fn validateAlignas(p: *Parser, ty: Type, tag: ?Diagnostics.Tag) !void { + const base = ty.canonicalize(.standard); + const default_align = base.alignof(p.pp.comp); + for (ty.getAttributes()) |attr| { + if (attr.tag != .aligned) continue; + if (attr.args.aligned.alignment) |alignment| { + if (!alignment.alignas) continue; + + const align_tok = attr.args.aligned.__name_tok; + if (tag) |t| try p.errTok(t, align_tok); + if (ty.isFunc()) { + try p.errTok(.alignas_on_func, align_tok); + } else if (alignment.requested < default_align) { + try p.errExtra(.minimum_alignment, align_tok, .{ .unsigned = default_align }); + } + } + } +} + +const InitDeclarator = struct { d: Declarator, initializer: NodeIndex = .none }; + +/// attribute +/// : attrIdentifier +/// | attrIdentifier '(' identifier ')' +/// | attrIdentifier '(' identifier (',' expr)+ ')' +/// | attrIdentifier '(' (expr (',' expr)*)? ')' +fn attribute(p: *Parser, kind: Attribute.Kind, namespace: ?[]const u8) Error!?TentativeAttribute { + const name_tok = p.tok_i; + switch (p.tok_ids[p.tok_i]) { + .keyword_const, .keyword_const1, .keyword_const2 => p.tok_i += 1, + else => _ = try p.expectIdentifier(), + } + const name = p.tokSlice(name_tok); + + const attr = Attribute.fromString(kind, namespace, name) orelse { + const tag: Diagnostics.Tag = if (kind == .declspec) .declspec_attr_not_supported else .unknown_attribute; + try p.errStr(tag, name_tok, name); + if (p.eatToken(.l_paren)) |_| p.skipTo(.r_paren); + return null; + }; + + const required_count = Attribute.requiredArgCount(attr); + var arguments = Attribute.initArguments(attr, name_tok); + var arg_idx: u32 = 0; + + switch (p.tok_ids[p.tok_i]) { + .comma, .r_paren => {}, // will be consumed in attributeList + .l_paren => blk: { + p.tok_i += 1; + if (p.eatToken(.r_paren)) |_| break :blk; + + if (Attribute.wantsIdentEnum(attr)) { + if (try p.eatIdentifier()) |ident| { + if (Attribute.diagnoseIdent(attr, &arguments, p.tokSlice(ident))) |msg| { + try p.errExtra(msg.tag, ident, msg.extra); + p.skipTo(.r_paren); + return error.ParsingFailed; + } + } else { + try p.errExtra(.attribute_requires_identifier, name_tok, .{ .str = name }); + return error.ParsingFailed; + } + } else { + const arg_start = p.tok_i; + var first_expr = try p.assignExpr(); + try first_expr.expect(p); + if (p.diagnose(attr, &arguments, arg_idx, first_expr)) |msg| { + try p.errExtra(msg.tag, arg_start, msg.extra); + p.skipTo(.r_paren); + return error.ParsingFailed; + } + } + arg_idx += 1; + while (p.eatToken(.r_paren) == null) : (arg_idx += 1) { + _ = try p.expectToken(.comma); + + const arg_start = p.tok_i; + var arg_expr = try p.assignExpr(); + try arg_expr.expect(p); + if (p.diagnose(attr, &arguments, arg_idx, arg_expr)) |msg| { + try p.errExtra(msg.tag, arg_start, msg.extra); + p.skipTo(.r_paren); + return error.ParsingFailed; + } + } + }, + else => {}, + } + if (arg_idx < required_count) { + try p.errExtra(.attribute_not_enough_args, name_tok, .{ .attr_arg_count = .{ .attribute = attr, .expected = required_count } }); + return error.ParsingFailed; + } + return TentativeAttribute{ .attr = .{ .tag = attr, .args = arguments }, .tok = name_tok }; +} + +fn diagnose(p: *Parser, attr: Attribute.Tag, arguments: *Attribute.Arguments, arg_idx: u32, res: Result) ?Diagnostics.Message { + if (Attribute.wantsAlignment(attr, arg_idx)) { + return Attribute.diagnoseAlignment(attr, arguments, arg_idx, res.val, res.ty, p.pp.comp); + } + const node = p.nodes.get(@enumToInt(res.node)); + return Attribute.diagnose(attr, arguments, arg_idx, res.val, node); +} + +/// attributeList : (attribute (',' attribute)*)? +fn gnuAttributeList(p: *Parser) Error!void { + if (p.tok_ids[p.tok_i] == .r_paren) return; + + if (try p.attribute(.gnu, null)) |attr| try p.attr_buf.append(p.pp.comp.gpa, attr); + while (p.tok_ids[p.tok_i] != .r_paren) { + _ = try p.expectToken(.comma); + if (try p.attribute(.gnu, null)) |attr| try p.attr_buf.append(p.pp.comp.gpa, attr); + } +} + +fn c2xAttributeList(p: *Parser) Error!void { + while (p.tok_ids[p.tok_i] != .r_bracket) { + var namespace_tok = try p.expectIdentifier(); + var namespace: ?[]const u8 = null; + if (p.eatToken(.colon_colon)) |_| { + namespace = p.tokSlice(namespace_tok); + } else { + p.tok_i -= 1; + } + if (try p.attribute(.c2x, namespace)) |attr| try p.attr_buf.append(p.pp.comp.gpa, attr); + _ = p.eatToken(.comma); + } +} + +fn msvcAttributeList(p: *Parser) Error!void { + while (p.tok_ids[p.tok_i] != .r_paren) { + if (try p.attribute(.declspec, null)) |attr| try p.attr_buf.append(p.pp.comp.gpa, attr); + _ = p.eatToken(.comma); + } +} + +fn c2xAttribute(p: *Parser) !bool { + if (!p.pp.comp.langopts.standard.atLeast(.c2x)) return false; + const bracket1 = p.eatToken(.l_bracket) orelse return false; + const bracket2 = p.eatToken(.l_bracket) orelse { + p.tok_i -= 1; + return false; + }; + + try p.c2xAttributeList(); + + _ = try p.expectClosing(bracket2, .r_bracket); + _ = try p.expectClosing(bracket1, .r_bracket); + + return true; +} + +fn msvcAttribute(p: *Parser) !bool { + const declspec_tok = p.eatToken(.keyword_declspec) orelse return false; + if (!p.pp.comp.langopts.declspec_attrs) { + try p.errTok(.declspec_not_enabled, declspec_tok); + return error.ParsingFailed; + } + const l_paren = try p.expectToken(.l_paren); + try p.msvcAttributeList(); + _ = try p.expectClosing(l_paren, .r_paren); + + return false; +} + +fn gnuAttribute(p: *Parser) !bool { + switch (p.tok_ids[p.tok_i]) { + .keyword_attribute1, .keyword_attribute2 => p.tok_i += 1, + else => return false, + } + const paren1 = try p.expectToken(.l_paren); + const paren2 = try p.expectToken(.l_paren); + + try p.gnuAttributeList(); + + _ = try p.expectClosing(paren2, .r_paren); + _ = try p.expectClosing(paren1, .r_paren); + return true; +} + +/// alignAs : keyword_alignas '(' (typeName | constExpr ) ')' +fn alignAs(p: *Parser) !bool { + const align_tok = p.eatToken(.keyword_alignas) orelse return false; + const l_paren = try p.expectToken(.l_paren); + if (try p.typeName()) |inner_ty| { + const alignment = Attribute.Alignment{ .requested = inner_ty.alignof(p.pp.comp), .alignas = true }; + const attr = Attribute{ .tag = .aligned, .args = .{ .aligned = .{ .alignment = alignment, .__name_tok = align_tok } } }; + try p.attr_buf.append(p.pp.comp.gpa, .{ .attr = attr, .tok = align_tok }); + } else { + const arg_start = p.tok_i; + const res = try p.constExpr(); + if (!res.val.isZero()) { + var args = Attribute.initArguments(.aligned, align_tok); + if (p.diagnose(.aligned, &args, 0, res)) |msg| { + try p.errExtra(msg.tag, arg_start, msg.extra); + p.skipTo(.r_paren); + return error.ParsingFailed; + } + args.aligned.alignment.?.node = res.node; + args.aligned.alignment.?.alignas = true; + try p.attr_buf.append(p.pp.comp.gpa, .{ .attr = .{ .tag = .aligned, .args = args }, .tok = align_tok }); + } + } + try p.expectClosing(l_paren, .r_paren); + return true; +} + +/// attributeSpecifier : (keyword_attribute '( '(' attributeList ')' ')')* +fn attributeSpecifier(p: *Parser) Error!void { + while (true) { + if (try p.alignAs()) continue; + if (try p.gnuAttribute()) continue; + if (try p.c2xAttribute()) continue; + if (try p.msvcAttribute()) continue; + break; + } +} + +/// initDeclarator : declarator assembly? attributeSpecifier? ('=' initializer)? +fn initDeclarator(p: *Parser, decl_spec: *DeclSpec) Error!?InitDeclarator { + var init_d = InitDeclarator{ + .d = (try p.declarator(decl_spec.ty, .normal)) orelse return null, + }; + _ = try p.assembly(.decl_label); + try p.attributeSpecifier(); // if (init_d.d.ty.isFunc()) .function else .variable + if (p.eatToken(.equal)) |eq| init: { + if (decl_spec.storage_class == .typedef or init_d.d.func_declarator != null) { + try p.errTok(.illegal_initializer, eq); + } else if (init_d.d.ty.is(.variable_len_array)) { + try p.errTok(.vla_init, eq); + } else if (decl_spec.storage_class == .@"extern") { + try p.err(.extern_initializer); + decl_spec.storage_class = .none; + } + + if (init_d.d.ty.hasIncompleteSize() and !init_d.d.ty.is(.incomplete_array)) { + try p.errStr(.variable_incomplete_ty, init_d.d.name, try p.typeStr(init_d.d.ty)); + return error.ParsingFailed; + } + + const scopes_len = p.scopes.items.len; + defer p.scopes.items.len = scopes_len; + try p.scopes.append(.{ .decl = .{ + .name = p.tokSlice(init_d.d.name), + .ty = init_d.d.ty, + .name_tok = init_d.d.name, + } }); + var init_list_expr = try p.initializer(init_d.d.ty); + init_d.initializer = init_list_expr.node; + if (!init_list_expr.ty.isArray()) break :init; + if (init_d.d.ty.specifier == .incomplete_array) { + // Modifying .data is exceptionally allowed for .incomplete_array. + init_d.d.ty.data.array.len = init_list_expr.ty.arrayLen() orelse break :init; + init_d.d.ty.specifier = .array; + } else if (init_d.d.ty.is(.incomplete_array)) { + const attrs = init_d.d.ty.getAttributes(); + + const arr_ty = try p.arena.create(Type.Array); + arr_ty.* = .{ .elem = init_d.d.ty.elemType(), .len = init_list_expr.ty.arrayLen().? }; + const ty = Type{ + .specifier = .array, + .data = .{ .array = arr_ty }, + }; + init_d.d.ty = try ty.withAttributes(p.arena, attrs); + } + } + const name = init_d.d.name; + if (decl_spec.storage_class != .typedef and init_d.d.ty.hasIncompleteSize()) incomplete: { + const specifier = init_d.d.ty.canonicalize(.standard).specifier; + if (decl_spec.storage_class == .@"extern") switch (specifier) { + .@"struct", .@"union", .@"enum" => break :incomplete, + .incomplete_array => { + init_d.d.ty.decayArray(); + break :incomplete; + }, + else => {}, + }; + // if there was an initializer expression it must have contained an error + if (init_d.initializer != .none) break :incomplete; + try p.errStr(.variable_incomplete_ty, name, try p.typeStr(init_d.d.ty)); + return init_d; + } + if (p.findSymbol(name, .definition)) |scope| switch (scope) { + .enumeration => { + try p.errStr(.redefinition_different_sym, name, p.tokSlice(name)); + try p.errTok(.previous_definition, scope.enumeration.name_tok); + }, + .decl => |s| if (!s.ty.eql(init_d.d.ty, p.pp.comp, true)) { + try p.errStr(.redefinition_incompatible, name, p.tokSlice(name)); + try p.errTok(.previous_definition, s.name_tok); + }, + .def => |s| if (!s.ty.eql(init_d.d.ty, p.pp.comp, true)) { + try p.errStr(.redefinition_incompatible, name, p.tokSlice(name)); + try p.errTok(.previous_definition, s.name_tok); + } else if (init_d.initializer != .none) { + try p.errStr(.redefinition, name, p.tokSlice(name)); + try p.errTok(.previous_definition, s.name_tok); + }, + .param => |s| { + try p.errStr(.redefinition, name, p.tokSlice(name)); + try p.errTok(.previous_definition, s.name_tok); + }, + else => unreachable, + }; + return init_d; +} + +/// typeSpec +/// : keyword_void +/// | keyword_char +/// | keyword_short +/// | keyword_int +/// | keyword_long +/// | keyword_float +/// | keyword_double +/// | keyword_signed +/// | keyword_unsigned +/// | keyword_bool +/// | keyword_complex +/// | atomicTypeSpec +/// | recordSpec +/// | enumSpec +/// | typedef // IDENTIFIER +/// | typeof +/// atomicTypeSpec : keyword_atomic '(' typeName ')' +/// alignSpec +/// : keyword_alignas '(' typeName ')' +/// | keyword_alignas '(' constExpr ')' +fn typeSpec(p: *Parser, ty: *Type.Builder) Error!bool { + const start = p.tok_i; + while (true) { + try p.attributeSpecifier(); // .typedef + + if (try p.typeof()) |inner_ty| { + try ty.combineFromTypeof(p, inner_ty, start); + continue; + } + if (try p.typeQual(&ty.qual)) continue; + switch (p.tok_ids[p.tok_i]) { + .keyword_void => try ty.combine(p, .void, p.tok_i), + .keyword_bool => try ty.combine(p, .bool, p.tok_i), + .keyword_char => try ty.combine(p, .char, p.tok_i), + .keyword_short => try ty.combine(p, .short, p.tok_i), + .keyword_int => try ty.combine(p, .int, p.tok_i), + .keyword_long => try ty.combine(p, .long, p.tok_i), + .keyword_signed => try ty.combine(p, .signed, p.tok_i), + .keyword_unsigned => try ty.combine(p, .unsigned, p.tok_i), + .keyword_float => try ty.combine(p, .float, p.tok_i), + .keyword_double => try ty.combine(p, .double, p.tok_i), + .keyword_complex => try ty.combine(p, .complex, p.tok_i), + .keyword_atomic => { + const atomic_tok = p.tok_i; + p.tok_i += 1; + const l_paren = p.eatToken(.l_paren) orelse { + // _Atomic qualifier not _Atomic(typeName) + p.tok_i = atomic_tok; + break; + }; + const inner_ty = (try p.typeName()) orelse { + try p.err(.expected_type); + return error.ParsingFailed; + }; + try p.expectClosing(l_paren, .r_paren); + + const new_spec = Type.Builder.fromType(inner_ty); + try ty.combine(p, new_spec, atomic_tok); + + if (ty.qual.atomic != null) + try p.errStr(.duplicate_decl_spec, atomic_tok, "atomic") + else + ty.qual.atomic = atomic_tok; + continue; + }, + .keyword_struct => { + const tag_tok = p.tok_i; + try ty.combine(p, .{ .@"struct" = try p.recordSpec() }, tag_tok); + continue; + }, + .keyword_union => { + const tag_tok = p.tok_i; + try ty.combine(p, .{ .@"union" = try p.recordSpec() }, tag_tok); + continue; + }, + .keyword_enum => { + const tag_tok = p.tok_i; + try ty.combine(p, .{ .@"enum" = try p.enumSpec() }, tag_tok); + continue; + }, + .identifier, .extended_identifier => { + const typedef = (try p.findTypedef(p.tok_i, ty.specifier != .none)) orelse break; + if (!ty.combineTypedef(p, typedef.ty, typedef.name_tok)) break; + }, + else => break, + } + // consume single token specifiers here + p.tok_i += 1; + } + return p.tok_i != start; +} + +fn getAnonymousName(p: *Parser, kind_tok: TokenIndex) ![]const u8 { + const loc = p.pp.tokens.items(.loc)[kind_tok]; + const source = p.pp.comp.getSource(loc.id); + const line_col = source.lineCol(loc); + + const kind_str = switch (p.tok_ids[kind_tok]) { + .keyword_struct, .keyword_union, .keyword_enum => p.tokSlice(kind_tok), + else => "record field", + }; + + return std.fmt.allocPrint( + p.arena, + "(anonymous {s} at {s}:{d}:{d})", + .{ kind_str, source.path, line_col.line_no, line_col.col }, + ); +} + +/// recordSpec +/// : (keyword_struct | keyword_union) IDENTIFIER? { recordDecl* } +/// | (keyword_struct | keyword_union) IDENTIFIER +fn recordSpec(p: *Parser) Error!*Type.Record { + const kind_tok = p.tok_i; + const is_struct = p.tok_ids[kind_tok] == .keyword_struct; + p.tok_i += 1; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); // .record + + const maybe_ident = try p.eatIdentifier(); + const l_brace = p.eatToken(.l_brace) orelse { + const ident = maybe_ident orelse { + try p.err(.ident_or_l_brace); + return error.ParsingFailed; + }; + // check if this is a reference to a previous type + if (try p.findTag(p.tok_ids[kind_tok], ident, .reference)) |prev| { + return prev.ty.data.record; + } else { + // this is a forward declaration, create a new record Type. + const record_ty = try Type.Record.create(p.arena, p.tokSlice(ident)); + const ty = Type{ + .specifier = if (is_struct) .@"struct" else .@"union", + .data = .{ .record = record_ty }, + }; + const sym = Scope.Symbol{ .name = record_ty.name, .ty = ty, .name_tok = ident }; + try p.scopes.append(if (is_struct) .{ .@"struct" = sym } else .{ .@"union" = sym }); + return record_ty; + } + }; + + // Get forward declared type or create a new one + var defined = false; + const record_ty: *Type.Record = if (maybe_ident) |ident| record_ty: { + if (try p.findTag(p.tok_ids[kind_tok], ident, .definition)) |prev| { + if (!prev.ty.data.record.isIncomplete()) { + // if the record isn't incomplete, this is a redefinition + try p.errStr(.redefinition, ident, p.tokSlice(ident)); + try p.errTok(.previous_definition, prev.name_tok); + } else { + defined = true; + break :record_ty prev.ty.data.record; + } + } + break :record_ty try Type.Record.create(p.arena, p.tokSlice(ident)); + } else try Type.Record.create(p.arena, try p.getAnonymousName(kind_tok)); + const ty = Type{ + .specifier = if (is_struct) .@"struct" else .@"union", + .data = .{ .record = record_ty }, + }; + + // declare a symbol for the type + if (maybe_ident != null and !defined) { + const sym = Scope.Symbol{ .name = record_ty.name, .ty = ty, .name_tok = maybe_ident.? }; + try p.scopes.append(if (is_struct) .{ .@"struct" = sym } else .{ .@"union" = sym }); + } + + // reserve space for this record + try p.decl_buf.append(.none); + const decl_buf_top = p.decl_buf.items.len; + const record_buf_top = p.record_buf.items.len; + const scopes_top = p.scopes.items.len; + errdefer p.decl_buf.items.len = decl_buf_top - 1; + defer { + p.decl_buf.items.len = decl_buf_top; + p.record_buf.items.len = record_buf_top; + p.scopes.items.len = scopes_top; + } + + const old_record = p.record; + defer p.record = old_record; + p.record = .{ + .kind = p.tok_ids[kind_tok], + .scopes_top = scopes_top, + }; + + try p.recordDecls(); + + if (p.record.flexible_field) |some| { + if (p.record_buf.items[record_buf_top..].len == 1 and is_struct) { + try p.errTok(.flexible_in_empty, some); + } + } + + record_ty.fields = try p.arena.dupe(Type.Record.Field, p.record_buf.items[record_buf_top..]); + // TODO actually calculate + record_ty.size = 1; + record_ty.alignment = 1; + + if (p.record_buf.items.len == record_buf_top) try p.errStr(.empty_record, kind_tok, p.tokSlice(kind_tok)); + try p.expectClosing(l_brace, .r_brace); + try p.attributeSpecifier(); // .record + + // finish by creating a node + var node: Tree.Node = .{ + .tag = if (is_struct) .struct_decl_two else .union_decl_two, + .ty = ty, + .data = .{ .bin = .{ .lhs = .none, .rhs = .none } }, + }; + const record_decls = p.decl_buf.items[decl_buf_top..]; + switch (record_decls.len) { + 0 => {}, + 1 => node.data = .{ .bin = .{ .lhs = record_decls[0], .rhs = .none } }, + 2 => node.data = .{ .bin = .{ .lhs = record_decls[0], .rhs = record_decls[1] } }, + else => { + node.tag = if (is_struct) .struct_decl else .union_decl; + node.data = .{ .range = try p.addList(record_decls) }; + }, + } + p.decl_buf.items[decl_buf_top - 1] = try p.addNode(node); + return record_ty; +} + +/// recordDecl +/// : specQual (recordDeclarator (',' recordDeclarator)*)? ; +/// | staticAssert +fn recordDecls(p: *Parser) Error!void { + while (true) { + if (try p.pragma()) continue; + if (try p.parseOrNextDecl(staticAssert)) continue; + if (p.eatToken(.keyword_extension)) |_| { + const saved_extension = p.extension_suppressed; + defer p.extension_suppressed = saved_extension; + p.extension_suppressed = true; + + if (try p.parseOrNextDecl(recordDeclarator)) continue; + try p.err(.expected_type); + p.nextExternDecl(); + continue; + } + if (try p.parseOrNextDecl(recordDeclarator)) continue; + break; + } +} + +/// recordDeclarator : keyword_extension? declarator (':' constExpr)? +fn recordDeclarator(p: *Parser) Error!bool { + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + const base_ty = (try p.specQual()) orelse return false; + + while (true) { + const this_decl_top = p.attr_buf.len; + defer p.attr_buf.len = this_decl_top; + + try p.attributeSpecifier(); // .record + + // 0 means unnamed + var name_tok: TokenIndex = 0; + var ty = base_ty; + var bits_node: NodeIndex = .none; + var bits: u32 = 0; + const first_tok = p.tok_i; + if (try p.declarator(ty, .record)) |d| { + name_tok = d.name; + ty = d.ty; + } + try p.attributeSpecifier(); // .record + ty = try p.withAttributes(ty, attr_buf_top); + + if (p.eatToken(.colon)) |_| bits: { + const res = try p.constExpr(); + if (!ty.isInt()) { + try p.errStr(.non_int_bitfield, first_tok, try p.typeStr(ty)); + break :bits; + } + + if (res.val.tag == .unavailable) { + try p.errTok(.expected_integer_constant_expr, first_tok); + break :bits; + } else if (res.val.compare(.lt, Value.int(0), res.ty, p.pp.comp)) { + try p.errExtra(.negative_bitwidth, first_tok, .{ + .signed = res.val.signExtend(res.ty, p.pp.comp), + }); + break :bits; + } + + // incomplete size error is reported later + const bit_size = ty.bitSizeof(p.pp.comp) orelse break :bits; + if (res.val.compare(.gt, Value.int(bit_size), res.ty, p.pp.comp)) { + try p.errTok(.bitfield_too_big, name_tok); + break :bits; + } else if (res.val.isZero() and name_tok != 0) { + try p.errTok(.zero_width_named_field, name_tok); + break :bits; + } + + bits = res.val.getInt(u32); + bits_node = res.node; + } + + if (name_tok == 0 and bits_node == .none) unnamed: { + if (ty.is(.@"enum")) break :unnamed; + if (ty.isAnonymousRecord()) { + // An anonymous record appears as indirect fields on the parent + try p.record_buf.append(.{ + .name = try p.getAnonymousName(first_tok), + .ty = ty, + .bit_width = 0, + }); + const node = try p.addNode(.{ + .tag = .indirect_record_field_decl, + .ty = ty, + .data = undefined, + }); + try p.decl_buf.append(node); + try p.record.addFieldsFromAnonymous(p, ty); + break; // must be followed by a semicolon + } + try p.err(.missing_declaration); + } else { + try p.record_buf.append(.{ + .name = if (name_tok != 0) p.tokSlice(name_tok) else try p.getAnonymousName(first_tok), + .ty = ty, + .name_tok = name_tok, + .bit_width = bits, + }); + if (name_tok != 0) try p.record.addField(p, name_tok); + const node = try p.addNode(.{ + .tag = .record_field_decl, + .ty = ty, + .data = .{ .decl = .{ .name = name_tok, .node = bits_node } }, + }); + try p.decl_buf.append(node); + } + + if (ty.isFunc()) { + try p.errTok(.func_field, first_tok); + } else if (ty.is(.variable_len_array)) { + try p.errTok(.vla_field, first_tok); + } else if (ty.is(.incomplete_array)) { + if (p.record.kind == .keyword_union) { + try p.errTok(.flexible_in_union, first_tok); + } + if (p.record.flexible_field) |some| { + try p.errTok(.flexible_non_final, some); + } + p.record.flexible_field = first_tok; + } else if (ty.hasIncompleteSize()) { + try p.errStr(.field_incomplete_ty, first_tok, try p.typeStr(ty)); + } else if (p.record.flexible_field) |some| { + if (some != first_tok) try p.errTok(.flexible_non_final, some); + } + if (p.eatToken(.comma) == null) break; + } + _ = try p.expectToken(.semicolon); + return true; +} + +fn checkAlignasUsage(p: *Parser, tag: Diagnostics.Tag, attr_buf_start: usize) !void { + var i = attr_buf_start; + while (i < p.attr_buf.len) : (i += 1) { + const tentative_attr = p.attr_buf.get(i); + if (tentative_attr.attr.tag != .aligned) continue; + if (tentative_attr.attr.args.aligned.alignment) |alignment| { + if (alignment.alignas) try p.errTok(tag, tentative_attr.tok); + } + } +} + +/// specQual : (typeSpec | typeQual | alignSpec)+ +fn specQual(p: *Parser) Error!?Type { + var spec: Type.Builder = .{}; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + if (try p.typeSpec(&spec)) { + const ty = try spec.finish(p, attr_buf_top); + try p.validateAlignas(ty, .align_ignored); + return ty; + } + return null; +} + +/// enumSpec +/// : keyword_enum IDENTIFIER? { enumerator (',' enumerator)? ',') } +/// | keyword_enum IDENTIFIER +fn enumSpec(p: *Parser) Error!*Type.Enum { + const enum_tok = p.tok_i; + p.tok_i += 1; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); // record + + const maybe_ident = try p.eatIdentifier(); + const l_brace = p.eatToken(.l_brace) orelse { + const ident = maybe_ident orelse { + try p.err(.ident_or_l_brace); + return error.ParsingFailed; + }; + // check if this is a reference to a previous type + if (try p.findTag(.keyword_enum, ident, .reference)) |prev| { + return prev.ty.data.@"enum"; + } else { + // this is a forward declaration, create a new enum Type. + const enum_ty = try Type.Enum.create(p.arena, p.tokSlice(ident)); + const ty = Type{ .specifier = .@"enum", .data = .{ .@"enum" = enum_ty } }; + const sym = Scope.Symbol{ .name = enum_ty.name, .ty = ty, .name_tok = ident }; + try p.scopes.append(.{ .@"enum" = sym }); + return enum_ty; + } + }; + + // Get forward declared type or create a new one + var defined = false; + const enum_ty: *Type.Enum = if (maybe_ident) |ident| enum_ty: { + if (try p.findTag(.keyword_enum, ident, .definition)) |prev| { + if (!prev.ty.data.@"enum".isIncomplete()) { + // if the enum isn't incomplete, this is a redefinition + try p.errStr(.redefinition, ident, p.tokSlice(ident)); + try p.errTok(.previous_definition, prev.name_tok); + } else { + defined = true; + break :enum_ty prev.ty.data.@"enum"; + } + } + break :enum_ty try Type.Enum.create(p.arena, p.tokSlice(ident)); + } else try Type.Enum.create(p.arena, try p.getAnonymousName(enum_tok)); + const ty = Type{ + .specifier = .@"enum", + .data = .{ .@"enum" = enum_ty }, + }; + + // declare a symbol for the type + if (maybe_ident != null and !defined) { + try p.scopes.append(.{ .@"enum" = .{ + .name = enum_ty.name, + .ty = ty, + .name_tok = maybe_ident.?, + } }); + } + + // reserve space for this enum + try p.decl_buf.append(.none); + const decl_buf_top = p.decl_buf.items.len; + const list_buf_top = p.list_buf.items.len; + const enum_buf_top = p.enum_buf.items.len; + errdefer p.decl_buf.items.len = decl_buf_top - 1; + defer { + p.decl_buf.items.len = decl_buf_top; + p.list_buf.items.len = list_buf_top; + p.enum_buf.items.len = enum_buf_top; + } + + var e = Enumerator.init(p); + while (try p.enumerator(&e)) |field_and_node| { + try p.enum_buf.append(field_and_node.field); + try p.list_buf.append(field_and_node.node); + if (p.eatToken(.comma) == null) break; + } + enum_ty.fields = try p.arena.dupe(Type.Enum.Field, p.enum_buf.items[enum_buf_top..]); + enum_ty.tag_ty = e.res.ty; + + if (p.enum_buf.items.len == enum_buf_top) try p.err(.empty_enum); + try p.expectClosing(l_brace, .r_brace); + try p.attributeSpecifier(); // record + + // finish by creating a node + var node: Tree.Node = .{ .tag = .enum_decl_two, .ty = ty, .data = .{ + .bin = .{ .lhs = .none, .rhs = .none }, + } }; + const field_nodes = p.list_buf.items[list_buf_top..]; + switch (field_nodes.len) { + 0 => {}, + 1 => node.data = .{ .bin = .{ .lhs = field_nodes[0], .rhs = .none } }, + 2 => node.data = .{ .bin = .{ .lhs = field_nodes[0], .rhs = field_nodes[1] } }, + else => { + node.tag = .enum_decl; + node.data = .{ .range = try p.addList(field_nodes) }; + }, + } + p.decl_buf.items[decl_buf_top - 1] = try p.addNode(node); + return enum_ty; +} + +const Enumerator = struct { + res: Result, + + fn init(p: *Parser) Enumerator { + return .{ .res = .{ + .ty = .{ .specifier = if (p.pp.comp.langopts.short_enums) .schar else .int }, + .val = Value.int(0), + } }; + } + + /// Increment enumerator value adjusting type if needed. + fn incr(e: *Enumerator, p: *Parser) !void { + e.res.node = .none; + _ = p; + _ = e.res.val.add(e.res.val, Value.int(1), e.res.ty, p.pp.comp); + // TODO adjust type if value does not fit current + } + + /// Set enumerator value to specified value, adjusting type if needed. + fn set(e: *Enumerator, p: *Parser, res: Result) !void { + _ = p; + e.res = res; + // TODO adjust res type to try to fit with the previous type + } +}; + +const EnumFieldAndNode = struct { field: Type.Enum.Field, node: NodeIndex }; + +/// enumerator : IDENTIFIER ('=' constExpr) +fn enumerator(p: *Parser, e: *Enumerator) Error!?EnumFieldAndNode { + _ = try p.pragma(); + const name_tok = (try p.eatIdentifier()) orelse { + if (p.tok_ids[p.tok_i] == .r_brace) return null; + try p.err(.expected_identifier); + p.skipTo(.r_brace); + return error.ParsingFailed; + }; + const name = p.tokSlice(name_tok); + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); + + if (p.eatToken(.equal)) |_| { + const specified = try p.constExpr(); + if (specified.val.tag == .unavailable) { + try p.errTok(.enum_val_unavailable, name_tok + 2); + try e.incr(p); + } else { + try e.set(p, specified); + } + } else { + try e.incr(p); + } + + if (p.findSymbol(name_tok, .definition)) |scope| switch (scope) { + .enumeration => |sym| { + try p.errStr(.redefinition, name_tok, name); + try p.errTok(.previous_definition, sym.name_tok); + }, + .decl, .def, .param => |sym| { + try p.errStr(.redefinition_different_sym, name_tok, name); + try p.errTok(.previous_definition, sym.name_tok); + }, + else => unreachable, + }; + + var res = e.res; + res.ty = try p.withAttributes(res.ty, attr_buf_top); + + try p.scopes.append(.{ .enumeration = .{ + .name = name, + .value = res, + .name_tok = name_tok, + } }); + const node = try p.addNode(.{ + .tag = .enum_field_decl, + .ty = res.ty, + .data = .{ .decl = .{ + .name = name_tok, + .node = res.node, + } }, + }); + return EnumFieldAndNode{ .field = .{ + .name = name, + .ty = res.ty, + .name_tok = name_tok, + .node = res.node, + }, .node = node }; +} + +/// typeQual : keyword_const | keyword_restrict | keyword_volatile | keyword_atomic +fn typeQual(p: *Parser, b: *Type.Qualifiers.Builder) Error!bool { + var any = false; + while (true) { + switch (p.tok_ids[p.tok_i]) { + .keyword_restrict, .keyword_restrict1, .keyword_restrict2 => { + if (b.restrict != null) + try p.errStr(.duplicate_decl_spec, p.tok_i, "restrict") + else + b.restrict = p.tok_i; + }, + .keyword_const, .keyword_const1, .keyword_const2 => { + if (b.@"const" != null) + try p.errStr(.duplicate_decl_spec, p.tok_i, "const") + else + b.@"const" = p.tok_i; + }, + .keyword_volatile, .keyword_volatile1, .keyword_volatile2 => { + if (b.@"volatile" != null) + try p.errStr(.duplicate_decl_spec, p.tok_i, "volatile") + else + b.@"volatile" = p.tok_i; + }, + .keyword_atomic => { + // _Atomic(typeName) instead of just _Atomic + if (p.tok_ids[p.tok_i + 1] == .l_paren) break; + if (b.atomic != null) + try p.errStr(.duplicate_decl_spec, p.tok_i, "atomic") + else + b.atomic = p.tok_i; + }, + else => break, + } + p.tok_i += 1; + any = true; + } + return any; +} + +const Declarator = struct { + name: TokenIndex, + ty: Type, + func_declarator: ?TokenIndex = null, + old_style_func: ?TokenIndex = null, +}; +const DeclaratorKind = enum { normal, abstract, param, record }; + +/// declarator : pointer? (IDENTIFIER | '(' declarator ')') directDeclarator* +/// abstractDeclarator +/// : pointer? ('(' abstractDeclarator ')')? directAbstractDeclarator* +fn declarator( + p: *Parser, + base_type: Type, + kind: DeclaratorKind, +) Error!?Declarator { + const start = p.tok_i; + var d = Declarator{ .name = 0, .ty = try p.pointer(base_type) }; + + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + + const maybe_ident = p.tok_i; + if (kind != .abstract and (try p.eatIdentifier()) != null) { + d.name = maybe_ident; + const combine_tok = p.tok_i; + d.ty = try p.directDeclarator(d.ty, &d, kind); + try d.ty.validateCombinedType(p, combine_tok); + d.ty = try p.withAttributes(d.ty, attr_buf_top); + return d; + } else if (p.eatToken(.l_paren)) |l_paren| blk: { + var res = (try p.declarator(.{ .specifier = .void }, kind)) orelse { + p.tok_i = l_paren; + break :blk; + }; + try p.expectClosing(l_paren, .r_paren); + const suffix_start = p.tok_i; + const outer = try p.directDeclarator(d.ty, &d, kind); + try res.ty.combine(outer, p, res.func_declarator orelse suffix_start); + try res.ty.validateCombinedType(p, suffix_start); + res.old_style_func = d.old_style_func; + return res; + } + + const expected_ident = p.tok_i; + + d.ty = try p.directDeclarator(d.ty, &d, kind); + + if (kind == .normal and !d.ty.isEnumOrRecord()) { + try p.errTok(.expected_ident_or_l_paren, expected_ident); + return error.ParsingFailed; + } + try d.ty.validateCombinedType(p, expected_ident); + d.ty = try p.withAttributes(d.ty, attr_buf_top); + if (start == p.tok_i) return null; + return d; +} + +/// directDeclarator +/// : '[' typeQual* assignExpr? ']' directDeclarator? +/// | '[' keyword_static typeQual* assignExpr ']' directDeclarator? +/// | '[' typeQual+ keyword_static assignExpr ']' directDeclarator? +/// | '[' typeQual* '*' ']' directDeclarator? +/// | '(' paramDecls ')' directDeclarator? +/// | '(' (IDENTIFIER (',' IDENTIFIER))? ')' directDeclarator? +/// directAbstractDeclarator +/// : '[' typeQual* assignExpr? ']' +/// | '[' keyword_static typeQual* assignExpr ']' +/// | '[' typeQual+ keyword_static assignExpr ']' +/// | '[' '*' ']' +/// | '(' paramDecls? ')' +fn directDeclarator(p: *Parser, base_type: Type, d: *Declarator, kind: DeclaratorKind) Error!Type { + try p.attributeSpecifier(); + if (p.eatToken(.l_bracket)) |l_bracket| { + var res_ty = Type{ + // so that we can get any restrict type that might be present + .specifier = .pointer, + }; + var quals = Type.Qualifiers.Builder{}; + + var got_quals = try p.typeQual(&quals); + var static = p.eatToken(.keyword_static); + if (static != null and !got_quals) got_quals = try p.typeQual(&quals); + var star = p.eatToken(.asterisk); + const size_tok = p.tok_i; + const size = if (star) |_| Result{} else try p.assignExpr(); + try p.expectClosing(l_bracket, .r_bracket); + + if (star != null and static != null) { + try p.errTok(.invalid_static_star, static.?); + static = null; + } + if (kind != .param) { + if (static != null) + try p.errTok(.static_non_param, l_bracket) + else if (got_quals) + try p.errTok(.array_qualifiers, l_bracket); + if (star) |some| try p.errTok(.star_non_param, some); + static = null; + quals = .{}; + star = null; + } else { + try quals.finish(p, &res_ty); + } + if (static) |_| try size.expect(p); + + const outer = try p.directDeclarator(base_type, d, kind); + var max_bits = p.pp.comp.target.cpu.arch.ptrBitWidth(); + if (max_bits > 61) max_bits = 61; + const max_bytes = (@as(u64, 1) << @truncate(u6, max_bits)) - 1; + // `outer` is validated later so it may be invalid here + const outer_size = if (outer.hasIncompleteSize()) 1 else outer.sizeof(p.pp.comp); + const max_elems = max_bytes / std.math.max(1, outer_size orelse 1); + + if (size.val.tag == .unavailable) { + if (size.node != .none) { + if (p.func.ty == null and kind != .param and p.record.kind == .invalid) { + try p.errTok(.variable_len_array_file_scope, l_bracket); + } + const expr_ty = try p.arena.create(Type.Expr); + expr_ty.node = size.node; + res_ty.data = .{ .expr = expr_ty }; + res_ty.specifier = .variable_len_array; + + if (static) |some| try p.errTok(.useless_static, some); + } else if (star) |_| { + const elem_ty = try p.arena.create(Type); + res_ty.data = .{ .sub_type = elem_ty }; + res_ty.specifier = .unspecified_variable_len_array; + } else { + const arr_ty = try p.arena.create(Type.Array); + arr_ty.len = 0; + res_ty.data = .{ .array = arr_ty }; + res_ty.specifier = .incomplete_array; + } + } else if (!size.ty.isInt() and !size.ty.isFloat()) { + try p.errStr(.array_size_non_int, size_tok, try p.typeStr(size.ty)); + return error.ParsingFailed; + } else { + var size_val = size.val; + const size_t = p.pp.comp.types.size; + if (size_val.tag == .float) { + size_val.floatToInt(size.ty, size_t, p.pp.comp); + } + if (size_val.compare(.lt, Value.int(0), size_t, p.pp.comp)) { + try p.errTok(.negative_array_size, l_bracket); + } + const arr_ty = try p.arena.create(Type.Array); + if (size_val.compare(.gt, Value.int(max_elems), size_t, p.pp.comp)) { + try p.errTok(.array_too_large, l_bracket); + arr_ty.len = max_elems; + } else { + arr_ty.len = size_val.getInt(u64); + } + res_ty.data = .{ .array = arr_ty }; + res_ty.specifier = .array; + } + + try res_ty.combine(outer, p, l_bracket); + return res_ty; + } else if (p.eatToken(.l_paren)) |l_paren| { + d.func_declarator = l_paren; + + const func_ty = try p.arena.create(Type.Func); + func_ty.params = &.{}; + var specifier: Type.Specifier = .func; + + if (p.eatToken(.ellipsis)) |_| { + try p.err(.param_before_var_args); + try p.expectClosing(l_paren, .r_paren); + var res_ty = Type{ .specifier = .func, .data = .{ .func = func_ty } }; + + const outer = try p.directDeclarator(base_type, d, kind); + try res_ty.combine(outer, p, l_paren); + return res_ty; + } + + if (try p.paramDecls()) |params| { + func_ty.params = params; + if (p.eatToken(.ellipsis)) |_| specifier = .var_args_func; + } else if (p.tok_ids[p.tok_i] == .r_paren) { + specifier = .old_style_func; + } else if (p.tok_ids[p.tok_i] == .identifier or p.tok_ids[p.tok_i] == .extended_identifier) { + d.old_style_func = p.tok_i; + const param_buf_top = p.param_buf.items.len; + const scopes_top = p.scopes.items.len; + defer { + p.param_buf.items.len = param_buf_top; + p.scopes.items.len = scopes_top; + } + + // findSymbol stops the search at .block + try p.scopes.append(.block); + + specifier = .old_style_func; + while (true) { + const name_tok = try p.expectIdentifier(); + if (p.findSymbol(name_tok, .definition)) |scope| { + try p.errStr(.redefinition_of_parameter, name_tok, p.tokSlice(name_tok)); + try p.errTok(.previous_definition, scope.param.name_tok); + } + try p.scopes.append(.{ .param = .{ + .name = p.tokSlice(name_tok), + .ty = undefined, + .name_tok = name_tok, + } }); + try p.param_buf.append(.{ + .name = p.tokSlice(name_tok), + .name_tok = name_tok, + .ty = .{ .specifier = .int }, + }); + if (p.eatToken(.comma) == null) break; + } + func_ty.params = try p.arena.dupe(Type.Func.Param, p.param_buf.items[param_buf_top..]); + } else { + try p.err(.expected_param_decl); + } + + try p.expectClosing(l_paren, .r_paren); + var res_ty = Type{ + .specifier = specifier, + .data = .{ .func = func_ty }, + }; + + const outer = try p.directDeclarator(base_type, d, kind); + try res_ty.combine(outer, p, l_paren); + return res_ty; + } else return base_type; +} + +/// pointer : '*' typeQual* pointer? +fn pointer(p: *Parser, base_ty: Type) Error!Type { + var ty = base_ty; + while (p.eatToken(.asterisk)) |_| { + const elem_ty = try p.arena.create(Type); + elem_ty.* = ty; + ty = Type{ + .specifier = .pointer, + .data = .{ .sub_type = elem_ty }, + }; + var quals = Type.Qualifiers.Builder{}; + _ = try p.typeQual(&quals); + try quals.finish(p, &ty); + } + return ty; +} + +/// paramDecls : paramDecl (',' paramDecl)* (',' '...') +/// paramDecl : declSpec (declarator | abstractDeclarator) +fn paramDecls(p: *Parser) Error!?[]Type.Func.Param { + // TODO warn about visibility of types declared here + const param_buf_top = p.param_buf.items.len; + const scopes_top = p.scopes.items.len; + defer { + p.param_buf.items.len = param_buf_top; + p.scopes.items.len = scopes_top; + } + + // findSymbol stops the search at .block + try p.scopes.append(.block); + + while (true) { + const param_decl_spec = if (try p.declSpec(true)) |some| + some + else if (p.param_buf.items.len == param_buf_top) + return null + else blk: { + var spec: Type.Builder = .{}; + break :blk DeclSpec{ .ty = try spec.finish(p, p.attr_buf.len) }; + }; + + var name_tok: TokenIndex = 0; + const first_tok = p.tok_i; + var param_ty = param_decl_spec.ty; + if (try p.declarator(param_decl_spec.ty, .param)) |some| { + if (some.old_style_func) |tok_i| try p.errTok(.invalid_old_style_params, tok_i); + + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); + + name_tok = some.name; + param_ty = try p.withAttributes(some.ty, attr_buf_top); + if (some.name != 0) { + if (p.findSymbol(name_tok, .definition)) |scope| { + if (scope == .enumeration) { + try p.errStr(.redefinition_of_parameter, name_tok, p.tokSlice(name_tok)); + try p.errTok(.previous_definition, scope.enumeration.name_tok); + } else { + try p.errStr(.redefinition_of_parameter, name_tok, p.tokSlice(name_tok)); + try p.errTok(.previous_definition, scope.param.name_tok); + } + } + try p.scopes.append(.{ .param = .{ + .name = p.tokSlice(name_tok), + .ty = param_ty, + .name_tok = name_tok, + } }); + } + } + + if (param_ty.isFunc()) { + // params declared as functions are converted to function pointers + const elem_ty = try p.arena.create(Type); + elem_ty.* = param_ty; + param_ty = Type{ + .specifier = .pointer, + .data = .{ .sub_type = elem_ty }, + }; + } else if (param_ty.isArray()) { + // params declared as arrays are converted to pointers + param_ty.decayArray(); + } else if (param_ty.is(.void)) { + // validate void parameters + if (p.param_buf.items.len == param_buf_top) { + if (p.tok_ids[p.tok_i] != .r_paren) { + try p.err(.void_only_param); + if (param_ty.anyQual()) try p.err(.void_param_qualified); + return error.ParsingFailed; + } + return &[0]Type.Func.Param{}; + } + try p.err(.void_must_be_first_param); + return error.ParsingFailed; + } + + try param_decl_spec.validateParam(p, ¶m_ty); + try p.param_buf.append(.{ + .name = if (name_tok == 0) "" else p.tokSlice(name_tok), + .name_tok = if (name_tok == 0) first_tok else name_tok, + .ty = param_ty, + }); + + if (p.eatToken(.comma) == null) break; + if (p.tok_ids[p.tok_i] == .ellipsis) break; + } + return try p.arena.dupe(Type.Func.Param, p.param_buf.items[param_buf_top..]); +} + +/// typeName : specQual abstractDeclarator +fn typeName(p: *Parser) Error!?Type { + var ty = (try p.specQual()) orelse return null; + if (try p.declarator(ty, .abstract)) |some| { + if (some.old_style_func) |tok_i| try p.errTok(.invalid_old_style_params, tok_i); + return some.ty; + } else return ty; +} + +/// initializer +/// : assignExpr +/// | '{' initializerItems '}' +fn initializer(p: *Parser, init_ty: Type) Error!Result { + // fast path for non-braced initializers + if (p.tok_ids[p.tok_i] != .l_brace) { + const tok = p.tok_i; + var res = try p.assignExpr(); + try res.expect(p); + if (try p.coerceArrayInit(&res, tok, init_ty)) return res; + try p.coerceInit(&res, tok, init_ty); + return res; + } + + var il: InitList = .{}; + defer il.deinit(p.pp.comp.gpa); + + _ = try p.initializerItem(&il, init_ty); + + const res = try p.convertInitList(il, init_ty); + var res_ty = p.nodes.items(.ty)[@enumToInt(res)]; + res_ty.qual = init_ty.qual; + return Result{ .ty = res_ty, .node = res }; +} + +/// initializerItems : designation? initializer (',' designation? initializer)* ','? +/// designation : designator+ '=' +/// designator +/// : '[' constExpr ']' +/// | '.' identifier +fn initializerItem(p: *Parser, il: *InitList, init_ty: Type) Error!bool { + const l_brace = p.eatToken(.l_brace) orelse { + const tok = p.tok_i; + var res = try p.assignExpr(); + if (res.empty(p)) return false; + + const arr = try p.coerceArrayInit(&res, tok, init_ty); + if (!arr) try p.coerceInit(&res, tok, init_ty); + if (il.tok != 0) { + try p.errTok(.initializer_overrides, tok); + try p.errTok(.previous_initializer, il.tok); + } + il.node = res.node; + il.tok = tok; + return true; + }; + + const is_scalar = init_ty.isInt() or init_ty.isFloat() or init_ty.isPtr(); + if (p.eatToken(.r_brace)) |_| { + if (is_scalar) try p.errTok(.empty_scalar_init, l_brace); + if (il.tok != 0) { + try p.errTok(.initializer_overrides, l_brace); + try p.errTok(.previous_initializer, il.tok); + } + il.node = .none; + il.tok = l_brace; + return true; + } + + var count: u64 = 0; + var warned_excess = false; + var is_str_init = false; + var index_hint: ?usize = null; + while (true) : (count += 1) { + errdefer p.skipTo(.r_brace); + + const first_tok = p.tok_i; + var cur_ty = init_ty; + var cur_il = il; + var designation = false; + var cur_index_hint: ?usize = null; + while (true) { + if (p.eatToken(.l_bracket)) |l_bracket| { + if (!cur_ty.isArray()) { + try p.errStr(.invalid_array_designator, l_bracket, try p.typeStr(cur_ty)); + return error.ParsingFailed; + } + const expr_tok = p.tok_i; + const index_res = try p.constExpr(); + try p.expectClosing(l_bracket, .r_bracket); + + if (index_res.val.tag == .unavailable) { + try p.errTok(.expected_integer_constant_expr, expr_tok); + return error.ParsingFailed; + } else if (index_res.val.compare(.lt, index_res.val.zero(), index_res.ty, p.pp.comp)) { + try p.errExtra(.negative_array_designator, l_bracket + 1, .{ + .signed = index_res.val.signExtend(index_res.ty, p.pp.comp), + }); + return error.ParsingFailed; + } + + const max_len = cur_ty.arrayLen() orelse std.math.maxInt(usize); + if (index_res.val.data.int >= max_len) { + try p.errExtra(.oob_array_designator, l_bracket + 1, .{ .unsigned = index_res.val.data.int }); + return error.ParsingFailed; + } + const checked = index_res.val.getInt(u64); + cur_index_hint = cur_index_hint orelse checked; + + cur_il = try cur_il.find(p.pp.comp.gpa, checked); + cur_ty = cur_ty.elemType(); + designation = true; + } else if (p.eatToken(.period)) |period| { + const field_name = p.tokSlice(try p.expectIdentifier()); + cur_ty = cur_ty.canonicalize(.standard); + if (!cur_ty.isRecord()) { + try p.errStr(.invalid_field_designator, period, try p.typeStr(cur_ty)); + return error.ParsingFailed; + } else if (!cur_ty.hasField(field_name)) { + try p.errStr(.no_such_field_designator, period, field_name); + return error.ParsingFailed; + } + + // TODO check if union already has field set + outer: while (true) { + for (cur_ty.data.record.fields) |f, i| { + if (f.isAnonymousRecord()) { + // Recurse into anonymous field if it has a field by the name. + if (!f.ty.hasField(field_name)) continue; + cur_ty = f.ty.canonicalize(.standard); + cur_il = try il.find(p.pp.comp.gpa, i); + cur_index_hint = cur_index_hint orelse i; + continue :outer; + } + if (std.mem.eql(u8, field_name, f.name)) { + cur_il = try cur_il.find(p.pp.comp.gpa, i); + cur_ty = f.ty; + cur_index_hint = cur_index_hint orelse i; + break :outer; + } + } + unreachable; // we already checked that the starting type has this field + } + designation = true; + } else break; + } + if (designation) index_hint = null; + defer index_hint = cur_index_hint orelse null; + + if (designation) _ = try p.expectToken(.equal); + + var saw = false; + if (is_str_init and p.isStringInit(init_ty)) { + // discard further strings + var tmp_il = InitList{}; + defer tmp_il.deinit(p.pp.comp.gpa); + saw = try p.initializerItem(&tmp_il, .{ .specifier = .void }); + } else if (count == 0 and p.isStringInit(init_ty)) { + is_str_init = true; + saw = try p.initializerItem(il, init_ty); + } else if (is_scalar and count != 0) { + // discard further scalars + var tmp_il = InitList{}; + defer tmp_il.deinit(p.pp.comp.gpa); + saw = try p.initializerItem(&tmp_il, .{ .specifier = .void }); + } else if (p.tok_ids[p.tok_i] == .l_brace) { + if (designation) { + // designation overrides previous value, let existing mechanism handle it + saw = try p.initializerItem(cur_il, cur_ty); + } else if (try p.findAggregateInitializer(&cur_il, &cur_ty, &index_hint)) { + saw = try p.initializerItem(cur_il, cur_ty); + } else { + // discard further values + var tmp_il = InitList{}; + defer tmp_il.deinit(p.pp.comp.gpa); + saw = try p.initializerItem(&tmp_il, .{ .specifier = .void }); + if (!warned_excess) try p.errTok(if (init_ty.isArray()) .excess_array_init else .excess_struct_init, first_tok); + warned_excess = true; + } + } else if (index_hint != null and try p.findScalarInitializerAt(&cur_il, &cur_ty, &index_hint.?)) { + saw = try p.initializerItem(cur_il, cur_ty); + } else if (try p.findScalarInitializer(&cur_il, &cur_ty)) { + saw = try p.initializerItem(cur_il, cur_ty); + } else if (designation) { + // designation overrides previous value, let existing mechanism handle it + saw = try p.initializerItem(cur_il, cur_ty); + } else { + // discard further values + var tmp_il = InitList{}; + defer tmp_il.deinit(p.pp.comp.gpa); + saw = try p.initializerItem(&tmp_il, .{ .specifier = .void }); + if (!warned_excess and saw) try p.errTok(if (init_ty.isArray()) .excess_array_init else .excess_struct_init, first_tok); + warned_excess = true; + } + + if (!saw) { + if (designation) { + try p.err(.expected_expr); + return error.ParsingFailed; + } + break; + } else if (count == 1) { + if (is_str_init) try p.errTok(.excess_str_init, first_tok); + if (is_scalar) try p.errTok(.excess_scalar_init, first_tok); + } + + if (p.eatToken(.comma) == null) break; + } + try p.expectClosing(l_brace, .r_brace); + + if (is_scalar or is_str_init) return true; + if (il.tok != 0) { + try p.errTok(.initializer_overrides, l_brace); + try p.errTok(.previous_initializer, il.tok); + } + il.node = .none; + il.tok = l_brace; + return true; +} + +/// Returns true if the value is unused. +fn findScalarInitializerAt(p: *Parser, il: **InitList, ty: *Type, start_index: *usize) Error!bool { + if (ty.isArray()) { + start_index.* += 1; + + const arr_ty = ty.*; + const elem_count = arr_ty.arrayLen() orelse std.math.maxInt(usize); + if (elem_count == 0) { + if (p.tok_ids[p.tok_i] != .l_brace) { + try p.err(.empty_aggregate_init_braces); + return error.ParsingFailed; + } + return false; + } + const elem_ty = arr_ty.elemType(); + const arr_il = il.*; + if (start_index.* < elem_count) { + ty.* = elem_ty; + il.* = try arr_il.find(p.pp.comp.gpa, start_index.*); + _ = try p.findScalarInitializer(il, ty); + return true; + } + return false; + } else if (ty.get(.@"struct")) |struct_ty| { + start_index.* += 1; + + const field_count = struct_ty.data.record.fields.len; + if (field_count == 0) { + if (p.tok_ids[p.tok_i] != .l_brace) { + try p.err(.empty_aggregate_init_braces); + return error.ParsingFailed; + } + return false; + } + const struct_il = il.*; + if (start_index.* < field_count) { + const field = struct_ty.data.record.fields[start_index.*]; + ty.* = field.ty; + il.* = try struct_il.find(p.pp.comp.gpa, start_index.*); + _ = try p.findScalarInitializer(il, ty); + return true; + } + return false; + } else if (ty.get(.@"union")) |_| { + return false; + } + return il.*.node == .none; +} + +/// Returns true if the value is unused. +fn findScalarInitializer(p: *Parser, il: **InitList, ty: *Type) Error!bool { + if (ty.isArray()) { + var index = il.*.list.items.len; + if (index != 0) index = il.*.list.items[index - 1].index; + + const arr_ty = ty.*; + const elem_count = arr_ty.arrayLen() orelse std.math.maxInt(usize); + if (elem_count == 0) { + if (p.tok_ids[p.tok_i] != .l_brace) { + try p.err(.empty_aggregate_init_braces); + return error.ParsingFailed; + } + return false; + } + const elem_ty = arr_ty.elemType(); + const arr_il = il.*; + while (index < elem_count) : (index += 1) { + ty.* = elem_ty; + il.* = try arr_il.find(p.pp.comp.gpa, index); + if (try p.findScalarInitializer(il, ty)) return true; + } + return false; + } else if (ty.get(.@"struct")) |struct_ty| { + var index = il.*.list.items.len; + if (index != 0) index = il.*.list.items[index - 1].index + 1; + + const field_count = struct_ty.data.record.fields.len; + if (field_count == 0) { + if (p.tok_ids[p.tok_i] != .l_brace) { + try p.err(.empty_aggregate_init_braces); + return error.ParsingFailed; + } + return false; + } + const struct_il = il.*; + while (index < field_count) : (index += 1) { + const field = struct_ty.data.record.fields[index]; + ty.* = field.ty; + il.* = try struct_il.find(p.pp.comp.gpa, index); + if (try p.findScalarInitializer(il, ty)) return true; + } + return false; + } else if (ty.get(.@"union")) |union_ty| { + if (union_ty.data.record.fields.len == 0) { + if (p.tok_ids[p.tok_i] != .l_brace) { + try p.err(.empty_aggregate_init_braces); + return error.ParsingFailed; + } + return false; + } + ty.* = union_ty.data.record.fields[0].ty; + il.* = try il.*.find(p.pp.comp.gpa, 0); + if (try p.findScalarInitializer(il, ty)) return true; + return false; + } + return il.*.node == .none; +} + +fn findAggregateInitializer(p: *Parser, il: **InitList, ty: *Type, start_index: *?usize) Error!bool { + if (ty.isArray()) { + var index = il.*.list.items.len; + if (index != 0) index = il.*.list.items[index - 1].index + 1; + if (start_index.*) |*some| { + some.* += 1; + index = some.*; + } + + const arr_ty = ty.*; + const elem_count = arr_ty.arrayLen() orelse std.math.maxInt(usize); + const elem_ty = arr_ty.elemType(); + if (index < elem_count) { + ty.* = elem_ty; + il.* = try il.*.find(p.pp.comp.gpa, index); + return true; + } + return false; + } else if (ty.get(.@"struct")) |struct_ty| { + var index = il.*.list.items.len; + if (index != 0) index = il.*.list.items[index - 1].index + 1; + if (start_index.*) |*some| { + some.* += 1; + index = some.*; + } + + const field_count = struct_ty.data.record.fields.len; + if (index < field_count) { + ty.* = struct_ty.data.record.fields[index].ty; + il.* = try il.*.find(p.pp.comp.gpa, index); + return true; + } + return false; + } else if (ty.get(.@"union")) |union_ty| { + if (start_index.*) |_| return false; // overrides + + ty.* = union_ty.data.record.fields[0].ty; + il.* = try il.*.find(p.pp.comp.gpa, 0); + return true; + } else { + try p.err(.too_many_scalar_init_braces); + return il.*.node == .none; + } +} + +fn coerceArrayInit(p: *Parser, item: *Result, tok: TokenIndex, target: Type) !bool { + if (!target.isArray()) return false; + + const is_str_lit = p.nodeIs(item.node, .string_literal_expr); + if (!is_str_lit and !p.nodeIs(item.node, .compound_literal_expr)) { + try p.errTok(.array_init_str, tok); + return true; // do not do further coercion + } + + const target_spec = target.elemType().canonicalize(.standard).specifier; + const item_spec = item.ty.elemType().canonicalize(.standard).specifier; + + const compatible = target.elemType().eql(item.ty.elemType(), p.pp.comp, false) or + (is_str_lit and item_spec == .char and (target_spec == .uchar or target_spec == .schar)); + if (!compatible) { + const e_msg = " with array of type "; + try p.errStr(.incompatible_array_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + return true; // do not do further coercion + } + + if (target.get(.array)) |arr_ty| { + assert(item.ty.specifier == .array); + var len = item.ty.arrayLen().?; + const array_len = arr_ty.arrayLen().?; + if (is_str_lit) { + // the null byte of a string can be dropped + if (len - 1 > array_len) + try p.errTok(.str_init_too_long, tok); + } else if (len > array_len) { + try p.errStr( + .arr_init_too_long, + tok, + try p.typePairStrExtra(target, " with array of type ", item.ty), + ); + } + } + return true; +} + +fn coerceInit(p: *Parser, item: *Result, tok: TokenIndex, target: Type) !void { + if (target.is(.void)) return; // Do not do type coercion on excess items + + // item does not need to be qualified + var unqual_ty = target.canonicalize(.standard); + unqual_ty.qual = .{}; + const e_msg = " from incompatible type "; + try item.lvalConversion(p); + if (unqual_ty.is(.bool)) { + // this is ridiculous but it's what clang does + if (item.ty.isInt() or item.ty.isFloat() or item.ty.isPtr()) { + try item.boolCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } + } else if (unqual_ty.isInt()) { + if (item.ty.isInt() or item.ty.isFloat()) { + try item.intCast(p, unqual_ty); + } else if (item.ty.isPtr()) { + try p.errStr(.implicit_ptr_to_int, tok, try p.typePairStrExtra(item.ty, " to ", target)); + try item.intCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } + } else if (unqual_ty.isFloat()) { + if (item.ty.isInt() or item.ty.isFloat()) { + try item.floatCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } + } else if (unqual_ty.isPtr()) { + if (item.val.isZero()) { + try item.nullCast(p, target); + } else if (item.ty.isInt()) { + try p.errStr(.implicit_int_to_ptr, tok, try p.typePairStrExtra(item.ty, " to ", target)); + try item.ptrCast(p, unqual_ty); + } else if (item.ty.isPtr()) { + if (!item.ty.isVoidStar() and !unqual_ty.isVoidStar() and !unqual_ty.eql(item.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_ptr_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + try item.ptrCast(p, unqual_ty); + } else if (!unqual_ty.eql(item.ty, p.pp.comp, true)) { + if (!unqual_ty.elemType().qual.hasQuals(item.ty.elemType().qual)) { + try p.errStr(.ptr_init_discards_quals, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } + try item.ptrCast(p, unqual_ty); + } + } else { + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } + } else if (unqual_ty.isRecord()) { + if (!unqual_ty.eql(item.ty, p.pp.comp, false)) + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } else if (unqual_ty.isArray() or unqual_ty.isFunc()) { + // we have already issued an error for this + } else { + try p.errStr(.incompatible_init, tok, try p.typePairStrExtra(target, e_msg, item.ty)); + } +} + +fn isStringInit(p: *Parser, ty: Type) bool { + if (!ty.isArray() or !ty.elemType().isInt()) return false; + var i = p.tok_i; + while (true) : (i += 1) { + switch (p.tok_ids[i]) { + .l_paren => {}, + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + => return true, + else => return false, + } + } +} + +/// Convert InitList into an AST +fn convertInitList(p: *Parser, il: InitList, init_ty: Type) Error!NodeIndex { + if (init_ty.isInt() or init_ty.isFloat() or init_ty.isPtr()) { + if (il.node == .none) { + return p.addNode(.{ .tag = .default_init_expr, .ty = init_ty, .data = undefined }); + } + return il.node; + } else if (init_ty.is(.variable_len_array)) { + return error.ParsingFailed; // vla invalid, reported earlier + } else if (init_ty.isArray()) { + if (il.node != .none) { + return il.node; + } + const list_buf_top = p.list_buf.items.len; + defer p.list_buf.items.len = list_buf_top; + + const elem_ty = init_ty.elemType(); + + const max_items = init_ty.arrayLen() orelse std.math.maxInt(usize); + var start: u64 = 0; + for (il.list.items) |*init| { + if (init.index > start) { + const elem = try p.addNode(.{ + .tag = .array_filler_expr, + .ty = elem_ty, + .data = .{ .int = init.index - start }, + }); + try p.list_buf.append(elem); + } + start = init.index + 1; + + const elem = try p.convertInitList(init.list, elem_ty); + try p.list_buf.append(elem); + } + + var arr_init_node: Tree.Node = .{ + .tag = .array_init_expr_two, + .ty = init_ty, + .data = .{ .bin = .{ .lhs = .none, .rhs = .none } }, + }; + + if (init_ty.specifier == .incomplete_array) { + arr_init_node.ty.specifier = .array; + arr_init_node.ty.data.array.len = start; + } else if (init_ty.is(.incomplete_array)) { + const arr_ty = try p.arena.create(Type.Array); + arr_ty.* = .{ .elem = init_ty.elemType(), .len = start }; + arr_init_node.ty = .{ + .specifier = .array, + .data = .{ .array = arr_ty }, + }; + const attrs = init_ty.getAttributes(); + arr_init_node.ty = try arr_init_node.ty.withAttributes(p.arena, attrs); + } else if (start < max_items) { + const elem = try p.addNode(.{ + .tag = .array_filler_expr, + .ty = elem_ty, + .data = .{ .int = max_items - start }, + }); + try p.list_buf.append(elem); + } + + const items = p.list_buf.items[list_buf_top..]; + switch (items.len) { + 0 => {}, + 1 => arr_init_node.data.bin.lhs = items[0], + 2 => arr_init_node.data.bin = .{ .lhs = items[0], .rhs = items[1] }, + else => { + arr_init_node.tag = .array_init_expr; + arr_init_node.data = .{ .range = try p.addList(items) }; + }, + } + return try p.addNode(arr_init_node); + } else if (init_ty.get(.@"struct")) |struct_ty| { + assert(!struct_ty.hasIncompleteSize()); + + const list_buf_top = p.list_buf.items.len; + defer p.list_buf.items.len = list_buf_top; + + var init_index: usize = 0; + for (struct_ty.data.record.fields) |f, i| { + if (init_index < il.list.items.len and il.list.items[init_index].index == i) { + const item = try p.convertInitList(il.list.items[init_index].list, f.ty); + try p.list_buf.append(item); + init_index += 1; + } else { + const item = try p.addNode(.{ .tag = .default_init_expr, .ty = f.ty, .data = undefined }); + try p.list_buf.append(item); + } + } + + var struct_init_node: Tree.Node = .{ + .tag = .struct_init_expr_two, + .ty = init_ty, + .data = .{ .bin = .{ .lhs = .none, .rhs = .none } }, + }; + const items = p.list_buf.items[list_buf_top..]; + switch (items.len) { + 0 => {}, + 1 => struct_init_node.data.bin.lhs = items[0], + 2 => struct_init_node.data.bin = .{ .lhs = items[0], .rhs = items[1] }, + else => { + struct_init_node.tag = .struct_init_expr; + struct_init_node.data = .{ .range = try p.addList(items) }; + }, + } + return try p.addNode(struct_init_node); + } else if (init_ty.get(.@"union")) |union_ty| { + var union_init_node: Tree.Node = .{ + .tag = .union_init_expr, + .ty = init_ty, + .data = .{ .union_init = .{ .field_index = 0, .node = .none } }, + }; + if (union_ty.data.record.fields.len == 0) { + // do nothing for empty unions + } else if (il.list.items.len == 0) { + union_init_node.data.union_init.node = try p.addNode(.{ + .tag = .default_init_expr, + .ty = init_ty, + .data = undefined, + }); + } else { + const init = il.list.items[0]; + const field_ty = union_ty.data.record.fields[init.index].ty; + union_init_node.data.union_init = .{ + .field_index = @truncate(u32, init.index), + .node = try p.convertInitList(init.list, field_ty), + }; + } + return try p.addNode(union_init_node); + } else { + return error.ParsingFailed; // initializer target is invalid, reported earlier + } +} + +/// assembly : keyword_asm asmQual* '(' asmStr ')' +fn assembly(p: *Parser, kind: enum { global, decl_label, stmt }) Error!?NodeIndex { + const asm_tok = p.tok_i; + switch (p.tok_ids[p.tok_i]) { + .keyword_asm, .keyword_asm1, .keyword_asm2 => p.tok_i += 1, + else => return null, + } + + var @"volatile" = false; + var @"inline" = false; + var goto = false; + while (true) : (p.tok_i += 1) switch (p.tok_ids[p.tok_i]) { + .keyword_volatile, .keyword_volatile1, .keyword_volatile2 => { + if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "volatile"); + if (@"volatile") try p.errStr(.duplicate_asm_qual, p.tok_i, "volatile"); + @"volatile" = true; + }, + .keyword_inline, .keyword_inline1, .keyword_inline2 => { + if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "inline"); + if (@"inline") try p.errStr(.duplicate_asm_qual, p.tok_i, "inline"); + @"inline" = true; + }, + .keyword_goto => { + if (kind != .stmt) try p.errStr(.meaningless_asm_qual, p.tok_i, "goto"); + if (goto) try p.errStr(.duplicate_asm_qual, p.tok_i, "goto"); + goto = true; + }, + else => break, + }; + + const l_paren = try p.expectToken(.l_paren); + switch (kind) { + .decl_label => { + const str = (try p.asmStr()).val.data.bytes; + const attr = Attribute{ .tag = .asm_label, .args = .{ .asm_label = .{ .name = str[0 .. str.len - 1] } } }; + try p.attr_buf.append(p.pp.comp.gpa, .{ .attr = attr, .tok = asm_tok }); + }, + .global => _ = try p.asmStr(), + .stmt => return p.todo("assembly statements"), + } + try p.expectClosing(l_paren, .r_paren); + + if (kind != .decl_label) _ = try p.expectToken(.semicolon); + return .none; +} + +/// Same as stringLiteral but errors on unicode and wide string literals +fn asmStr(p: *Parser) Error!Result { + var i = p.tok_i; + while (true) : (i += 1) switch (p.tok_ids[i]) { + .string_literal => {}, + .string_literal_utf_16, .string_literal_utf_8, .string_literal_utf_32 => { + try p.errStr(.invalid_asm_str, p.tok_i, "unicode"); + return error.ParsingFailed; + }, + .string_literal_wide => { + try p.errStr(.invalid_asm_str, p.tok_i, "wide"); + return error.ParsingFailed; + }, + else => break, + }; + return try p.stringLiteral(); +} + +// ====== statements ====== + +/// stmt +/// : labeledStmt +/// | compoundStmt +/// | keyword_if '(' expr ')' stmt (keyword_else stmt)? +/// | keyword_switch '(' expr ')' stmt +/// | keyword_while '(' expr ')' stmt +/// | keyword_do stmt while '(' expr ')' ';' +/// | keyword_for '(' (decl | expr? ';') expr? ';' expr? ')' stmt +/// | keyword_goto (IDENTIFIER | ('*' expr)) ';' +/// | keyword_continue ';' +/// | keyword_break ';' +/// | keyword_return expr? ';' +/// | assembly ';' +/// | expr? ';' +fn stmt(p: *Parser) Error!NodeIndex { + if (try p.labeledStmt()) |some| return some; + if (try p.compoundStmt(false, null)) |some| return some; + if (p.eatToken(.keyword_if)) |_| { + const start_scopes_len = p.scopes.items.len; + defer p.scopes.items.len = start_scopes_len; + + const l_paren = try p.expectToken(.l_paren); + var cond = try p.expr(); + try cond.expect(p); + try cond.lvalConversion(p); + if (cond.ty.isInt()) + try cond.intCast(p, cond.ty.integerPromotion(p.pp.comp)) + else if (!cond.ty.isFloat() and !cond.ty.isPtr()) + try p.errStr(.statement_scalar, l_paren + 1, try p.typeStr(cond.ty)); + try cond.saveValue(p); + try p.expectClosing(l_paren, .r_paren); + + const then = try p.stmt(); + const @"else" = if (p.eatToken(.keyword_else)) |_| try p.stmt() else .none; + + if (then != .none and @"else" != .none) + return try p.addNode(.{ + .tag = .if_then_else_stmt, + .data = .{ .if3 = .{ .cond = cond.node, .body = (try p.addList(&.{ then, @"else" })).start } }, + }) + else if (then == .none and @"else" != .none) + return try p.addNode(.{ + .tag = .if_else_stmt, + .data = .{ .bin = .{ .lhs = cond.node, .rhs = @"else" } }, + }) + else + return try p.addNode(.{ + .tag = .if_then_stmt, + .data = .{ .bin = .{ .lhs = cond.node, .rhs = then } }, + }); + } + if (p.eatToken(.keyword_switch)) |_| { + const start_scopes_len = p.scopes.items.len; + defer p.scopes.items.len = start_scopes_len; + + const l_paren = try p.expectToken(.l_paren); + var cond = try p.expr(); + try cond.expect(p); + try cond.lvalConversion(p); + if (cond.ty.isInt()) + try cond.intCast(p, cond.ty.integerPromotion(p.pp.comp)) + else + try p.errStr(.statement_int, l_paren + 1, try p.typeStr(cond.ty)); + try cond.saveValue(p); + try p.expectClosing(l_paren, .r_paren); + + var switch_scope = Scope.Switch{ + .cases = Scope.Switch.CaseMap.initContext( + p.pp.comp.gpa, + .{ .ty = cond.ty, .comp = p.pp.comp }, + ), + }; + defer switch_scope.cases.deinit(); + try p.scopes.append(.{ .@"switch" = &switch_scope }); + const body = try p.stmt(); + + return try p.addNode(.{ + .tag = .switch_stmt, + .data = .{ .bin = .{ .lhs = cond.node, .rhs = body } }, + }); + } + if (p.eatToken(.keyword_while)) |_| { + const start_scopes_len = p.scopes.items.len; + defer p.scopes.items.len = start_scopes_len; + + const l_paren = try p.expectToken(.l_paren); + var cond = try p.expr(); + try cond.expect(p); + try cond.lvalConversion(p); + if (cond.ty.isInt()) + try cond.intCast(p, cond.ty.integerPromotion(p.pp.comp)) + else if (!cond.ty.isFloat() and !cond.ty.isPtr()) + try p.errStr(.statement_scalar, l_paren + 1, try p.typeStr(cond.ty)); + try cond.saveValue(p); + try p.expectClosing(l_paren, .r_paren); + + try p.scopes.append(.loop); + const body = try p.stmt(); + + return try p.addNode(.{ + .tag = .while_stmt, + .data = .{ .bin = .{ .rhs = cond.node, .lhs = body } }, + }); + } + if (p.eatToken(.keyword_do)) |_| { + const start_scopes_len = p.scopes.items.len; + defer p.scopes.items.len = start_scopes_len; + + try p.scopes.append(.loop); + const body = try p.stmt(); + p.scopes.items.len = start_scopes_len; + + _ = try p.expectToken(.keyword_while); + const l_paren = try p.expectToken(.l_paren); + var cond = try p.expr(); + try cond.expect(p); + try cond.lvalConversion(p); + if (cond.ty.isInt()) + try cond.intCast(p, cond.ty.integerPromotion(p.pp.comp)) + else if (!cond.ty.isFloat() and !cond.ty.isPtr()) + try p.errStr(.statement_scalar, l_paren + 1, try p.typeStr(cond.ty)); + try cond.saveValue(p); + try p.expectClosing(l_paren, .r_paren); + + _ = try p.expectToken(.semicolon); + return try p.addNode(.{ + .tag = .do_while_stmt, + .data = .{ .bin = .{ .rhs = cond.node, .lhs = body } }, + }); + } + if (p.eatToken(.keyword_for)) |_| { + const start_scopes_len = p.scopes.items.len; + defer p.scopes.items.len = start_scopes_len; + const decl_buf_top = p.decl_buf.items.len; + defer p.decl_buf.items.len = decl_buf_top; + + const l_paren = try p.expectToken(.l_paren); + const got_decl = try p.decl(); + + // for (init + const init_start = p.tok_i; + var err_start = p.pp.comp.diag.list.items.len; + var init = if (!got_decl) try p.expr() else Result{}; + try init.saveValue(p); + try init.maybeWarnUnused(p, init_start, err_start); + if (!got_decl) _ = try p.expectToken(.semicolon); + + // for (init; cond + var cond = try p.expr(); + if (cond.node != .none) { + try cond.lvalConversion(p); + if (cond.ty.isInt()) + try cond.intCast(p, cond.ty.integerPromotion(p.pp.comp)) + else if (!cond.ty.isFloat() and !cond.ty.isPtr()) + try p.errStr(.statement_scalar, l_paren + 1, try p.typeStr(cond.ty)); + } + try cond.saveValue(p); + _ = try p.expectToken(.semicolon); + + // for (init; cond; incr + const incr_start = p.tok_i; + err_start = p.pp.comp.diag.list.items.len; + var incr = try p.expr(); + try incr.maybeWarnUnused(p, incr_start, err_start); + try incr.saveValue(p); + try p.expectClosing(l_paren, .r_paren); + + try p.scopes.append(.loop); + const body = try p.stmt(); + + if (got_decl) { + const start = (try p.addList(p.decl_buf.items[decl_buf_top..])).start; + const end = (try p.addList(&.{ cond.node, incr.node, body })).end; + + return try p.addNode(.{ + .tag = .for_decl_stmt, + .data = .{ .range = .{ .start = start, .end = end } }, + }); + } else if (init.node == .none and cond.node == .none and incr.node == .none) { + return try p.addNode(.{ + .tag = .forever_stmt, + .data = .{ .un = body }, + }); + } else return try p.addNode(.{ .tag = .for_stmt, .data = .{ .if3 = .{ + .cond = body, + .body = (try p.addList(&.{ init.node, cond.node, incr.node })).start, + } } }); + } + if (p.eatToken(.keyword_goto)) |goto_tok| { + if (p.eatToken(.asterisk)) |_| { + const expr_tok = p.tok_i; + var e = try p.expr(); + try e.expect(p); + try e.lvalConversion(p); + p.computed_goto_tok = p.computed_goto_tok orelse goto_tok; + if (!e.ty.isPtr()) { + if (!e.ty.isInt()) { + try p.errStr(.incompatible_param, expr_tok, try p.typeStr(e.ty)); + return error.ParsingFailed; + } + const elem_ty = try p.arena.create(Type); + elem_ty.* = .{ .specifier = .void, .qual = .{ .@"const" = true } }; + const result_ty = Type{ + .specifier = .pointer, + .data = .{ .sub_type = elem_ty }, + }; + if (e.val.isZero()) { + try e.nullCast(p, result_ty); + } else { + try p.errStr(.implicit_int_to_ptr, expr_tok, try p.typePairStrExtra(e.ty, " to ", result_ty)); + try e.ptrCast(p, result_ty); + } + } + + try e.un(p, .computed_goto_stmt); + _ = try p.expectToken(.semicolon); + return e.node; + } + const name_tok = try p.expectIdentifier(); + const str = p.tokSlice(name_tok); + if (p.findLabel(str) == null) { + try p.labels.append(.{ .unresolved_goto = name_tok }); + } + _ = try p.expectToken(.semicolon); + return try p.addNode(.{ + .tag = .goto_stmt, + .data = .{ .decl_ref = name_tok }, + }); + } + if (p.eatToken(.keyword_continue)) |cont| { + if (!p.inLoop()) try p.errTok(.continue_not_in_loop, cont); + _ = try p.expectToken(.semicolon); + return try p.addNode(.{ .tag = .continue_stmt, .data = undefined }); + } + if (p.eatToken(.keyword_break)) |br| { + if (!p.inLoopOrSwitch()) try p.errTok(.break_not_in_loop_or_switch, br); + _ = try p.expectToken(.semicolon); + return try p.addNode(.{ .tag = .break_stmt, .data = undefined }); + } + if (try p.returnStmt()) |some| return some; + if (try p.assembly(.stmt)) |some| return some; + + const expr_start = p.tok_i; + const err_start = p.pp.comp.diag.list.items.len; + + const e = try p.expr(); + if (e.node != .none) { + _ = try p.expectToken(.semicolon); + try e.maybeWarnUnused(p, expr_start, err_start); + return e.node; + } + + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); // statement + + if (p.eatToken(.semicolon)) |_| { + var null_node: Tree.Node = .{ .tag = .null_stmt, .data = undefined }; + null_node.ty = try p.withAttributes(null_node.ty, attr_buf_top); + if (null_node.ty.getAttribute(.fallthrough) != null) { + if (p.tok_ids[p.tok_i] != .keyword_case and p.tok_ids[p.tok_i] != .keyword_default) { + // TODO: this condition is not completely correct; the last statement of a compound + // statement is also valid if it precedes a switch label (so intervening '}' are ok, + // but only if they close a compound statement) + try p.errTok(.invalid_fallthrough, expr_start); + } + } + return p.addNode(null_node); + } + + try p.err(.expected_stmt); + return error.ParsingFailed; +} + +/// labeledStmt +/// : IDENTIFIER ':' stmt +/// | keyword_case constExpr ':' stmt +/// | keyword_default ':' stmt +fn labeledStmt(p: *Parser) Error!?NodeIndex { + if ((p.tok_ids[p.tok_i] == .identifier or p.tok_ids[p.tok_i] == .extended_identifier) and p.tok_ids[p.tok_i + 1] == .colon) { + const name_tok = p.expectIdentifier() catch unreachable; + const str = p.tokSlice(name_tok); + if (p.findLabel(str)) |some| { + try p.errStr(.duplicate_label, name_tok, str); + try p.errStr(.previous_label, some, str); + } else { + p.label_count += 1; + try p.labels.append(.{ .label = name_tok }); + var i: usize = 0; + while (i < p.labels.items.len) { + if (p.labels.items[i] == .unresolved_goto and + mem.eql(u8, p.tokSlice(p.labels.items[i].unresolved_goto), str)) + { + _ = p.labels.swapRemove(i); + } else i += 1; + } + } + + p.tok_i += 1; + const attr_buf_top = p.attr_buf.len; + defer p.attr_buf.len = attr_buf_top; + try p.attributeSpecifier(); // label + + return try p.addNode(.{ + .tag = .labeled_stmt, + .data = .{ .decl = .{ .name = name_tok, .node = try p.stmt() } }, + }); + } else if (p.eatToken(.keyword_case)) |case| { + const val = try p.constExpr(); + _ = try p.expectToken(.colon); + const s = try p.stmt(); + const node = try p.addNode(.{ + .tag = .case_stmt, + .data = .{ .bin = .{ .lhs = val.node, .rhs = s } }, + }); + if (p.findSwitch()) |some| { + if (val.val.tag == .unavailable) { + try p.errTok(.case_val_unavailable, case + 1); + return node; + } + // TODO cast to target type + const gop = try some.cases.getOrPut(val); + if (gop.found_existing) { + if (some.cases.ctx.ty.isUnsignedInt(p.pp.comp)) { + try p.errExtra(.duplicate_switch_case_unsigned, case, .{ + .unsigned = val.val.data.int, + }); + } else { + try p.errExtra(.duplicate_switch_case_signed, case, .{ + .signed = val.val.signExtend(val.ty, p.pp.comp), + }); + } + try p.errTok(.previous_case, gop.value_ptr.tok); + } else { + gop.value_ptr.* = .{ + .tok = case, + .node = node, + }; + } + } else { + try p.errStr(.case_not_in_switch, case, "case"); + } + return node; + } else if (p.eatToken(.keyword_default)) |default| { + _ = try p.expectToken(.colon); + const s = try p.stmt(); + const node = try p.addNode(.{ + .tag = .default_stmt, + .data = .{ .un = s }, + }); + if (p.findSwitch()) |some| { + if (some.default) |previous| { + try p.errTok(.multiple_default, default); + try p.errTok(.previous_case, previous.tok); + } else { + some.default = .{ + .tok = default, + .node = node, + }; + } + } else { + try p.errStr(.case_not_in_switch, default, "default"); + } + return node; + } else return null; +} + +const StmtExprState = struct { + last_expr_tok: TokenIndex = 0, + last_expr_res: Result = .{ .ty = .{ .specifier = .void } }, +}; + +/// compoundStmt : '{' ( decl | keyword_extension decl | staticAssert | stmt)* '}' +fn compoundStmt(p: *Parser, is_fn_body: bool, stmt_expr_state: ?*StmtExprState) Error!?NodeIndex { + const l_brace = p.eatToken(.l_brace) orelse return null; + + const decl_buf_top = p.decl_buf.items.len; + defer p.decl_buf.items.len = decl_buf_top; + + const scopes_top = p.scopes.items.len; + defer p.scopes.items.len = scopes_top; + // the parameters of a function are in the same scope as the body + if (!is_fn_body) try p.scopes.append(.block); + + var noreturn_index: ?TokenIndex = null; + var noreturn_label_count: u32 = 0; + + while (p.eatToken(.r_brace) == null) : (_ = try p.pragma()) { + if (stmt_expr_state) |state| state.* = .{}; + if (try p.parseOrNextStmt(staticAssert, l_brace)) continue; + if (try p.parseOrNextStmt(decl, l_brace)) continue; + if (p.eatToken(.keyword_extension)) |ext| { + const saved_extension = p.extension_suppressed; + defer p.extension_suppressed = saved_extension; + p.extension_suppressed = true; + + if (try p.parseOrNextStmt(decl, l_brace)) continue; + p.tok_i = ext; + } + const stmt_tok = p.tok_i; + const s = p.stmt() catch |er| switch (er) { + error.ParsingFailed => { + try p.nextStmt(l_brace); + continue; + }, + else => |e| return e, + }; + if (s == .none) continue; + if (stmt_expr_state) |state| { + state.* = .{ + .last_expr_tok = stmt_tok, + .last_expr_res = .{ + .node = s, + .ty = p.nodes.items(.ty)[@enumToInt(s)], + }, + }; + } + try p.decl_buf.append(s); + + if (noreturn_index == null and p.nodeIsNoreturn(s)) { + noreturn_index = p.tok_i; + noreturn_label_count = p.label_count; + } + switch (p.nodes.items(.tag)[@enumToInt(s)]) { + .case_stmt, .default_stmt, .labeled_stmt => noreturn_index = null, + else => {}, + } + } + + if (noreturn_index) |some| { + // if new labels were defined we cannot be certain that the code is unreachable + if (some != p.tok_i - 1 and noreturn_label_count == p.label_count) try p.errTok(.unreachable_code, some); + } + if (is_fn_body and (p.decl_buf.items.len == decl_buf_top or !p.nodeIsNoreturn(p.decl_buf.items[p.decl_buf.items.len - 1]))) { + if (!p.func.ty.?.returnType().is(.void)) try p.errStr(.func_does_not_return, p.tok_i - 1, p.tokSlice(p.func.name)); + try p.decl_buf.append(try p.addNode(.{ .tag = .implicit_return, .ty = p.func.ty.?.returnType(), .data = undefined })); + } + if (is_fn_body) { + if (p.func.ident) |some| try p.decl_buf.insert(decl_buf_top, some.node); + if (p.func.pretty_ident) |some| try p.decl_buf.insert(decl_buf_top, some.node); + } + + var node: Tree.Node = .{ + .tag = .compound_stmt_two, + .data = .{ .bin = .{ .lhs = .none, .rhs = .none } }, + }; + const statements = p.decl_buf.items[decl_buf_top..]; + switch (statements.len) { + 0 => {}, + 1 => node.data = .{ .bin = .{ .lhs = statements[0], .rhs = .none } }, + 2 => node.data = .{ .bin = .{ .lhs = statements[0], .rhs = statements[1] } }, + else => { + node.tag = .compound_stmt; + node.data = .{ .range = try p.addList(statements) }; + }, + } + return try p.addNode(node); +} + +fn nodeIsNoreturn(p: *Parser, node: NodeIndex) bool { + switch (p.nodes.items(.tag)[@enumToInt(node)]) { + .break_stmt, .continue_stmt, .return_stmt => return true, + .if_then_else_stmt => { + const data = p.data.items[p.nodes.items(.data)[@enumToInt(node)].if3.body..]; + return p.nodeIsNoreturn(data[0]) and p.nodeIsNoreturn(data[1]); + }, + .compound_stmt_two => { + const data = p.nodes.items(.data)[@enumToInt(node)]; + if (data.bin.rhs != .none) return p.nodeIsNoreturn(data.bin.rhs); + if (data.bin.lhs != .none) return p.nodeIsNoreturn(data.bin.lhs); + return false; + }, + .compound_stmt => { + const data = p.nodes.items(.data)[@enumToInt(node)]; + return p.nodeIsNoreturn(p.data.items[data.range.end - 1]); + }, + .labeled_stmt => { + const data = p.nodes.items(.data)[@enumToInt(node)]; + return p.nodeIsNoreturn(data.decl.node); + }, + else => return false, + } +} + +fn parseOrNextStmt(p: *Parser, comptime func: fn (*Parser) Error!bool, l_brace: TokenIndex) !bool { + return func(p) catch |er| switch (er) { + error.ParsingFailed => { + try p.nextStmt(l_brace); + return true; + }, + else => |e| return e, + }; +} + +fn nextStmt(p: *Parser, l_brace: TokenIndex) !void { + var parens: u32 = 0; + while (p.tok_i < p.tok_ids.len) : (p.tok_i += 1) { + switch (p.tok_ids[p.tok_i]) { + .l_paren, .l_brace, .l_bracket => parens += 1, + .r_paren, .r_bracket => if (parens != 0) { + parens -= 1; + }, + .r_brace => if (parens == 0) + return + else { + parens -= 1; + }, + .semicolon, + .keyword_for, + .keyword_while, + .keyword_do, + .keyword_if, + .keyword_goto, + .keyword_switch, + .keyword_case, + .keyword_default, + .keyword_continue, + .keyword_break, + .keyword_return, + .keyword_typedef, + .keyword_extern, + .keyword_static, + .keyword_auto, + .keyword_register, + .keyword_thread_local, + .keyword_inline, + .keyword_inline1, + .keyword_inline2, + .keyword_noreturn, + .keyword_void, + .keyword_bool, + .keyword_char, + .keyword_short, + .keyword_int, + .keyword_long, + .keyword_signed, + .keyword_unsigned, + .keyword_float, + .keyword_double, + .keyword_complex, + .keyword_atomic, + .keyword_enum, + .keyword_struct, + .keyword_union, + .keyword_alignas, + .keyword_typeof, + .keyword_typeof1, + .keyword_typeof2, + .keyword_extension, + => if (parens == 0) return, + .keyword_pragma => p.skipToPragmaSentinel(), + else => {}, + } + } + p.tok_i -= 1; // So we can consume EOF + try p.expectClosing(l_brace, .r_brace); + unreachable; +} + +fn returnStmt(p: *Parser) Error!?NodeIndex { + const ret_tok = p.eatToken(.keyword_return) orelse return null; + + const e_tok = p.tok_i; + var e = try p.expr(); + _ = try p.expectToken(.semicolon); + const ret_ty = p.func.ty.?.returnType(); + + if (e.node == .none) { + if (!ret_ty.is(.void)) try p.errStr(.func_should_return, ret_tok, p.tokSlice(p.func.name)); + return try p.addNode(.{ .tag = .return_stmt, .data = .{ .un = e.node } }); + } else if (ret_ty.is(.void)) { + try p.errStr(.void_func_returns_value, e_tok, p.tokSlice(p.func.name)); + return try p.addNode(.{ .tag = .return_stmt, .data = .{ .un = e.node } }); + } + + try e.lvalConversion(p); + // Return type conversion is done as if it was assignment + if (ret_ty.is(.bool)) { + // this is ridiculous but it's what clang does + if (e.ty.isInt() or e.ty.isFloat() or e.ty.isPtr()) { + try e.boolCast(p, ret_ty); + } else { + try p.errStr(.incompatible_return, e_tok, try p.typeStr(e.ty)); + } + } else if (ret_ty.isInt()) { + if (e.ty.isInt() or e.ty.isFloat()) { + try e.intCast(p, ret_ty); + } else if (e.ty.isPtr()) { + try p.errStr(.implicit_ptr_to_int, e_tok, try p.typePairStrExtra(e.ty, " to ", ret_ty)); + try e.intCast(p, ret_ty); + } else { + try p.errStr(.incompatible_return, e_tok, try p.typeStr(e.ty)); + } + } else if (ret_ty.isFloat()) { + if (e.ty.isInt() or e.ty.isFloat()) { + try e.floatCast(p, ret_ty); + } else { + try p.errStr(.incompatible_return, e_tok, try p.typeStr(e.ty)); + } + } else if (ret_ty.isPtr()) { + if (e.val.isZero()) { + try e.nullCast(p, ret_ty); + } else if (e.ty.isInt()) { + try p.errStr(.implicit_int_to_ptr, e_tok, try p.typePairStrExtra(e.ty, " to ", ret_ty)); + try e.intCast(p, ret_ty); + } else if (!e.ty.isVoidStar() and !ret_ty.isVoidStar() and !ret_ty.eql(e.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_return, e_tok, try p.typeStr(e.ty)); + } + } else if (ret_ty.isRecord()) { + if (!ret_ty.eql(e.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_return, e_tok, try p.typeStr(e.ty)); + } + } else if (ret_ty.isFunc()) { + // Syntax error reported earlier; just let this return as-is since it is a parse failure anyway + } else unreachable; + + try e.saveValue(p); + return try p.addNode(.{ .tag = .return_stmt, .data = .{ .un = e.node } }); +} + +// ====== expressions ====== + +pub fn macroExpr(p: *Parser) Compilation.Error!bool { + const res = p.condExpr() catch |e| switch (e) { + error.OutOfMemory => return error.OutOfMemory, + error.FatalError => return error.FatalError, + error.ParsingFailed => return false, + }; + if (res.val.tag == .unavailable) { + try p.errTok(.expected_expr, p.tok_i); + return false; + } + return res.val.getBool(); +} + +const Result = struct { + node: NodeIndex = .none, + ty: Type = .{ .specifier = .int }, + val: Value = .{}, + + fn expect(res: Result, p: *Parser) Error!void { + if (p.in_macro) { + if (res.val.tag == .unavailable) { + try p.errTok(.expected_expr, p.tok_i); + return error.ParsingFailed; + } + return; + } + if (res.node == .none) { + try p.errTok(.expected_expr, p.tok_i); + return error.ParsingFailed; + } + } + + fn empty(res: Result, p: *Parser) bool { + if (p.in_macro) return res.val.tag == .unavailable; + return res.node == .none; + } + + fn maybeWarnUnused(res: Result, p: *Parser, expr_start: TokenIndex, err_start: usize) Error!void { + if (res.ty.is(.void) or res.node == .none) return; + // don't warn about unused result if the expression contained errors besides other unused results + var i = err_start; + while (i < p.pp.comp.diag.list.items.len) : (i += 1) { + if (p.pp.comp.diag.list.items[i].tag != .unused_value) return; + } + var cur_node = res.node; + while (true) switch (p.nodes.items(.tag)[@enumToInt(cur_node)]) { + .invalid, // So that we don't need to check for node == 0 + .assign_expr, + .mul_assign_expr, + .div_assign_expr, + .mod_assign_expr, + .add_assign_expr, + .sub_assign_expr, + .shl_assign_expr, + .shr_assign_expr, + .bit_and_assign_expr, + .bit_xor_assign_expr, + .bit_or_assign_expr, + .call_expr, + .call_expr_one, + .pre_inc_expr, + .pre_dec_expr, + .post_inc_expr, + .post_dec_expr, + => return, + .stmt_expr => { + const body = p.nodes.items(.data)[@enumToInt(cur_node)].un; + switch (p.nodes.items(.tag)[@enumToInt(body)]) { + .compound_stmt_two => { + const body_stmt = p.nodes.items(.data)[@enumToInt(body)].bin; + cur_node = if (body_stmt.rhs != .none) body_stmt.rhs else body_stmt.lhs; + }, + .compound_stmt => { + const data = p.nodes.items(.data)[@enumToInt(body)]; + cur_node = p.data.items[data.range.end - 1]; + }, + else => unreachable, + } + }, + .comma_expr => cur_node = p.nodes.items(.data)[@enumToInt(cur_node)].bin.rhs, + .paren_expr => cur_node = p.nodes.items(.data)[@enumToInt(cur_node)].un, + else => break, + }; + try p.errTok(.unused_value, expr_start); + } + + fn bin(lhs: *Result, p: *Parser, tag: Tree.Tag, rhs: Result) !void { + lhs.node = try p.addNode(.{ + .tag = tag, + .ty = lhs.ty, + .data = .{ .bin = .{ .lhs = lhs.node, .rhs = rhs.node } }, + }); + } + + fn un(operand: *Result, p: *Parser, tag: Tree.Tag) Error!void { + operand.node = try p.addNode(.{ + .tag = tag, + .ty = operand.ty, + .data = .{ .un = operand.node }, + }); + } + + fn qualCast(res: *Result, p: *Parser, elem_ty: *Type) Error!void { + res.ty = .{ + .data = .{ .sub_type = elem_ty }, + .specifier = .pointer, + }; + try res.un(p, .qual_cast); + } + + fn adjustCondExprPtrs(a: *Result, tok: TokenIndex, b: *Result, p: *Parser) !bool { + assert(a.ty.isPtr() and b.ty.isPtr()); + + const a_elem = a.ty.elemType(); + const b_elem = b.ty.elemType(); + if (a_elem.eql(b_elem, p.pp.comp, true)) return true; + + var adjusted_elem_ty = try p.arena.create(Type); + adjusted_elem_ty.* = a_elem; + + const has_void_star_branch = a.ty.isVoidStar() or b.ty.isVoidStar(); + const only_quals_differ = a_elem.eql(b_elem, p.pp.comp, false); + const pointers_compatible = only_quals_differ or has_void_star_branch; + + if (!pointers_compatible or has_void_star_branch) { + if (!pointers_compatible) { + try p.errStr(.pointer_mismatch, tok, try p.typePairStrExtra(a.ty, " and ", b.ty)); + } + adjusted_elem_ty.* = .{ .specifier = .void }; + } + if (pointers_compatible) { + adjusted_elem_ty.qual = a_elem.qual.mergeCV(b_elem.qual); + } + if (!adjusted_elem_ty.eql(a_elem, p.pp.comp, true)) try a.qualCast(p, adjusted_elem_ty); + if (!adjusted_elem_ty.eql(b_elem, p.pp.comp, true)) try b.qualCast(p, adjusted_elem_ty); + return true; + } + + /// Adjust types for binary operation, returns true if the result can and should be evaluated. + fn adjustTypes(a: *Result, tok: TokenIndex, b: *Result, p: *Parser, kind: enum { + integer, + arithmetic, + boolean_logic, + relational, + equality, + conditional, + add, + sub, + }) !bool { + try a.lvalConversion(p); + try b.lvalConversion(p); + + const a_int = a.ty.isInt(); + const b_int = b.ty.isInt(); + if (a_int and b_int) { + try a.usualArithmeticConversion(b, p); + return a.shouldEval(b, p); + } + if (kind == .integer) return a.invalidBinTy(tok, b, p); + + const a_float = a.ty.isFloat(); + const b_float = b.ty.isFloat(); + const a_arithmetic = a_int or a_float; + const b_arithmetic = b_int or b_float; + if (a_arithmetic and b_arithmetic) { + // <, <=, >, >= only work on real types + if (kind == .relational and (!a.ty.isReal() or !b.ty.isReal())) + return a.invalidBinTy(tok, b, p); + + try a.usualArithmeticConversion(b, p); + return a.shouldEval(b, p); + } + if (kind == .arithmetic) return a.invalidBinTy(tok, b, p); + + const a_ptr = a.ty.isPtr(); + const b_ptr = b.ty.isPtr(); + const a_scalar = a_arithmetic or a_ptr; + const b_scalar = b_arithmetic or b_ptr; + switch (kind) { + .boolean_logic => { + if (!a_scalar or !b_scalar) return a.invalidBinTy(tok, b, p); + + // Do integer promotions but nothing else + if (a_int) try a.intCast(p, a.ty.integerPromotion(p.pp.comp)); + if (b_int) try b.intCast(p, b.ty.integerPromotion(p.pp.comp)); + return a.shouldEval(b, p); + }, + .relational, .equality => { + // comparisons between floats and pointes not allowed + if (!a_scalar or !b_scalar or (a_float and b_ptr) or (b_float and a_ptr)) + return a.invalidBinTy(tok, b, p); + + if ((a_int or b_int) and !(a.val.isZero() or b.val.isZero())) { + try p.errStr(.comparison_ptr_int, tok, try p.typePairStr(a.ty, b.ty)); + } else if (a_ptr and b_ptr) { + if (!a.ty.isVoidStar() and !b.ty.isVoidStar() and !a.ty.eql(b.ty, p.pp.comp, false)) + try p.errStr(.comparison_distinct_ptr, tok, try p.typePairStr(a.ty, b.ty)); + } else if (a_ptr) { + try b.ptrCast(p, a.ty); + } else { + assert(b_ptr); + try a.ptrCast(p, b.ty); + } + + return a.shouldEval(b, p); + }, + .conditional => { + // doesn't matter what we return here, as the result is ignored + if (a.ty.is(.void) or b.ty.is(.void)) { + try a.toVoid(p); + try b.toVoid(p); + return true; + } + if ((a_ptr and b_int) or (a_int and b_ptr)) { + if (a.val.isZero() or b.val.isZero()) { + try a.nullCast(p, b.ty); + try b.nullCast(p, a.ty); + return true; + } + const int_ty = if (a_int) a else b; + const ptr_ty = if (a_ptr) a else b; + try p.errStr(.implicit_int_to_ptr, tok, try p.typePairStrExtra(int_ty.ty, " to ", ptr_ty.ty)); + try int_ty.ptrCast(p, ptr_ty.ty); + + return true; + } + if (a_ptr and b_ptr) return a.adjustCondExprPtrs(tok, b, p); + if (a.ty.isRecord() and b.ty.isRecord() and a.ty.eql(b.ty, p.pp.comp, false)) { + return true; + } + return a.invalidBinTy(tok, b, p); + }, + .add => { + // if both aren't arithmetic one should be pointer and the other an integer + if (a_ptr == b_ptr or a_int == b_int) return a.invalidBinTy(tok, b, p); + + // Do integer promotions but nothing else + if (a_int) try a.intCast(p, a.ty.integerPromotion(p.pp.comp)); + if (b_int) try b.intCast(p, b.ty.integerPromotion(p.pp.comp)); + + // The result type is the type of the pointer operand + if (a_int) a.ty = b.ty else b.ty = a.ty; + return a.shouldEval(b, p); + }, + .sub => { + // if both aren't arithmetic then either both should be pointers or just a + if (!a_ptr or !(b_ptr or b_int)) return a.invalidBinTy(tok, b, p); + + if (a_ptr and b_ptr) { + if (!a.ty.eql(b.ty, p.pp.comp, false)) try p.errStr(.incompatible_pointers, tok, try p.typePairStr(a.ty, b.ty)); + a.ty = p.pp.comp.types.ptrdiff; + } + + // Do integer promotion on b if needed + if (b_int) try b.intCast(p, b.ty.integerPromotion(p.pp.comp)); + return a.shouldEval(b, p); + }, + else => return a.invalidBinTy(tok, b, p), + } + } + + fn lvalConversion(res: *Result, p: *Parser) Error!void { + if (res.ty.isFunc()) { + var elem_ty = try p.arena.create(Type); + elem_ty.* = res.ty; + res.ty.specifier = .pointer; + res.ty.data = .{ .sub_type = elem_ty }; + try res.un(p, .function_to_pointer); + } else if (res.ty.isArray()) { + res.val.tag = .unavailable; + res.ty.decayArray(); + try res.un(p, .array_to_pointer); + } else if (!p.in_macro and Tree.isLval(p.nodes.slice(), p.data.items, p.value_map, res.node)) { + res.val.tag = .unavailable; + res.ty.qual = .{}; + try res.un(p, .lval_to_rval); + } + } + + fn boolCast(res: *Result, p: *Parser, bool_ty: Type) Error!void { + if (res.ty.isPtr()) { + res.val.toBool(); + res.ty = bool_ty; + try res.un(p, .pointer_to_bool); + } else if (res.ty.isInt() and !res.ty.is(.bool)) { + res.val.toBool(); + res.ty = bool_ty; + try res.un(p, .int_to_bool); + } else if (res.ty.isFloat()) { + res.val.floatToInt(res.ty, bool_ty, p.pp.comp); + res.ty = bool_ty; + try res.un(p, .float_to_bool); + } + } + + fn intCast(res: *Result, p: *Parser, int_ty: Type) Error!void { + if (res.ty.is(.bool)) { + res.ty = int_ty; + try res.un(p, .bool_to_int); + } else if (res.ty.isPtr()) { + res.ty = int_ty; + try res.un(p, .pointer_to_int); + } else if (res.ty.isFloat()) { + res.val.floatToInt(res.ty, int_ty, p.pp.comp); + res.ty = int_ty; + try res.un(p, .float_to_int); + } else if (!res.ty.eql(int_ty, p.pp.comp, true)) { + if (int_ty.hasIncompleteSize()) return error.ParsingFailed; // Diagnostic already issued + res.val.intCast(res.ty, int_ty, p.pp.comp); + res.ty = int_ty; + try res.un(p, .int_cast); + } + } + + fn floatCast(res: *Result, p: *Parser, float_ty: Type) Error!void { + if (res.ty.is(.bool)) { + res.val.intToFloat(res.ty, float_ty, p.pp.comp); + res.ty = float_ty; + try res.un(p, .bool_to_float); + } else if (res.ty.isInt()) { + res.val.intToFloat(res.ty, float_ty, p.pp.comp); + res.ty = float_ty; + try res.un(p, .int_to_float); + } else if (!res.ty.eql(float_ty, p.pp.comp, true)) { + res.val.floatCast(res.ty, float_ty, p.pp.comp); + res.ty = float_ty; + try res.un(p, .float_cast); + } + } + + fn ptrCast(res: *Result, p: *Parser, ptr_ty: Type) Error!void { + if (res.ty.is(.bool)) { + res.ty = ptr_ty; + try res.un(p, .bool_to_pointer); + } else if (res.ty.isInt()) { + res.val.intCast(res.ty, ptr_ty, p.pp.comp); + res.ty = ptr_ty; + try res.un(p, .int_to_pointer); + } + } + + fn toVoid(res: *Result, p: *Parser) Error!void { + if (!res.ty.is(.void)) { + res.ty = .{ .specifier = .void }; + res.node = try p.addNode(.{ + .tag = .to_void, + .ty = res.ty, + .data = .{ .un = res.node }, + }); + } + } + + fn nullCast(res: *Result, p: *Parser, ptr_ty: Type) Error!void { + if (!res.val.isZero()) return; + res.ty = ptr_ty; + try res.un(p, .null_to_pointer); + } + + fn usualArithmeticConversion(a: *Result, b: *Result, p: *Parser) Error!void { + // if either is a float cast to that type + const float_types = [3][2]Type.Specifier{ + .{ .complex_long_double, .long_double }, + .{ .complex_double, .double }, + .{ .complex_float, .float }, + }; + const a_spec = a.ty.canonicalize(.standard).specifier; + const b_spec = b.ty.canonicalize(.standard).specifier; + for (float_types) |pair| { + if (a_spec == pair[0] or a_spec == pair[1] or + b_spec == pair[0] or b_spec == pair[1]) + { + const both_real = a.ty.isReal() and b.ty.isReal(); + const res_spec = pair[@boolToInt(both_real)]; + const ty = Type{ .specifier = res_spec }; + try a.floatCast(p, ty); + try b.floatCast(p, ty); + return; + } + } + + // Do integer promotion on both operands + const a_promoted = a.ty.integerPromotion(p.pp.comp); + const b_promoted = b.ty.integerPromotion(p.pp.comp); + if (a_promoted.eql(b_promoted, p.pp.comp, true)) { + // cast to promoted type + try a.intCast(p, a_promoted); + try b.intCast(p, a_promoted); + return; + } + + const a_unsigned = a_promoted.isUnsignedInt(p.pp.comp); + const b_unsigned = b_promoted.isUnsignedInt(p.pp.comp); + if (a_unsigned == b_unsigned) { + // cast to greater signed or unsigned type + const res_spec = std.math.max(@enumToInt(a_promoted.specifier), @enumToInt(b_promoted.specifier)); + const res_ty = Type{ .specifier = @intToEnum(Type.Specifier, res_spec) }; + try a.intCast(p, res_ty); + try b.intCast(p, res_ty); + return; + } + + // cast to the unsigned type with greater rank + const a_larger = @enumToInt(a_promoted.specifier) > @enumToInt(b_promoted.specifier); + const b_larger = @enumToInt(b_promoted.specifier) > @enumToInt(b_promoted.specifier); + if (a_unsigned) { + const target = if (a_larger) a_promoted else b_promoted; + try a.intCast(p, target); + try b.intCast(p, target); + } else { + assert(b_unsigned); + const target = if (b_larger) b_promoted else a_promoted; + try a.intCast(p, target); + try b.intCast(p, target); + } + } + + fn invalidBinTy(a: *Result, tok: TokenIndex, b: *Result, p: *Parser) Error!bool { + try p.errStr(.invalid_bin_types, tok, try p.typePairStr(a.ty, b.ty)); + return false; + } + + fn shouldEval(a: *Result, b: *Result, p: *Parser) Error!bool { + if (p.no_eval) return false; + if (a.val.tag != .unavailable and b.val.tag != .unavailable) + return true; + + try a.saveValue(p); + try b.saveValue(p); + return p.no_eval; + } + + /// Saves value and replaces it with `.unavailable`. + fn saveValue(res: *Result, p: *Parser) !void { + assert(!p.in_macro); + if (res.val.tag == .unavailable) return; + if (!p.in_macro) try p.value_map.put(res.node, res.val); + res.val.tag = .unavailable; + } +}; + +/// expr : assignExpr (',' assignExpr)* +fn expr(p: *Parser) Error!Result { + var expr_start = p.tok_i; + var err_start = p.pp.comp.diag.list.items.len; + var lhs = try p.assignExpr(); + if (p.tok_ids[p.tok_i] == .comma) try lhs.expect(p); + while (p.eatToken(.comma)) |_| { + try lhs.maybeWarnUnused(p, expr_start, err_start); + expr_start = p.tok_i; + err_start = p.pp.comp.diag.list.items.len; + + const rhs = try p.assignExpr(); + try rhs.expect(p); + lhs.val = rhs.val; + lhs.ty = rhs.ty; + try lhs.bin(p, .comma_expr, rhs); + } + return lhs; +} + +fn tokToTag(p: *Parser, tok: TokenIndex) Tree.Tag { + return switch (p.tok_ids[tok]) { + .equal => .assign_expr, + .asterisk_equal => .mul_assign_expr, + .slash_equal => .div_assign_expr, + .percent_equal => .mod_assign_expr, + .plus_equal => .add_assign_expr, + .minus_equal => .sub_assign_expr, + .angle_bracket_angle_bracket_left_equal => .shl_assign_expr, + .angle_bracket_angle_bracket_right_equal => .shr_assign_expr, + .ampersand_equal => .bit_and_assign_expr, + .caret_equal => .bit_xor_assign_expr, + .pipe_equal => .bit_or_assign_expr, + .equal_equal => .equal_expr, + .bang_equal => .not_equal_expr, + .angle_bracket_left => .less_than_expr, + .angle_bracket_left_equal => .less_than_equal_expr, + .angle_bracket_right => .greater_than_expr, + .angle_bracket_right_equal => .greater_than_equal_expr, + .angle_bracket_angle_bracket_left => .shl_expr, + .angle_bracket_angle_bracket_right => .shr_expr, + .plus => .add_expr, + .minus => .sub_expr, + .asterisk => .mul_expr, + .slash => .div_expr, + .percent => .mod_expr, + else => unreachable, + }; +} + +/// assignExpr +/// : condExpr +/// | unExpr ('=' | '*=' | '/=' | '%=' | '+=' | '-=' | '<<=' | '>>=' | '&=' | '^=' | '|=') assignExpr +fn assignExpr(p: *Parser) Error!Result { + var lhs = try p.condExpr(); + if (lhs.empty(p)) return lhs; + + const tok = p.tok_i; + const eq = p.eatToken(.equal); + const mul = eq orelse p.eatToken(.asterisk_equal); + const div = mul orelse p.eatToken(.slash_equal); + const mod = div orelse p.eatToken(.percent_equal); + const add = mod orelse p.eatToken(.plus_equal); + const sub = add orelse p.eatToken(.minus_equal); + const shl = sub orelse p.eatToken(.angle_bracket_angle_bracket_left_equal); + const shr = shl orelse p.eatToken(.angle_bracket_angle_bracket_right_equal); + const bit_and = shr orelse p.eatToken(.ampersand_equal); + const bit_xor = bit_and orelse p.eatToken(.caret_equal); + const bit_or = bit_xor orelse p.eatToken(.pipe_equal); + + const tag = p.tokToTag(bit_or orelse return lhs); + var rhs = try p.assignExpr(); + try rhs.expect(p); + try rhs.lvalConversion(p); + + var is_const: bool = undefined; + if (!Tree.isLvalExtra(p.nodes.slice(), p.data.items, p.value_map, lhs.node, &is_const) or is_const) { + try p.errTok(.not_assignable, tok); + return error.ParsingFailed; + } + + // adjustTypes will do do lvalue conversion but we do not want that + var lhs_copy = lhs; + switch (tag) { + .assign_expr => {}, // handle plain assignment separately + .mul_assign_expr, + .div_assign_expr, + .mod_assign_expr, + => { + if (rhs.val.isZero()) { + switch (tag) { + .div_assign_expr => try p.errStr(.division_by_zero, div.?, "division"), + .mod_assign_expr => try p.errStr(.division_by_zero, mod.?, "remainder"), + else => {}, + } + } + _ = try lhs_copy.adjustTypes(tok, &rhs, p, .arithmetic); + try lhs.bin(p, tag, rhs); + return lhs; + }, + .sub_assign_expr, + .add_assign_expr, + => { + if (lhs.ty.isPtr() and rhs.ty.isInt()) { + try rhs.ptrCast(p, lhs.ty); + } else { + _ = try lhs_copy.adjustTypes(tok, &rhs, p, .arithmetic); + } + try lhs.bin(p, tag, rhs); + return lhs; + }, + .shl_assign_expr, + .shr_assign_expr, + .bit_and_assign_expr, + .bit_xor_assign_expr, + .bit_or_assign_expr, + => { + _ = try lhs_copy.adjustTypes(tok, &rhs, p, .integer); + try lhs.bin(p, tag, rhs); + return lhs; + }, + else => unreachable, + } + + // rhs does not need to be qualified + var unqual_ty = lhs.ty.canonicalize(.standard); + unqual_ty.qual = .{}; + const e_msg = " from incompatible type "; + if (lhs.ty.is(.bool)) { + // this is ridiculous but it's what clang does + if (rhs.ty.isInt() or rhs.ty.isFloat() or rhs.ty.isPtr()) { + try rhs.boolCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + } else if (unqual_ty.isInt()) { + if (rhs.ty.isInt() or rhs.ty.isFloat()) { + try rhs.intCast(p, unqual_ty); + } else if (rhs.ty.isPtr()) { + try p.errStr(.implicit_ptr_to_int, tok, try p.typePairStrExtra(rhs.ty, " to ", lhs.ty)); + try rhs.intCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + } else if (unqual_ty.isFloat()) { + if (rhs.ty.isInt() or rhs.ty.isFloat()) { + try rhs.floatCast(p, unqual_ty); + } else { + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + } else if (unqual_ty.isPtr()) { + if (rhs.val.isZero()) { + try rhs.nullCast(p, lhs.ty); + } else if (rhs.ty.isInt()) { + try p.errStr(.implicit_int_to_ptr, tok, try p.typePairStrExtra(rhs.ty, " to ", lhs.ty)); + try rhs.ptrCast(p, unqual_ty); + } else if (rhs.ty.isPtr()) { + if (!unqual_ty.isVoidStar() and !rhs.ty.isVoidStar() and !unqual_ty.eql(rhs.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_ptr_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + try rhs.ptrCast(p, unqual_ty); + } else if (!unqual_ty.eql(rhs.ty, p.pp.comp, true)) { + if (!unqual_ty.elemType().qual.hasQuals(rhs.ty.elemType().qual)) { + try p.errStr(.ptr_assign_discards_quals, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + try rhs.ptrCast(p, unqual_ty); + } + } else { + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + } else if (unqual_ty.isRecord()) { + if (!unqual_ty.eql(rhs.ty, p.pp.comp, false)) + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } else if (unqual_ty.isArray() or unqual_ty.isFunc()) { + try p.errTok(.not_assignable, tok); + } else { + try p.errStr(.incompatible_assign, tok, try p.typePairStrExtra(lhs.ty, e_msg, rhs.ty)); + } + + try lhs.bin(p, tag, rhs); + return lhs; +} + +/// constExpr : condExpr +fn constExpr(p: *Parser) Error!Result { + const start = p.tok_i; + const res = try p.condExpr(); + try res.expect(p); + if (!res.ty.isInt()) { + try p.errTok(.expected_integer_constant_expr, start); + return error.ParsingFailed; + } + // saveValue sets val to unavailable + var copy = res; + try copy.saveValue(p); + return res; +} + +/// condExpr : lorExpr ('?' expression? ':' condExpr)? +fn condExpr(p: *Parser) Error!Result { + var cond = try p.lorExpr(); + if (cond.empty(p) or p.eatToken(.question_mark) == null) return cond; + const saved_eval = p.no_eval; + + // Depending on the value of the condition, avoid evaluating unreachable branches. + var then_expr = blk: { + defer p.no_eval = saved_eval; + if (cond.val.tag != .unavailable and !cond.val.getBool()) p.no_eval = true; + break :blk try p.expr(); + }; + try then_expr.expect(p); // TODO binary cond expr + const colon = try p.expectToken(.colon); + var else_expr = blk: { + defer p.no_eval = saved_eval; + if (cond.val.tag != .unavailable and cond.val.getBool()) p.no_eval = true; + break :blk try p.condExpr(); + }; + try else_expr.expect(p); + + _ = try then_expr.adjustTypes(colon, &else_expr, p, .conditional); + + if (cond.val.tag != .unavailable) { + cond.val = if (cond.val.getBool()) then_expr.val else else_expr.val; + } else { + try then_expr.saveValue(p); + try else_expr.saveValue(p); + } + cond.ty = then_expr.ty; + cond.node = try p.addNode(.{ + .tag = .cond_expr, + .ty = cond.ty, + .data = .{ .if3 = .{ .cond = cond.node, .body = (try p.addList(&.{ then_expr.node, else_expr.node })).start } }, + }); + return cond; +} + +/// lorExpr : landExpr ('||' landExpr)* +fn lorExpr(p: *Parser) Error!Result { + var lhs = try p.landExpr(); + if (lhs.empty(p)) return lhs; + const saved_eval = p.no_eval; + defer p.no_eval = saved_eval; + + while (p.eatToken(.pipe_pipe)) |tok| { + if (lhs.val.tag != .unavailable and lhs.val.getBool()) p.no_eval = true; + var rhs = try p.landExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(tok, &rhs, p, .boolean_logic)) { + const res = @boolToInt(lhs.val.getBool() or rhs.val.getBool()); + lhs.val = Value.int(res); + } + lhs.ty = .{ .specifier = .int }; + try lhs.bin(p, .bool_or_expr, rhs); + } + return lhs; +} + +/// landExpr : orExpr ('&&' orExpr)* +fn landExpr(p: *Parser) Error!Result { + var lhs = try p.orExpr(); + if (lhs.empty(p)) return lhs; + const saved_eval = p.no_eval; + defer p.no_eval = saved_eval; + + while (p.eatToken(.ampersand_ampersand)) |tok| { + if (lhs.val.tag != .unavailable and !lhs.val.getBool()) p.no_eval = true; + var rhs = try p.orExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(tok, &rhs, p, .boolean_logic)) { + const res = @boolToInt(lhs.val.getBool() and rhs.val.getBool()); + lhs.val = Value.int(res); + } + lhs.ty = .{ .specifier = .int }; + try lhs.bin(p, .bool_and_expr, rhs); + } + return lhs; +} + +/// orExpr : xorExpr ('|' xorExpr)* +fn orExpr(p: *Parser) Error!Result { + var lhs = try p.xorExpr(); + if (lhs.empty(p)) return lhs; + while (p.eatToken(.pipe)) |tok| { + var rhs = try p.xorExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(tok, &rhs, p, .integer)) { + lhs.val = lhs.val.bitOr(rhs.val, lhs.ty, p.pp.comp); + } + try lhs.bin(p, .bit_or_expr, rhs); + } + return lhs; +} + +/// xorExpr : andExpr ('^' andExpr)* +fn xorExpr(p: *Parser) Error!Result { + var lhs = try p.andExpr(); + if (lhs.empty(p)) return lhs; + while (p.eatToken(.caret)) |tok| { + var rhs = try p.andExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(tok, &rhs, p, .integer)) { + lhs.val = lhs.val.bitXor(rhs.val, lhs.ty, p.pp.comp); + } + try lhs.bin(p, .bit_xor_expr, rhs); + } + return lhs; +} + +/// andExpr : eqExpr ('&' eqExpr)* +fn andExpr(p: *Parser) Error!Result { + var lhs = try p.eqExpr(); + if (lhs.empty(p)) return lhs; + while (p.eatToken(.ampersand)) |tok| { + var rhs = try p.eqExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(tok, &rhs, p, .integer)) { + lhs.val = lhs.val.bitAnd(rhs.val, lhs.ty, p.pp.comp); + } + try lhs.bin(p, .bit_and_expr, rhs); + } + return lhs; +} + +/// eqExpr : compExpr (('==' | '!=') compExpr)* +fn eqExpr(p: *Parser) Error!Result { + var lhs = try p.compExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const eq = p.eatToken(.equal_equal); + const ne = eq orelse p.eatToken(.bang_equal); + const tag = p.tokToTag(ne orelse break); + var rhs = try p.compExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(ne.?, &rhs, p, .equality)) { + const op: std.math.CompareOperator = if (tag == .equal_expr) .eq else .neq; + const res = lhs.val.compare(op, rhs.val, lhs.ty, p.pp.comp); + lhs.val = Value.int(@boolToInt(res)); + } + lhs.ty = .{ .specifier = .int }; + try lhs.bin(p, tag, rhs); + } + return lhs; +} + +/// compExpr : shiftExpr (('<' | '<=' | '>' | '>=') shiftExpr)* +fn compExpr(p: *Parser) Error!Result { + var lhs = try p.shiftExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const lt = p.eatToken(.angle_bracket_left); + const le = lt orelse p.eatToken(.angle_bracket_left_equal); + const gt = le orelse p.eatToken(.angle_bracket_right); + const ge = gt orelse p.eatToken(.angle_bracket_right_equal); + const tag = p.tokToTag(ge orelse break); + var rhs = try p.shiftExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(ge.?, &rhs, p, .relational)) { + const op: std.math.CompareOperator = switch (tag) { + .less_than_expr => .lt, + .less_than_equal_expr => .lte, + .greater_than_expr => .gt, + .greater_than_equal_expr => .gte, + else => unreachable, + }; + const res = lhs.val.compare(op, rhs.val, lhs.ty, p.pp.comp); + lhs.val = Value.int(@boolToInt(res)); + } + lhs.ty = .{ .specifier = .int }; + try lhs.bin(p, tag, rhs); + } + return lhs; +} + +/// shiftExpr : addExpr (('<<' | '>>') addExpr)* +fn shiftExpr(p: *Parser) Error!Result { + var lhs = try p.addExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const shl = p.eatToken(.angle_bracket_angle_bracket_left); + const shr = shl orelse p.eatToken(.angle_bracket_angle_bracket_right); + const tag = p.tokToTag(shr orelse break); + var rhs = try p.addExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(shr.?, &rhs, p, .integer)) { + if (shl != null) { + lhs.val = lhs.val.shl(rhs.val, lhs.ty, p.pp.comp); + } else { + lhs.val = lhs.val.shr(rhs.val, lhs.ty, p.pp.comp); + } + } + try lhs.bin(p, tag, rhs); + } + return lhs; +} + +/// addExpr : mulExpr (('+' | '-') mulExpr)* +fn addExpr(p: *Parser) Error!Result { + var lhs = try p.mulExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const plus = p.eatToken(.plus); + const minus = plus orelse p.eatToken(.minus); + const tag = p.tokToTag(minus orelse break); + var rhs = try p.mulExpr(); + try rhs.expect(p); + + if (try lhs.adjustTypes(minus.?, &rhs, p, if (plus != null) .add else .sub)) { + if (plus != null) { + if (lhs.val.add(lhs.val, rhs.val, lhs.ty, p.pp.comp)) try p.errOverflow(plus.?, lhs); + } else { + if (lhs.val.sub(lhs.val, rhs.val, lhs.ty, p.pp.comp)) try p.errOverflow(minus.?, lhs); + } + } + try lhs.bin(p, tag, rhs); + } + return lhs; +} + +/// mulExpr : castExpr (('*' | '/' | '%') castExpr)*´ +fn mulExpr(p: *Parser) Error!Result { + var lhs = try p.castExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const mul = p.eatToken(.asterisk); + const div = mul orelse p.eatToken(.slash); + const percent = div orelse p.eatToken(.percent); + const tag = p.tokToTag(percent orelse break); + var rhs = try p.castExpr(); + try rhs.expect(p); + + if (rhs.val.isZero() and mul == null and !p.no_eval) { + const err_tag: Diagnostics.Tag = if (p.in_macro) .division_by_zero_macro else .division_by_zero; + lhs.val.tag = .unavailable; + if (div != null) { + try p.errStr(err_tag, div.?, "division"); + } else { + try p.errStr(err_tag, percent.?, "remainder"); + } + if (p.in_macro) return error.ParsingFailed; + } + + if (try lhs.adjustTypes(percent.?, &rhs, p, if (tag == .mod_expr) .integer else .arithmetic)) { + if (mul != null) { + if (lhs.val.mul(lhs.val, rhs.val, lhs.ty, p.pp.comp)) try p.errOverflow(mul.?, lhs); + } else if (div != null) { + lhs.val = Value.div(lhs.val, rhs.val, lhs.ty, p.pp.comp); + } else { + var res = Value.rem(lhs.val, rhs.val, lhs.ty, p.pp.comp); + if (res.tag == .unavailable) { + if (p.in_macro) { + // match clang behavior by defining invalid remainder to be zero in macros + res = Value.int(0); + } else { + try lhs.saveValue(p); + try rhs.saveValue(p); + } + } + lhs.val = res; + } + } + + try lhs.bin(p, tag, rhs); + } + return lhs; +} + +/// This will always be the last message, if present +fn removeUnusedWarningForTok(p: *Parser, last_expr_tok: TokenIndex) void { + if (last_expr_tok == 0) return; + if (p.pp.comp.diag.list.items.len == 0) return; + + const last_expr_loc = p.pp.tokens.items(.loc)[last_expr_tok]; + const last_msg = p.pp.comp.diag.list.items[p.pp.comp.diag.list.items.len - 1]; + + if (last_msg.tag == .unused_value and last_msg.loc.eql(last_expr_loc)) { + p.pp.comp.diag.list.items.len = p.pp.comp.diag.list.items.len - 1; + } +} + +/// castExpr +/// : '(' compoundStmt ')' +/// | '(' typeName ')' castExpr +/// | '(' typeName ')' '{' initializerItems '}' +/// | __builtin_choose_expr '(' constExpr ',' assignExpr ',' assignExpr ')' +/// | __builtin_va_arg '(' assignExpr ',' typeName ')' +/// | unExpr +fn castExpr(p: *Parser) Error!Result { + if (p.eatToken(.l_paren)) |l_paren| cast_expr: { + if (p.tok_ids[p.tok_i] == .l_brace) { + try p.err(.gnu_statement_expression); + if (p.func.ty == null) { + try p.err(.stmt_expr_not_allowed_file_scope); + return error.ParsingFailed; + } + var stmt_expr_state: StmtExprState = .{}; + const body_node = (try p.compoundStmt(false, &stmt_expr_state)).?; // compoundStmt only returns null if .l_brace isn't the first token + p.removeUnusedWarningForTok(stmt_expr_state.last_expr_tok); + + var res = Result{ + .node = body_node, + .ty = stmt_expr_state.last_expr_res.ty, + .val = stmt_expr_state.last_expr_res.val, + }; + try p.expectClosing(l_paren, .r_paren); + try res.un(p, .stmt_expr); + return res; + } + const ty = (try p.typeName()) orelse { + p.tok_i -= 1; + break :cast_expr; + }; + try p.expectClosing(l_paren, .r_paren); + + if (p.tok_ids[p.tok_i] == .l_brace) { + // compound literal + if (ty.isFunc()) { + try p.err(.func_init); + } else if (ty.is(.variable_len_array)) { + try p.err(.vla_init); + } else if (ty.hasIncompleteSize() and !ty.is(.incomplete_array)) { + try p.errStr(.variable_incomplete_ty, p.tok_i, try p.typeStr(ty)); + return error.ParsingFailed; + } + var init_list_expr = try p.initializer(ty); + try init_list_expr.un(p, .compound_literal_expr); + return init_list_expr; + } + + var operand = try p.castExpr(); + try operand.expect(p); + if (ty.is(.void)) { + // everything can cast to void + operand.val.tag = .unavailable; + } else if (ty.isInt() or ty.isFloat() or ty.isPtr()) cast: { + const old_float = operand.ty.isFloat(); + const new_float = ty.isFloat(); + + if (new_float and operand.ty.isPtr()) { + try p.errStr(.invalid_cast_to_float, l_paren, try p.typeStr(operand.ty)); + return error.ParsingFailed; + } else if (old_float and ty.isPtr()) { + try p.errStr(.invalid_cast_to_pointer, l_paren, try p.typeStr(operand.ty)); + return error.ParsingFailed; + } + if (operand.val.tag == .unavailable) break :cast; + + const old_int = operand.ty.isInt() or operand.ty.isPtr(); + const new_int = ty.isInt() or ty.isPtr(); + if (ty.is(.bool)) { + operand.val.toBool(); + } else if (old_float and new_int) { + operand.val.floatToInt(operand.ty, ty, p.pp.comp); + } else if (new_float and old_int) { + operand.val.intToFloat(operand.ty, ty, p.pp.comp); + } else if (new_float and old_float) { + operand.val.floatCast(operand.ty, ty, p.pp.comp); + } + } else { + try p.errStr(.invalid_cast_type, l_paren, try p.typeStr(operand.ty)); + return error.ParsingFailed; + } + if (ty.anyQual()) try p.errStr(.qual_cast, l_paren, try p.typeStr(ty)); + operand.ty = ty; + operand.ty.qual = .{}; + try operand.un(p, .cast_expr); + return operand; + } + switch (p.tok_ids[p.tok_i]) { + .builtin_choose_expr => return p.builtinChooseExpr(), + .builtin_va_arg => return p.builtinVaArg(), + // TODO: other special-cased builtins + else => {}, + } + return p.unExpr(); +} + +fn builtinChooseExpr(p: *Parser) Error!Result { + p.tok_i += 1; + const l_paren = try p.expectToken(.l_paren); + const cond_tok = p.tok_i; + var cond = try p.constExpr(); + if (cond.val.tag == .unavailable) { + try p.errTok(.builtin_choose_cond, cond_tok); + return error.ParsingFailed; + } + + _ = try p.expectToken(.comma); + + var then_expr = if (cond.val.getBool()) try p.assignExpr() else try p.parseNoEval(assignExpr); + try then_expr.expect(p); + + _ = try p.expectToken(.comma); + + var else_expr = if (!cond.val.getBool()) try p.assignExpr() else try p.parseNoEval(assignExpr); + try else_expr.expect(p); + + try p.expectClosing(l_paren, .r_paren); + + if (cond.val.getBool()) { + cond.val = then_expr.val; + cond.ty = then_expr.ty; + } else { + cond.val = else_expr.val; + cond.ty = else_expr.ty; + } + cond.node = try p.addNode(.{ + .tag = .builtin_choose_expr, + .ty = cond.ty, + .data = .{ .if3 = .{ .cond = cond.node, .body = (try p.addList(&.{ then_expr.node, else_expr.node })).start } }, + }); + return cond; +} + +fn builtinVaArg(p: *Parser) Error!Result { + const builtin_tok = p.tok_i; + p.tok_i += 1; + + const l_paren = try p.expectToken(.l_paren); + const va_list_tok = p.tok_i; + var va_list = try p.assignExpr(); + try va_list.expect(p); + try va_list.lvalConversion(p); + + _ = try p.expectToken(.comma); + + const ty = (try p.typeName()) orelse { + try p.err(.expected_type); + return error.ParsingFailed; + }; + try p.expectClosing(l_paren, .r_paren); + + if (!va_list.ty.eql(p.pp.comp.types.va_list, p.pp.comp, true)) { + try p.errStr(.incompatible_va_arg, va_list_tok, try p.typeStr(va_list.ty)); + return error.ParsingFailed; + } + + return Result{ .ty = ty, .node = try p.addNode(.{ + .tag = .builtin_call_expr_one, + .ty = ty, + .data = .{ .decl = .{ .name = builtin_tok, .node = va_list.node } }, + }) }; +} + +/// unExpr +/// : primaryExpr suffixExpr* +/// | '&&' IDENTIFIER +/// | ('&' | '*' | '+' | '-' | '~' | '!' | '++' | '--' | keyword_extension) castExpr +/// | keyword_sizeof unExpr +/// | keyword_sizeof '(' typeName ')' +/// | keyword_alignof '(' typeName ')' +fn unExpr(p: *Parser) Error!Result { + const tok = p.tok_i; + switch (p.tok_ids[tok]) { + .ampersand_ampersand => { + const address_tok = p.tok_i; + p.tok_i += 1; + const name_tok = try p.expectIdentifier(); + try p.errTok(.gnu_label_as_value, address_tok); + p.contains_address_of_label = true; + + const str = p.tokSlice(name_tok); + if (p.findLabel(str) == null) { + try p.labels.append(.{ .unresolved_goto = name_tok }); + } + const elem_ty = try p.arena.create(Type); + elem_ty.* = .{ .specifier = .void }; + const result_ty = Type{ .specifier = .pointer, .data = .{ .sub_type = elem_ty } }; + return Result{ + .node = try p.addNode(.{ + .tag = .addr_of_label, + .data = .{ .decl_ref = name_tok }, + .ty = result_ty, + }), + .ty = result_ty, + }; + }, + .ampersand => { + if (p.in_macro) { + try p.err(.invalid_preproc_operator); + return error.ParsingFailed; + } + p.tok_i += 1; + var operand = try p.castExpr(); + try operand.expect(p); + + const slice = p.nodes.slice(); + if (!Tree.isLval(slice, p.data.items, p.value_map, operand.node)) { + try p.errTok(.addr_of_rvalue, tok); + } + if (operand.ty.qual.register) try p.errTok(.addr_of_register, tok); + + const elem_ty = try p.arena.create(Type); + elem_ty.* = operand.ty; + operand.ty = Type{ + .specifier = .pointer, + .data = .{ .sub_type = elem_ty }, + }; + try operand.saveValue(p); + try operand.un(p, .addr_of_expr); + return operand; + }, + .asterisk => { + const asterisk_loc = p.tok_i; + p.tok_i += 1; + var operand = try p.castExpr(); + try operand.expect(p); + + if (operand.ty.isArray() or operand.ty.isPtr()) { + operand.ty = operand.ty.elemType(); + } else if (!operand.ty.isFunc()) { + try p.errTok(.indirection_ptr, tok); + } + if (operand.ty.hasIncompleteSize() and !operand.ty.is(.void)) { + try p.errStr(.deref_incomplete_ty_ptr, asterisk_loc, try p.typeStr(operand.ty)); + } + operand.ty.qual = .{}; + try operand.un(p, .deref_expr); + return operand; + }, + .plus => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + try operand.lvalConversion(p); + if (!operand.ty.isInt() and !operand.ty.isFloat()) + try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + return operand; + }, + .minus => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + try operand.lvalConversion(p); + if (!operand.ty.isInt() and !operand.ty.isFloat()) + try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + if (operand.val.tag != .unavailable) { + _ = operand.val.sub(operand.val.zero(), operand.val, operand.ty, p.pp.comp); + } + try operand.un(p, .negate_expr); + return operand; + }, + .plus_plus => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + if (!operand.ty.isInt() and !operand.ty.isFloat() and !operand.ty.isReal() and !operand.ty.isPtr()) + try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + + if (!Tree.isLval(p.nodes.slice(), p.data.items, p.value_map, operand.node) or operand.ty.isConst()) { + try p.errTok(.not_assignable, tok); + return error.ParsingFailed; + } + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + + if (operand.val.tag != .unavailable) { + if (operand.val.add(operand.val, operand.val.one(), operand.ty, p.pp.comp)) + try p.errOverflow(tok, operand); + } + + try operand.un(p, .pre_inc_expr); + return operand; + }, + .minus_minus => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + if (!operand.ty.isInt() and !operand.ty.isFloat() and !operand.ty.isReal() and !operand.ty.isPtr()) + try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + + if (!Tree.isLval(p.nodes.slice(), p.data.items, p.value_map, operand.node) or operand.ty.isConst()) { + try p.errTok(.not_assignable, tok); + return error.ParsingFailed; + } + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + + if (operand.val.tag != .unavailable) { + if (operand.val.sub(operand.val, operand.val.one(), operand.ty, p.pp.comp)) + try p.errOverflow(tok, operand); + } + + try operand.un(p, .pre_dec_expr); + return operand; + }, + .tilde => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + try operand.lvalConversion(p); + if (!operand.ty.isInt()) try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + if (operand.ty.isInt()) { + try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + if (operand.val.tag != .unavailable) { + operand.val = operand.val.bitNot(operand.ty, p.pp.comp); + } + } else { + operand.val.tag = .unavailable; + } + try operand.un(p, .bit_not_expr); + return operand; + }, + .bang => { + p.tok_i += 1; + + var operand = try p.castExpr(); + try operand.expect(p); + try operand.lvalConversion(p); + if (!operand.ty.isInt() and !operand.ty.isFloat() and !operand.ty.isPtr()) + try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty)); + + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + if (operand.val.tag != .unavailable) { + const res = Value.int(@boolToInt(!operand.val.getBool())); + operand.val = res; + } + operand.ty = .{ .specifier = .int }; + try operand.un(p, .bool_not_expr); + return operand; + }, + .keyword_sizeof => { + p.tok_i += 1; + const expected_paren = p.tok_i; + var res = Result{}; + if (try p.typeName()) |ty| { + res.ty = ty; + try p.errTok(.expected_parens_around_typename, expected_paren); + } else if (p.eatToken(.l_paren)) |l_paren| { + if (try p.typeName()) |ty| { + res.ty = ty; + try p.expectClosing(l_paren, .r_paren); + } else { + p.tok_i = expected_paren; + res = try p.parseNoEval(unExpr); + } + } else { + res = try p.parseNoEval(unExpr); + } + + if (res.ty.sizeof(p.pp.comp)) |size| { + res.val = .{ .tag = .int, .data = .{ .int = size } }; + } else { + res.val.tag = .unavailable; + try p.errStr(.invalid_sizeof, expected_paren - 1, try p.typeStr(res.ty)); + } + res.ty = p.pp.comp.types.size; + try res.un(p, .sizeof_expr); + return res; + }, + .keyword_alignof, .keyword_alignof1, .keyword_alignof2 => { + p.tok_i += 1; + const expected_paren = p.tok_i; + var res = Result{}; + if (try p.typeName()) |ty| { + res.ty = ty; + try p.errTok(.expected_parens_around_typename, expected_paren); + } else if (p.eatToken(.l_paren)) |l_paren| { + if (try p.typeName()) |ty| { + res.ty = ty; + try p.expectClosing(l_paren, .r_paren); + } else { + p.tok_i = expected_paren; + res = try p.parseNoEval(unExpr); + try p.errTok(.alignof_expr, expected_paren); + } + } else { + res = try p.parseNoEval(unExpr); + try p.errTok(.alignof_expr, expected_paren); + } + + res.val = Value.int(res.ty.alignof(p.pp.comp)); + res.ty = p.pp.comp.types.size; + try res.un(p, .alignof_expr); + return res; + }, + .keyword_extension => { + p.tok_i += 1; + const saved_extension = p.extension_suppressed; + defer p.extension_suppressed = saved_extension; + p.extension_suppressed = true; + + var child = try p.castExpr(); + try child.expect(p); + return child; + }, + else => { + var lhs = try p.primaryExpr(); + if (lhs.empty(p)) return lhs; + while (true) { + const suffix = try p.suffixExpr(lhs); + if (suffix.empty(p)) break; + lhs = suffix; + } + return lhs; + }, + } +} + +/// suffixExpr +/// : '[' expr ']' +/// | '(' argumentExprList? ')' +/// | '.' IDENTIFIER +/// | '->' IDENTIFIER +/// | '++' +/// | '--' +/// argumentExprList : assignExpr (',' assignExpr)* +fn suffixExpr(p: *Parser, lhs: Result) Error!Result { + assert(!lhs.empty(p)); + switch (p.tok_ids[p.tok_i]) { + .l_paren => return p.callExpr(lhs), + .plus_plus => { + defer p.tok_i += 1; + + var operand = lhs; + if (!operand.ty.isInt() and !operand.ty.isFloat() and !operand.ty.isReal() and !operand.ty.isPtr()) + try p.errStr(.invalid_argument_un, p.tok_i, try p.typeStr(operand.ty)); + + if (!Tree.isLval(p.nodes.slice(), p.data.items, p.value_map, operand.node) or operand.ty.isConst()) { + try p.err(.not_assignable); + return error.ParsingFailed; + } + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + + try operand.un(p, .post_dec_expr); + return operand; + }, + .minus_minus => { + defer p.tok_i += 1; + + var operand = lhs; + if (!operand.ty.isInt() and !operand.ty.isFloat() and !operand.ty.isReal() and !operand.ty.isPtr()) + try p.errStr(.invalid_argument_un, p.tok_i, try p.typeStr(operand.ty)); + + if (!Tree.isLval(p.nodes.slice(), p.data.items, p.value_map, operand.node) or operand.ty.isConst()) { + try p.err(.not_assignable); + return error.ParsingFailed; + } + if (operand.ty.isInt()) try operand.intCast(p, operand.ty.integerPromotion(p.pp.comp)); + + try operand.un(p, .post_dec_expr); + return operand; + }, + .l_bracket => { + const l_bracket = p.tok_i; + p.tok_i += 1; + var index = try p.expr(); + try index.expect(p); + try p.expectClosing(l_bracket, .r_bracket); + + const l_ty = lhs.ty; + const r_ty = index.ty; + var ptr = lhs; + try ptr.lvalConversion(p); + try index.lvalConversion(p); + if (ptr.ty.isPtr()) { + ptr.ty = ptr.ty.elemType(); + if (!index.ty.isInt()) try p.errTok(.invalid_index, l_bracket); + try p.checkArrayBounds(index, l_ty, l_bracket); + } else if (index.ty.isPtr()) { + index.ty = index.ty.elemType(); + if (!ptr.ty.isInt()) try p.errTok(.invalid_index, l_bracket); + try p.checkArrayBounds(ptr, r_ty, l_bracket); + std.mem.swap(Result, &ptr, &index); + } else { + try p.errTok(.invalid_subscript, l_bracket); + } + + try ptr.saveValue(p); + try index.saveValue(p); + try ptr.bin(p, .array_access_expr, index); + return ptr; + }, + .period => { + p.tok_i += 1; + const name = try p.expectIdentifier(); + return p.fieldAccess(lhs, name, false); + }, + .arrow => { + p.tok_i += 1; + const name = try p.expectIdentifier(); + if (lhs.ty.isArray()) { + var copy = lhs; + copy.ty.decayArray(); + try copy.un(p, .array_to_pointer); + return p.fieldAccess(copy, name, true); + } + return p.fieldAccess(lhs, name, true); + }, + else => return Result{}, + } +} + +fn fieldAccess( + p: *Parser, + lhs: Result, + field_name_tok: TokenIndex, + is_arrow: bool, +) !Result { + const expr_ty = lhs.ty; + const is_ptr = expr_ty.isPtr(); + const expr_base_ty = if (is_ptr) expr_ty.elemType() else expr_ty; + const record_ty = expr_base_ty.canonicalize(.standard); + + switch (record_ty.specifier) { + .@"struct", .@"union" => {}, + else => { + try p.errStr(.expected_record_ty, field_name_tok, try p.typeStr(expr_ty)); + return error.ParsingFailed; + }, + } + if (record_ty.hasIncompleteSize()) { + try p.errStr(.deref_incomplete_ty_ptr, field_name_tok - 2, try p.typeStr(expr_base_ty)); + return error.ParsingFailed; + } + if (is_arrow and !is_ptr) try p.errStr(.member_expr_not_ptr, field_name_tok, try p.typeStr(expr_ty)); + if (!is_arrow and is_ptr) try p.errStr(.member_expr_ptr, field_name_tok, try p.typeStr(expr_ty)); + + const field_name = p.tokSlice(field_name_tok); + if (!record_ty.hasField(field_name)) { + p.strings.items.len = 0; + + try p.strings.writer().print("'{s}' in '", .{field_name}); + try expr_ty.print(p.strings.writer()); + try p.strings.append('\''); + + const duped = try p.pp.comp.diag.arena.allocator().dupe(u8, p.strings.items); + try p.errStr(.no_such_member, field_name_tok, duped); + return error.ParsingFailed; + } + return p.fieldAccessExtra(lhs.node, record_ty, field_name, is_arrow); +} + +fn fieldAccessExtra(p: *Parser, lhs: NodeIndex, record_ty: Type, field_name: []const u8, is_arrow: bool) Error!Result { + for (record_ty.data.record.fields) |f, i| { + if (f.isAnonymousRecord()) { + if (!f.ty.hasField(field_name)) continue; + const inner = try p.addNode(.{ + .tag = if (is_arrow) .member_access_ptr_expr else .member_access_expr, + .ty = f.ty, + .data = .{ .member = .{ .lhs = lhs, .index = @intCast(u32, i) } }, + }); + return p.fieldAccessExtra(inner, f.ty, field_name, false); + } + if (std.mem.eql(u8, field_name, f.name)) return Result{ + .ty = f.ty, + .node = try p.addNode(.{ + .tag = if (is_arrow) .member_access_ptr_expr else .member_access_expr, + .ty = f.ty, + .data = .{ .member = .{ .lhs = lhs, .index = @intCast(u32, i) } }, + }), + }; + } + // We already checked that this container has a field by the name. + unreachable; +} + +fn callExpr(p: *Parser, lhs: Result) Error!Result { + const l_paren = p.tok_i; + p.tok_i += 1; + const ty = lhs.ty.isCallable() orelse { + try p.errStr(.not_callable, l_paren, try p.typeStr(lhs.ty)); + return error.ParsingFailed; + }; + const params = ty.params(); + var func = lhs; + try func.lvalConversion(p); + + const list_buf_top = p.list_buf.items.len; + defer p.list_buf.items.len = list_buf_top; + try p.list_buf.append(func.node); + var arg_count: u32 = 0; + + const builtin_node = p.getNode(lhs.node, .builtin_call_expr_one); + + var first_after = l_paren; + while (p.eatToken(.r_paren) == null) { + const param_tok = p.tok_i; + if (arg_count == params.len) first_after = p.tok_i; + var arg = try p.assignExpr(); + try arg.expect(p); + const raw_arg_node = arg.node; + try arg.lvalConversion(p); + if (arg.ty.hasIncompleteSize() and !arg.ty.is(.void)) return error.ParsingFailed; + + if (arg_count >= params.len) { + if (arg.ty.isInt()) try arg.intCast(p, arg.ty.integerPromotion(p.pp.comp)); + if (arg.ty.is(.float)) try arg.floatCast(p, .{ .specifier = .double }); + try arg.saveValue(p); + try p.list_buf.append(arg.node); + arg_count += 1; + + _ = p.eatToken(.comma) orelse { + try p.expectClosing(l_paren, .r_paren); + break; + }; + continue; + } + + const p_ty = params[arg_count].ty; + if (p_ty.is(.special_va_start)) va_start: { + const builtin_tok = p.nodes.items(.data)[@enumToInt(builtin_node.?)].decl.name; + var func_ty = p.func.ty orelse { + try p.errTok(.va_start_not_in_func, builtin_tok); + break :va_start; + }; + if (func_ty.specifier != .var_args_func) { + try p.errTok(.va_start_fixed_args, builtin_tok); + break :va_start; + } + const func_params = func_ty.params(); + const last_param_name = func_params[func_params.len - 1].name; + const decl_ref = p.getNode(raw_arg_node, .decl_ref_expr); + if (decl_ref == null or + !mem.eql(u8, p.tokSlice(p.nodes.items(.data)[@enumToInt(decl_ref.?)].decl_ref), last_param_name)) + { + try p.errTok(.va_start_not_last_param, param_tok); + } + } else if (p_ty.is(.bool)) { + // this is ridiculous but it's what clang does + if (arg.ty.isInt() or arg.ty.isFloat() or arg.ty.isPtr()) { + try arg.boolCast(p, p_ty); + } else { + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + } else if (p_ty.isInt()) { + if (arg.ty.isInt() or arg.ty.isFloat()) { + try arg.intCast(p, p_ty); + } else if (arg.ty.isPtr()) { + try p.errStr( + .implicit_ptr_to_int, + param_tok, + try p.typePairStrExtra(arg.ty, " to ", p_ty), + ); + try p.errTok(.parameter_here, params[arg_count].name_tok); + try arg.intCast(p, p_ty); + } else { + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + } else if (p_ty.isFloat()) { + if (arg.ty.isInt() or arg.ty.isFloat()) { + try arg.floatCast(p, p_ty); + } else { + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + } else if (p_ty.isPtr()) { + if (arg.val.isZero()) { + try arg.nullCast(p, p_ty); + } else if (arg.ty.isInt()) { + try p.errStr( + .implicit_int_to_ptr, + param_tok, + try p.typePairStrExtra(arg.ty, " to ", p_ty), + ); + try p.errTok(.parameter_here, params[arg_count].name_tok); + try arg.intCast(p, p_ty); + } else if (!arg.ty.isVoidStar() and !p_ty.isVoidStar() and !p_ty.eql(arg.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + } else if (p_ty.isRecord()) { + if (!p_ty.eql(arg.ty, p.pp.comp, false)) { + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + } else { + // should be unreachable + try p.errStr(.incompatible_param, param_tok, try p.typeStr(arg.ty)); + try p.errTok(.parameter_here, params[arg_count].name_tok); + } + + try arg.saveValue(p); + try p.list_buf.append(arg.node); + arg_count += 1; + + _ = p.eatToken(.comma) orelse { + try p.expectClosing(l_paren, .r_paren); + break; + }; + } + + const extra = Diagnostics.Message.Extra{ .arguments = .{ + .expected = @intCast(u32, params.len), + .actual = @intCast(u32, arg_count), + } }; + if (ty.is(.func) and params.len != arg_count) { + try p.errExtra(.expected_arguments, first_after, extra); + } + if (ty.is(.old_style_func) and params.len != arg_count) { + try p.errExtra(.expected_arguments_old, first_after, extra); + } + if (ty.is(.var_args_func) and arg_count < params.len) { + try p.errExtra(.expected_at_least_arguments, first_after, extra); + } + + if (builtin_node) |some| { + const index = @enumToInt(some); + var call_node = p.nodes.get(index); + defer p.nodes.set(index, call_node); + const args = p.list_buf.items[list_buf_top..]; + switch (arg_count) { + 0 => {}, + 1 => call_node.data.decl.node = args[1], // args[0] == func.node + else => { + call_node.tag = .builtin_call_expr; + args[0] = @intToEnum(NodeIndex, call_node.data.decl.name); + call_node.data = .{ .range = try p.addList(args) }; + }, + } + return Result{ .node = some, .ty = call_node.ty.returnType() }; + } + + var call_node: Tree.Node = .{ + .tag = .call_expr_one, + .ty = ty.returnType(), + .data = .{ .bin = .{ .lhs = func.node, .rhs = .none } }, + }; + const args = p.list_buf.items[list_buf_top..]; + switch (arg_count) { + 0 => {}, + 1 => call_node.data.bin.rhs = args[1], // args[0] == func.node + else => { + call_node.tag = .call_expr; + call_node.data = .{ .range = try p.addList(args) }; + }, + } + return Result{ .node = try p.addNode(call_node), .ty = call_node.ty }; +} + +fn checkArrayBounds(p: *Parser, index: Result, arr_ty: Type, tok: TokenIndex) !void { + if (index.val.tag == .unavailable) return; + const len = Value.int(arr_ty.arrayLen() orelse return); + + if (index.ty.isUnsignedInt(p.pp.comp)) { + if (index.val.compare(.gte, len, p.pp.comp.types.size, p.pp.comp)) + try p.errExtra(.array_after, tok, .{ .unsigned = index.val.data.int }); + } else { + if (index.val.compare(.lt, Value.int(0), index.ty, p.pp.comp)) { + try p.errExtra(.array_before, tok, .{ + .signed = index.val.signExtend(index.ty, p.pp.comp), + }); + } else if (index.val.compare(.gte, len, p.pp.comp.types.size, p.pp.comp)) { + try p.errExtra(.array_after, tok, .{ .unsigned = index.val.data.int }); + } + } +} + +/// primaryExpr +/// : IDENTIFIER +/// | INTEGER_LITERAL +/// | FLOAT_LITERAL +/// | IMAGINARY_LITERAL +/// | CHAR_LITERAL +/// | STRING_LITERAL +/// | '(' expr ')' +/// | genericSelection +fn primaryExpr(p: *Parser) Error!Result { + if (p.eatToken(.l_paren)) |l_paren| { + var e = try p.expr(); + try e.expect(p); + try p.expectClosing(l_paren, .r_paren); + try e.un(p, .paren_expr); + return e; + } + switch (p.tok_ids[p.tok_i]) { + .identifier, .extended_identifier => { + const name_tok = p.expectIdentifier() catch unreachable; + const name = p.tokSlice(name_tok); + if (p.pp.comp.builtins.get(name)) |some| { + for (p.tok_ids[p.tok_i..]) |id| switch (id) { + .r_paren => {}, // closing grouped expr + .l_paren => break, // beginning of a call + else => { + try p.errTok(.builtin_must_be_called, name_tok); + return error.ParsingFailed; + }, + }; + return Result{ + .ty = some, + .node = try p.addNode(.{ + .tag = .builtin_call_expr_one, + .ty = some, + .data = .{ .decl = .{ .name = name_tok, .node = .none } }, + }), + }; + } + const sym = p.findSymbol(name_tok, .reference) orelse { + if (p.tok_ids[p.tok_i] == .l_paren) { + // allow implicitly declaring functions before C99 like `puts("foo")` + if (mem.startsWith(u8, name, "__builtin_")) + try p.errStr(.unknown_builtin, name_tok, name) + else + try p.errStr(.implicit_func_decl, name_tok, name); + + const func_ty = try p.arena.create(Type.Func); + func_ty.* = .{ .return_type = .{ .specifier = .int }, .params = &.{} }; + const ty: Type = .{ .specifier = .old_style_func, .data = .{ .func = func_ty } }; + const node = try p.addNode(.{ + .ty = ty, + .tag = .fn_proto, + .data = .{ .decl = .{ .name = name_tok } }, + }); + + try p.decl_buf.append(node); + try p.scopes.append(.{ .decl = .{ + .name = name, + .ty = ty, + .name_tok = name_tok, + } }); + + return Result{ + .ty = ty, + .node = try p.addNode(.{ + .tag = .decl_ref_expr, + .ty = ty, + .data = .{ .decl_ref = name_tok }, + }), + }; + } + try p.errStr(.undeclared_identifier, name_tok, p.tokSlice(name_tok)); + return error.ParsingFailed; + }; + switch (sym) { + .enumeration => |e| { + var res = e.value; + try p.checkDeprecatedUnavailable(res.ty, name_tok, e.name_tok); + res.node = try p.addNode(.{ + .tag = .enumeration_ref, + .ty = res.ty, + .data = .{ .decl_ref = name_tok }, + }); + return res; + }, + .def, .decl, .param => |s| { + try p.checkDeprecatedUnavailable(s.ty, name_tok, s.name_tok); + return Result{ + .ty = s.ty, + .node = try p.addNode(.{ + .tag = .decl_ref_expr, + .ty = s.ty, + .data = .{ .decl_ref = name_tok }, + }), + }; + }, + else => unreachable, + } + }, + .macro_func, .macro_function => { + defer p.tok_i += 1; + var ty: Type = undefined; + var tok = p.tok_i; + if (p.func.ident) |some| { + ty = some.ty; + tok = p.nodes.items(.data)[@enumToInt(some.node)].decl.name; + } else if (p.func.ty) |_| { + p.strings.items.len = 0; + try p.strings.appendSlice(p.tokSlice(p.func.name)); + try p.strings.append(0); + const predef = try p.makePredefinedIdentifier(); + ty = predef.ty; + p.func.ident = predef; + } else { + p.strings.items.len = 0; + try p.strings.append(0); + const predef = try p.makePredefinedIdentifier(); + ty = predef.ty; + p.func.ident = predef; + try p.decl_buf.append(predef.node); + } + if (p.func.ty == null) try p.err(.predefined_top_level); + return Result{ + .ty = ty, + .node = try p.addNode(.{ + .tag = .decl_ref_expr, + .ty = ty, + .data = .{ .decl_ref = tok }, + }), + }; + }, + .macro_pretty_func => { + defer p.tok_i += 1; + var ty: Type = undefined; + if (p.func.pretty_ident) |some| { + ty = some.ty; + } else if (p.func.ty) |func_ty| { + p.strings.items.len = 0; + try Type.printNamed(func_ty, p.tokSlice(p.func.name), p.strings.writer()); + try p.strings.append(0); + const predef = try p.makePredefinedIdentifier(); + ty = predef.ty; + p.func.pretty_ident = predef; + } else { + p.strings.items.len = 0; + try p.strings.appendSlice("top level\x00"); + const predef = try p.makePredefinedIdentifier(); + ty = predef.ty; + p.func.pretty_ident = predef; + try p.decl_buf.append(predef.node); + } + if (p.func.ty == null) try p.err(.predefined_top_level); + return Result{ + .ty = ty, + .node = try p.addNode(.{ + .tag = .decl_ref_expr, + .ty = ty, + .data = .{ .decl_ref = p.tok_i }, + }), + }; + }, + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + => return p.stringLiteral(), + .char_literal, + .char_literal_utf_16, + .char_literal_utf_32, + .char_literal_wide, + => return p.charLiteral(), + .float_literal, .imaginary_literal => |tag| { + defer p.tok_i += 1; + const ty = Type{ .specifier = .double }; + const d_val = try p.parseFloat(p.tok_i, f64); + var res = Result{ + .ty = ty, + .node = try p.addNode(.{ .tag = .double_literal, .ty = ty, .data = undefined }), + .val = Value.float(d_val), + }; + if (!p.in_macro) try p.value_map.put(res.node, res.val); + if (tag == .imaginary_literal) { + try p.err(.gnu_imaginary_constant); + res.ty = .{ .specifier = .complex_double }; + res.val.tag = .unavailable; + try res.un(p, .imaginary_literal); + } + return res; + }, + .float_literal_f, .imaginary_literal_f => |tag| { + defer p.tok_i += 1; + const ty = Type{ .specifier = .float }; + const f_val = try p.parseFloat(p.tok_i, f64); + var res = Result{ + .ty = ty, + .node = try p.addNode(.{ .tag = .float_literal, .ty = ty, .data = undefined }), + .val = Value.float(f_val), + }; + if (!p.in_macro) try p.value_map.put(res.node, res.val); + if (tag == .imaginary_literal_f) { + try p.err(.gnu_imaginary_constant); + res.ty = .{ .specifier = .complex_float }; + res.val.tag = .unavailable; + try res.un(p, .imaginary_literal); + } + return res; + }, + .float_literal_l => return p.todo("long double literals"), + .imaginary_literal_l => { + try p.err(.gnu_imaginary_constant); + return p.todo("long double imaginary literals"); + }, + .zero => { + p.tok_i += 1; + var res: Result = .{ .val = Value.int(0) }; + res.node = try p.addNode(.{ .tag = .int_literal, .ty = res.ty, .data = undefined }); + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; + }, + .one => { + p.tok_i += 1; + var res: Result = .{ .val = Value.int(1) }; + res.node = try p.addNode(.{ .tag = .int_literal, .ty = res.ty, .data = undefined }); + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; + }, + .integer_literal, + .integer_literal_u, + .integer_literal_l, + .integer_literal_lu, + .integer_literal_ll, + .integer_literal_llu, + => return p.integerLiteral(), + .keyword_generic => return p.genericSelection(), + else => return Result{}, + } +} + +fn makePredefinedIdentifier(p: *Parser) !Result { + const slice = p.strings.items; + const elem_ty = .{ .specifier = .char, .qual = .{ .@"const" = true } }; + const arr_ty = try p.arena.create(Type.Array); + arr_ty.* = .{ .elem = elem_ty, .len = slice.len }; + const ty: Type = .{ .specifier = .array, .data = .{ .array = arr_ty } }; + + const val = Value.bytes(try p.arena.dupe(u8, slice)); + const str_lit = try p.addNode(.{ .tag = .string_literal_expr, .ty = ty, .data = undefined }); + if (!p.in_macro) try p.value_map.put(str_lit, val); + + return Result{ .ty = ty, .node = try p.addNode(.{ + .tag = .implicit_static_var, + .ty = ty, + .data = .{ .decl = .{ .name = p.tok_i, .node = str_lit } }, + }) }; +} + +fn stringLiteral(p: *Parser) Error!Result { + var start = p.tok_i; + // use 1 for wchar_t + var width: ?u8 = null; + while (true) { + switch (p.tok_ids[p.tok_i]) { + .string_literal => {}, + .string_literal_utf_16 => if (width) |some| { + if (some != 16) try p.err(.unsupported_str_cat); + } else { + width = 16; + }, + .string_literal_utf_8 => if (width) |some| { + if (some != 8) try p.err(.unsupported_str_cat); + } else { + width = 8; + }, + .string_literal_utf_32 => if (width) |some| { + if (some != 32) try p.err(.unsupported_str_cat); + } else { + width = 32; + }, + .string_literal_wide => if (width) |some| { + if (some != 1) try p.err(.unsupported_str_cat); + } else { + width = 1; + }, + else => break, + } + p.tok_i += 1; + } + if (width == null) width = 8; + if (width.? != 8) return p.todo("unicode string literals"); + p.strings.items.len = 0; + while (start < p.tok_i) : (start += 1) { + var slice = p.tokSlice(start); + slice = slice[0 .. slice.len - 1]; + var i = mem.indexOf(u8, slice, "\"").? + 1; + try p.strings.ensureUnusedCapacity(slice.len); + while (i < slice.len) : (i += 1) { + switch (slice[i]) { + '\\' => { + i += 1; + switch (slice[i]) { + '\n' => i += 1, + '\r' => i += 2, + '\'', '\"', '\\', '?' => |c| p.strings.appendAssumeCapacity(c), + 'n' => p.strings.appendAssumeCapacity('\n'), + 'r' => p.strings.appendAssumeCapacity('\r'), + 't' => p.strings.appendAssumeCapacity('\t'), + 'a' => p.strings.appendAssumeCapacity(0x07), + 'b' => p.strings.appendAssumeCapacity(0x08), + 'e' => p.strings.appendAssumeCapacity(0x1B), + 'f' => p.strings.appendAssumeCapacity(0x0C), + 'v' => p.strings.appendAssumeCapacity(0x0B), + 'x' => p.strings.appendAssumeCapacity(try p.parseNumberEscape(start, 16, slice, &i)), + '0'...'7' => p.strings.appendAssumeCapacity(try p.parseNumberEscape(start, 8, slice, &i)), + 'u' => try p.parseUnicodeEscape(start, 4, slice, &i), + 'U' => try p.parseUnicodeEscape(start, 8, slice, &i), + else => unreachable, + } + }, + else => |c| p.strings.appendAssumeCapacity(c), + } + } + } + try p.strings.append(0); + const slice = p.strings.items; + + const arr_ty = try p.arena.create(Type.Array); + arr_ty.* = .{ .elem = .{ .specifier = .char }, .len = slice.len }; + var res: Result = .{ + .ty = .{ + .specifier = .array, + .data = .{ .array = arr_ty }, + }, + .val = Value.bytes(try p.arena.dupe(u8, slice)), + }; + res.node = try p.addNode(.{ .tag = .string_literal_expr, .ty = res.ty, .data = undefined }); + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; +} + +fn parseNumberEscape(p: *Parser, tok: TokenIndex, base: u8, slice: []const u8, i: *usize) !u8 { + if (base == 16) i.* += 1; // skip x + var char: u8 = 0; + var reported = false; + while (i.* < slice.len) : (i.* += 1) { + const val = std.fmt.charToDigit(slice[i.*], base) catch break; // validated by Tokenizer + if (@mulWithOverflow(u8, char, base, &char) and !reported) { + try p.errExtra(.escape_sequence_overflow, tok, .{ .unsigned = i.* }); + reported = true; + } + char += val; + } + i.* -= 1; + return char; +} + +fn parseUnicodeEscape(p: *Parser, tok: TokenIndex, count: u8, slice: []const u8, i: *usize) !void { + const c = std.fmt.parseInt(u21, slice[i.* + 1 ..][0..count], 16) catch 0x110000; // count validated by tokenizer + i.* += count + 1; + if (!std.unicode.utf8ValidCodepoint(c) or (c < 0xa0 and c != '$' and c != '@' and c != '`')) { + try p.errExtra(.invalid_universal_character, tok, .{ .unsigned = i.* - count - 2 }); + return; + } + var buf: [4]u8 = undefined; + const to_write = std.unicode.utf8Encode(c, &buf) catch unreachable; // validated above + p.strings.appendSliceAssumeCapacity(buf[0..to_write]); +} + +fn charLiteral(p: *Parser) Error!Result { + defer p.tok_i += 1; + const ty: Type = switch (p.tok_ids[p.tok_i]) { + .char_literal => .{ .specifier = .int }, + .char_literal_wide => p.pp.comp.types.wchar, + .char_literal_utf_16 => .{ .specifier = .ushort }, + .char_literal_utf_32 => .{ .specifier = .ulong }, + else => unreachable, + }; + const max: u32 = switch (p.tok_ids[p.tok_i]) { + .char_literal => std.math.maxInt(u8), + .char_literal_wide => std.math.maxInt(u32), // TODO correct + .char_literal_utf_16 => std.math.maxInt(u16), + .char_literal_utf_32 => std.math.maxInt(u32), + else => unreachable, + }; + var multichar: u8 = switch (p.tok_ids[p.tok_i]) { + .char_literal => 0, + .char_literal_wide => 4, + .char_literal_utf_16 => 2, + .char_literal_utf_32 => 2, + else => unreachable, + }; + + var val: u32 = 0; + var overflow_reported = false; + var slice = p.tokSlice(p.tok_i); + slice = slice[0 .. slice.len - 1]; + var i = mem.indexOf(u8, slice, "\'").? + 1; + while (i < slice.len) : (i += 1) { + var c: u32 = slice[i]; + switch (c) { + '\\' => { + i += 1; + switch (slice[i]) { + '\n' => i += 1, + '\r' => i += 2, + '\'', '\"', '\\', '?' => c = slice[i], + 'n' => c = '\n', + 'r' => c = '\r', + 't' => c = '\t', + 'a' => c = 0x07, + 'b' => c = 0x08, + 'e' => c = 0x1B, + 'f' => c = 0x0C, + 'v' => c = 0x0B, + 'x' => c = try p.parseNumberEscape(p.tok_i, 16, slice, &i), + '0'...'7' => c = try p.parseNumberEscape(p.tok_i, 8, slice, &i), + 'u', 'U' => return p.todo("unicode escapes in char literals"), + else => unreachable, + } + }, + // These are safe since the source is checked to be valid utf8. + 0b1100_0000...0b1101_1111 => { + c &= 0b00011111; + c <<= 6; + c |= slice[i + 1] & 0b00111111; + i += 1; + }, + 0b1110_0000...0b1110_1111 => { + c &= 0b00001111; + c <<= 6; + c |= slice[i + 1] & 0b00111111; + c <<= 6; + c |= slice[i + 2] & 0b00111111; + i += 2; + }, + 0b1111_0000...0b1111_0111 => { + c &= 0b00000111; + c <<= 6; + c |= slice[i + 1] & 0b00111111; + c <<= 6; + c |= slice[i + 2] & 0b00111111; + c <<= 6; + c |= slice[i + 3] & 0b00111111; + i += 3; + }, + else => {}, + } + if (c > max) try p.err(.char_too_large); + switch (multichar) { + 0, 2, 4 => multichar += 1, + 1 => { + multichar = 99; + try p.err(.multichar_literal); + }, + 3 => { + try p.err(.unicode_multichar_literal); + return error.ParsingFailed; + }, + 5 => { + try p.err(.wide_multichar_literal); + val = 0; + multichar = 6; + }, + 6 => val = 0, + else => {}, + } + if (@mulWithOverflow(u32, val, max, &val) and !overflow_reported) { + try p.errExtra(.char_lit_too_wide, p.tok_i, .{ .unsigned = i }); + overflow_reported = true; + } + val += c; + } + + var res = Result{ + .ty = ty, + .val = Value.int(val), + .node = try p.addNode(.{ .tag = .char_literal, .ty = ty, .data = undefined }), + }; + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; +} + +fn parseFloat(p: *Parser, tok: TokenIndex, comptime T: type) Error!T { + var bytes = p.tokSlice(tok); + switch (p.tok_ids[tok]) { + .float_literal => {}, + .imaginary_literal, .float_literal_f, .float_literal_l => bytes = bytes[0 .. bytes.len - 1], + .imaginary_literal_f, .imaginary_literal_l => bytes = bytes[0 .. bytes.len - 2], + else => unreachable, + } + if (bytes.len > 2 and (bytes[1] == 'x' or bytes[1] == 'X')) { + assert(bytes[0] == '0'); // validated by Tokenizer + return std.fmt.parseHexFloat(T, bytes) catch |e| switch (e) { + error.InvalidCharacter => unreachable, // validated by Tokenizer + error.Overflow => p.todo("what to do with hex floats too big"), + }; + } else { + return std.fmt.parseFloat(T, bytes) catch |e| switch (e) { + error.InvalidCharacter => unreachable, // validated by Tokenizer + }; + } +} + +fn integerLiteral(p: *Parser) Error!Result { + const id = p.tok_ids[p.tok_i]; + var slice = p.tokSlice(p.tok_i); + defer p.tok_i += 1; + var base: u8 = 10; + if (std.ascii.startsWithIgnoreCase(slice, "0x")) { + slice = slice[2..]; + base = 16; + } else if (std.ascii.startsWithIgnoreCase(slice, "0b")) { + try p.err(.binary_integer_literal); + slice = slice[2..]; + base = 2; + } else if (slice[0] == '0') { + base = 8; + } + switch (id) { + .integer_literal_u, .integer_literal_l => slice = slice[0 .. slice.len - 1], + .integer_literal_lu, .integer_literal_ll => slice = slice[0 .. slice.len - 2], + .integer_literal_llu => slice = slice[0 .. slice.len - 3], + else => {}, + } + + var val: u64 = 0; + var overflow = false; + for (slice) |c| { + const digit: u64 = switch (c) { + '0'...'9' => c - '0', + 'A'...'Z' => c - 'A' + 10, + 'a'...'z' => c - 'a' + 10, + else => unreachable, + }; + + if (val != 0 and @mulWithOverflow(u64, val, base, &val)) overflow = true; + if (@addWithOverflow(u64, val, digit, &val)) overflow = true; + } + if (overflow) { + try p.err(.int_literal_too_big); + var res: Result = .{ .ty = .{ .specifier = .ulong_long }, .val = Value.int(val) }; + res.node = try p.addNode(.{ .tag = .int_literal, .ty = res.ty, .data = undefined }); + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; + } + switch (id) { + .integer_literal, .integer_literal_l, .integer_literal_ll => { + if (val > std.math.maxInt(i64)) { + try p.err(.implicitly_unsigned_literal); + } + }, + else => {}, + } + + if (base == 10) { + switch (id) { + .integer_literal => return p.castInt(val, &.{ .int, .long, .long_long }), + .integer_literal_u => return p.castInt(val, &.{ .uint, .ulong, .ulong_long }), + .integer_literal_l => return p.castInt(val, &.{ .long, .long_long }), + .integer_literal_lu => return p.castInt(val, &.{ .ulong, .ulong_long }), + .integer_literal_ll => return p.castInt(val, &.{.long_long}), + .integer_literal_llu => return p.castInt(val, &.{.ulong_long}), + else => unreachable, + } + } else { + switch (id) { + .integer_literal => return p.castInt(val, &.{ .int, .uint, .long, .ulong, .long_long, .ulong_long }), + .integer_literal_u => return p.castInt(val, &.{ .uint, .ulong, .ulong_long }), + .integer_literal_l => return p.castInt(val, &.{ .long, .ulong, .long_long, .ulong_long }), + .integer_literal_lu => return p.castInt(val, &.{ .ulong, .ulong_long }), + .integer_literal_ll => return p.castInt(val, &.{ .long_long, .ulong_long }), + .integer_literal_llu => return p.castInt(val, &.{.ulong_long}), + else => unreachable, + } + } +} + +fn castInt(p: *Parser, val: u64, specs: []const Type.Specifier) Error!Result { + var res: Result = .{ .val = Value.int(val) }; + for (specs) |spec| { + const ty = Type{ .specifier = spec }; + const unsigned = ty.isUnsignedInt(p.pp.comp); + const size = ty.sizeof(p.pp.comp).?; + res.ty = ty; + + if (unsigned) { + switch (size) { + 2 => if (val <= std.math.maxInt(u16)) break, + 4 => if (val <= std.math.maxInt(u32)) break, + 8 => if (val <= std.math.maxInt(u64)) break, + else => unreachable, + } + } else { + switch (size) { + 2 => if (val <= std.math.maxInt(i16)) break, + 4 => if (val <= std.math.maxInt(i32)) break, + 8 => if (val <= std.math.maxInt(i64)) break, + else => unreachable, + } + } + } else { + res.ty = .{ .specifier = .ulong_long }; + } + res.node = try p.addNode(.{ .tag = .int_literal, .ty = res.ty, .data = .{ .int = val } }); + if (!p.in_macro) try p.value_map.put(res.node, res.val); + return res; +} + +/// Run a parser function but do not evaluate the result +fn parseNoEval(p: *Parser, func: fn (*Parser) Error!Result) Error!Result { + const no_eval = p.no_eval; + defer p.no_eval = no_eval; + p.no_eval = true; + const parsed = try func(p); + try parsed.expect(p); + return parsed; +} + +/// genericSelection : keyword_generic '(' assignExpr ',' genericAssoc (',' genericAssoc)* ')' +/// genericAssoc +/// : typeName ':' assignExpr +/// | keyword_default ':' assignExpr +fn genericSelection(p: *Parser) Error!Result { + p.tok_i += 1; + const l_paren = try p.expectToken(.l_paren); + const controlling = try p.parseNoEval(assignExpr); + _ = try p.expectToken(.comma); + + const list_buf_top = p.list_buf.items.len; + defer p.list_buf.items.len = list_buf_top; + try p.list_buf.append(controlling.node); + + var default_tok: ?TokenIndex = null; + // TODO actually choose + var chosen: Result = .{}; + while (true) { + const start = p.tok_i; + if (try p.typeName()) |ty| { + if (ty.anyQual()) { + try p.errTok(.generic_qual_type, start); + } + _ = try p.expectToken(.colon); + chosen = try p.assignExpr(); + try chosen.expect(p); + try chosen.saveValue(p); + try p.list_buf.append(try p.addNode(.{ + .tag = .generic_association_expr, + .ty = ty, + .data = .{ .un = chosen.node }, + })); + } else if (p.eatToken(.keyword_default)) |tok| { + if (default_tok) |prev| { + try p.errTok(.generic_duplicate_default, tok); + try p.errTok(.previous_case, prev); + } + default_tok = tok; + _ = try p.expectToken(.colon); + chosen = try p.assignExpr(); + try chosen.expect(p); + try chosen.saveValue(p); + try p.list_buf.append(try p.addNode(.{ + .tag = .generic_default_expr, + .data = .{ .un = chosen.node }, + })); + } else { + if (p.list_buf.items.len == list_buf_top + 1) { + try p.err(.expected_type); + return error.ParsingFailed; + } + break; + } + if (p.eatToken(.comma) == null) break; + } + try p.expectClosing(l_paren, .r_paren); + + var generic_node: Tree.Node = .{ + .tag = .generic_expr_one, + .ty = chosen.ty, + .data = .{ .bin = .{ .lhs = controlling.node, .rhs = chosen.node } }, + }; + const associations = p.list_buf.items[list_buf_top..]; + if (associations.len > 2) { // associations[0] == controlling.node + generic_node.tag = .generic_expr; + generic_node.data = .{ .range = try p.addList(associations) }; + } + chosen.node = try p.addNode(generic_node); + return chosen; +} diff --git a/src/aro/Pragma.zig b/src/aro/Pragma.zig new file mode 100644 index 000000000000..e4ce42ac38ad --- /dev/null +++ b/src/aro/Pragma.zig @@ -0,0 +1,83 @@ +const std = @import("std"); +const Compilation = @import("Compilation.zig"); +const Preprocessor = @import("Preprocessor.zig"); +const Parser = @import("Parser.zig"); +const TokenIndex = @import("Tree.zig").TokenIndex; + +const Pragma = @This(); + +pub const Error = Compilation.Error || error{ UnknownPragma, StopPreprocessing }; + +/// Called during Preprocessor.init +beforePreprocess: ?fn (*Pragma, *Compilation) void = null, + +/// Called at the beginning of Parser.parse +beforeParse: ?fn (*Pragma, *Compilation) void = null, + +/// Called at the end of Parser.parse if a Tree was successfully parsed +afterParse: ?fn (*Pragma, *Compilation) void = null, + +/// Called during Compilation.deinit +deinit: fn (*Pragma, *Compilation) void, + +/// Called whenever the preprocessor encounters this pragma. `start_idx` is the index +/// within `pp.tokens` of the pragma name token. The pragma end is indicated by a +/// .nl token (which may be generated if the source ends with a pragma with no newline) +/// As an example, given the following line: +/// #pragma GCC diagnostic error "-Wnewline-eof" \n +/// Then pp.tokens.get(start_idx) will return the `GCC` token. +/// Return error.UnknownPragma to emit an `unknown_pragma` diagnostic +/// Return error.StopPreprocessing to stop preprocessing the current file (see once.zig) +preprocessorHandler: ?fn (*Pragma, *Preprocessor, start_idx: TokenIndex) Error!void = null, + +/// Called during token pretty-printing (`-E` option). If this returns true, the pragma will +/// be printed; otherwise it will be omitted. start_idx is the index of the pragma name token +preserveTokens: ?fn (*Pragma, *Preprocessor, start_idx: TokenIndex) bool = null, + +/// Same as preprocessorHandler except called during parsing +/// The parser's `p.tok_i` field must not be changed +parserHandler: ?fn (*Pragma, *Parser, start_idx: TokenIndex) Compilation.Error!void = null, + +pub fn pasteTokens(pp: *Preprocessor, start_idx: TokenIndex) ![]const u8 { + if (pp.tokens.get(start_idx).id == .nl) return error.ExpectedStringLiteral; + + const char_top = pp.char_buf.items.len; + defer pp.char_buf.items.len = char_top; + var i: usize = 0; + var lparen_count: u32 = 0; + var rparen_count: u32 = 0; + while (true) : (i += 1) { + const tok = pp.tokens.get(start_idx + i); + if (tok.id == .nl) break; + switch (tok.id) { + .l_paren => { + if (lparen_count != i) return error.ExpectedStringLiteral; + lparen_count += 1; + }, + .r_paren => rparen_count += 1, + .string_literal => { + if (rparen_count != 0) return error.ExpectedStringLiteral; + const str = pp.expandedSlice(tok); + try pp.char_buf.appendSlice(str[1 .. str.len - 1]); + }, + else => return error.ExpectedStringLiteral, + } + } + if (lparen_count != rparen_count) return error.ExpectedStringLiteral; + return pp.char_buf.items[char_top..]; +} + +pub fn shouldPreserveTokens(self: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) bool { + if (self.preserveTokens) |func| return func(self, pp, start_idx); + return false; +} + +pub fn preprocessorCB(self: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Error!void { + if (self.preprocessorHandler) |func| return func(self, pp, start_idx); +} + +pub fn parserCB(self: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation.Error!void { + const tok_index = p.tok_i; + defer std.debug.assert(tok_index == p.tok_i); + if (self.parserHandler) |func| return func(self, p, start_idx); +} diff --git a/src/aro/Preprocessor.zig b/src/aro/Preprocessor.zig new file mode 100644 index 000000000000..cda8b964ac99 --- /dev/null +++ b/src/aro/Preprocessor.zig @@ -0,0 +1,1945 @@ +const std = @import("std"); +const mem = std.mem; +const Allocator = mem.Allocator; +const assert = std.debug.assert; +const Compilation = @import("Compilation.zig"); +const Error = Compilation.Error; +const Source = @import("Source.zig"); +const Tokenizer = @import("Tokenizer.zig"); +const RawToken = Tokenizer.Token; +const Parser = @import("Parser.zig"); +const Diagnostics = @import("Diagnostics.zig"); +const Token = @import("Tree.zig").Token; +const Attribute = @import("Attribute.zig"); +const features = @import("features.zig"); + +const Preprocessor = @This(); +const DefineMap = std.StringHashMap(Macro); +const RawTokenList = std.ArrayList(RawToken); +const max_include_depth = 200; + +/// Errors that can be returned when expanding a macro. +/// error.UnknownPragma can occur within Preprocessor.pragma() but +/// it is handled there and doesn't escape that function +const MacroError = Error || error{StopPreprocessing}; + +const Macro = struct { + /// Parameters of the function type macro + params: []const []const u8, + + /// Token constituting the macro body + tokens: []const RawToken, + + /// If the function type macro has variable number of arguments + var_args: bool, + + /// Is a function type macro + is_func: bool, + + /// Is a predefined macro + is_builtin: bool = false, + + /// Location of macro in the source + /// `byte_offset` and `line` are used to define the range of tokens included + /// in the macro. + loc: Source.Location, + + fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool { + if (a.tokens.len != b.tokens.len) return false; + if (a.is_builtin != b.is_builtin) return false; + for (a.tokens) |t, i| if (!tokEql(pp, t, b.tokens[i])) return false; + + if (a.is_func and b.is_func) { + if (a.var_args != b.var_args) return false; + if (a.params.len != b.params.len) return false; + for (a.params) |p, i| if (!mem.eql(u8, p, b.params[i])) return false; + } + + return true; + } + + fn tokEql(pp: *Preprocessor, a: RawToken, b: RawToken) bool { + return mem.eql(u8, pp.tokSlice(a), pp.tokSlice(b)); + } +}; + +comp: *Compilation, +arena: std.heap.ArenaAllocator, +defines: DefineMap, +tokens: Token.List = .{}, +token_buf: RawTokenList, +char_buf: std.ArrayList(u8), +/// Counter that is incremented each time preprocess() is called +/// Can be used to distinguish multiple preprocessings of the same file +preprocess_count: u32 = 0, +generated_line: u32 = 1, +add_expansion_nl: u32 = 0, +include_depth: u8 = 0, +counter: u32 = 0, +expansion_source_loc: Source.Location = undefined, +poisoned_identifiers: std.StringHashMap(void), +/// Memory is retained to avoid allocation on every single token. +top_expansion_buf: ExpandBuf, + +pub fn init(comp: *Compilation) Preprocessor { + const pp = Preprocessor{ + .comp = comp, + .arena = std.heap.ArenaAllocator.init(comp.gpa), + .defines = DefineMap.init(comp.gpa), + .token_buf = RawTokenList.init(comp.gpa), + .char_buf = std.ArrayList(u8).init(comp.gpa), + .poisoned_identifiers = std.StringHashMap(void).init(comp.gpa), + .top_expansion_buf = ExpandBuf.init(comp.gpa), + }; + comp.pragmaEvent(.before_preprocess); + return pp; +} + +const builtin_macros = struct { + const args = [1][]const u8{"X"}; + + const has_attribute = [1]RawToken{.{ + .id = .macro_param_has_attribute, + .source = .generated, + }}; + const has_warning = [1]RawToken{.{ + .id = .macro_param_has_warning, + .source = .generated, + }}; + const has_feature = [1]RawToken{.{ + .id = .macro_param_has_feature, + .source = .generated, + }}; + const has_extension = [1]RawToken{.{ + .id = .macro_param_has_extension, + .source = .generated, + }}; + const has_builtin = [1]RawToken{.{ + .id = .macro_param_has_builtin, + .source = .generated, + }}; + + const is_identifier = [1]RawToken{.{ + .id = .macro_param_is_identifier, + .source = .generated, + }}; + + const pragma_operator = [1]RawToken{.{ + .id = .macro_param_pragma_operator, + .source = .generated, + }}; + + const file = [1]RawToken{.{ + .id = .macro_file, + .source = .generated, + }}; + const line = [1]RawToken{.{ + .id = .macro_line, + .source = .generated, + }}; + const counter = [1]RawToken{.{ + .id = .macro_counter, + .source = .generated, + }}; +}; + +fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: []const RawToken) !void { + try pp.defines.put(name, .{ + .params = &builtin_macros.args, + .tokens = tokens, + .var_args = false, + .is_func = is_func, + .loc = .{ .id = .generated }, + .is_builtin = true, + }); +} + +pub fn addBuiltinMacros(pp: *Preprocessor) !void { + try pp.addBuiltinMacro("__has_attribute", true, &builtin_macros.has_attribute); + try pp.addBuiltinMacro("__has_warning", true, &builtin_macros.has_warning); + try pp.addBuiltinMacro("__has_feature", true, &builtin_macros.has_feature); + try pp.addBuiltinMacro("__has_extension", true, &builtin_macros.has_extension); + try pp.addBuiltinMacro("__has_builtin", true, &builtin_macros.has_builtin); + try pp.addBuiltinMacro("__is_identifier", true, &builtin_macros.is_identifier); + try pp.addBuiltinMacro("_Pragma", true, &builtin_macros.pragma_operator); + + try pp.addBuiltinMacro("__FILE__", false, &builtin_macros.file); + try pp.addBuiltinMacro("__LINE__", false, &builtin_macros.line); + try pp.addBuiltinMacro("__COUNTER__", false, &builtin_macros.counter); +} + +pub fn deinit(pp: *Preprocessor) void { + pp.defines.deinit(); + for (pp.tokens.items(.expansion_locs)) |loc| Token.free(loc, pp.comp.gpa); + pp.tokens.deinit(pp.comp.gpa); + pp.arena.deinit(); + pp.token_buf.deinit(); + pp.char_buf.deinit(); + pp.poisoned_identifiers.deinit(); + pp.top_expansion_buf.deinit(); +} + +/// Preprocess a source file, returns eof token. +pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token { + return pp.preprocessExtra(source) catch |err| switch (err) { + // This cannot occur in the main file and is handled in `include`. + error.StopPreprocessing => unreachable, + else => |e| return e, + }; +} + +fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token { + if (source.invalid_utf8_loc) |loc| { + try pp.comp.diag.add(.{ + .tag = .invalid_utf8, + .loc = loc, + }, &.{}); + return error.FatalError; + } + + pp.preprocess_count += 1; + var tokenizer = Tokenizer{ + .buf = source.buf, + .comp = pp.comp, + .source = source.id, + }; + + // Estimate how many new tokens this source will contain. + const estimated_token_count = source.buf.len / 8; + try pp.tokens.ensureTotalCapacity(pp.comp.gpa, pp.tokens.len + estimated_token_count); + + var if_level: u8 = 0; + var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256); + const until_else = 0; + const until_endif = 1; + const until_endif_seen_else = 2; + + var start_of_line = true; + while (true) { + var tok = tokenizer.next(); + switch (tok.id) { + .hash => if (start_of_line) { + const directive = tokenizer.nextNoWS(); + switch (directive.id) { + .keyword_error, .keyword_warning => { + // #error tokens.. + pp.top_expansion_buf.items.len = 0; + const char_top = pp.char_buf.items.len; + defer pp.char_buf.items.len = char_top; + + while (true) { + tok = tokenizer.next(); + if (tok.id == .nl or tok.id == .eof) break; + if (tok.id == .whitespace) tok.id = .macro_ws; + try pp.top_expansion_buf.append(tokFromRaw(tok)); + } + try pp.stringify(pp.top_expansion_buf.items); + const slice = pp.char_buf.items[char_top + 1 .. pp.char_buf.items.len - 2]; + const duped = try pp.comp.diag.arena.allocator().dupe(u8, slice); + + try pp.comp.diag.add(.{ + .tag = if (directive.id == .keyword_error) .error_directive else .warning_directive, + .loc = .{ .id = tok.source, .byte_offset = directive.start, .line = directive.line }, + .extra = .{ .str = duped }, + }, &.{}); + }, + .keyword_if => { + if (@addWithOverflow(u8, if_level, 1, &if_level)) + return pp.fatal(directive, "too many #if nestings", .{}); + + if (try pp.expr(&tokenizer)) { + if_kind.set(if_level, until_endif); + } else { + if_kind.set(if_level, until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_ifdef => { + if (@addWithOverflow(u8, if_level, 1, &if_level)) + return pp.fatal(directive, "too many #if nestings", .{}); + + const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; + try pp.expectNl(&tokenizer); + if (pp.defines.get(macro_name) != null) { + if_kind.set(if_level, until_endif); + } else { + if_kind.set(if_level, until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_ifndef => { + if (@addWithOverflow(u8, if_level, 1, &if_level)) + return pp.fatal(directive, "too many #if nestings", .{}); + + const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; + try pp.expectNl(&tokenizer); + if (pp.defines.get(macro_name) == null) { + if_kind.set(if_level, until_endif); + } else { + if_kind.set(if_level, until_else); + try pp.skip(&tokenizer, .until_else); + } + }, + .keyword_elif => { + if (if_level == 0) { + try pp.err(directive, .elif_without_if); + if_level += 1; + if_kind.set(if_level, until_else); + } + switch (if_kind.get(if_level)) { + until_else => if (try pp.expr(&tokenizer)) { + if_kind.set(if_level, until_endif); + } else { + try pp.skip(&tokenizer, .until_else); + }, + until_endif => try pp.skip(&tokenizer, .until_endif), + until_endif_seen_else => { + try pp.err(directive, .elif_after_else); + skipToNl(&tokenizer); + }, + else => unreachable, + } + }, + .keyword_else => { + try pp.expectNl(&tokenizer); + if (if_level == 0) { + try pp.err(directive, .else_without_if); + continue; + } + switch (if_kind.get(if_level)) { + until_else => if_kind.set(if_level, until_endif_seen_else), + until_endif => try pp.skip(&tokenizer, .until_endif_seen_else), + until_endif_seen_else => { + try pp.err(directive, .else_after_else); + skipToNl(&tokenizer); + }, + else => unreachable, + } + }, + .keyword_endif => { + try pp.expectNl(&tokenizer); + if (if_level == 0) { + try pp.err(directive, .endif_without_if); + continue; + } + if_level -= 1; + }, + .keyword_define => try pp.define(&tokenizer), + .keyword_undef => { + const macro_name = (try pp.expectMacroName(&tokenizer)) orelse continue; + + _ = pp.defines.remove(macro_name); + try pp.expectNl(&tokenizer); + }, + .keyword_include => try pp.include(&tokenizer), + .keyword_pragma => try pp.pragma(&tokenizer, directive, null, &.{}), + .keyword_line => { + // #line number "file" + const digits = tokenizer.nextNoWS(); + if (digits.id != .integer_literal) try pp.err(digits, .line_simple_digit); + if (digits.id == .eof or digits.id == .nl) continue; + const name = tokenizer.nextNoWS(); + if (name.id == .eof or name.id == .nl) continue; + if (name.id != .string_literal) try pp.err(name, .line_invalid_filename); + try pp.expectNl(&tokenizer); + }, + .integer_literal => { + // # number "file" flags + const name = tokenizer.nextNoWS(); + if (name.id == .eof or name.id == .nl) continue; + if (name.id != .string_literal) try pp.err(name, .line_invalid_filename); + + const flag_1 = tokenizer.nextNoWS(); + if (flag_1.id == .eof or flag_1.id == .nl) continue; + const flag_2 = tokenizer.nextNoWS(); + if (flag_2.id == .eof or flag_2.id == .nl) continue; + const flag_3 = tokenizer.nextNoWS(); + if (flag_3.id == .eof or flag_3.id == .nl) continue; + const flag_4 = tokenizer.nextNoWS(); + if (flag_4.id == .eof or flag_4.id == .nl) continue; + try pp.expectNl(&tokenizer); + }, + .nl => {}, + .eof => { + if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive); + return tokFromRaw(directive); + }, + else => { + try pp.err(tok, .invalid_preprocessing_directive); + skipToNl(&tokenizer); + }, + } + }, + .whitespace => if (pp.comp.only_preprocess) try pp.tokens.append(pp.comp.gpa, tokFromRaw(tok)), + .nl => { + start_of_line = true; + if (pp.comp.only_preprocess) try pp.tokens.append(pp.comp.gpa, tokFromRaw(tok)); + }, + .eof => { + if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive); + // The following check needs to occur here and not at the top of the function + // because a pragma may change the level during preprocessing + if (source.buf.len > 0 and source.buf[source.buf.len - 1] != '\n') { + try pp.err(tok, .newline_eof); + } + return tokFromRaw(tok); + }, + else => { + if (tok.id.isMacroIdentifier() and pp.poisoned_identifiers.get(pp.tokSlice(tok)) != null) { + try pp.err(tok, .poisoned_identifier); + } + // Add the token to the buffer doing any necessary expansions. + start_of_line = false; + try pp.expandMacro(&tokenizer, tok); + }, + } + } +} + +/// Get raw token source string. +/// Returned slice is invalidated when comp.generated_buf is updated. +pub fn tokSlice(pp: *Preprocessor, token: RawToken) []const u8 { + if (token.id.lexeme()) |some| return some; + const source = pp.comp.getSource(token.source); + return source.buf[token.start..token.end]; +} + +/// Convert a token from the Tokenizer into a token used by the parser. +fn tokFromRaw(raw: RawToken) Token { + return .{ + .id = raw.id, + .loc = .{ + .id = raw.source, + .byte_offset = raw.start, + .line = raw.line, + }, + }; +} + +fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void { + try pp.comp.diag.add(.{ + .tag = tag, + .loc = .{ + .id = raw.source, + .byte_offset = raw.start, + .line = raw.line, + }, + }, &.{}); +} + +fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anytype) Compilation.Error { + const source = pp.comp.getSource(raw.source); + const line_col = source.lineCol(.{ .id = raw.source, .line = raw.line, .byte_offset = raw.start }); + return pp.comp.diag.fatal(source.path, line_col.line, raw.line, line_col.col, fmt, args); +} + +/// Consume next token, error if it is not an identifier. +fn expectMacroName(pp: *Preprocessor, tokenizer: *Tokenizer) Error!?[]const u8 { + const macro_name = tokenizer.nextNoWS(); + if (!macro_name.id.isMacroIdentifier()) { + try pp.err(macro_name, .macro_name_missing); + skipToNl(tokenizer); + return null; + } + return pp.tokSlice(macro_name); +} + +/// Skip until after a newline, error if extra tokens before it. +fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { + var sent_err = false; + while (true) { + const tok = tokenizer.next(); + if (tok.id == .nl or tok.id == .eof) return; + if (tok.id == .whitespace) continue; + if (!sent_err) { + sent_err = true; + try pp.err(tok, .extra_tokens_directive_end); + } + } +} + +/// Consume all tokens until a newline and parse the result into a boolean. +fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool { + const start = pp.tokens.len; + defer { + for (pp.tokens.items(.expansion_locs)[start..]) |loc| Token.free(loc, pp.comp.gpa); + pp.tokens.len = start; + } + + while (true) { + var tok = tokenizer.next(); + switch (tok.id) { + .nl, .eof => { + if (pp.tokens.len == start) { + try pp.err(tok, .expected_value_in_expr); + try pp.expectNl(tokenizer); + return false; + } + tok.id = .eof; + try pp.tokens.append(pp.comp.gpa, tokFromRaw(tok)); + break; + }, + .keyword_defined => { + const first = tokenizer.nextNoWS(); + const macro_tok = if (first.id == .l_paren) tokenizer.nextNoWS() else first; + if (!macro_tok.id.isMacroIdentifier()) try pp.err(macro_tok, .macro_name_missing); + if (first.id == .l_paren) { + const r_paren = tokenizer.nextNoWS(); + if (r_paren.id != .r_paren) { + try pp.err(r_paren, .closing_paren); + try pp.err(first, .to_match_paren); + } + } + tok.id = if (pp.defines.get(pp.tokSlice(macro_tok)) != null) .one else .zero; + }, + .whitespace => continue, + else => {}, + } + try pp.expandMacro(tokenizer, tok); + } + + if (!pp.tokens.items(.id)[start].validPreprocessorExprStart()) { + const tok = pp.tokens.get(start); + try pp.comp.diag.add(.{ + .tag = .invalid_preproc_expr_start, + .loc = tok.loc, + }, tok.expansionSlice()); + return false; + } + // validate the tokens in the expression + for (pp.tokens.items(.id)[start..]) |*id, i| { + switch (id.*) { + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + => { + const tok = pp.tokens.get(start + i); + try pp.comp.diag.add(.{ + .tag = .string_literal_in_pp_expr, + .loc = tok.loc, + }, tok.expansionSlice()); + return false; + }, + .float_literal, + .float_literal_f, + .float_literal_l, + .imaginary_literal, + .imaginary_literal_f, + .imaginary_literal_l, + => { + const tok = pp.tokens.get(start + i); + try pp.comp.diag.add(.{ + .tag = .float_literal_in_pp_expr, + .loc = tok.loc, + }, tok.expansionSlice()); + return false; + }, + .plus_plus, + .minus_minus, + .plus_equal, + .minus_equal, + .asterisk_equal, + .slash_equal, + .percent_equal, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_angle_bracket_right_equal, + .ampersand_equal, + .caret_equal, + .pipe_equal, + .l_bracket, + .r_bracket, + .l_brace, + .r_brace, + .ellipsis, + .semicolon, + .hash, + .hash_hash, + .equal, + .arrow, + .period, + => { + const tok = pp.tokens.get(start + i); + try pp.comp.diag.add(.{ + .tag = .invalid_preproc_operator, + .loc = tok.loc, + }, tok.expansionSlice()); + return false; + }, + else => if (id.isMacroIdentifier()) { + id.* = .zero; // undefined macro + }, + } + } + + // Actually parse it. + var parser = Parser{ + .pp = pp, + .tok_ids = pp.tokens.items(.id), + .tok_i = @intCast(u32, start), + .arena = pp.arena.allocator(), + .in_macro = true, + .data = undefined, + .strings = undefined, + .value_map = undefined, + .scopes = undefined, + .labels = undefined, + .decl_buf = undefined, + .list_buf = undefined, + .param_buf = undefined, + .enum_buf = undefined, + .record_buf = undefined, + .attr_buf = undefined, + }; + return parser.macroExpr(); +} + +/// Skip until #else #elif #endif, return last directive token id. +/// Also skips nested #if ... #endifs. +fn skip( + pp: *Preprocessor, + tokenizer: *Tokenizer, + cont: enum { until_else, until_endif, until_endif_seen_else }, +) Error!void { + var ifs_seen: u32 = 0; + var line_start = true; + while (tokenizer.index < tokenizer.buf.len) { + if (line_start) { + const saved_tokenizer = tokenizer.*; + const hash = tokenizer.nextNoWS(); + if (hash.id == .nl) continue; + line_start = false; + if (hash.id != .hash) continue; + const directive = tokenizer.nextNoWS(); + switch (directive.id) { + .keyword_else => { + if (ifs_seen != 0) continue; + if (cont == .until_endif_seen_else) { + try pp.err(directive, .else_after_else); + continue; + } + tokenizer.* = saved_tokenizer; + return; + }, + .keyword_elif => { + if (ifs_seen != 0 or cont == .until_endif) continue; + if (cont == .until_endif_seen_else) { + try pp.err(directive, .elif_after_else); + continue; + } + tokenizer.* = saved_tokenizer; + return; + }, + .keyword_endif => { + if (ifs_seen == 0) { + tokenizer.* = saved_tokenizer; + return; + } + ifs_seen -= 1; + }, + .keyword_if, .keyword_ifdef, .keyword_ifndef => ifs_seen += 1, + else => {}, + } + } else if (tokenizer.buf[tokenizer.index] == '\n') { + line_start = true; + tokenizer.index += 1; + tokenizer.line += 1; + } else { + line_start = false; + tokenizer.index += 1; + } + } else { + const eof = tokenizer.next(); + return pp.err(eof, .unterminated_conditional_directive); + } +} + +// Skip until newline, ignore other tokens. +fn skipToNl(tokenizer: *Tokenizer) void { + while (true) { + const tok = tokenizer.next(); + if (tok.id == .nl or tok.id == .eof) return; + } +} + +const ExpandBuf = std.ArrayList(Token); +const MacroArguments = std.ArrayList([]const Token); +fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void { + for (args.items) |item| { + for (item) |tok| Token.free(tok.expansion_locs, allocator); + allocator.free(item); + } + args.deinit(); +} + +fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf { + var buf = ExpandBuf.init(pp.comp.gpa); + try buf.ensureTotalCapacity(simple_macro.tokens.len); + + // Add all of the simple_macros tokens to the new buffer handling any concats. + var i: usize = 0; + while (i < simple_macro.tokens.len) : (i += 1) { + const raw = simple_macro.tokens[i]; + const tok = tokFromRaw(raw); + switch (raw.id) { + .hash_hash => { + var rhs = tokFromRaw(simple_macro.tokens[i + 1]); + i += 1; + while (rhs.id == .whitespace) { + rhs = tokFromRaw(simple_macro.tokens[i + 1]); + i += 1; + } + try pp.pasteTokens(&buf, &.{rhs}); + }, + .whitespace => if (pp.comp.only_preprocess) buf.appendAssumeCapacity(tok), + .macro_file => { + const start = pp.comp.generated_buf.items.len; + const source = pp.comp.getSource(pp.expansion_source_loc.id); + try pp.comp.generated_buf.writer().print("\"{s}\"\n", .{source.path}); + + buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .string_literal, tok)); + }, + .macro_line => { + const start = pp.comp.generated_buf.items.len; + const source = pp.comp.getSource(pp.expansion_source_loc.id); + try pp.comp.generated_buf.writer().print("{d}\n", .{source.physicalLine(pp.expansion_source_loc)}); + + buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .integer_literal, tok)); + }, + .macro_counter => { + defer pp.counter += 1; + const start = pp.comp.generated_buf.items.len; + try pp.comp.generated_buf.writer().print("{d}\n", .{pp.counter}); + + buf.appendAssumeCapacity(try pp.makeGeneratedToken(start, .integer_literal, tok)); + }, + else => buf.appendAssumeCapacity(tok), + } + } + + return buf; +} + +/// Join a possibly-parenthesized series of string literal tokens into a single string without +/// leading or trailing quotes. The returned slice is invalidated if pp.char_buf changes. +/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal +/// is encountered, or if no string literals are encountered +/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"') +fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 { + const char_top = pp.char_buf.items.len; + defer pp.char_buf.items.len = char_top; + var unwrapped = toks; + if (toks.len >= 2 and toks[0].id == .l_paren and toks[toks.len - 1].id == .r_paren) { + unwrapped = toks[1 .. toks.len - 1]; + } + if (unwrapped.len == 0) return error.ExpectedStringLiteral; + + for (unwrapped) |tok| { + if (tok.id == .macro_ws) continue; + if (tok.id != .string_literal) return error.ExpectedStringLiteral; + const str = pp.expandedSlice(tok); + try pp.char_buf.appendSlice(str[1 .. str.len - 1]); + } + return pp.char_buf.items[char_top..]; +} + +/// Handle the _Pragma operator (implemented as a builtin macro) +fn pragmaOperator(pp: *Preprocessor, arg_tok: Token, operator_loc: Source.Location) !void { + const arg_slice = pp.expandedSlice(arg_tok); + const content = arg_slice[1 .. arg_slice.len - 1]; + const directive = "#pragma "; + + pp.char_buf.clearRetainingCapacity(); + const total_len = directive.len + content.len + 1; // destringify can never grow the string, + 1 for newline + try pp.char_buf.ensureUnusedCapacity(total_len); + pp.char_buf.appendSliceAssumeCapacity(directive); + pp.destringify(content); + pp.char_buf.appendAssumeCapacity('\n'); + + const start = pp.comp.generated_buf.items.len; + try pp.comp.generated_buf.appendSlice(pp.char_buf.items); + var tmp_tokenizer = Tokenizer{ + .buf = pp.comp.generated_buf.items, + .comp = pp.comp, + .index = @intCast(u32, start), + .source = .generated, + .line = pp.generated_line, + }; + pp.generated_line += 1; + const hash_tok = tmp_tokenizer.next(); + assert(hash_tok.id == .hash); + const pragma_tok = tmp_tokenizer.next(); + assert(pragma_tok.id == .keyword_pragma); + try pp.pragma(&tmp_tokenizer, pragma_tok, operator_loc, arg_tok.expansionSlice()); +} + +/// Inverts the output of the preprocessor stringify (#) operation +/// (except all whitespace is condensed to a single space) +/// writes output to pp.char_buf; assumes capacity is sufficient +/// backslash backslash -> backslash +/// backslash doublequote -> doublequote +/// All other characters remain the same +fn destringify(pp: *Preprocessor, str: []const u8) void { + var state: enum { start, backslash_seen } = .start; + for (str) |c| { + switch (c) { + '\\' => { + if (state == .backslash_seen) pp.char_buf.appendAssumeCapacity(c); + state = if (state == .start) .backslash_seen else .start; + }, + else => { + if (state == .backslash_seen and c != '"') pp.char_buf.appendAssumeCapacity('\\'); + pp.char_buf.appendAssumeCapacity(c); + state = .start; + }, + } + } +} + +/// Stringify `tokens` into pp.char_buf. +/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing +fn stringify(pp: *Preprocessor, tokens: []const Token) !void { + try pp.char_buf.append('"'); + var ws_state: enum { start, need, not_needed } = .start; + for (tokens) |tok| { + if (tok.id == .macro_ws) { + if (ws_state == .start) continue; + ws_state = .need; + continue; + } + if (ws_state == .need) try pp.char_buf.append(' '); + ws_state = .not_needed; + + // backslashes not inside strings are not escaped + const is_str = switch (tok.id) { + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + .char_literal, + .char_literal_utf_16, + .char_literal_utf_32, + .char_literal_wide, + => true, + else => false, + }; + + for (pp.expandedSlice(tok)) |c| { + if (c == '"') + try pp.char_buf.appendSlice("\\\"") + else if (c == '\\' and is_str) + try pp.char_buf.appendSlice("\\\\") + else + try pp.char_buf.append(c); + } + } + try pp.char_buf.appendSlice("\"\n"); +} + +fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool { + switch (builtin) { + .macro_param_has_attribute, + .macro_param_has_feature, + .macro_param_has_extension, + .macro_param_has_builtin, + => { + var invalid: ?Token = null; + var identifier: ?Token = null; + for (param_toks) |tok| switch (tok.id) { + .identifier, .extended_identifier, .builtin_choose_expr, .builtin_va_arg => { + if (identifier) |_| invalid = tok else identifier = tok; + }, + .macro_ws => continue, + else => { + invalid = tok; + break; + }, + }; + if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc }; + if (invalid) |some| { + try pp.comp.diag.add( + .{ .tag = .feature_check_requires_identifier, .loc = some.loc }, + some.expansionSlice(), + ); + return false; + } + + const ident_str = pp.expandedSlice(identifier.?); + return switch (builtin) { + .macro_param_has_attribute => Attribute.fromString(.gnu, null, ident_str) != null, + .macro_param_has_feature => features.hasFeature(pp.comp, ident_str), + .macro_param_has_extension => features.hasExtension(pp.comp, ident_str), + .macro_param_has_builtin => pp.comp.builtins.hasBuiltin(ident_str), + else => unreachable, + }; + }, + .macro_param_has_warning => { + const actual_param = pp.pasteStringsUnsafe(param_toks) catch |err| switch (err) { + error.ExpectedStringLiteral => { + try pp.comp.diag.add(.{ + .tag = .expected_str_literal_in, + .loc = param_toks[0].loc, + .extra = .{ .str = "__has_warning" }, + }, param_toks[0].expansionSlice()); + return false; + }, + else => |e| return e, + }; + if (!mem.startsWith(u8, actual_param, "-W")) { + try pp.comp.diag.add(.{ + .tag = .malformed_warning_check, + .loc = param_toks[0].loc, + .extra = .{ .str = "__has_warning" }, + }, param_toks[0].expansionSlice()); + return false; + } + const warning_name = actual_param[2..]; + return Diagnostics.warningExists(warning_name); + }, + .macro_param_is_identifier => { + var invalid: ?Token = null; + var identifier: ?Token = null; + for (param_toks) |tok| switch (tok.id) { + .macro_ws => continue, + else => { + if (identifier) |_| invalid = tok else identifier = tok; + }, + }; + if (identifier == null and invalid == null) invalid = .{ .id = .eof, .loc = src_loc }; + if (invalid) |some| { + try pp.comp.diag.add(.{ + .tag = .missing_tok_builtin, + .loc = some.loc, + .extra = .{ .tok_id_expected = .r_paren }, + }, some.expansionSlice()); + return false; + } + + const id = identifier.?.id; + return id == .identifier or id == .extended_identifier; + }, + else => unreachable, + } +} + +fn expandFuncMacro( + pp: *Preprocessor, + loc: Source.Location, + func_macro: *const Macro, + args: *const MacroArguments, + expanded_args: *const MacroArguments, +) MacroError!ExpandBuf { + var buf = ExpandBuf.init(pp.comp.gpa); + try buf.ensureTotalCapacity(func_macro.tokens.len); + errdefer buf.deinit(); + + var expanded_variable_arguments = ExpandBuf.init(pp.comp.gpa); + defer expanded_variable_arguments.deinit(); + var variable_arguments = ExpandBuf.init(pp.comp.gpa); + defer variable_arguments.deinit(); + + if (func_macro.var_args) { + var i: usize = func_macro.params.len; + while (i < expanded_args.items.len) : (i += 1) { + try variable_arguments.appendSlice(args.items[i]); + try expanded_variable_arguments.appendSlice(expanded_args.items[i]); + if (i != expanded_args.items.len - 1) { + const comma = Token{ .id = .comma, .loc = .{ .id = .generated } }; + try variable_arguments.append(comma); + try expanded_variable_arguments.append(comma); + } + } + } + + // token concatenation and expansion phase + var tok_i: usize = 0; + while (tok_i < func_macro.tokens.len) : (tok_i += 1) { + const raw = func_macro.tokens[tok_i]; + switch (raw.id) { + .hash_hash => while (tok_i + 1 < func_macro.tokens.len) { + const raw_next = func_macro.tokens[tok_i + 1]; + tok_i += 1; + + const next = switch (raw_next.id) { + .macro_ws => continue, + .hash_hash => continue, + .macro_param, .macro_param_no_expand => args.items[raw_next.end], + .keyword_va_args => variable_arguments.items, + else => &[1]Token{tokFromRaw(raw_next)}, + }; + + try pp.pasteTokens(&buf, next); + if (next.len != 0) break; + }, + .macro_param_no_expand => { + const slice = args.items[raw.end]; + const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; + try bufCopyTokens(&buf, slice, &.{raw_loc}); + }, + .macro_param => { + const arg = expanded_args.items[raw.end]; + const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; + try bufCopyTokens(&buf, arg, &.{raw_loc}); + }, + .keyword_va_args => { + const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line }; + try bufCopyTokens(&buf, expanded_variable_arguments.items, &.{raw_loc}); + }, + .stringify_param, .stringify_va_args => { + const arg = if (raw.id == .stringify_va_args) + variable_arguments.items + else + args.items[raw.end]; + + pp.char_buf.clearRetainingCapacity(); + try pp.stringify(arg); + + const start = pp.comp.generated_buf.items.len; + try pp.comp.generated_buf.appendSlice(pp.char_buf.items); + + try buf.append(try pp.makeGeneratedToken(start, .string_literal, tokFromRaw(raw))); + }, + .macro_param_has_attribute, + .macro_param_has_warning, + .macro_param_has_feature, + .macro_param_has_extension, + .macro_param_has_builtin, + .macro_param_is_identifier, + => { + const arg = expanded_args.items[0]; + const result = if (arg.len == 0) blk: { + const extra = Diagnostics.Message.Extra{ .arguments = .{ .expected = 1, .actual = 0 } }; + try pp.comp.diag.add(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{}); + break :blk false; + } else try pp.handleBuiltinMacro(raw.id, arg, loc); + const start = pp.comp.generated_buf.items.len; + try pp.comp.generated_buf.writer().print("{}\n", .{@boolToInt(result)}); + try buf.append(try pp.makeGeneratedToken(start, .integer_literal, tokFromRaw(raw))); + }, + .macro_param_pragma_operator => { + const param_toks = expanded_args.items[0]; + // Clang and GCC require exactly one token (so, no parentheses or string pasting) + // even though their error messages indicate otherwise. Ours is slightly more + // descriptive. + var invalid: ?Token = null; + var string: ?Token = null; + for (param_toks) |tok| switch (tok.id) { + .string_literal => { + if (string) |_| invalid = tok else string = tok; + }, + .macro_ws => continue, + else => { + invalid = tok; + break; + }, + }; + if (string == null and invalid == null) invalid = .{ .loc = loc, .id = .eof }; + if (invalid) |some| try pp.comp.diag.add( + .{ .tag = .pragma_operator_string_literal, .loc = some.loc }, + some.expansionSlice(), + ) else try pp.pragmaOperator(string.?, loc); + }, + else => try buf.append(tokFromRaw(raw)), + } + } + + return buf; +} + +fn shouldExpand(tok: Token, macro: *Macro) bool { + // macro.loc.line contains the macros end index + if (tok.loc.id == macro.loc.id and + tok.loc.byte_offset >= macro.loc.byte_offset and + tok.loc.byte_offset <= macro.loc.line) + return false; + for (tok.expansionSlice()) |loc| { + if (loc.id == macro.loc.id and + loc.byte_offset >= macro.loc.byte_offset and + loc.byte_offset <= macro.loc.line) + return false; + } + + return true; +} + +fn bufCopyTokens(buf: *ExpandBuf, tokens: []const Token, src: []const Source.Location) !void { + try buf.ensureUnusedCapacity(tokens.len); + for (tokens) |tok| { + var copy = try tok.dupe(buf.allocator); + try copy.addExpansionLocation(buf.allocator, src); + buf.appendAssumeCapacity(copy); + } +} + +fn nextBufToken( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: *usize, + end_idx: *usize, + extend_buf: bool, +) Error!Token { + start_idx.* += 1; + if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) { + if (extend_buf) { + const raw_tok = tokenizer.next(); + if (raw_tok.id.isMacroIdentifier() and + pp.poisoned_identifiers.get(pp.tokSlice(raw_tok)) != null) + try pp.err(raw_tok, .poisoned_identifier); + + if (raw_tok.id == .nl) pp.add_expansion_nl += 1; + + const new_tok = tokFromRaw(raw_tok); + end_idx.* += 1; + try buf.append(new_tok); + return new_tok; + } else { + return Token{ .id = .eof, .loc = .{ .id = .generated } }; + } + } else { + return buf.items[start_idx.*]; + } +} + +fn collectMacroFuncArguments( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: *usize, + end_idx: *usize, + extend_buf: bool, + is_builtin: bool, +) Error!(?MacroArguments) { + const name_tok = buf.items[start_idx.*]; + const saved_tokenizer = tokenizer.*; + const old_end = end_idx.*; + + while (true) { + const tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); + switch (tok.id) { + .nl, .whitespace, .macro_ws => {}, + .l_paren => break, + else => { + if (is_builtin) { + try pp.comp.diag.add(.{ + .tag = .missing_tok_builtin, + .loc = tok.loc, + .extra = .{ .tok_id_expected = .l_paren }, + }, tok.expansionSlice()); + } + // Not a macro function call, go over normal identifier, rewind + tokenizer.* = saved_tokenizer; + end_idx.* = old_end; + return null; + }, + } + } + + // collect the arguments. + var parens: u32 = 0; + var args = MacroArguments.init(pp.comp.gpa); + errdefer deinitMacroArguments(pp.comp.gpa, &args); + var curArgument = std.ArrayList(Token).init(pp.comp.gpa); + defer curArgument.deinit(); + while (true) { + var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf); + switch (tok.id) { + .comma => { + if (parens == 0) { + try args.append(curArgument.toOwnedSlice()); + } else { + try curArgument.append(try tok.dupe(pp.comp.gpa)); + } + }, + .l_paren => { + try curArgument.append(try tok.dupe(pp.comp.gpa)); + parens += 1; + }, + .r_paren => { + if (parens == 0) { + try args.append(curArgument.toOwnedSlice()); + break; + } else { + try curArgument.append(try tok.dupe(pp.comp.gpa)); + parens -= 1; + } + }, + .eof => { + deinitMacroArguments(pp.comp.gpa, &args); + tokenizer.* = saved_tokenizer; + end_idx.* = old_end; + try pp.comp.diag.add( + .{ .tag = .unterminated_macro_arg_list, .loc = name_tok.loc }, + name_tok.expansionSlice(), + ); + return null; + }, + .nl, .whitespace => { + try curArgument.append(.{ .id = .macro_ws, .loc = .{ .id = .generated } }); + }, + else => { + try curArgument.append(try tok.dupe(pp.comp.gpa)); + }, + } + } + + return args; +} + +fn expandMacroExhaustive( + pp: *Preprocessor, + tokenizer: *Tokenizer, + buf: *ExpandBuf, + start_idx: usize, + end_idx: usize, + extend_buf: bool, +) MacroError!void { + var moving_end_idx = end_idx; + var advance_index: usize = 0; + // rescan loop + var do_rescan = true; + while (do_rescan) { + do_rescan = false; + // expansion loop + var idx: usize = start_idx + advance_index; + while (idx < moving_end_idx) { + const macro_tok = buf.items[idx]; + const macro_entry = pp.defines.getPtr(pp.expandedSlice(macro_tok)); + if (macro_entry == null or !shouldExpand(buf.items[idx], macro_entry.?)) { + idx += 1; + continue; + } + if (macro_entry) |macro| macro_handler: { + if (macro.is_func) { + var macro_scan_idx = idx; + // to be saved in case this doesn't turn out to be a call + const args = (try pp.collectMacroFuncArguments( + tokenizer, + buf, + ¯o_scan_idx, + &moving_end_idx, + extend_buf, + macro.is_builtin, + )) orelse { + idx += 1; + break :macro_handler; + }; + defer { + for (args.items) |item| { + pp.comp.gpa.free(item); + } + args.deinit(); + } + + var args_count = @intCast(u32, args.items.len); + // if the macro has zero arguments g() args_count is still 1 + if (args_count == 1 and macro.params.len == 0) args_count = 0; + + // Validate argument count. + const extra = Diagnostics.Message.Extra{ + .arguments = .{ .expected = @intCast(u32, macro.params.len), .actual = args_count }, + }; + if (macro.var_args and args_count < macro.params.len) { + try pp.comp.diag.add( + .{ .tag = .expected_at_least_arguments, .loc = buf.items[idx].loc, .extra = extra }, + buf.items[idx].expansionSlice(), + ); + idx += 1; + continue; + } + if (!macro.var_args and args_count != macro.params.len) { + try pp.comp.diag.add( + .{ .tag = .expected_arguments, .loc = buf.items[idx].loc, .extra = extra }, + buf.items[idx].expansionSlice(), + ); + idx += 1; + continue; + } + var expanded_args = MacroArguments.init(pp.comp.gpa); + defer deinitMacroArguments(pp.comp.gpa, &expanded_args); + try expanded_args.ensureTotalCapacity(args.items.len); + for (args.items) |arg| { + var expand_buf = ExpandBuf.init(pp.comp.gpa); + try expand_buf.appendSlice(arg); + + try pp.expandMacroExhaustive(tokenizer, &expand_buf, 0, expand_buf.items.len, false); + + expanded_args.appendAssumeCapacity(expand_buf.toOwnedSlice()); + } + + var res = try pp.expandFuncMacro(macro_tok.loc, macro, &args, &expanded_args); + defer res.deinit(); + + const macro_expansion_locs = macro_tok.expansionSlice(); + for (res.items) |*tok| { + try tok.addExpansionLocation(pp.comp.gpa, &.{macro_tok.loc}); + try tok.addExpansionLocation(pp.comp.gpa, macro_expansion_locs); + } + + const count = macro_scan_idx - idx + 1; + for (buf.items[idx .. idx + count]) |tok| Token.free(tok.expansion_locs, pp.comp.gpa); + try buf.replaceRange(idx, count, res.items); + // TODO: moving_end_idx += res.items.len - (macro_scan_idx-idx+1) + // doesn't work when the RHS is negative (unsigned!) + moving_end_idx = moving_end_idx + res.items.len - count; + idx += res.items.len; + do_rescan = true; + } else { + const res = try pp.expandObjMacro(macro); + defer res.deinit(); + + const macro_expansion_locs = macro_tok.expansionSlice(); + for (res.items) |*tok| { + try tok.addExpansionLocation(pp.comp.gpa, &.{macro_tok.loc}); + try tok.addExpansionLocation(pp.comp.gpa, macro_expansion_locs); + } + + Token.free(buf.items[idx].expansion_locs, pp.comp.gpa); + try buf.replaceRange(idx, 1, res.items); + idx += res.items.len; + moving_end_idx = moving_end_idx + res.items.len - 1; + do_rescan = true; + } + } + if (idx - start_idx == advance_index + 1 and !do_rescan) { + advance_index += 1; + } + } // end of replacement phase + } + // end of scanning phase + + // trim excess buffer + for (buf.items[moving_end_idx..]) |item| { + Token.free(item.expansion_locs, pp.comp.gpa); + } + buf.items.len = moving_end_idx; +} + +/// Try to expand a macro after a possible candidate has been read from the `tokenizer` +/// into the `raw` token passed as argument +fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroError!void { + var source_tok = tokFromRaw(raw); + if (!raw.id.isMacroIdentifier()) { + source_tok.id.simplifyMacroKeyword(); + return pp.tokens.append(pp.comp.gpa, source_tok); + } + pp.top_expansion_buf.items.len = 0; + try pp.top_expansion_buf.append(source_tok); + pp.expansion_source_loc = source_tok.loc; + + try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true); + try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, pp.top_expansion_buf.items.len); + for (pp.top_expansion_buf.items) |*tok| { + if (tok.id == .macro_ws and !pp.comp.only_preprocess) { + Token.free(tok.expansion_locs, pp.comp.gpa); + continue; + } + tok.id.simplifyMacroKeyword(); + pp.tokens.appendAssumeCapacity(tok.*); + } + if (pp.comp.only_preprocess) { + try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, pp.add_expansion_nl); + while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) { + pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{ .id = .generated } }); + } + } +} + +/// Get expanded token source string. +pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 { + if (tok.id.lexeme()) |some| return some; + var tmp_tokenizer = Tokenizer{ + .buf = pp.comp.getSource(tok.loc.id).buf, + .comp = pp.comp, + .index = tok.loc.byte_offset, + .source = .generated, + }; + if (tok.id == .macro_string) { + while (true) : (tmp_tokenizer.index += 1) { + if (tmp_tokenizer.buf[tmp_tokenizer.index] == '>') break; + } + return tmp_tokenizer.buf[tok.loc.byte_offset .. tmp_tokenizer.index + 1]; + } + const res = tmp_tokenizer.next(); + return tmp_tokenizer.buf[res.start..res.end]; +} + +/// Concat two tokens and add the result to pp.generated +fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void { + const lhs = while (lhs_toks.popOrNull()) |lhs| { + if (lhs.id == .macro_ws) + Token.free(lhs.expansion_locs, pp.comp.gpa) + else + break lhs; + } else { + return bufCopyTokens(lhs_toks, rhs_toks, &.{}); + }; + + var rhs_rest: u32 = 1; + const rhs = for (rhs_toks) |rhs| { + if (rhs.id != .macro_ws) break rhs; + rhs_rest += 1; + } else { + return lhs_toks.appendAssumeCapacity(lhs); + }; + defer Token.free(lhs.expansion_locs, pp.comp.gpa); + + const start = pp.comp.generated_buf.items.len; + const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len; + try pp.comp.generated_buf.ensureTotalCapacity(end + 1); // +1 for a newline + // We cannot use the same slices here since they might be invalidated by `ensureCapacity` + pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(lhs)); + pp.comp.generated_buf.appendSliceAssumeCapacity(pp.expandedSlice(rhs)); + pp.comp.generated_buf.appendAssumeCapacity('\n'); + + // Try to tokenize the result. + var tmp_tokenizer = Tokenizer{ + .buf = pp.comp.generated_buf.items, + .comp = pp.comp, + .index = @intCast(u32, start), + .source = .generated, + }; + const pasted_token = tmp_tokenizer.nextNoWS(); + const next = tmp_tokenizer.nextNoWS().id; + if (next != .nl and next != .eof) { + try pp.comp.diag.add(.{ + .tag = .pasting_formed_invalid, + .loc = lhs.loc, + .extra = .{ .str = try pp.comp.diag.arena.allocator().dupe( + u8, + pp.comp.generated_buf.items[start..end], + ) }, + }, lhs.expansionSlice()); + } + + try lhs_toks.append(try pp.makeGeneratedToken(start, pasted_token.id, lhs)); + try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{}); +} + +fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Token) !Token { + var pasted_token = Token{ .id = id, .loc = .{ + .id = .generated, + .byte_offset = @intCast(u32, start), + .line = pp.generated_line, + } }; + pp.generated_line += 1; + try pasted_token.addExpansionLocation(pp.comp.gpa, &.{source.loc}); + try pasted_token.addExpansionLocation(pp.comp.gpa, source.expansionSlice()); + return pasted_token; +} + +/// Defines a new macro and warns if it is a duplicate +fn defineMacro(pp: *Preprocessor, name_tok: RawToken, macro: Macro) Error!void { + const name_str = pp.tokSlice(name_tok); + const gop = try pp.defines.getOrPut(name_str); + if (gop.found_existing and !gop.value_ptr.eql(macro, pp)) { + try pp.comp.diag.add(.{ + .tag = if (gop.value_ptr.is_builtin) .builtin_macro_redefined else .macro_redefined, + .loc = .{ .id = name_tok.source, .byte_offset = name_tok.start, .line = name_tok.line }, + .extra = .{ .str = name_str }, + }, &.{}); + // TODO add a previous definition note + } + gop.value_ptr.* = macro; +} + +/// Handle a #define directive. +fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void { + // Get macro name and validate it. + const macro_name = tokenizer.nextNoWS(); + if (macro_name.id == .keyword_defined) { + try pp.err(macro_name, .defined_as_macro_name); + return skipToNl(tokenizer); + } + if (!macro_name.id.isMacroIdentifier()) { + try pp.err(macro_name, .macro_name_must_be_identifier); + return skipToNl(tokenizer); + } + + // Check for function macros and empty defines. + var first = tokenizer.next(); + switch (first.id) { + .nl, .eof => return pp.defineMacro(macro_name, .{ + .params = undefined, + .tokens = undefined, + .var_args = false, + .loc = undefined, + .is_func = false, + }), + .whitespace => first = tokenizer.next(), + .l_paren => return pp.defineFn(tokenizer, macro_name, first), + else => try pp.err(first, .whitespace_after_macro_name), + } + if (first.id == .hash_hash) { + try pp.err(first, .hash_hash_at_start); + return skipToNl(tokenizer); + } + first.id.simplifyMacroKeyword(); + + pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time. + + var need_ws = false; + // Collect the token body and validate any ## found. + var tok = first; + const end_index = while (true) { + tok.id.simplifyMacroKeyword(); + switch (tok.id) { + .hash_hash => { + const next = tokenizer.nextNoWS(); + switch (next.id) { + .nl, .eof => { + try pp.err(tok, .hash_hash_at_end); + return; + }, + .hash_hash => { + try pp.err(next, .hash_hash_at_end); + return; + }, + else => {}, + } + try pp.token_buf.append(tok); + try pp.token_buf.append(next); + }, + .nl, .eof => break tok.start, + .whitespace => need_ws = true, + else => { + if (tok.id != .whitespace and need_ws) { + need_ws = false; + try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); + } + try pp.token_buf.append(tok); + }, + } + tok = tokenizer.next(); + } else unreachable; + + const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); + try pp.defineMacro(macro_name, .{ + .loc = .{ + .id = macro_name.source, + .byte_offset = first.start, + .line = end_index, + }, + .tokens = list, + .params = undefined, + .is_func = false, + .var_args = false, + }); +} + +/// Handle a function like #define directive. +fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_paren: RawToken) Error!void { + assert(macro_name.id.isMacroIdentifier()); + var params = std.ArrayList([]const u8).init(pp.comp.gpa); + defer params.deinit(); + + // Parse the parameter list. + var gnu_var_args: []const u8 = ""; + var var_args = false; + const start_index = while (true) { + var tok = tokenizer.nextNoWS(); + if (tok.id == .r_paren) break tok.end; + if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list); + if (tok.id == .ellipsis) { + var_args = true; + const r_paren = tokenizer.nextNoWS(); + if (r_paren.id != .r_paren) { + try pp.err(r_paren, .missing_paren_param_list); + try pp.err(l_paren, .to_match_paren); + return skipToNl(tokenizer); + } + break r_paren.end; + } + if (!tok.id.isMacroIdentifier()) { + try pp.err(tok, .invalid_token_param_list); + return skipToNl(tokenizer); + } + + try params.append(pp.tokSlice(tok)); + + tok = tokenizer.nextNoWS(); + if (tok.id == .ellipsis) { + try pp.err(tok, .gnu_va_macro); + gnu_var_args = params.pop(); + const r_paren = tokenizer.nextNoWS(); + if (r_paren.id != .r_paren) { + try pp.err(r_paren, .missing_paren_param_list); + try pp.err(l_paren, .to_match_paren); + return skipToNl(tokenizer); + } + break r_paren.end; + } else if (tok.id == .r_paren) { + break tok.end; + } else if (tok.id != .comma) { + try pp.err(tok, .expected_comma_param_list); + return skipToNl(tokenizer); + } + } else unreachable; + + var need_ws = false; + // Collect the body tokens and validate # and ##'s found. + pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time. + const end_index = tok_loop: while (true) { + var tok = tokenizer.next(); + switch (tok.id) { + .nl, .eof => break tok.start, + .whitespace => need_ws = pp.token_buf.items.len != 0, + .hash => { + if (tok.id != .whitespace and need_ws) { + need_ws = false; + try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); + } + const param = tokenizer.nextNoWS(); + blk: { + if (var_args and param.id == .keyword_va_args) { + tok.id = .stringify_va_args; + try pp.token_buf.append(tok); + continue :tok_loop; + } + if (!param.id.isMacroIdentifier()) break :blk; + const s = pp.tokSlice(param); + if (mem.eql(u8, s, gnu_var_args)) { + tok.id = .stringify_va_args; + try pp.token_buf.append(tok); + continue :tok_loop; + } + for (params.items) |p, i| { + if (mem.eql(u8, p, s)) { + tok.id = .stringify_param; + tok.end = @intCast(u32, i); + try pp.token_buf.append(tok); + continue :tok_loop; + } + } + } + try pp.err(param, .hash_not_followed_param); + return skipToNl(tokenizer); + }, + .hash_hash => { + need_ws = false; + // if ## appears at the beginning, the token buf is still empty + // in this case, error out + if (pp.token_buf.items.len == 0) { + try pp.err(tok, .hash_hash_at_start); + return skipToNl(tokenizer); + } + const saved_tokenizer = tokenizer.*; + const next = tokenizer.nextNoWS(); + if (next.id == .nl or next.id == .eof) { + try pp.err(tok, .hash_hash_at_end); + return; + } + tokenizer.* = saved_tokenizer; + // convert the previous token to .macro_param_no_expand if it was .macro_param + if (pp.token_buf.items[pp.token_buf.items.len - 1].id == .macro_param) { + pp.token_buf.items[pp.token_buf.items.len - 1].id = .macro_param_no_expand; + } + try pp.token_buf.append(tok); + }, + else => { + if (tok.id != .whitespace and need_ws) { + need_ws = false; + try pp.token_buf.append(.{ .id = .macro_ws, .source = .generated }); + } + if (var_args and tok.id == .keyword_va_args) { + // do nothing + } else if (tok.id.isMacroIdentifier()) { + tok.id.simplifyMacroKeyword(); + const s = pp.tokSlice(tok); + if (mem.eql(u8, gnu_var_args, s)) { + tok.id = .keyword_va_args; + } else for (params.items) |param, i| { + if (mem.eql(u8, param, s)) { + // NOTE: it doesn't matter to assign .macro_param_no_expand + // here in case a ## was the previous token, because + // ## processing will eat this token with the same semantics + tok.id = .macro_param; + tok.end = @intCast(u32, i); + break; + } + } + } + try pp.token_buf.append(tok); + }, + } + } else unreachable; + + const param_list = try pp.arena.allocator().dupe([]const u8, params.items); + const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items); + try pp.defineMacro(macro_name, .{ + .is_func = true, + .params = param_list, + .var_args = var_args or gnu_var_args.len != 0, + .tokens = token_list, + .loc = .{ + .id = macro_name.source, + .byte_offset = start_index, + .line = end_index, + }, + }); +} + +// Handle a #include directive. +fn include(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void { + const new_source = findIncludeSource(pp, tokenizer) catch |er| switch (er) { + error.InvalidInclude => return, + else => |e| return e, + }; + + // Prevent stack overflow + pp.include_depth += 1; + defer pp.include_depth -= 1; + if (pp.include_depth > max_include_depth) return; + + _ = pp.preprocessExtra(new_source) catch |err| switch (err) { + error.StopPreprocessing => {}, + else => |e| return e, + }; +} + +/// tokens that are part of a pragma directive can happen in 3 ways: +/// 1. directly in the text via `#pragma ...` +/// 2. Via a string literal argument to `_Pragma` +/// 3. Via a stringified macro argument which is used as an argument to `_Pragma` +/// operator_loc: Location of `_Pragma`; null if this is from #pragma +/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used +fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !Token { + var tok = tokFromRaw(raw); + if (operator_loc) |loc| { + try tok.addExpansionLocation(pp.comp.gpa, &.{loc}); + } + try tok.addExpansionLocation(pp.comp.gpa, arg_locs); + return tok; +} + +/// Handle a pragma directive +fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void { + const name_tok = tokenizer.nextNoWS(); + if (name_tok.id == .nl or name_tok.id == .eof) return; + + const name = pp.tokSlice(name_tok); + try pp.tokens.append(pp.comp.gpa, try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs)); + const pragma_start = @intCast(u32, pp.tokens.len); + + const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs); + try pp.tokens.append(pp.comp.gpa, pragma_name_tok); + while (true) { + const next_tok = tokenizer.next(); + if (next_tok.id == .whitespace) continue; + if (next_tok.id == .eof) { + try pp.tokens.append(pp.comp.gpa, .{ + .id = .nl, + .loc = .{ .id = .generated }, + }); + break; + } + try pp.tokens.append(pp.comp.gpa, try pp.makePragmaToken(next_tok, operator_loc, arg_locs)); + if (next_tok.id == .nl) break; + } + if (pp.comp.getPragma(name)) |prag| unknown: { + return prag.preprocessorCB(pp, pragma_start) catch |err| switch (err) { + error.UnknownPragma => break :unknown, + else => |e| return e, + }; + } + return pp.comp.diag.add(.{ + .tag = .unknown_pragma, + .loc = pragma_name_tok.loc, + }, pragma_name_tok.expansionSlice()); +} + +fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer) !Source { + const start = pp.tokens.len; + defer pp.tokens.len = start; + + var first = tokenizer.nextNoWS(); + if (first.id == .angle_bracket_left) to_end: { + // The tokenizer does not handle include strings so do it here. + while (tokenizer.index < tokenizer.buf.len) : (tokenizer.index += 1) { + switch (tokenizer.buf[tokenizer.index]) { + '>' => { + tokenizer.index += 1; + first.end = tokenizer.index; + first.id = .macro_string; + break :to_end; + }, + '\n' => break, + else => {}, + } + } + try pp.comp.diag.add(.{ + .tag = .header_str_closing, + .loc = .{ .id = first.source, .byte_offset = first.start }, + }, &.{}); + try pp.err(first, .header_str_match); + } + // Try to expand if the argument is a macro. + try pp.expandMacro(tokenizer, first); + + // Check that we actually got a string. + const filename_tok = pp.tokens.get(start); + switch (filename_tok.id) { + .string_literal, .macro_string => {}, + else => { + try pp.err(first, .expected_filename); + try pp.expectNl(tokenizer); + return error.InvalidInclude; + }, + } + // Error on extra tokens. + const nl = tokenizer.nextNoWS(); + if ((nl.id != .nl and nl.id != .eof) or pp.tokens.len > start + 1) { + skipToNl(tokenizer); + try pp.err(first, .extra_tokens_directive_end); + } + + // Check for empty filename. + const tok_slice = pp.expandedSlice(filename_tok); + if (tok_slice.len < 3) { + try pp.err(first, .empty_filename); + return error.InvalidInclude; + } + + // Find the file. + const filename = tok_slice[1 .. tok_slice.len - 1]; + return (try pp.comp.findInclude(first, filename, filename_tok.id == .string_literal)) orelse + pp.fatal(first, "'{s}' not found", .{filename}); +} + +/// Pretty print tokens and try to preserve whitespace. +pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void { + var i: u32 = 0; + while (true) : (i += 1) { + var cur: Token = pp.tokens.get(i); + switch (cur.id) { + .eof => { + if (pp.tokens.len > 1 and pp.tokens.items(.id)[i - 1] != .nl) try w.writeByte('\n'); + break; + }, + .nl => try w.writeAll("\n"), + .keyword_pragma => { + const pragma_name = pp.expandedSlice(pp.tokens.get(i + 1)); + const end_idx = mem.indexOfScalarPos(Token.Id, pp.tokens.items(.id), i, .nl) orelse i + 1; + const pragma_len = @intCast(u32, end_idx) - i; + + if (pp.comp.getPragma(pragma_name)) |prag| { + if (!prag.shouldPreserveTokens(pp, i + 1)) { + i += pragma_len; + cur = pp.tokens.get(i); + continue; + } + } + try w.writeAll("#pragma"); + i += 1; + while (true) : (i += 1) { + cur = pp.tokens.get(i); + if (cur.id == .nl) { + try w.writeByte('\n'); + break; + } + try w.writeByte(' '); + const slice = pp.expandedSlice(cur); + try w.writeAll(slice); + } + }, + .whitespace => { + var slice = pp.expandedSlice(cur); + while (mem.indexOfScalar(u8, slice, '\n')) |some| { + try w.writeByte('\n'); + slice = slice[some + 1 ..]; + } + for (slice) |_| try w.writeByte(' '); + }, + else => { + const slice = pp.expandedSlice(cur); + try w.writeAll(slice); + }, + } + } +} + +test "Preserve pragma tokens sometimes" { + const allocator = std.testing.allocator; + const Test = struct { + fn runPreprocessor(source_text: []const u8) ![]const u8 { + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + + var comp = Compilation.init(allocator); + defer comp.deinit(); + comp.only_preprocess = true; + + try comp.addDefaultPragmaHandlers(); + + var pp = Preprocessor.init(&comp); + defer pp.deinit(); + + const test_runner_macros = try comp.addSourceFromBuffer("", source_text); + const eof = try pp.preprocess(test_runner_macros); + try pp.tokens.append(pp.comp.gpa, eof); + try pp.prettyPrintTokens(buf.writer()); + return allocator.dupe(u8, buf.items); + } + + fn check(source_text: []const u8, expected: []const u8) !void { + const output = try runPreprocessor(source_text); + defer allocator.free(output); + + try std.testing.expectEqualStrings(expected, output); + } + }; + const preserve_gcc_diagnostic = + \\#pragma GCC diagnostic error "-Wnewline-eof" + \\#pragma GCC warning error "-Wnewline-eof" + \\int x; + \\#pragma GCC ignored error "-Wnewline-eof" + \\ + ; + try Test.check(preserve_gcc_diagnostic, preserve_gcc_diagnostic); + + const omit_once = + \\#pragma once + \\int x; + \\#pragma once + \\ + ; + try Test.check(omit_once, "int x;\n"); + + const omit_poison = + \\#pragma GCC poison foobar + \\ + ; + try Test.check(omit_poison, ""); +} + +test "destringify" { + const allocator = std.testing.allocator; + const Test = struct { + fn testDestringify(pp: *Preprocessor, stringified: []const u8, destringified: []const u8) !void { + pp.char_buf.clearRetainingCapacity(); + try pp.char_buf.ensureUnusedCapacity(stringified.len); + pp.destringify(stringified); + try std.testing.expectEqualStrings(destringified, pp.char_buf.items); + } + }; + var comp = Compilation.init(allocator); + defer comp.deinit(); + var pp = Preprocessor.init(&comp); + defer pp.deinit(); + + try Test.testDestringify(&pp, "hello\tworld\n", "hello\tworld\n"); + try Test.testDestringify(&pp, + \\ \"FOO BAR BAZ\" + , + \\ "FOO BAR BAZ" + ); + try Test.testDestringify(&pp, + \\ \\t\\n + \\ + , + \\ \t\n + \\ + ); +} diff --git a/src/aro/Source.zig b/src/aro/Source.zig new file mode 100644 index 000000000000..d2efadb5551b --- /dev/null +++ b/src/aro/Source.zig @@ -0,0 +1,131 @@ +const std = @import("std"); +const Source = @This(); + +pub const Id = enum(u32) { + unused = 0, + generated = 1, + _, +}; + +pub const Location = struct { + id: Id = .unused, + byte_offset: u32 = 0, + line: u32 = 0, + + pub fn eql(a: Location, b: Location) bool { + return a.id == b.id and a.byte_offset == b.byte_offset and a.line == b.line; + } +}; + +path: []const u8, +buf: []const u8, +id: Id, +invalid_utf8_loc: ?Location = null, +/// each entry represents a byte position within `buf` where a backslash+newline was deleted +/// from the original raw buffer. The same position can appear multiple times if multiple +/// consecutive splices happened. Guaranteed to be non-decreasing +splice_locs: []const u32, + +/// Todo: binary search instead of scanning entire `splice_locs`. +pub fn numSplicesBefore(source: Source, byte_offset: u32) u32 { + for (source.splice_locs) |splice_offset, i| { + if (splice_offset > byte_offset) return @intCast(u32, i); + } + return @intCast(u32, source.splice_locs.len); +} + +/// Returns the actual line number (before newline splicing) of a Location +/// This corresponds to what the user would actually see in their text editor +pub fn physicalLine(source: Source, loc: Location) u32 { + return loc.line + source.numSplicesBefore(loc.byte_offset); +} + +const LineCol = struct { line: []const u8, line_no: u32, col: u32, width: u32, end_with_splice: bool }; + +pub fn lineCol(source: Source, loc: Location) LineCol { + var start: usize = 0; + // find the start of the line which is either a newline or a splice + if (std.mem.lastIndexOfScalar(u8, source.buf[0..loc.byte_offset], '\n')) |some| start = some + 1; + const splice_index = for (source.splice_locs) |splice_offset, i| { + if (splice_offset > start) { + if (splice_offset < loc.byte_offset) { + start = splice_offset; + break @intCast(u32, i) + 1; + } + break @intCast(u32, i); + } + } else @intCast(u32, source.splice_locs.len); + var i: usize = start; + var col: u32 = 1; + var width: u32 = 0; + + while (i < loc.byte_offset) : (col += 1) { // TODO this is still incorrect, but better + const len = std.unicode.utf8ByteSequenceLength(source.buf[i]) catch unreachable; + const cp = std.unicode.utf8Decode(source.buf[i..][0..len]) catch unreachable; + width += codepointWidth(cp); + i += len; + } + + // find the end of the line which is either a newline, EOF or a splice + var nl = source.buf.len; + var end_with_splice = false; + if (std.mem.indexOfScalar(u8, source.buf[start..], '\n')) |some| nl = some + start; + if (source.splice_locs.len > splice_index and nl > source.splice_locs[splice_index] and source.splice_locs[splice_index] > start) { + end_with_splice = true; + nl = source.splice_locs[splice_index]; + } + return .{ + .line = source.buf[start..nl], + .line_no = loc.line + splice_index, + .col = col, + .width = width, + .end_with_splice = end_with_splice, + }; +} + +fn codepointWidth(cp: u32) u32 { + return switch (cp) { + 0x1100...0x115F, + 0x2329, + 0x232A, + 0x2E80...0x303F, + 0x3040...0x3247, + 0x3250...0x4DBF, + 0x4E00...0xA4C6, + 0xA960...0xA97C, + 0xAC00...0xD7A3, + 0xF900...0xFAFF, + 0xFE10...0xFE19, + 0xFE30...0xFE6B, + 0xFF01...0xFF60, + 0xFFE0...0xFFE6, + 0x1B000...0x1B001, + 0x1F200...0x1F251, + 0x20000...0x3FFFD, + 0x1F300...0x1F5FF, + 0x1F900...0x1F9FF, + => 2, + else => 1, + }; +} + +/// Returns the first offset, if any, in buf where an invalid utf8 sequence +/// is found. Code adapted from std.unicode.utf8ValidateSlice +fn offsetOfInvalidUtf8(buf: []const u8) ?u32 { + std.debug.assert(buf.len <= std.math.maxInt(u32)); + var i: u32 = 0; + while (i < buf.len) { + if (std.unicode.utf8ByteSequenceLength(buf[i])) |cp_len| { + if (i + cp_len > buf.len) return i; + if (std.meta.isError(std.unicode.utf8Decode(buf[i .. i + cp_len]))) return i; + i += cp_len; + } else |_| return i; + } + return null; +} + +pub fn checkUtf8(source: *Source) void { + if (offsetOfInvalidUtf8(source.buf)) |offset| { + source.invalid_utf8_loc = Location{ .id = source.id, .byte_offset = offset }; + } +} diff --git a/src/aro/Tokenizer.zig b/src/aro/Tokenizer.zig new file mode 100644 index 000000000000..dd848c443646 --- /dev/null +++ b/src/aro/Tokenizer.zig @@ -0,0 +1,1995 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Compilation = @import("Compilation.zig"); +const Source = @import("Source.zig"); +const LangOpts = @import("LangOpts.zig"); +const CharInfo = @import("CharInfo.zig"); + +const Tokenizer = @This(); + +pub const Token = struct { + id: Id, + source: Source.Id, + start: u32 = 0, + end: u32 = 0, + line: u32 = 0, + + pub const Id = enum(u8) { + invalid, + nl, + whitespace, + eof, + /// identifier containing solely basic character set characters + identifier, + /// identifier with at least one extended character + extended_identifier, + + // string literals with prefixes + string_literal, + string_literal_utf_16, + string_literal_utf_8, + string_literal_utf_32, + string_literal_wide, + + // only generated by preprocessor + macro_string, + + // char literals with prefixes + char_literal, + char_literal_utf_16, + char_literal_utf_32, + char_literal_wide, + + // float literals with suffixes + float_literal, + float_literal_f, + float_literal_l, + + // imaginary literals + imaginary_literal, + imaginary_literal_f, + imaginary_literal_l, + + // integer literals with suffixes + integer_literal, + integer_literal_u, + integer_literal_l, + integer_literal_lu, + integer_literal_ll, + integer_literal_llu, + + /// Integer literal tokens generated by preprocessor. + one, + zero, + + bang, + bang_equal, + pipe, + pipe_pipe, + pipe_equal, + equal, + equal_equal, + l_paren, + r_paren, + l_brace, + r_brace, + l_bracket, + r_bracket, + period, + ellipsis, + caret, + caret_equal, + plus, + plus_plus, + plus_equal, + minus, + minus_minus, + minus_equal, + asterisk, + asterisk_equal, + percent, + percent_equal, + arrow, + colon, + colon_colon, + semicolon, + slash, + slash_equal, + comma, + ampersand, + ampersand_ampersand, + ampersand_equal, + question_mark, + angle_bracket_left, + angle_bracket_left_equal, + angle_bracket_angle_bracket_left, + angle_bracket_angle_bracket_left_equal, + angle_bracket_right, + angle_bracket_right_equal, + angle_bracket_angle_bracket_right, + angle_bracket_angle_bracket_right_equal, + tilde, + hash, + hash_hash, + + /// Special token to speed up preprocessing, `loc.end` will be an index to the param list. + macro_param, + /// Special token to signal that the argument must be replaced without expansion (e.g. in concatenation) + macro_param_no_expand, + /// Special token to speed up preprocessing, `loc.end` will be an index to the param list. + stringify_param, + /// Same as stringify_param, but for var args + stringify_va_args, + /// Special macro whitespace, always equal to a single space + macro_ws, + /// Special token for implementing __has_attribute + macro_param_has_attribute, + /// Special token for implementing __has_warning + macro_param_has_warning, + /// Special token for implementing __has_feature + macro_param_has_feature, + /// Special token for implementing __has_extension + macro_param_has_extension, + /// Special token for implementing __has_builtin + macro_param_has_builtin, + /// Special token for implementing __is_identifier + macro_param_is_identifier, + /// Special token for implementing __FILE__ + macro_file, + /// Special token for implementing __LINE__ + macro_line, + /// Special token for implementing __COUNTER__ + macro_counter, + /// Special token for implementing _Pragma + macro_param_pragma_operator, + + /// Special identifier for implementing __func__ + macro_func, + /// Special identifier for implementing __FUNCTION__ + macro_function, + /// Special identifier for implementing __PRETTY_FUNCTION__ + macro_pretty_func, + + keyword_auto, + keyword_break, + keyword_case, + keyword_char, + keyword_const, + keyword_continue, + keyword_default, + keyword_do, + keyword_double, + keyword_else, + keyword_enum, + keyword_extern, + keyword_float, + keyword_for, + keyword_goto, + keyword_if, + keyword_int, + keyword_long, + keyword_register, + keyword_return, + keyword_short, + keyword_signed, + keyword_sizeof, + keyword_static, + keyword_struct, + keyword_switch, + keyword_typedef, + keyword_typeof1, + keyword_typeof2, + keyword_union, + keyword_unsigned, + keyword_void, + keyword_volatile, + keyword_while, + + // ISO C99 + keyword_bool, + keyword_complex, + keyword_imaginary, + keyword_inline, + keyword_restrict, + + // ISO C11 + keyword_alignas, + keyword_alignof, + keyword_atomic, + keyword_generic, + keyword_noreturn, + keyword_static_assert, + keyword_thread_local, + + // Preprocessor directives + keyword_include, + keyword_define, + keyword_defined, + keyword_undef, + keyword_ifdef, + keyword_ifndef, + keyword_elif, + keyword_endif, + keyword_error, + keyword_warning, + keyword_pragma, + keyword_line, + keyword_va_args, + + // gcc keywords + keyword_const1, + keyword_const2, + keyword_inline1, + keyword_inline2, + keyword_volatile1, + keyword_volatile2, + keyword_restrict1, + keyword_restrict2, + keyword_alignof1, + keyword_alignof2, + keyword_typeof, + keyword_attribute1, + keyword_attribute2, + keyword_extension, + keyword_asm, + keyword_asm1, + keyword_asm2, + + // ms keywords + keyword_declspec, + + // builtins that require special parsing + builtin_choose_expr, + builtin_va_arg, + + /// Return true if token is identifier or keyword. + pub fn isMacroIdentifier(id: Id) bool { + switch (id) { + .keyword_include, + .keyword_define, + .keyword_defined, + .keyword_undef, + .keyword_ifdef, + .keyword_ifndef, + .keyword_elif, + .keyword_endif, + .keyword_error, + .keyword_warning, + .keyword_pragma, + .keyword_line, + .keyword_va_args, + .macro_func, + .macro_function, + .macro_pretty_func, + .keyword_auto, + .keyword_break, + .keyword_case, + .keyword_char, + .keyword_const, + .keyword_continue, + .keyword_default, + .keyword_do, + .keyword_double, + .keyword_else, + .keyword_enum, + .keyword_extern, + .keyword_float, + .keyword_for, + .keyword_goto, + .keyword_if, + .keyword_int, + .keyword_long, + .keyword_register, + .keyword_return, + .keyword_short, + .keyword_signed, + .keyword_sizeof, + .keyword_static, + .keyword_struct, + .keyword_switch, + .keyword_typedef, + .keyword_union, + .keyword_unsigned, + .keyword_void, + .keyword_volatile, + .keyword_while, + .keyword_bool, + .keyword_complex, + .keyword_imaginary, + .keyword_inline, + .keyword_restrict, + .keyword_alignas, + .keyword_alignof, + .keyword_atomic, + .keyword_generic, + .keyword_noreturn, + .keyword_static_assert, + .keyword_thread_local, + .identifier, + .extended_identifier, + .keyword_typeof, + .keyword_typeof1, + .keyword_typeof2, + .keyword_const1, + .keyword_const2, + .keyword_inline1, + .keyword_inline2, + .keyword_volatile1, + .keyword_volatile2, + .keyword_restrict1, + .keyword_restrict2, + .keyword_alignof1, + .keyword_alignof2, + .builtin_choose_expr, + .builtin_va_arg, + .keyword_attribute1, + .keyword_attribute2, + .keyword_extension, + .keyword_asm, + .keyword_asm1, + .keyword_asm2, + .keyword_declspec, + => return true, + else => return false, + } + } + + /// Turn macro keywords into identifiers. + pub fn simplifyMacroKeyword(id: *Id) void { + switch (id.*) { + .keyword_include, + .keyword_define, + .keyword_defined, + .keyword_undef, + .keyword_ifdef, + .keyword_ifndef, + .keyword_elif, + .keyword_endif, + .keyword_error, + .keyword_warning, + .keyword_pragma, + .keyword_line, + .keyword_va_args, + => id.* = .identifier, + else => {}, + } + } + + pub fn lexeme(id: Id) ?[]const u8 { + return switch (id) { + .invalid, + .identifier, + .extended_identifier, + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + .char_literal, + .char_literal_utf_16, + .char_literal_utf_32, + .char_literal_wide, + .float_literal, + .float_literal_f, + .float_literal_l, + .imaginary_literal, + .imaginary_literal_f, + .imaginary_literal_l, + .integer_literal, + .integer_literal_u, + .integer_literal_l, + .integer_literal_lu, + .integer_literal_ll, + .integer_literal_llu, + .macro_string, + .whitespace, + => null, + + .zero => "0", + .one => "1", + + .nl, + .eof, + .macro_param, + .macro_param_no_expand, + .stringify_param, + .stringify_va_args, + .macro_param_has_attribute, + .macro_param_has_warning, + .macro_param_has_feature, + .macro_param_has_extension, + .macro_param_has_builtin, + .macro_param_is_identifier, + .macro_file, + .macro_line, + .macro_counter, + .macro_param_pragma_operator, + => "", + .macro_ws => " ", + + .macro_func => "__func__", + .macro_function => "__FUNCTION__", + .macro_pretty_func => "__PRETTY_FUNCTION__", + + .bang => "!", + .bang_equal => "!=", + .pipe => "|", + .pipe_pipe => "||", + .pipe_equal => "|=", + .equal => "=", + .equal_equal => "==", + .l_paren => "(", + .r_paren => ")", + .l_brace => "{", + .r_brace => "}", + .l_bracket => "[", + .r_bracket => "]", + .period => ".", + .ellipsis => "...", + .caret => "^", + .caret_equal => "^=", + .plus => "+", + .plus_plus => "++", + .plus_equal => "+=", + .minus => "-", + .minus_minus => "--", + .minus_equal => "-=", + .asterisk => "*", + .asterisk_equal => "*=", + .percent => "%", + .percent_equal => "%=", + .arrow => "->", + .colon => ":", + .colon_colon => "::", + .semicolon => ";", + .slash => "/", + .slash_equal => "/=", + .comma => ",", + .ampersand => "&", + .ampersand_ampersand => "&&", + .ampersand_equal => "&=", + .question_mark => "?", + .angle_bracket_left => "<", + .angle_bracket_left_equal => "<=", + .angle_bracket_angle_bracket_left => "<<", + .angle_bracket_angle_bracket_left_equal => "<<=", + .angle_bracket_right => ">", + .angle_bracket_right_equal => ">=", + .angle_bracket_angle_bracket_right => ">>", + .angle_bracket_angle_bracket_right_equal => ">>=", + .tilde => "~", + .hash => "#", + .hash_hash => "##", + + .keyword_auto => "auto", + .keyword_break => "break", + .keyword_case => "case", + .keyword_char => "char", + .keyword_const => "const", + .keyword_continue => "continue", + .keyword_default => "default", + .keyword_do => "do", + .keyword_double => "double", + .keyword_else => "else", + .keyword_enum => "enum", + .keyword_extern => "extern", + .keyword_float => "float", + .keyword_for => "for", + .keyword_goto => "goto", + .keyword_if => "if", + .keyword_int => "int", + .keyword_long => "long", + .keyword_register => "register", + .keyword_return => "return", + .keyword_short => "short", + .keyword_signed => "signed", + .keyword_sizeof => "sizeof", + .keyword_static => "static", + .keyword_struct => "struct", + .keyword_switch => "switch", + .keyword_typedef => "typedef", + .keyword_typeof => "typeof", + .keyword_union => "union", + .keyword_unsigned => "unsigned", + .keyword_void => "void", + .keyword_volatile => "volatile", + .keyword_while => "while", + .keyword_bool => "_Bool", + .keyword_complex => "_Complex", + .keyword_imaginary => "_Imaginary", + .keyword_inline => "inline", + .keyword_restrict => "restrict", + .keyword_alignas => "_Alignas", + .keyword_alignof => "_Alignof", + .keyword_atomic => "_Atomic", + .keyword_generic => "_Generic", + .keyword_noreturn => "_Noreturn", + .keyword_static_assert => "_Static_assert", + .keyword_thread_local => "_Thread_local", + .keyword_include => "include", + .keyword_define => "define", + .keyword_defined => "defined", + .keyword_undef => "undef", + .keyword_ifdef => "ifdef", + .keyword_ifndef => "ifndef", + .keyword_elif => "elif", + .keyword_endif => "endif", + .keyword_error => "error", + .keyword_warning => "warning", + .keyword_pragma => "pragma", + .keyword_line => "line", + .keyword_va_args => "__VA_ARGS__", + .keyword_const1 => "__const", + .keyword_const2 => "__const__", + .keyword_inline1 => "__inline", + .keyword_inline2 => "__inline__", + .keyword_volatile1 => "__volatile", + .keyword_volatile2 => "__volatile__", + .keyword_restrict1 => "__restrict", + .keyword_restrict2 => "__restrict__", + .keyword_alignof1 => "__alignof", + .keyword_alignof2 => "__alignof__", + .keyword_typeof1 => "__typeof", + .keyword_typeof2 => "__typeof__", + .builtin_choose_expr => "__builtin_choose_expr", + .builtin_va_arg => "__builtin_va_arg", + .keyword_attribute1 => "__attribute", + .keyword_attribute2 => "__attribute__", + .keyword_extension => "__extension__", + .keyword_asm => "asm", + .keyword_asm1 => "__asm", + .keyword_asm2 => "__asm__", + .keyword_declspec => "__declspec", + }; + } + + pub fn symbol(id: Id) []const u8 { + return switch (id) { + .macro_string, .invalid => unreachable, + .identifier, + .extended_identifier, + .macro_func, + .macro_function, + .macro_pretty_func, + .builtin_choose_expr, + .builtin_va_arg, + => "an identifier", + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + => "a string literal", + .char_literal, + .char_literal_utf_16, + .char_literal_utf_32, + .char_literal_wide, + => "a character literal", + .float_literal, + .float_literal_f, + .float_literal_l, + => "a float literal", + .imaginary_literal, + .imaginary_literal_f, + .imaginary_literal_l, + => "an imaginary literal", + .integer_literal, + .integer_literal_u, + .integer_literal_l, + .integer_literal_lu, + .integer_literal_ll, + .integer_literal_llu, + => "an integer literal", + else => id.lexeme().?, + }; + } + + /// tokens that can start an expression parsed by Preprocessor.expr + /// Note that eof, r_paren, and string literals cannot actually start a + /// preprocessor expression, but we include them here so that a nicer + /// error message can be generated by the parser. + pub fn validPreprocessorExprStart(id: Id) bool { + return switch (id) { + .eof, + .r_paren, + .string_literal, + .string_literal_utf_16, + .string_literal_utf_8, + .string_literal_utf_32, + .string_literal_wide, + + .integer_literal, + .integer_literal_u, + .integer_literal_l, + .integer_literal_lu, + .integer_literal_ll, + .integer_literal_llu, + .float_literal, + .float_literal_f, + .float_literal_l, + .imaginary_literal, + .imaginary_literal_f, + .imaginary_literal_l, + .char_literal, + .char_literal_utf_16, + .char_literal_utf_32, + .char_literal_wide, + .l_paren, + .plus, + .minus, + .tilde, + .bang, + .identifier, + .extended_identifier, + .one, + .zero, + => true, + else => false, + }; + } + }; + + /// double underscore and underscore + capital letter identifiers + /// belong to the implementation namespace, so we always convert them + /// to keywords. + pub fn getTokenId(comp: *const Compilation, str: []const u8) Token.Id { + const kw = all_kws.get(str) orelse return .identifier; + const standard = comp.langopts.standard; + return switch (kw) { + .keyword_inline => if (standard.isGNU() or standard.atLeast(.c99)) kw else .identifier, + .keyword_restrict => if (standard.atLeast(.c99)) kw else .identifier, + .keyword_typeof => if (standard.isGNU()) kw else .identifier, + .keyword_asm => if (standard.isGNU()) kw else .identifier, + else => kw, + }; + } + + /// Check if codepoint may appear in specified context + /// does not check basic character set chars because the tokenizer handles them separately to keep the common + /// case on the fast path + pub fn mayAppearInIdent(comp: *const Compilation, codepoint: u21, where: enum { start, inside }) bool { + if (codepoint == '$') return comp.langopts.dollars_in_identifiers; + if (codepoint <= 0x7F) return false; + return switch (where) { + .start => if (comp.langopts.standard.atLeast(.c11)) + CharInfo.isC11IdChar(codepoint) and !CharInfo.isC11DisallowedInitialIdChar(codepoint) + else + CharInfo.isC99IdChar(codepoint) and !CharInfo.isC99DisallowedInitialIDChar(codepoint), + .inside => if (comp.langopts.standard.atLeast(.c11)) + CharInfo.isC11IdChar(codepoint) + else + CharInfo.isC99IdChar(codepoint), + }; + } + + const all_kws = std.ComptimeStringMap(Id, .{ + .{ "auto", .keyword_auto }, + .{ "break", .keyword_break }, + .{ "case", .keyword_case }, + .{ "char", .keyword_char }, + .{ "const", .keyword_const }, + .{ "continue", .keyword_continue }, + .{ "default", .keyword_default }, + .{ "do", .keyword_do }, + .{ "double", .keyword_double }, + .{ "else", .keyword_else }, + .{ "enum", .keyword_enum }, + .{ "extern", .keyword_extern }, + .{ "float", .keyword_float }, + .{ "for", .keyword_for }, + .{ "goto", .keyword_goto }, + .{ "if", .keyword_if }, + .{ "int", .keyword_int }, + .{ "long", .keyword_long }, + .{ "register", .keyword_register }, + .{ "return", .keyword_return }, + .{ "short", .keyword_short }, + .{ "signed", .keyword_signed }, + .{ "sizeof", .keyword_sizeof }, + .{ "static", .keyword_static }, + .{ "struct", .keyword_struct }, + .{ "switch", .keyword_switch }, + .{ "typedef", .keyword_typedef }, + .{ "union", .keyword_union }, + .{ "unsigned", .keyword_unsigned }, + .{ "void", .keyword_void }, + .{ "volatile", .keyword_volatile }, + .{ "while", .keyword_while }, + .{ "__typeof__", .keyword_typeof2 }, + .{ "__typeof", .keyword_typeof1 }, + + // ISO C99 + .{ "_Bool", .keyword_bool }, + .{ "_Complex", .keyword_complex }, + .{ "_Imaginary", .keyword_imaginary }, + .{ "inline", .keyword_inline }, + .{ "restrict", .keyword_restrict }, + + // ISO C11 + .{ "_Alignas", .keyword_alignas }, + .{ "_Alignof", .keyword_alignof }, + .{ "_Atomic", .keyword_atomic }, + .{ "_Generic", .keyword_generic }, + .{ "_Noreturn", .keyword_noreturn }, + .{ "_Static_assert", .keyword_static_assert }, + .{ "_Thread_local", .keyword_thread_local }, + + // Preprocessor directives + .{ "include", .keyword_include }, + .{ "define", .keyword_define }, + .{ "defined", .keyword_defined }, + .{ "undef", .keyword_undef }, + .{ "ifdef", .keyword_ifdef }, + .{ "ifndef", .keyword_ifndef }, + .{ "elif", .keyword_elif }, + .{ "endif", .keyword_endif }, + .{ "error", .keyword_error }, + .{ "warning", .keyword_warning }, + .{ "pragma", .keyword_pragma }, + .{ "line", .keyword_line }, + .{ "__VA_ARGS__", .keyword_va_args }, + .{ "__func__", .macro_func }, + .{ "__FUNCTION__", .macro_function }, + .{ "__PRETTY_FUNCTION__", .macro_pretty_func }, + + // gcc keywords + .{ "__const", .keyword_const1 }, + .{ "__const__", .keyword_const2 }, + .{ "__inline", .keyword_inline1 }, + .{ "__inline__", .keyword_inline2 }, + .{ "__volatile", .keyword_volatile1 }, + .{ "__volatile__", .keyword_volatile2 }, + .{ "__restrict", .keyword_restrict1 }, + .{ "__restrict__", .keyword_restrict2 }, + .{ "__alignof", .keyword_alignof1 }, + .{ "__alignof__", .keyword_alignof2 }, + .{ "typeof", .keyword_typeof }, + .{ "__attribute", .keyword_attribute1 }, + .{ "__attribute__", .keyword_attribute2 }, + .{ "__extension__", .keyword_extension }, + .{ "asm", .keyword_asm }, + .{ "__asm", .keyword_asm1 }, + .{ "__asm__", .keyword_asm2 }, + + // ms keywords + .{ "__declspec", .keyword_declspec }, + + // builtins that require special parsing + .{ "__builtin_choose_expr", .builtin_choose_expr }, + .{ "__builtin_va_arg", .builtin_va_arg }, + }); +}; + +buf: []const u8, +index: u32 = 0, +source: Source.Id, +comp: *const Compilation, +line: u32 = 1, + +pub fn next(self: *Tokenizer) Token { + var state: enum { + start, + whitespace, + u, + u8, + U, + L, + string_literal, + char_literal_start, + char_literal, + escape_sequence, + octal_escape, + hex_escape, + unicode_escape, + identifier, + extended_identifier, + equal, + bang, + pipe, + colon, + percent, + asterisk, + plus, + angle_bracket_left, + angle_bracket_angle_bracket_left, + angle_bracket_right, + angle_bracket_angle_bracket_right, + caret, + period, + period2, + minus, + slash, + ampersand, + hash, + line_comment, + multi_line_comment, + multi_line_comment_asterisk, + multi_line_comment_done, + zero, + integer_literal_oct, + integer_literal_binary, + integer_literal_binary_first, + integer_literal_hex, + integer_literal_hex_first, + integer_literal, + integer_suffix, + integer_suffix_u, + integer_suffix_l, + integer_suffix_ll, + integer_suffix_ul, + float_fraction, + float_fraction_hex, + float_exponent, + float_exponent_digits, + float_suffix, + float_suffix_f, + float_suffix_i, + float_suffix_l, + } = .start; + + var start = self.index; + var id: Token.Id = .eof; + + var return_state = state; + var counter: u32 = 0; + var codepoint_len: u3 = undefined; + while (self.index < self.buf.len) : (self.index += codepoint_len) { + codepoint_len = std.unicode.utf8ByteSequenceLength(self.buf[self.index]) catch unreachable; + const c = std.unicode.utf8Decode(self.buf[self.index .. self.index + codepoint_len]) catch unreachable; + switch (state) { + .start => switch (c) { + '\n' => { + id = .nl; + self.index += 1; + self.line += 1; + break; + }, + '"' => { + id = .string_literal; + state = .string_literal; + }, + '\'' => { + id = .char_literal; + state = .char_literal_start; + }, + 'u' => state = .u, + 'U' => state = .U, + 'L' => state = .L, + 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => state = .identifier, + '=' => state = .equal, + '!' => state = .bang, + '|' => state = .pipe, + '(' => { + id = .l_paren; + self.index += 1; + break; + }, + ')' => { + id = .r_paren; + self.index += 1; + break; + }, + '[' => { + id = .l_bracket; + self.index += 1; + break; + }, + ']' => { + id = .r_bracket; + self.index += 1; + break; + }, + ';' => { + id = .semicolon; + self.index += 1; + break; + }, + ',' => { + id = .comma; + self.index += 1; + break; + }, + '?' => { + id = .question_mark; + self.index += 1; + break; + }, + ':' => if (self.comp.langopts.standard.atLeast(.c2x)) { + state = .colon; + } else { + id = .colon; + self.index += 1; + break; + }, + '%' => state = .percent, + '*' => state = .asterisk, + '+' => state = .plus, + '<' => state = .angle_bracket_left, + '>' => state = .angle_bracket_right, + '^' => state = .caret, + '{' => { + id = .l_brace; + self.index += 1; + break; + }, + '}' => { + id = .r_brace; + self.index += 1; + break; + }, + '~' => { + id = .tilde; + self.index += 1; + break; + }, + '.' => state = .period, + '-' => state = .minus, + '/' => state = .slash, + '&' => state = .ampersand, + '#' => state = .hash, + '0' => state = .zero, + '1'...'9' => state = .integer_literal, + '\t', '\x0B', '\x0C', ' ' => state = .whitespace, + else => if (Token.mayAppearInIdent(self.comp, c, .start)) { + state = .extended_identifier; + } else { + id = .invalid; + self.index += codepoint_len; + break; + }, + }, + .whitespace => switch (c) { + '\t', '\x0B', '\x0C', ' ' => {}, + else => { + id = .whitespace; + break; + }, + }, + .u => switch (c) { + '8' => { + state = .u8; + }, + '\'' => { + id = .char_literal_utf_16; + state = .char_literal_start; + }, + '\"' => { + id = .string_literal_utf_16; + state = .string_literal; + }, + else => { + codepoint_len = 0; + state = .identifier; + }, + }, + .u8 => switch (c) { + '\"' => { + id = .string_literal_utf_8; + state = .string_literal; + }, + else => { + codepoint_len = 0; + state = .identifier; + }, + }, + .U => switch (c) { + '\'' => { + id = .char_literal_utf_32; + state = .char_literal_start; + }, + '\"' => { + id = .string_literal_utf_32; + state = .string_literal; + }, + else => { + codepoint_len = 0; + state = .identifier; + }, + }, + .L => switch (c) { + '\'' => { + id = .char_literal_wide; + state = .char_literal_start; + }, + '\"' => { + id = .string_literal_wide; + state = .string_literal; + }, + else => { + codepoint_len = 0; + state = .identifier; + }, + }, + .string_literal => switch (c) { + '\\' => { + return_state = .string_literal; + state = .escape_sequence; + }, + '"' => { + self.index += 1; + break; + }, + '\n' => { + id = .invalid; + break; + }, + '\r' => unreachable, + else => {}, + }, + .char_literal_start => switch (c) { + '\\' => { + return_state = .char_literal; + state = .escape_sequence; + }, + + '\'', '\n' => { + id = .invalid; + break; + }, + else => { + state = .char_literal; + }, + }, + .char_literal => switch (c) { + '\\' => { + return_state = .char_literal; + state = .escape_sequence; + }, + '\'' => { + self.index += 1; + break; + }, + '\n' => { + id = .invalid; + break; + }, + else => {}, + }, + .escape_sequence => switch (c) { + '\'', '"', '?', '\\', 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v' => { + state = return_state; + }, + '\n' => { + state = return_state; + self.line += 1; + }, + '0'...'7' => { + counter = 1; + state = .octal_escape; + }, + 'x' => state = .hex_escape, + 'u' => { + counter = 4; + state = .unicode_escape; + }, + 'U' => { + counter = 8; + state = .unicode_escape; + }, + else => { + id = .invalid; + break; + }, + }, + .octal_escape => switch (c) { + '0'...'7' => { + counter += 1; + if (counter == 3) state = return_state; + }, + else => { + codepoint_len = 0; + state = return_state; + }, + }, + .hex_escape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + else => { + codepoint_len = 0; + state = return_state; + }, + }, + .unicode_escape => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => { + counter -= 1; + if (counter == 0) state = return_state; + }, + else => { + id = .invalid; + break; + }, + }, + .identifier, .extended_identifier => switch (c) { + 'a'...'z', 'A'...'Z', '_', '0'...'9' => {}, + else => { + if (!Token.mayAppearInIdent(self.comp, c, .inside)) { + id = if (state == .identifier) Token.getTokenId(self.comp, self.buf[start..self.index]) else .extended_identifier; + break; + } + state = .extended_identifier; + }, + }, + .equal => switch (c) { + '=' => { + id = .equal_equal; + self.index += 1; + break; + }, + else => { + id = .equal; + break; + }, + }, + .bang => switch (c) { + '=' => { + id = .bang_equal; + self.index += 1; + break; + }, + else => { + id = .bang; + break; + }, + }, + .pipe => switch (c) { + '=' => { + id = .pipe_equal; + self.index += 1; + break; + }, + '|' => { + id = .pipe_pipe; + self.index += 1; + break; + }, + else => { + id = .pipe; + break; + }, + }, + .colon => switch (c) { + ':' => { + id = .colon_colon; + self.index += 1; + break; + }, + else => { + id = .colon; + break; + }, + }, + .percent => switch (c) { + '=' => { + id = .percent_equal; + self.index += 1; + break; + }, + else => { + id = .percent; + break; + }, + }, + .asterisk => switch (c) { + '=' => { + id = .asterisk_equal; + self.index += 1; + break; + }, + else => { + id = .asterisk; + break; + }, + }, + .plus => switch (c) { + '=' => { + id = .plus_equal; + self.index += 1; + break; + }, + '+' => { + id = .plus_plus; + self.index += 1; + break; + }, + else => { + id = .plus; + break; + }, + }, + .angle_bracket_left => switch (c) { + '<' => state = .angle_bracket_angle_bracket_left, + '=' => { + id = .angle_bracket_left_equal; + self.index += 1; + break; + }, + else => { + id = .angle_bracket_left; + break; + }, + }, + .angle_bracket_angle_bracket_left => switch (c) { + '=' => { + id = .angle_bracket_angle_bracket_left_equal; + self.index += 1; + break; + }, + else => { + id = .angle_bracket_angle_bracket_left; + break; + }, + }, + .angle_bracket_right => switch (c) { + '>' => state = .angle_bracket_angle_bracket_right, + '=' => { + id = .angle_bracket_right_equal; + self.index += 1; + break; + }, + else => { + id = .angle_bracket_right; + break; + }, + }, + .angle_bracket_angle_bracket_right => switch (c) { + '=' => { + id = .angle_bracket_angle_bracket_right_equal; + self.index += 1; + break; + }, + else => { + id = .angle_bracket_angle_bracket_right; + break; + }, + }, + .caret => switch (c) { + '=' => { + id = .caret_equal; + self.index += 1; + break; + }, + else => { + id = .caret; + break; + }, + }, + .period => switch (c) { + '.' => state = .period2, + '0'...'9' => state = .float_fraction, + else => { + id = .period; + break; + }, + }, + .period2 => switch (c) { + '.' => { + id = .ellipsis; + self.index += 1; + break; + }, + else => { + id = .period; + self.index -= 1; + break; + }, + }, + .minus => switch (c) { + '>' => { + id = .arrow; + self.index += 1; + break; + }, + '=' => { + id = .minus_equal; + self.index += 1; + break; + }, + '-' => { + id = .minus_minus; + self.index += 1; + break; + }, + else => { + id = .minus; + break; + }, + }, + .ampersand => switch (c) { + '&' => { + id = .ampersand_ampersand; + self.index += 1; + break; + }, + '=' => { + id = .ampersand_equal; + self.index += 1; + break; + }, + else => { + id = .ampersand; + break; + }, + }, + .hash => switch (c) { + '#' => { + id = .hash_hash; + self.index += 1; + break; + }, + else => { + id = .hash; + break; + }, + }, + .slash => switch (c) { + '/' => state = .line_comment, + '*' => state = .multi_line_comment, + '=' => { + id = .slash_equal; + self.index += 1; + break; + }, + else => { + id = .slash; + break; + }, + }, + .line_comment => switch (c) { + '\n' => { + self.index -= 1; + state = .start; + }, + else => {}, + }, + .multi_line_comment => switch (c) { + '*' => state = .multi_line_comment_asterisk, + '\n' => self.line += 1, + else => {}, + }, + .multi_line_comment_asterisk => switch (c) { + '/' => state = .multi_line_comment_done, + '\n' => { + self.line += 1; + state = .multi_line_comment; + }, + '*' => {}, + else => state = .multi_line_comment, + }, + .multi_line_comment_done => switch (c) { + '\n' => { + start = self.index; + id = .nl; + self.index += 1; + self.line += 1; + break; + }, + '\r' => unreachable, + '\t', '\x0B', '\x0C', ' ' => { + start = self.index; + state = .whitespace; + }, + else => { + id = .whitespace; + break; + }, + }, + .zero => switch (c) { + '0'...'9' => state = .integer_literal_oct, + 'b', 'B' => state = .integer_literal_binary_first, + 'x', 'X' => state = .integer_literal_hex_first, + '.' => state = .float_fraction, + else => { + if (c <= 0x7F) { + state = .integer_suffix; + self.index -= 1; + } else { + id = .integer_literal; + break; + } + }, + }, + .integer_literal_oct => switch (c) { + '0'...'7' => {}, + else => if (c <= 0x7F) { + state = .integer_suffix; + self.index -= 1; + } else { + id = .integer_literal; + break; + }, + }, + .integer_literal_binary_first => switch (c) { + '0', '1' => state = .integer_literal_binary, + else => { + id = .invalid; + break; + }, + }, + .integer_literal_binary => switch (c) { + '0', '1' => {}, + else => if (c <= 0x7F) { + state = .integer_suffix; + self.index -= 1; + } else { + id = .integer_literal; + break; + }, + }, + .integer_literal_hex_first => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => state = .integer_literal_hex, + '.' => state = .float_fraction_hex, + 'p', 'P' => state = .float_exponent, + else => { + id = .invalid; + break; + }, + }, + .integer_literal_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + '.' => state = .float_fraction_hex, + 'p', 'P' => state = .float_exponent, + else => if (c <= 0x7F) { + state = .integer_suffix; + self.index -= 1; + } else { + id = .integer_literal; + break; + }, + }, + .integer_literal => switch (c) { + '0'...'9' => {}, + '.' => state = .float_fraction, + 'e', 'E' => state = .float_exponent, + else => if (c <= 0x7F) { + state = .integer_suffix; + self.index -= 1; + } else { + id = .integer_literal; + break; + }, + }, + .integer_suffix => switch (c) { + 'u', 'U' => state = .integer_suffix_u, + 'l', 'L' => state = .integer_suffix_l, + else => { + id = .integer_literal; + break; + }, + }, + .integer_suffix_u => switch (c) { + 'l', 'L' => state = .integer_suffix_ul, + else => { + id = .integer_literal_u; + break; + }, + }, + .integer_suffix_l => switch (c) { + 'l', 'L' => state = .integer_suffix_ll, + 'u', 'U' => { + id = .integer_literal_lu; + self.index += 1; + break; + }, + else => { + id = .integer_literal_l; + break; + }, + }, + .integer_suffix_ll => switch (c) { + 'u', 'U' => { + id = .integer_literal_llu; + self.index += 1; + break; + }, + else => { + id = .integer_literal_ll; + break; + }, + }, + .integer_suffix_ul => switch (c) { + 'l', 'L' => { + id = .integer_literal_llu; + self.index += 1; + break; + }, + else => { + id = .integer_literal_lu; + break; + }, + }, + .float_fraction => switch (c) { + '0'...'9' => {}, + 'e', 'E' => state = .float_exponent, + else => if (c <= 0x7F) { + self.index -= 1; + state = .float_suffix; + } else { + id = .float_literal; + break; + }, + }, + .float_fraction_hex => switch (c) { + '0'...'9', 'a'...'f', 'A'...'F' => {}, + 'p', 'P' => state = .float_exponent, + else => { + id = .invalid; + break; + }, + }, + .float_exponent => switch (c) { + '+', '-' => state = .float_exponent_digits, + else => { + codepoint_len = 0; + state = .float_exponent_digits; + }, + }, + .float_exponent_digits => switch (c) { + '0'...'9' => counter += 1, + else => { + if (counter == 0) { + id = .invalid; + break; + } + codepoint_len = 0; + state = .float_suffix; + }, + }, + .float_suffix => switch (c) { + 'f', 'F' => state = .float_suffix_f, + 'i', 'I' => state = .float_suffix_i, + 'l', 'L' => state = .float_suffix_l, + else => { + id = .float_literal; + break; + }, + }, + .float_suffix_f => switch (c) { + 'i', 'I' => { + id = .imaginary_literal_f; + self.index += 1; + break; + }, + else => { + id = .float_literal_f; + break; + }, + }, + .float_suffix_i => switch (c) { + 'f', 'F' => { + id = .imaginary_literal_f; + self.index += 1; + break; + }, + 'l', 'L' => { + id = .imaginary_literal_l; + self.index += 1; + break; + }, + else => { + id = .imaginary_literal; + break; + }, + }, + .float_suffix_l => switch (c) { + 'i', 'I' => { + id = .imaginary_literal_l; + self.index += 1; + break; + }, + else => { + id = .float_literal_l; + break; + }, + }, + } + } else if (self.index == self.buf.len) { + switch (state) { + .start, .line_comment => {}, + .u, .u8, .U, .L, .identifier => id = Token.getTokenId(self.comp, self.buf[start..self.index]), + .extended_identifier => id = .extended_identifier, + .period2, + .string_literal, + .char_literal_start, + .char_literal, + .escape_sequence, + .octal_escape, + .hex_escape, + .unicode_escape, + .multi_line_comment, + .multi_line_comment_asterisk, + .float_exponent, + .integer_literal_binary_first, + .integer_literal_hex_first, + => id = .invalid, + + .whitespace => id = .whitespace, + .multi_line_comment_done => id = .whitespace, + .float_exponent_digits => id = if (counter == 0) .invalid else .float_literal, + .float_fraction, + .float_fraction_hex, + => id = .float_literal, + .integer_literal_oct, + .integer_literal_binary, + .integer_literal_hex, + .integer_literal, + .integer_suffix, + .zero, + => id = .integer_literal, + .integer_suffix_u => id = .integer_literal_u, + .integer_suffix_l => id = .integer_literal_l, + .integer_suffix_ll => id = .integer_literal_ll, + .integer_suffix_ul => id = .integer_literal_lu, + + .float_suffix => id = .float_literal, + .float_suffix_f => id = .float_literal_f, + .float_suffix_i => id = .imaginary_literal, + .float_suffix_l => id = .float_literal_l, + .equal => id = .equal, + .bang => id = .bang, + .minus => id = .minus, + .slash => id = .slash, + .ampersand => id = .ampersand, + .hash => id = .hash, + .period => id = .period, + .pipe => id = .pipe, + .angle_bracket_angle_bracket_right => id = .angle_bracket_angle_bracket_right, + .angle_bracket_right => id = .angle_bracket_right, + .angle_bracket_angle_bracket_left => id = .angle_bracket_angle_bracket_left, + .angle_bracket_left => id = .angle_bracket_left, + .plus => id = .plus, + .colon => id = .colon, + .percent => id = .percent, + .caret => id = .caret, + .asterisk => id = .asterisk, + } + } + + return .{ + .id = id, + .start = start, + .end = self.index, + .line = self.line, + .source = self.source, + }; +} + +pub fn nextNoWS(self: *Tokenizer) Token { + var tok = self.next(); + while (tok.id == .whitespace) tok = self.next(); + return tok; +} + +test "operators" { + try expectTokens( + \\ ! != | || |= = == + \\ ( ) { } [ ] . .. ... + \\ ^ ^= + ++ += - -- -= + \\ * *= % %= -> : ; / /= + \\ , & && &= ? < <= << + \\ <<= > >= >> >>= ~ # ## + \\ + , &.{ + .bang, + .bang_equal, + .pipe, + .pipe_pipe, + .pipe_equal, + .equal, + .equal_equal, + .nl, + .l_paren, + .r_paren, + .l_brace, + .r_brace, + .l_bracket, + .r_bracket, + .period, + .period, + .period, + .ellipsis, + .nl, + .caret, + .caret_equal, + .plus, + .plus_plus, + .plus_equal, + .minus, + .minus_minus, + .minus_equal, + .nl, + .asterisk, + .asterisk_equal, + .percent, + .percent_equal, + .arrow, + .colon, + .semicolon, + .slash, + .slash_equal, + .nl, + .comma, + .ampersand, + .ampersand_ampersand, + .ampersand_equal, + .question_mark, + .angle_bracket_left, + .angle_bracket_left_equal, + .angle_bracket_angle_bracket_left, + .nl, + .angle_bracket_angle_bracket_left_equal, + .angle_bracket_right, + .angle_bracket_right_equal, + .angle_bracket_angle_bracket_right, + .angle_bracket_angle_bracket_right_equal, + .tilde, + .hash, + .hash_hash, + .nl, + }); +} + +test "keywords" { + try expectTokens( + \\auto break case char const continue default do + \\double else enum extern float for goto if int + \\long register return short signed sizeof static + \\struct switch typedef union unsigned void volatile + \\while _Bool _Complex _Imaginary inline restrict _Alignas + \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local + \\__attribute __attribute__ __declspec + \\ + , &.{ + .keyword_auto, + .keyword_break, + .keyword_case, + .keyword_char, + .keyword_const, + .keyword_continue, + .keyword_default, + .keyword_do, + .nl, + .keyword_double, + .keyword_else, + .keyword_enum, + .keyword_extern, + .keyword_float, + .keyword_for, + .keyword_goto, + .keyword_if, + .keyword_int, + .nl, + .keyword_long, + .keyword_register, + .keyword_return, + .keyword_short, + .keyword_signed, + .keyword_sizeof, + .keyword_static, + .nl, + .keyword_struct, + .keyword_switch, + .keyword_typedef, + .keyword_union, + .keyword_unsigned, + .keyword_void, + .keyword_volatile, + .nl, + .keyword_while, + .keyword_bool, + .keyword_complex, + .keyword_imaginary, + .keyword_inline, + .keyword_restrict, + .keyword_alignas, + .nl, + .keyword_alignof, + .keyword_atomic, + .keyword_generic, + .keyword_noreturn, + .keyword_static_assert, + .keyword_thread_local, + .nl, + .keyword_attribute1, + .keyword_attribute2, + .keyword_declspec, + .nl, + }); +} + +test "preprocessor keywords" { + try expectTokens( + \\#include + \\#define + \\#ifdef + \\#ifndef + \\#error + \\#pragma + \\ + , &.{ + .hash, + .keyword_include, + .nl, + .hash, + .keyword_define, + .nl, + .hash, + .keyword_ifdef, + .nl, + .hash, + .keyword_ifndef, + .nl, + .hash, + .keyword_error, + .nl, + .hash, + .keyword_pragma, + .nl, + }); +} + +test "line continuation" { + try expectTokens( + \\#define foo \ + \\ bar + \\"foo\ + \\ bar" + \\#define "foo" + \\ "bar" + \\#define "foo" \ + \\ "bar" + , &.{ + .hash, + .keyword_define, + .identifier, + .identifier, + .nl, + .string_literal, + .nl, + .hash, + .keyword_define, + .string_literal, + .nl, + .string_literal, + .nl, + .hash, + .keyword_define, + .string_literal, + .string_literal, + }); +} + +test "string prefix" { + try expectTokens( + \\"foo" + \\u"foo" + \\u8"foo" + \\U"foo" + \\L"foo" + \\'foo' + \\u'foo' + \\U'foo' + \\L'foo' + \\ + , &.{ + .string_literal, + .nl, + .string_literal_utf_16, + .nl, + .string_literal_utf_8, + .nl, + .string_literal_utf_32, + .nl, + .string_literal_wide, + .nl, + .char_literal, + .nl, + .char_literal_utf_16, + .nl, + .char_literal_utf_32, + .nl, + .char_literal_wide, + .nl, + }); +} + +test "num suffixes" { + try expectTokens( + \\ 1.0f 1.0L 1.0 .0 1. 0x1p0f 0X1p0 + \\ 0l 0lu 0ll 0llu 0 + \\ 1u 1ul 1ull 1 + \\ 1.0i 1.0I + \\ 1.0if 1.0If 1.0fi 1.0fI + \\ 1.0il 1.0Il 1.0li 1.0lI + \\ + , &.{ + .float_literal_f, + .float_literal_l, + .float_literal, + .float_literal, + .float_literal, + .float_literal_f, + .float_literal, + .nl, + .integer_literal_l, + .integer_literal_lu, + .integer_literal_ll, + .integer_literal_llu, + .integer_literal, + .nl, + .integer_literal_u, + .integer_literal_lu, + .integer_literal_llu, + .integer_literal, + .nl, + .imaginary_literal, + .imaginary_literal, + .nl, + .imaginary_literal_f, + .imaginary_literal_f, + .imaginary_literal_f, + .imaginary_literal_f, + .nl, + .imaginary_literal_l, + .imaginary_literal_l, + .imaginary_literal_l, + .imaginary_literal_l, + .nl, + }); +} + +test "comments" { + try expectTokens( + \\//foo + \\#foo + , &.{ + .nl, + .hash, + .identifier, + }); +} + +test "extended identifiers" { + try expectTokens("𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier}); + try expectTokens("u𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier}); + try expectTokens("u8𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier}); + try expectTokens("U𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier}); + try expectTokens("L𝓪𝓻𝓸𝓬𝓬", &.{.extended_identifier}); + try expectTokens("1™", &.{ .integer_literal, .extended_identifier }); + try expectTokens("1.™", &.{ .float_literal, .extended_identifier }); + try expectTokens("..™", &.{ .period, .period, .extended_identifier }); + try expectTokens("0™", &.{ .integer_literal, .extended_identifier }); + try expectTokens("0b\u{E0000}", &.{ .invalid, .extended_identifier }); + try expectTokens("0b0\u{E0000}", &.{ .integer_literal, .extended_identifier }); + try expectTokens("01\u{E0000}", &.{ .integer_literal, .extended_identifier }); + try expectTokens("010\u{E0000}", &.{ .integer_literal, .extended_identifier }); + try expectTokens("0x\u{E0000}", &.{ .invalid, .extended_identifier }); + try expectTokens("0x0\u{E0000}", &.{ .integer_literal, .extended_identifier }); + try expectTokens("\"\\0\u{E0000}\"", &.{.string_literal}); + try expectTokens("\"\\x\u{E0000}\"", &.{.string_literal}); + try expectTokens("\"\\u\u{E0000}\"", &.{ .invalid, .extended_identifier, .invalid }); + try expectTokens("1e\u{E0000}", &.{ .invalid, .extended_identifier }); + try expectTokens("1e1\u{E0000}", &.{ .float_literal, .extended_identifier }); +} + +fn expectTokens(contents: []const u8, expected_tokens: []const Token.Id) !void { + var comp = Compilation.init(std.testing.allocator); + defer comp.deinit(); + const source = try comp.addSourceFromBuffer("path", contents); + var tokenizer = Tokenizer{ + .buf = source.buf, + .source = source.id, + .comp = &comp, + }; + var i: usize = 0; + while (i < expected_tokens.len) { + const token = tokenizer.next(); + if (token.id == .whitespace) continue; + const expected_token_id = expected_tokens[i]; + i += 1; + if (!std.meta.eql(token.id, expected_token_id)) { + std.debug.print("expected {s}, found {s}\n", .{ @tagName(expected_token_id), @tagName(token.id) }); + return error.TokensDoNotEqual; + } + } + const last_token = tokenizer.next(); + try std.testing.expect(last_token.id == .eof); +} diff --git a/src/aro/Tree.zig b/src/aro/Tree.zig new file mode 100644 index 000000000000..94b283f88665 --- /dev/null +++ b/src/aro/Tree.zig @@ -0,0 +1,1148 @@ +const std = @import("std"); +const Type = @import("Type.zig"); +const Tokenizer = @import("Tokenizer.zig"); +const Compilation = @import("Compilation.zig"); +const Source = @import("Source.zig"); +const Attribute = @import("Attribute.zig"); +const Value = @import("Value.zig"); + +const Tree = @This(); + +pub const Token = struct { + id: Id, + /// This location contains the actual token slice which might be generated. + /// If it is generated then there is guaranteed to be at least one + /// expansion location. + loc: Source.Location, + expansion_locs: ?[*]Source.Location = null, + + pub fn expansionSlice(tok: Token) []const Source.Location { + const locs = tok.expansion_locs orelse return &[0]Source.Location{}; + var i: usize = 0; + while (locs[i].id != .unused) : (i += 1) {} + return locs[0..i]; + } + + pub fn addExpansionLocation(tok: *Token, gpa: std.mem.Allocator, new: []const Source.Location) !void { + if (new.len == 0 or tok.id == .whitespace) return; + var list = std.ArrayList(Source.Location).init(gpa); + defer { + std.mem.set(Source.Location, list.items.ptr[list.items.len..list.capacity], .{}); + // add a sentinel since the allocator is not guaranteed + // to return the exact desired size + list.items.ptr[list.capacity - 1].byte_offset = 1; + tok.expansion_locs = list.items.ptr; + } + + if (tok.expansion_locs) |locs| { + var i: usize = 0; + while (locs[i].id != .unused) : (i += 1) {} + list.items = locs[0..i]; + while (locs[i].byte_offset != 1) : (i += 1) {} + list.capacity = i + 1; + } + + const min_len = std.math.max(list.items.len + new.len + 1, 4); + const wanted_len = std.math.ceilPowerOfTwo(usize, min_len) catch + return error.OutOfMemory; + try list.ensureTotalCapacity(wanted_len); + + for (new) |new_loc| { + if (new_loc.id == .generated) continue; + list.appendAssumeCapacity(new_loc); + } + } + + pub fn free(expansion_locs: ?[*]Source.Location, gpa: std.mem.Allocator) void { + const locs = expansion_locs orelse return; + var i: usize = 0; + while (locs[i].id != .unused) : (i += 1) {} + while (locs[i].byte_offset != 1) : (i += 1) {} + gpa.free(locs[0 .. i + 1]); + } + + pub fn dupe(tok: Token, gpa: std.mem.Allocator) !Token { + var copy = tok; + copy.expansion_locs = null; + try copy.addExpansionLocation(gpa, tok.expansionSlice()); + return copy; + } + + pub const List = std.MultiArrayList(Token); + pub const Id = Tokenizer.Token.Id; +}; + +pub const TokenIndex = u32; +pub const NodeIndex = enum(u32) { none, _ }; +pub const ValueMap = std.AutoHashMap(NodeIndex, Value); + +comp: *Compilation, +arena: std.heap.ArenaAllocator, +generated: []const u8, +tokens: Token.List.Slice, +nodes: Node.List.Slice, +data: []const NodeIndex, +root_decls: []const NodeIndex, +strings: []const u8, +value_map: ValueMap, + +pub fn deinit(tree: *Tree) void { + tree.comp.gpa.free(tree.root_decls); + tree.comp.gpa.free(tree.data); + tree.comp.gpa.free(tree.strings); + tree.nodes.deinit(tree.comp.gpa); + tree.arena.deinit(); + tree.value_map.deinit(); +} + +pub const Node = struct { + tag: Tag, + ty: Type = .{ .specifier = .void }, + data: Data, + + pub const Range = struct { start: u32, end: u32 }; + + pub const Data = union { + decl: struct { + name: TokenIndex, + node: NodeIndex = .none, + }, + decl_ref: TokenIndex, + range: Range, + if3: struct { + cond: NodeIndex, + body: u32, + }, + un: NodeIndex, + bin: struct { + lhs: NodeIndex, + rhs: NodeIndex, + }, + member: struct { + lhs: NodeIndex, + index: u32, + }, + union_init: struct { + field_index: u32, + node: NodeIndex, + }, + int: u64, + + pub fn forDecl(data: Data, tree: Tree) struct { + decls: []const NodeIndex, + cond: NodeIndex, + incr: NodeIndex, + body: NodeIndex, + } { + const items = tree.data[data.range.start..data.range.end]; + const decls = items[0 .. items.len - 3]; + + return .{ + .decls = decls, + .cond = items[items.len - 3], + .incr = items[items.len - 2], + .body = items[items.len - 1], + }; + } + + pub fn forStmt(data: Data, tree: Tree) struct { + init: NodeIndex, + cond: NodeIndex, + incr: NodeIndex, + body: NodeIndex, + } { + const items = tree.data[data.if3.body..]; + + return .{ + .init = items[0], + .cond = items[1], + .incr = items[2], + .body = data.if3.cond, + }; + } + }; + + pub const List = std.MultiArrayList(Node); +}; + +pub const Tag = enum(u8) { + /// Only appears at index 0 and reaching it is always a result of a bug. + invalid, + + // ====== Decl ====== + + // _Static_assert + static_assert, + + // function prototype + fn_proto, + static_fn_proto, + inline_fn_proto, + inline_static_fn_proto, + + // function definition + fn_def, + static_fn_def, + inline_fn_def, + inline_static_fn_def, + + // variable declaration + @"var", + extern_var, + static_var, + // same as static_var, used for __func__, __FUNCTION__ and __PRETTY_FUNCTION__ + implicit_static_var, + threadlocal_var, + threadlocal_extern_var, + threadlocal_static_var, + + // typedef declaration + typedef, + + // container declarations + /// { lhs; rhs; } + struct_decl_two, + /// { lhs; rhs; } + union_decl_two, + /// { lhs, rhs, } + enum_decl_two, + /// { range } + struct_decl, + /// { range } + union_decl, + /// { range } + enum_decl, + + /// name = node + enum_field_decl, + /// ty name : node + /// name == 0 means unnamed + record_field_decl, + /// Used when a record has an unnamed record as a field + indirect_record_field_decl, + + // ====== Stmt ====== + + labeled_stmt, + /// { first; second; } first and second may be null + compound_stmt_two, + /// { data } + compound_stmt, + /// if (first) data[second] else data[second+1]; + if_then_else_stmt, + /// if (first); else second; + if_else_stmt, + /// if (first) second; second may be null + if_then_stmt, + /// switch (first) second + switch_stmt, + /// case first: second + case_stmt, + /// default: first + default_stmt, + /// while (first) second + while_stmt, + /// do second while(first); + do_while_stmt, + /// for (data[..]; data[len-3]; data[len-2]) data[len-1] + for_decl_stmt, + /// for (;;;) first + forever_stmt, + /// for (data[first]; data[first+1]; data[first+2]) second + for_stmt, + /// goto first; + goto_stmt, + /// goto *un; + computed_goto_stmt, + // continue; first and second unused + continue_stmt, + // break; first and second unused + break_stmt, + // null statement (just a semicolon); first and second unused + null_stmt, + /// return first; first may be null + return_stmt, + + // ====== Expr ====== + + /// lhs , rhs + comma_expr, + /// lhs ?: rhs + binary_cond_expr, + /// lhs ? data[0] : data[1] + cond_expr, + /// lhs = rhs + assign_expr, + /// lhs *= rhs + mul_assign_expr, + /// lhs /= rhs + div_assign_expr, + /// lhs %= rhs + mod_assign_expr, + /// lhs += rhs + add_assign_expr, + /// lhs -= rhs + sub_assign_expr, + /// lhs <<= rhs + shl_assign_expr, + /// lhs >>= rhs + shr_assign_expr, + /// lhs &= rhs + bit_and_assign_expr, + /// lhs ^= rhs + bit_xor_assign_expr, + /// lhs |= rhs + bit_or_assign_expr, + /// lhs || rhs + bool_or_expr, + /// lhs && rhs + bool_and_expr, + /// lhs | rhs + bit_or_expr, + /// lhs ^ rhs + bit_xor_expr, + /// lhs & rhs + bit_and_expr, + /// lhs == rhs + equal_expr, + /// lhs != rhs + not_equal_expr, + /// lhs < rhs + less_than_expr, + /// lhs <= rhs + less_than_equal_expr, + /// lhs > rhs + greater_than_expr, + /// lhs >= rhs + greater_than_equal_expr, + /// lhs << rhs + shl_expr, + /// lhs >> rhs + shr_expr, + /// lhs + rhs + add_expr, + /// lhs - rhs + sub_expr, + /// lhs * rhs + mul_expr, + /// lhs / rhs + div_expr, + /// lhs % rhs + mod_expr, + /// Explicit (type)un + cast_expr, + /// &un + addr_of_expr, + /// &&decl_ref + addr_of_label, + /// *un + deref_expr, + /// +un + plus_expr, + /// -un + negate_expr, + /// ~un + bit_not_expr, + /// !un + bool_not_expr, + /// ++un + pre_inc_expr, + /// --un + pre_dec_expr, + /// lhs[rhs] lhs is pointer/array type, rhs is integer type + array_access_expr, + /// first(second) second may be 0 + call_expr_one, + /// data[0](data[1..]) + call_expr, + /// decl + builtin_call_expr_one, + builtin_call_expr, + /// lhs.member + member_access_expr, + /// lhs->member + member_access_ptr_expr, + /// un++ + post_inc_expr, + /// un-- + post_dec_expr, + /// (un) + paren_expr, + /// decl_ref + decl_ref_expr, + /// decl_ref + enumeration_ref, + /// integer literal, always unsigned + int_literal, + /// Same as int_literal, but originates from a char literal + char_literal, + /// f32 literal + float_literal, + /// f64 literal + double_literal, + /// wraps a float or double literal: un + imaginary_literal, + /// tree.str[index..][0..len] + string_literal_expr, + /// sizeof(un?) + sizeof_expr, + /// _Alignof(un?) + alignof_expr, + /// _Generic(controlling lhs, chosen rhs) + generic_expr_one, + /// _Generic(controlling range[0], chosen range[1], rest range[2..]) + generic_expr, + /// ty: un + generic_association_expr, + // default: un + generic_default_expr, + /// __builtin_choose_expr(lhs, data[0], data[1]) + builtin_choose_expr, + /// ({ un }) + stmt_expr, + + // ====== Initializer expressions ====== + + /// { lhs, rhs } + array_init_expr_two, + /// { range } + array_init_expr, + /// { lhs, rhs } + struct_init_expr_two, + /// { range } + struct_init_expr, + /// { union_init } + union_init_expr, + /// (ty){ un } + compound_literal_expr, + + // ====== Implicit casts ====== + + /// Convert T[] to T * + array_to_pointer, + /// Converts an lvalue to an rvalue + lval_to_rval, + /// Convert a function type to a pointer to a function + function_to_pointer, + /// Convert a pointer type to a _Bool + pointer_to_bool, + /// Convert a pointer type to an integer type + pointer_to_int, + /// Convert _Bool to an integer type + bool_to_int, + /// Convert _Bool to a floating type + bool_to_float, + /// Convert a _Bool to a pointer; will cause a warning + bool_to_pointer, + /// Convert an integer type to _Bool + int_to_bool, + /// Convert an integer to a floating + int_to_float, + /// Convert an integer type to a pointer type + int_to_pointer, + /// Convert a floating type to a _Bool + float_to_bool, + /// Convert a floating type to an integer + float_to_int, + /// Convert one integer type to another + int_cast, + /// Convert one floating type to another + float_cast, + /// Convert pointer to one with same child type but more CV-quals, + /// OR to appropriately-qualified void * + /// only appears on the branches of a conditional expr + qual_cast, + /// Convert type to void; only appears on the branches of a conditional expr + to_void, + + /// Convert a literal 0 to a null pointer + null_to_pointer, + + /// Inserted at the end of a function body if no return stmt is found. + /// ty is the functions return type + implicit_return, + + /// Inserted in array_init_expr to represent unspecified elements. + /// data.int contains the amount of elements. + array_filler_expr, + /// Inserted in record and scalar initializers for unspecified elements. + default_init_expr, + + /// attribute argument identifier (see `mode` attribute) + attr_arg_ident, + /// rhs can be none + attr_params_two, + /// range + attr_params, + + pub fn isImplicit(tag: Tag) bool { + return switch (tag) { + .array_to_pointer, + .lval_to_rval, + .function_to_pointer, + .pointer_to_bool, + .pointer_to_int, + .bool_to_int, + .bool_to_float, + .bool_to_pointer, + .int_to_bool, + .int_to_float, + .int_to_pointer, + .float_to_bool, + .float_to_int, + .int_cast, + .float_cast, + .to_void, + .implicit_return, + .qual_cast, + .null_to_pointer, + .array_filler_expr, + .default_init_expr, + .implicit_static_var, + => true, + else => false, + }; + } +}; + +pub fn isLval(nodes: Node.List.Slice, extra: []const NodeIndex, value_map: ValueMap, node: NodeIndex) bool { + var is_const: bool = undefined; + return isLvalExtra(nodes, extra, value_map, node, &is_const); +} + +pub fn isLvalExtra(nodes: Node.List.Slice, extra: []const NodeIndex, value_map: ValueMap, node: NodeIndex, is_const: *bool) bool { + is_const.* = false; + switch (nodes.items(.tag)[@enumToInt(node)]) { + .compound_literal_expr => { + is_const.* = nodes.items(.ty)[@enumToInt(node)].isConst(); + return true; + }, + .string_literal_expr => return true, + .member_access_ptr_expr => { + const lhs_expr = nodes.items(.data)[@enumToInt(node)].member.lhs; + const ptr_ty = nodes.items(.ty)[@enumToInt(lhs_expr)]; + if (ptr_ty.isPtr()) is_const.* = ptr_ty.elemType().isConst(); + return true; + }, + .array_access_expr => { + const lhs_expr = nodes.items(.data)[@enumToInt(node)].bin.lhs; + if (lhs_expr != .none) { + const array_ty = nodes.items(.ty)[@enumToInt(lhs_expr)]; + if (array_ty.isPtr() or array_ty.isArray()) is_const.* = array_ty.elemType().isConst(); + } + return true; + }, + .decl_ref_expr => { + const decl_ty = nodes.items(.ty)[@enumToInt(node)]; + is_const.* = decl_ty.isConst(); + return true; + }, + .deref_expr => { + const data = nodes.items(.data)[@enumToInt(node)]; + const operand_ty = nodes.items(.ty)[@enumToInt(data.un)]; + if (operand_ty.isFunc()) return false; + if (operand_ty.isPtr() or operand_ty.isArray()) is_const.* = operand_ty.elemType().isConst(); + return true; + }, + .member_access_expr => { + const data = nodes.items(.data)[@enumToInt(node)]; + return isLvalExtra(nodes, extra, value_map, data.member.lhs, is_const); + }, + .paren_expr => { + const data = nodes.items(.data)[@enumToInt(node)]; + return isLvalExtra(nodes, extra, value_map, data.un, is_const); + }, + .builtin_choose_expr => { + const data = nodes.items(.data)[@enumToInt(node)]; + + if (value_map.get(data.if3.cond)) |val| { + const offset = @boolToInt(val.isZero()); + return isLvalExtra(nodes, extra, value_map, extra[data.if3.body + offset], is_const); + } + return false; + }, + else => return false, + } +} + +pub fn dumpStr(bytes: []const u8, tag: Tag, writer: anytype) !void { + switch (tag) { + .string_literal_expr => try writer.print("\"{}\"", .{std.zig.fmtEscapes(bytes[0 .. bytes.len - 1])}), + else => unreachable, + } +} + +pub fn tokSlice(tree: Tree, tok_i: TokenIndex) []const u8 { + if (tree.tokens.items(.id)[tok_i].lexeme()) |some| return some; + const loc = tree.tokens.items(.loc)[tok_i]; + var tmp_tokenizer = Tokenizer{ + .buf = tree.comp.getSource(loc.id).buf, + .comp = tree.comp, + .index = loc.byte_offset, + .source = .generated, + }; + const tok = tmp_tokenizer.next(); + return tmp_tokenizer.buf[tok.start..tok.end]; +} + +pub fn dump(tree: Tree, writer: anytype) @TypeOf(writer).Error!void { + for (tree.root_decls) |i| { + try tree.dumpNode(i, 0, writer); + try writer.writeByte('\n'); + } +} + +fn dumpAttribute(attr: Attribute, writer: anytype) !void { + inline for (std.meta.fields(Attribute.Tag)) |e| { + if (e.value == @enumToInt(attr.tag)) { + const args = @field(attr.args, e.name); + if (@TypeOf(args) == void) { + try writer.writeByte('\n'); + return; + } + inline for (@typeInfo(@TypeOf(args)).Struct.fields) |f, i| { + if (comptime std.mem.eql(u8, f.name, "__name_tok")) continue; + if (i != 0) { + try writer.writeAll(", "); + } + try writer.writeAll(f.name); + try writer.writeAll(": "); + switch (f.field_type) { + []const u8, ?[]const u8 => try writer.print("\"{s}\"", .{@field(args, f.name)}), + else => switch (@typeInfo(f.field_type)) { + .Enum => try writer.writeAll(@tagName(@field(args, f.name))), + else => try writer.print("{}", .{@field(args, f.name)}), + }, + } + } + try writer.writeByte('\n'); + return; + } + } +} + +fn dumpNode(tree: Tree, node: NodeIndex, level: u32, w: anytype) @TypeOf(w).Error!void { + const delta = 2; + const half = delta / 2; + const util = @import("util.zig"); + const TYPE = util.Color.purple; + const TAG = util.Color.cyan; + const IMPLICIT = util.Color.blue; + const NAME = util.Color.red; + const LITERAL = util.Color.green; + const ATTRIBUTE = util.Color.yellow; + std.debug.assert(node != .none); + + const tag = tree.nodes.items(.tag)[@enumToInt(node)]; + const data = tree.nodes.items(.data)[@enumToInt(node)]; + const ty = tree.nodes.items(.ty)[@enumToInt(node)]; + try w.writeByteNTimes(' ', level); + + util.setColor(if (tag.isImplicit()) IMPLICIT else TAG, w); + try w.print("{s}: ", .{@tagName(tag)}); + util.setColor(TYPE, w); + try w.writeByte('\''); + try ty.dump(w); + try w.writeByte('\''); + + if (isLval(tree.nodes, tree.data, tree.value_map, node)) { + util.setColor(ATTRIBUTE, w); + try w.writeAll(" lvalue"); + } + if (tree.value_map.get(node)) |val| { + util.setColor(LITERAL, w); + try w.writeAll(" (value: "); + try val.dump(ty, tree.comp, w); + try w.writeByte(')'); + } + try w.writeAll("\n"); + util.setColor(.reset, w); + + if (ty.specifier == .attributed) { + util.setColor(ATTRIBUTE, w); + for (ty.data.attributed.attributes) |attr| { + try w.writeByteNTimes(' ', level + half); + try w.print("attr: {s} ", .{@tagName(attr.tag)}); + try dumpAttribute(attr, w); + } + util.setColor(.reset, w); + } + + switch (tag) { + .invalid => unreachable, + .static_assert => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("condition:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + if (data.bin.rhs != .none) { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("diagnostic:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + } + }, + .fn_proto, + .static_fn_proto, + .inline_fn_proto, + .inline_static_fn_proto, + => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + }, + .fn_def, + .static_fn_def, + .inline_fn_def, + .inline_static_fn_def, + => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + try w.writeByteNTimes(' ', level + half); + try w.writeAll("body:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + }, + .typedef, + .@"var", + .extern_var, + .static_var, + .implicit_static_var, + .threadlocal_var, + .threadlocal_extern_var, + .threadlocal_static_var, + => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + if (data.decl.node != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("init:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + } + }, + .enum_field_decl => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + if (data.decl.node != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("value:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + } + }, + .record_field_decl => { + if (data.decl.name != 0) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + } + if (data.decl.node != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("bits:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + } + }, + .indirect_record_field_decl => {}, + .compound_stmt, + .array_init_expr, + .struct_init_expr, + .enum_decl, + .struct_decl, + .union_decl, + .attr_params, + => { + for (tree.data[data.range.start..data.range.end]) |stmt, i| { + if (i != 0) try w.writeByte('\n'); + try tree.dumpNode(stmt, level + delta, w); + } + }, + .compound_stmt_two, + .array_init_expr_two, + .struct_init_expr_two, + .enum_decl_two, + .struct_decl_two, + .union_decl_two, + .attr_params_two, + => { + if (data.bin.lhs != .none) try tree.dumpNode(data.bin.lhs, level + delta, w); + if (data.bin.rhs != .none) try tree.dumpNode(data.bin.rhs, level + delta, w); + }, + .union_init_expr => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("field index: "); + util.setColor(LITERAL, w); + try w.print("{d}\n", .{data.union_init.field_index}); + util.setColor(.reset, w); + if (data.union_init.node != .none) { + try tree.dumpNode(data.union_init.node, level + delta, w); + } + }, + .compound_literal_expr => { + try tree.dumpNode(data.un, level + half, w); + }, + .labeled_stmt => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("label: "); + util.setColor(LITERAL, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + if (data.decl.node != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("stmt:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + } + }, + .case_stmt => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("value:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + if (data.bin.rhs != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("stmt:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + } + }, + .default_stmt => { + if (data.un != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("stmt:\n"); + try tree.dumpNode(data.un, level + delta, w); + } + }, + .cond_expr, .if_then_else_stmt, .builtin_choose_expr => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(data.if3.cond, level + delta, w); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("then:\n"); + try tree.dumpNode(tree.data[data.if3.body], level + delta, w); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("else:\n"); + try tree.dumpNode(tree.data[data.if3.body + 1], level + delta, w); + }, + .if_else_stmt => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("else:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + }, + .if_then_stmt => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + + if (data.bin.rhs != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("then:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + } + }, + .switch_stmt, .while_stmt, .do_while_stmt => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + + if (data.bin.rhs != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("body:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + } + }, + .for_decl_stmt => { + const for_decl = data.forDecl(tree); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("decl:\n"); + for (for_decl.decls) |decl| { + try tree.dumpNode(decl, level + delta, w); + try w.writeByte('\n'); + } + if (for_decl.cond != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(for_decl.cond, level + delta, w); + } + if (for_decl.incr != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("incr:\n"); + try tree.dumpNode(for_decl.incr, level + delta, w); + } + if (for_decl.body != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("body:\n"); + try tree.dumpNode(for_decl.body, level + delta, w); + } + }, + .forever_stmt => { + if (data.un != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("body:\n"); + try tree.dumpNode(data.un, level + delta, w); + } + }, + .for_stmt => { + const for_stmt = data.forStmt(tree); + + if (for_stmt.init != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("init:\n"); + try tree.dumpNode(for_stmt.init, level + delta, w); + } + if (for_stmt.cond != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("cond:\n"); + try tree.dumpNode(for_stmt.cond, level + delta, w); + } + if (for_stmt.incr != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("incr:\n"); + try tree.dumpNode(for_stmt.incr, level + delta, w); + } + if (for_stmt.body != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("body:\n"); + try tree.dumpNode(for_stmt.body, level + delta, w); + } + }, + .goto_stmt, .addr_of_label => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("label: "); + util.setColor(LITERAL, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl_ref)}); + util.setColor(.reset, w); + }, + .continue_stmt, .break_stmt, .implicit_return, .null_stmt => {}, + .return_stmt => { + if (data.un != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("expr:\n"); + try tree.dumpNode(data.un, level + delta, w); + } + }, + .attr_arg_ident => { + try w.writeByteNTimes(' ', level + half); + util.setColor(ATTRIBUTE, w); + try w.print("name: {s}\n", .{tree.tokSlice(data.decl_ref)}); + util.setColor(.reset, w); + }, + .call_expr => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("lhs:\n"); + try tree.dumpNode(tree.data[data.range.start], level + delta, w); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("args:\n"); + for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, w); + }, + .call_expr_one => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("lhs:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + if (data.bin.rhs != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("arg:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + } + }, + .builtin_call_expr => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(@enumToInt(tree.data[data.range.start]))}); + util.setColor(.reset, w); + + try w.writeByteNTimes(' ', level + half); + try w.writeAll("args:\n"); + for (tree.data[data.range.start + 1 .. data.range.end]) |arg| try tree.dumpNode(arg, level + delta, w); + }, + .builtin_call_expr_one => { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl.name)}); + util.setColor(.reset, w); + if (data.decl.node != .none) { + try w.writeByteNTimes(' ', level + half); + try w.writeAll("arg:\n"); + try tree.dumpNode(data.decl.node, level + delta, w); + } + }, + .comma_expr, + .binary_cond_expr, + .assign_expr, + .mul_assign_expr, + .div_assign_expr, + .mod_assign_expr, + .add_assign_expr, + .sub_assign_expr, + .shl_assign_expr, + .shr_assign_expr, + .bit_and_assign_expr, + .bit_xor_assign_expr, + .bit_or_assign_expr, + .bool_or_expr, + .bool_and_expr, + .bit_or_expr, + .bit_xor_expr, + .bit_and_expr, + .equal_expr, + .not_equal_expr, + .less_than_expr, + .less_than_equal_expr, + .greater_than_expr, + .greater_than_equal_expr, + .shl_expr, + .shr_expr, + .add_expr, + .sub_expr, + .mul_expr, + .div_expr, + .mod_expr, + => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("lhs:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("rhs:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + }, + .cast_expr, + .addr_of_expr, + .computed_goto_stmt, + .deref_expr, + .plus_expr, + .negate_expr, + .bit_not_expr, + .bool_not_expr, + .pre_inc_expr, + .pre_dec_expr, + .post_inc_expr, + .post_dec_expr, + .paren_expr, + => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("operand:\n"); + try tree.dumpNode(data.un, level + delta, w); + }, + .decl_ref_expr => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl_ref)}); + util.setColor(.reset, w); + }, + .enumeration_ref => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{tree.tokSlice(data.decl_ref)}); + util.setColor(.reset, w); + }, + .int_literal, + .char_literal, + .float_literal, + .double_literal, + .string_literal_expr, + => {}, + .member_access_expr, .member_access_ptr_expr => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("lhs:\n"); + try tree.dumpNode(data.member.lhs, level + delta, w); + + var lhs_ty = tree.nodes.items(.ty)[@enumToInt(data.member.lhs)]; + if (lhs_ty.isPtr()) lhs_ty = lhs_ty.elemType(); + lhs_ty = lhs_ty.canonicalize(.standard); + + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("name: "); + util.setColor(NAME, w); + try w.print("{s}\n", .{lhs_ty.data.record.fields[data.member.index].name}); + util.setColor(.reset, w); + }, + .array_access_expr => { + if (data.bin.lhs != .none) { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("lhs:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + } + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("index:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + }, + .sizeof_expr, .alignof_expr => { + if (data.un != .none) { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("expr:\n"); + try tree.dumpNode(data.un, level + delta, w); + } + }, + .generic_expr_one => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("controlling:\n"); + try tree.dumpNode(data.bin.lhs, level + delta, w); + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("chosen:\n"); + try tree.dumpNode(data.bin.rhs, level + delta, w); + }, + .generic_expr => { + const nodes = tree.data[data.range.start..data.range.end]; + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("controlling:\n"); + try tree.dumpNode(nodes[0], level + delta, w); + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("chosen:\n"); + try tree.dumpNode(nodes[1], level + delta, w); + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("rest:\n"); + for (nodes[2..]) |expr| { + try tree.dumpNode(expr, level + delta, w); + } + }, + .generic_association_expr, .generic_default_expr, .stmt_expr, .imaginary_literal => { + try tree.dumpNode(data.un, level + delta, w); + }, + .array_to_pointer, + .lval_to_rval, + .function_to_pointer, + .pointer_to_bool, + .pointer_to_int, + .bool_to_int, + .bool_to_float, + .bool_to_pointer, + .int_to_bool, + .int_to_float, + .int_to_pointer, + .float_to_bool, + .float_to_int, + .int_cast, + .float_cast, + .to_void, + .qual_cast, + .null_to_pointer, + => { + try tree.dumpNode(data.un, level + delta, w); + }, + .array_filler_expr => { + try w.writeByteNTimes(' ', level + 1); + try w.writeAll("count: "); + util.setColor(LITERAL, w); + try w.print("{d}\n", .{data.int}); + util.setColor(.reset, w); + }, + .default_init_expr => {}, + } +} diff --git a/src/aro/Type.zig b/src/aro/Type.zig new file mode 100644 index 000000000000..2f65046e1ecc --- /dev/null +++ b/src/aro/Type.zig @@ -0,0 +1,1676 @@ +const std = @import("std"); +const Tree = @import("Tree.zig"); +const TokenIndex = Tree.TokenIndex; +const NodeIndex = Tree.NodeIndex; +const Parser = @import("Parser.zig"); +const Compilation = @import("Compilation.zig"); +const Attribute = @import("Attribute.zig"); + +const Type = @This(); + +pub const Qualifiers = packed struct { + @"const": bool = false, + atomic: bool = false, + @"volatile": bool = false, + restrict: bool = false, + + // for function parameters only, stored here since it fits in the padding + register: bool = false, + + pub fn any(quals: Qualifiers) bool { + return quals.@"const" or quals.restrict or quals.@"volatile" or quals.atomic; + } + + pub fn dump(quals: Qualifiers, w: anytype) !void { + if (quals.@"const") try w.writeAll("const "); + if (quals.atomic) try w.writeAll("_Atomic "); + if (quals.@"volatile") try w.writeAll("volatile "); + if (quals.restrict) try w.writeAll("restrict "); + if (quals.register) try w.writeAll("register "); + } + + /// Merge the const/volatile qualifiers, used by type resolution + /// of the conditional operator + pub fn mergeCV(a: Qualifiers, b: Qualifiers) Qualifiers { + return .{ + .@"const" = a.@"const" or b.@"const", + .@"volatile" = a.@"volatile" or b.@"volatile", + }; + } + + /// Merge all qualifiers, used by typeof() + fn mergeAll(a: Qualifiers, b: Qualifiers) Qualifiers { + return .{ + .@"const" = a.@"const" or b.@"const", + .atomic = a.atomic or b.atomic, + .@"volatile" = a.@"volatile" or b.@"volatile", + .restrict = a.restrict or b.restrict, + .register = a.register or b.register, + }; + } + + /// Checks if a has all the qualifiers of b + pub fn hasQuals(a: Qualifiers, b: Qualifiers) bool { + if (b.@"const" and !a.@"const") return false; + if (b.@"volatile" and !a.@"volatile") return false; + if (b.atomic and !a.atomic) return false; + return true; + } + + /// register is a storage class and not actually a qualifier + /// so it is not preserved by typeof() + pub fn inheritFromTypeof(quals: Qualifiers) Qualifiers { + var res = quals; + res.register = false; + return res; + } + + pub const Builder = struct { + @"const": ?TokenIndex = null, + atomic: ?TokenIndex = null, + @"volatile": ?TokenIndex = null, + restrict: ?TokenIndex = null, + + pub fn finish(b: Qualifiers.Builder, p: *Parser, ty: *Type) !void { + if (ty.specifier != .pointer and b.restrict != null) { + try p.errStr(.restrict_non_pointer, b.restrict.?, try p.typeStr(ty.*)); + } + if (b.atomic) |some| { + if (ty.isArray()) try p.errStr(.atomic_array, some, try p.typeStr(ty.*)); + if (ty.isFunc()) try p.errStr(.atomic_func, some, try p.typeStr(ty.*)); + if (ty.hasIncompleteSize()) try p.errStr(.atomic_incomplete, some, try p.typeStr(ty.*)); + } + + ty.qual = .{ + .@"const" = b.@"const" != null, + .atomic = b.atomic != null, + .@"volatile" = b.@"volatile" != null, + .restrict = b.restrict != null, + }; + } + }; +}; + +// TODO improve memory usage +pub const Func = struct { + return_type: Type, + params: []Param, + + pub const Param = struct { + name: []const u8, + ty: Type, + name_tok: TokenIndex, + }; +}; + +pub const Array = struct { + len: u64, + elem: Type, +}; + +pub const Expr = struct { + node: NodeIndex, + ty: Type, +}; + +pub const Attributed = struct { + attributes: []Attribute, + base: Type, + + fn create(allocator: std.mem.Allocator, base: Type, attributes: []const Attribute) !*Attributed { + var attributed_type = try allocator.create(Attributed); + errdefer allocator.destroy(attributed_type); + + const existing = base.getAttributes(); + var all_attrs = try allocator.alloc(Attribute, existing.len + attributes.len); + std.mem.copy(Attribute, all_attrs, existing); + std.mem.copy(Attribute, all_attrs[existing.len..], attributes); + + attributed_type.* = .{ + .attributes = all_attrs, + .base = base, + }; + return attributed_type; + } +}; + +// TODO improve memory usage +pub const Enum = struct { + name: []const u8, + tag_ty: Type, + fields: []Field, + + pub const Field = struct { + name: []const u8, + ty: Type, + name_tok: TokenIndex, + node: NodeIndex, + }; + + pub fn isIncomplete(e: Enum) bool { + return e.fields.len == std.math.maxInt(usize); + } + + pub fn create(allocator: std.mem.Allocator, name: []const u8) !*Enum { + var e = try allocator.create(Enum); + e.name = name; + e.fields.len = std.math.maxInt(usize); + return e; + } +}; + +// TODO improve memory usage +pub const Record = struct { + name: []const u8, + fields: []Field, + size: u64, + alignment: u29, + + pub const Field = struct { + name: []const u8, + ty: Type, + /// zero for anonymous fields + name_tok: TokenIndex = 0, + bit_width: u32 = 0, + + pub fn isAnonymousRecord(f: Field) bool { + return f.name_tok == 0 and f.ty.isRecord(); + } + }; + + pub fn isIncomplete(r: Record) bool { + return r.fields.len == std.math.maxInt(usize); + } + + pub fn create(allocator: std.mem.Allocator, name: []const u8) !*Record { + var r = try allocator.create(Record); + r.name = name; + r.fields.len = std.math.maxInt(usize); + return r; + } +}; + +pub const Specifier = enum { + void, + bool, + + // integers + char, + schar, + uchar, + short, + ushort, + int, + uint, + long, + ulong, + long_long, + ulong_long, + + // floating point numbers + float, + double, + long_double, + complex_float, + complex_double, + complex_long_double, + + // data.sub_type + pointer, + unspecified_variable_len_array, + decayed_unspecified_variable_len_array, + // data.func + /// int foo(int bar, char baz) and int (void) + func, + /// int foo(int bar, char baz, ...) + var_args_func, + /// int foo(bar, baz) and int foo() + /// is also var args, but we can give warnings about incorrect amounts of parameters + old_style_func, + + // data.array + array, + decayed_array, + static_array, + decayed_static_array, + incomplete_array, + decayed_incomplete_array, + // data.expr + variable_len_array, + decayed_variable_len_array, + + // data.record + @"struct", + @"union", + + // data.enum + @"enum", + + /// typeof(type-name) + typeof_type, + /// decayed array created with typeof(type-name) + decayed_typeof_type, + + /// typeof(expression) + typeof_expr, + /// decayed array created with typeof(expression) + decayed_typeof_expr, + + /// data.attributed + attributed, + + /// special type used to implement __builtin_va_start + special_va_start, +}; + +/// All fields of Type except data may be mutated +data: union { + sub_type: *Type, + func: *Func, + array: *Array, + expr: *Expr, + @"enum": *Enum, + record: *Record, + attributed: *Attributed, + none: void, +} = .{ .none = {} }, +specifier: Specifier, +qual: Qualifiers = .{}, + +/// Determine if type matches the given specifier, recursing into typeof +/// types if necessary. +pub fn is(ty: Type, specifier: Specifier) bool { + std.debug.assert(specifier != .typeof_type and specifier != .typeof_expr); + return ty.get(specifier) != null; +} + +pub fn withAttributes(self: Type, allocator: std.mem.Allocator, attributes: []const Attribute) !Type { + if (attributes.len == 0) return self; + const attributed_type = try Type.Attributed.create(allocator, self, attributes); + return Type{ .specifier = .attributed, .data = .{ .attributed = attributed_type } }; +} + +pub fn isCallable(ty: Type) ?Type { + return switch (ty.specifier) { + .func, .var_args_func, .old_style_func => ty, + .pointer => if (ty.data.sub_type.isFunc()) ty.data.sub_type.* else null, + .typeof_type => ty.data.sub_type.isCallable(), + .typeof_expr => ty.data.expr.ty.isCallable(), + .attributed => ty.data.attributed.base.isCallable(), + else => null, + }; +} + +pub fn isFunc(ty: Type) bool { + return switch (ty.specifier) { + .func, .var_args_func, .old_style_func => true, + .typeof_type => ty.data.sub_type.isFunc(), + .typeof_expr => ty.data.expr.ty.isFunc(), + .attributed => ty.data.attributed.base.isFunc(), + else => false, + }; +} + +pub fn isArray(ty: Type) bool { + return switch (ty.specifier) { + .array, .static_array, .incomplete_array, .variable_len_array, .unspecified_variable_len_array => true, + .typeof_type => ty.data.sub_type.isArray(), + .typeof_expr => ty.data.expr.ty.isArray(), + .attributed => ty.data.attributed.base.isArray(), + else => false, + }; +} + +pub fn isPtr(ty: Type) bool { + return switch (ty.specifier) { + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .decayed_typeof_type, + .decayed_typeof_expr, + => true, + .typeof_type => ty.data.sub_type.isPtr(), + .typeof_expr => ty.data.expr.ty.isPtr(), + .attributed => ty.data.attributed.base.isPtr(), + else => false, + }; +} + +pub fn isInt(ty: Type) bool { + return switch (ty.specifier) { + .@"enum", .bool, .char, .schar, .uchar, .short, .ushort, .int, .uint, .long, .ulong, .long_long, .ulong_long => true, + .typeof_type => ty.data.sub_type.isInt(), + .typeof_expr => ty.data.expr.ty.isInt(), + .attributed => ty.data.attributed.base.isInt(), + else => false, + }; +} + +pub fn isFloat(ty: Type) bool { + return switch (ty.specifier) { + .float, .double, .long_double, .complex_float, .complex_double, .complex_long_double => true, + .typeof_type => ty.data.sub_type.isFloat(), + .typeof_expr => ty.data.expr.ty.isFloat(), + .attributed => ty.data.attributed.base.isFloat(), + else => false, + }; +} + +pub fn isReal(ty: Type) bool { + return switch (ty.specifier) { + .complex_float, .complex_double, .complex_long_double => false, + .typeof_type => ty.data.sub_type.isReal(), + .typeof_expr => ty.data.expr.ty.isReal(), + .attributed => ty.data.attributed.base.isReal(), + else => true, + }; +} + +pub fn isVoidStar(ty: Type) bool { + return switch (ty.specifier) { + .pointer => ty.data.sub_type.specifier == .void, + .typeof_type => ty.data.sub_type.isVoidStar(), + .typeof_expr => ty.data.expr.ty.isVoidStar(), + .attributed => ty.data.attributed.base.isVoidStar(), + else => false, + }; +} + +pub fn isTypeof(ty: Type) bool { + return switch (ty.specifier) { + .typeof_type, .typeof_expr, .decayed_typeof_type, .decayed_typeof_expr => true, + else => false, + }; +} + +pub fn isConst(ty: Type) bool { + return switch (ty.specifier) { + .typeof_type, .decayed_typeof_type => ty.qual.@"const" or ty.data.sub_type.isConst(), + .typeof_expr, .decayed_typeof_expr => ty.qual.@"const" or ty.data.expr.ty.isConst(), + .attributed => ty.data.attributed.base.isConst(), + else => ty.qual.@"const", + }; +} + +pub fn isUnsignedInt(ty: Type, comp: *Compilation) bool { + return switch (ty.specifier) { + .char => return getCharSignedness(comp) == .unsigned, + .uchar, .ushort, .uint, .ulong, .ulong_long, .bool => true, + .typeof_type => ty.data.sub_type.isUnsignedInt(comp), + .typeof_expr => ty.data.expr.ty.isUnsignedInt(comp), + .attributed => ty.data.attributed.base.isUnsignedInt(comp), + else => false, + }; +} + +pub fn isEnumOrRecord(ty: Type) bool { + return switch (ty.specifier) { + .@"enum", .@"struct", .@"union" => true, + .typeof_type => ty.data.sub_type.isEnumOrRecord(), + .typeof_expr => ty.data.expr.ty.isEnumOrRecord(), + .attributed => ty.data.attributed.base.isEnumOrRecord(), + else => false, + }; +} + +pub fn isRecord(ty: Type) bool { + return switch (ty.specifier) { + .@"struct", .@"union" => true, + .typeof_type => ty.data.sub_type.isRecord(), + .typeof_expr => ty.data.expr.ty.isRecord(), + .attributed => ty.data.attributed.base.isRecord(), + else => false, + }; +} + +pub fn isAnonymousRecord(ty: Type) bool { + return switch (ty.specifier) { + // anonymous records can be recognized by their names which are in + // the format "(anonymous TAG at path:line:col)". + .@"struct", .@"union" => ty.data.record.name[0] == '(', + .typeof_type => ty.data.sub_type.isAnonymousRecord(), + .typeof_expr => ty.data.expr.ty.isAnonymousRecord(), + .attributed => ty.data.attributed.base.isAnonymousRecord(), + else => false, + }; +} + +pub fn elemType(ty: Type) Type { + return switch (ty.specifier) { + .pointer, .unspecified_variable_len_array, .decayed_unspecified_variable_len_array => ty.data.sub_type.*, + .array, .static_array, .incomplete_array, .decayed_array, .decayed_static_array, .decayed_incomplete_array => ty.data.array.elem, + .variable_len_array, .decayed_variable_len_array => ty.data.expr.ty, + .typeof_type, .decayed_typeof_type, .typeof_expr, .decayed_typeof_expr => { + const unwrapped = ty.canonicalize(.preserve_quals); + var elem = unwrapped.elemType(); + elem.qual = elem.qual.mergeAll(unwrapped.qual); + return elem; + }, + .attributed => ty.data.attributed.base, + else => unreachable, + }; +} + +pub fn returnType(ty: Type) Type { + return switch (ty.specifier) { + .func, .var_args_func, .old_style_func => ty.data.func.return_type, + .typeof_type, .decayed_typeof_type => ty.data.sub_type.returnType(), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.returnType(), + .attributed => ty.data.attributed.base.returnType(), + else => unreachable, + }; +} + +pub fn params(ty: Type) []Func.Param { + return switch (ty.specifier) { + .func, .var_args_func, .old_style_func => ty.data.func.params, + .typeof_type, .decayed_typeof_type => ty.data.sub_type.params(), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.params(), + .attributed => ty.data.attributed.base.params(), + else => unreachable, + }; +} + +pub fn arrayLen(ty: Type) ?usize { + return switch (ty.specifier) { + .array, .static_array, .decayed_array, .decayed_static_array => ty.data.array.len, + .typeof_type, .decayed_typeof_type => ty.data.sub_type.arrayLen(), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.arrayLen(), + .attributed => ty.data.attributed.base.arrayLen(), + else => null, + }; +} + +pub fn anyQual(ty: Type) bool { + return switch (ty.specifier) { + .typeof_type => ty.qual.any() or ty.data.sub_type.anyQual(), + .typeof_expr => ty.qual.any() or ty.data.expr.ty.anyQual(), + else => ty.qual.any(), + }; +} + +pub fn getAttributes(ty: Type) []const Attribute { + return switch (ty.specifier) { + .attributed => ty.data.attributed.attributes, + .typeof_type, .decayed_typeof_type => ty.data.sub_type.getAttributes(), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.getAttributes(), + else => &.{}, + }; +} + +pub fn integerPromotion(ty: Type, comp: *Compilation) Type { + var specifier = ty.specifier; + if (specifier == .@"enum") { + if (ty.hasIncompleteSize()) return .{ .specifier = .int }; + specifier = ty.data.@"enum".tag_ty.specifier; + } + return .{ + .specifier = switch (specifier) { + .bool, .char, .schar, .uchar, .short => .int, + .ushort => if (ty.sizeof(comp).? == sizeof(.{ .specifier = .int }, comp)) Specifier.uint else .int, + .int => .int, + .uint => .uint, + .long => .long, + .ulong => .ulong, + .long_long => .long_long, + .ulong_long => .ulong_long, + .typeof_type => return ty.data.sub_type.integerPromotion(comp), + .typeof_expr => return ty.data.expr.ty.integerPromotion(comp), + .attributed => return ty.data.attributed.base.integerPromotion(comp), + else => unreachable, // not an integer type + }, + }; +} + +pub fn hasIncompleteSize(ty: Type) bool { + return switch (ty.specifier) { + .void, .incomplete_array => true, + .@"enum" => ty.data.@"enum".isIncomplete(), + .@"struct", .@"union" => ty.data.record.isIncomplete(), + .array, .static_array => ty.data.array.elem.hasIncompleteSize(), + .typeof_type => ty.data.sub_type.hasIncompleteSize(), + .typeof_expr => ty.data.expr.ty.hasIncompleteSize(), + .attributed => ty.data.attributed.base.hasIncompleteSize(), + else => false, + }; +} + +pub fn hasUnboundVLA(ty: Type) bool { + var cur = ty; + while (true) { + switch (cur.specifier) { + .unspecified_variable_len_array, + .decayed_unspecified_variable_len_array, + => return true, + .array, + .static_array, + .incomplete_array, + .variable_len_array, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + => cur = cur.elemType(), + .typeof_type, .decayed_typeof_type => cur = cur.data.sub_type.*, + .typeof_expr, .decayed_typeof_expr => cur = cur.data.expr.ty, + .attributed => cur = cur.data.attributed.base, + else => return false, + } + } +} + +pub fn hasField(ty: Type, name: []const u8) bool { + switch (ty.specifier) { + .@"struct" => { + std.debug.assert(!ty.data.record.isIncomplete()); + for (ty.data.record.fields) |f| { + if (f.isAnonymousRecord() and f.ty.hasField(name)) return true; + if (std.mem.eql(u8, name, f.name)) return true; + } + }, + .@"union" => { + std.debug.assert(!ty.data.record.isIncomplete()); + for (ty.data.record.fields) |f| { + if (f.isAnonymousRecord() and f.ty.hasField(name)) return true; + if (std.mem.eql(u8, name, f.name)) return true; + } + }, + .typeof_type => return ty.data.sub_type.hasField(name), + .typeof_expr => return ty.data.expr.ty.hasField(name), + .attributed => return ty.data.attributed.base.hasField(name), + else => unreachable, + } + return false; +} + +pub fn getCharSignedness(comp: *Compilation) std.builtin.Signedness { + switch (comp.target.cpu.arch) { + .aarch64, + .aarch64_32, + .aarch64_be, + .arm, + .armeb, + .thumb, + .thumbeb, + => return if (comp.target.os.tag.isDarwin() or comp.target.os.tag == .windows) .signed else .unsigned, + .powerpc, .powerpc64 => return if (comp.target.os.tag.isDarwin()) .signed else .unsigned, + .powerpc64le, + .s390x, + .xcore, + .arc, + => return .unsigned, + else => return .signed, + } +} + +/// Size of type as reported by sizeof +pub fn sizeof(ty: Type, comp: *Compilation) ?u64 { + // TODO get target from compilation + return switch (ty.specifier) { + .variable_len_array, .unspecified_variable_len_array, .incomplete_array => return null, + .func, .var_args_func, .old_style_func, .void, .bool => 1, + .char, .schar, .uchar => 1, + .short, .ushort => 2, + .int, .uint => 4, + .long, .ulong => switch (comp.target.os.tag) { + .linux, + .macos, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .wasi, + .emscripten, + => comp.target.cpu.arch.ptrBitWidth() >> 3, + .windows, .uefi => 4, + else => 4, + }, + .long_long, .ulong_long => 8, + .float => 4, + .double => 8, + .long_double => 16, + .complex_float => 8, + .complex_double => 16, + .complex_long_double => 32, + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .decayed_typeof_type, + .decayed_typeof_expr, + .static_array, + => comp.target.cpu.arch.ptrBitWidth() >> 3, + .array => ty.data.array.elem.sizeof(comp).? * ty.data.array.len, + .@"struct", .@"union" => if (ty.data.record.isIncomplete()) null else ty.data.record.size, + .@"enum" => if (ty.data.@"enum".isIncomplete()) null else ty.data.@"enum".tag_ty.sizeof(comp), + .typeof_type => ty.data.sub_type.sizeof(comp), + .typeof_expr => ty.data.expr.ty.sizeof(comp), + .attributed => ty.data.attributed.base.sizeof(comp), + else => unreachable, + }; +} + +pub fn bitSizeof(ty: Type, comp: *Compilation) ?u64 { + return switch (ty.specifier) { + .bool => 1, + .typeof_type, .decayed_typeof_type => ty.data.sub_type.bitSizeof(comp), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.bitSizeof(comp), + .attributed => ty.data.attributed.base.bitSizeof(comp), + else => 8 * (ty.sizeof(comp) orelse return null), + }; +} + +/// Get the alignment of a type +pub fn alignof(ty: Type, comp: *const Compilation) u29 { + if (ty.requestedAlignment(comp)) |requested| return requested; + + // TODO get target from compilation + return switch (ty.specifier) { + .unspecified_variable_len_array => unreachable, // must be bound in function definition + .variable_len_array, .incomplete_array => ty.elemType().alignof(comp), + .func, .var_args_func, .old_style_func => 4, // TODO check target + .char, .schar, .uchar, .void, .bool => 1, + .short, .ushort => 2, + .int, .uint => 4, + .long, .ulong => switch (comp.target.os.tag) { + .linux, + .macos, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .wasi, + .emscripten, + => comp.target.cpu.arch.ptrBitWidth() >> 3, + .windows, .uefi => 4, + else => 4, + }, + .long_long, .ulong_long => 8, + .float, .complex_float => 4, + .double, .complex_double => 8, + .long_double, .complex_long_double => 16, + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .static_array, + => comp.target.cpu.arch.ptrBitWidth() >> 3, + .array => ty.data.array.elem.alignof(comp), + .@"struct", .@"union" => if (ty.data.record.isIncomplete()) 0 else ty.data.record.alignment, + .@"enum" => if (ty.data.@"enum".isIncomplete()) 0 else ty.data.@"enum".tag_ty.alignof(comp), + .typeof_type, .decayed_typeof_type => ty.data.sub_type.alignof(comp), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.alignof(comp), + .attributed => ty.data.attributed.base.alignof(comp), + else => unreachable, + }; +} + +/// Canonicalize a possibly-typeof() type. If the type is not a typeof() type, simply +/// return it. Otherwise, determine the actual qualified type. +/// The `qual_handling` parameter can be used to return the full set of qualifiers +/// added by typeof() operations, which is useful when determining the elemType of +/// arrays and pointers. +pub fn canonicalize(ty: Type, qual_handling: enum { standard, preserve_quals }) Type { + var cur = ty; + if (cur.specifier == .attributed) cur = cur.data.attributed.base; + if (!cur.isTypeof()) return cur; + + var qual = cur.qual; + while (true) { + switch (cur.specifier) { + .typeof_type => cur = cur.data.sub_type.*, + .typeof_expr => cur = cur.data.expr.ty, + .decayed_typeof_type => { + cur = cur.data.sub_type.*; + cur.decayArray(); + }, + .decayed_typeof_expr => { + cur = cur.data.expr.ty; + cur.decayArray(); + }, + else => break, + } + qual = qual.mergeAll(cur.qual); + } + if ((cur.isArray() or cur.isPtr()) and qual_handling == .standard) { + cur.qual = .{}; + } else { + cur.qual = qual; + } + return cur; +} + +pub fn get(ty: *const Type, specifier: Specifier) ?*const Type { + std.debug.assert(specifier != .typeof_type and specifier != .typeof_expr); + return switch (ty.specifier) { + .typeof_type => ty.data.sub_type.get(specifier), + .typeof_expr => ty.data.expr.ty.get(specifier), + .attributed => ty.data.attributed.base.get(specifier), + else => if (ty.specifier == specifier) ty else null, + }; +} + +fn requestedAlignment(ty: Type, comp: *const Compilation) ?u29 { + return switch (ty.specifier) { + .typeof_type, .decayed_typeof_type => ty.data.sub_type.requestedAlignment(comp), + .typeof_expr, .decayed_typeof_expr => ty.data.expr.ty.requestedAlignment(comp), + .attributed => { + var max_requested: ?u29 = null; + for (ty.data.attributed.attributes) |attribute| { + if (attribute.tag != .aligned) continue; + const requested = if (attribute.args.aligned.alignment) |alignment| + alignment.requested + else + comp.defaultAlignment(); + + if (max_requested == null or max_requested.? < requested) { + max_requested = requested; + } + } + return max_requested; + }, + else => null, + }; +} + +pub fn eql(a_param: Type, b_param: Type, comp: *const Compilation, check_qualifiers: bool) bool { + const a = a_param.canonicalize(.standard); + const b = b_param.canonicalize(.standard); + + if (a.alignof(comp) != b.alignof(comp)) return false; + if (a.isPtr()) { + if (!b.isPtr()) return false; + } else if (a.isFunc()) { + if (!b.isFunc()) return false; + } else if (a.isArray()) { + if (!b.isArray()) return false; + } else if (a.specifier != b.specifier) return false; + + if (a.qual.atomic != b.qual.atomic) return false; + if (check_qualifiers) { + if (a.qual.@"const" != b.qual.@"const") return false; + if (a.qual.@"volatile" != b.qual.@"volatile") return false; + } + + switch (a.specifier) { + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + => if (!a_param.elemType().eql(b_param.elemType(), comp, check_qualifiers)) return false, + + .func, + .var_args_func, + .old_style_func, + => { + // TODO validate this + if (a.data.func.params.len != b.data.func.params.len) return false; + // return type cannot have qualifiers + if (!a.returnType().eql(b.returnType(), comp, false)) return false; + for (a.data.func.params) |param, i| { + var a_unqual = param.ty; + a_unqual.qual.@"const" = false; + a_unqual.qual.@"volatile" = false; + var b_unqual = b.data.func.params[i].ty; + b_unqual.qual.@"const" = false; + b_unqual.qual.@"volatile" = false; + if (!a_unqual.eql(b_unqual, comp, check_qualifiers)) return false; + } + }, + + .array, + .static_array, + .incomplete_array, + => { + if (!std.meta.eql(a.arrayLen(), b.arrayLen())) return false; + if (!a.elemType().eql(b.elemType(), comp, check_qualifiers)) return false; + }, + .variable_len_array => if (!a.elemType().eql(b.elemType(), comp, check_qualifiers)) return false, + + .@"struct", .@"union" => if (a.data.record != b.data.record) return false, + .@"enum" => if (a.data.@"enum" != b.data.@"enum") return false, + + else => {}, + } + return true; +} + +/// Decays an array to a pointer +pub fn decayArray(ty: *Type) void { + // the decayed array type is the current specifier +1 + ty.specifier = @intToEnum(Type.Specifier, @enumToInt(ty.specifier) + 1); +} + +pub fn combine(inner: *Type, outer: Type, p: *Parser, source_tok: TokenIndex) Parser.Error!void { + switch (inner.specifier) { + .pointer => return inner.data.sub_type.combine(outer, p, source_tok), + .unspecified_variable_len_array => { + try inner.data.sub_type.combine(outer, p, source_tok); + }, + .variable_len_array => { + try inner.data.expr.ty.combine(outer, p, source_tok); + }, + .array, .static_array, .incomplete_array => { + try inner.data.array.elem.combine(outer, p, source_tok); + }, + .func, .var_args_func, .old_style_func => { + try inner.data.func.return_type.combine(outer, p, source_tok); + }, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .decayed_typeof_type, + .decayed_typeof_expr, + => unreachable, // type should not be able to decay before being combined + else => inner.* = outer, + } +} + +pub fn validateCombinedType(ty: Type, p: *Parser, source_tok: TokenIndex) Parser.Error!void { + switch (ty.specifier) { + .pointer => return ty.data.sub_type.validateCombinedType(p, source_tok), + .unspecified_variable_len_array, + .variable_len_array, + .array, + .static_array, + .incomplete_array, + => { + const elem_ty = ty.elemType(); + if (elem_ty.hasIncompleteSize()) { + try p.errStr(.array_incomplete_elem, source_tok, try p.typeStr(elem_ty)); + return error.ParsingFailed; + } + if (elem_ty.isFunc()) { + try p.errTok(.array_func_elem, source_tok); + return error.ParsingFailed; + } + if (elem_ty.specifier == .static_array and elem_ty.isArray()) { + try p.errTok(.static_non_outermost_array, source_tok); + } + if (elem_ty.anyQual() and elem_ty.isArray()) { + try p.errTok(.qualifier_non_outermost_array, source_tok); + } + }, + .func, .var_args_func, .old_style_func => { + const ret_ty = &ty.data.func.return_type; + if (ret_ty.isArray()) try p.errTok(.func_cannot_return_array, source_tok); + if (ret_ty.isFunc()) try p.errTok(.func_cannot_return_func, source_tok); + if (ret_ty.qual.@"const") { + try p.errStr(.qual_on_ret_type, source_tok, "const"); + ret_ty.qual.@"const" = false; + } + if (ret_ty.qual.@"volatile") { + try p.errStr(.qual_on_ret_type, source_tok, "volatile"); + ret_ty.qual.@"volatile" = false; + } + if (ret_ty.qual.atomic) { + try p.errStr(.qual_on_ret_type, source_tok, "atomic"); + ret_ty.qual.atomic = false; + } + }, + .typeof_type, .decayed_typeof_type => return ty.data.sub_type.validateCombinedType(p, source_tok), + .typeof_expr, .decayed_typeof_expr => return ty.data.expr.ty.validateCombinedType(p, source_tok), + .attributed => return ty.data.attributed.base.validateCombinedType(p, source_tok), + else => {}, + } +} + +/// An unfinished Type +pub const Builder = struct { + typedef: ?struct { + tok: TokenIndex, + ty: Type, + } = null, + specifier: Builder.Specifier = .none, + qual: Qualifiers.Builder = .{}, + typeof: ?Type = null, + /// When true an error is returned instead of adding a diagnostic message. + /// Used for trying to combine typedef types. + error_on_invalid: bool = false, + + pub const Specifier = union(enum) { + none, + void, + bool, + char, + schar, + uchar, + + unsigned, + signed, + short, + sshort, + ushort, + short_int, + sshort_int, + ushort_int, + int, + sint, + uint, + long, + slong, + ulong, + long_int, + slong_int, + ulong_int, + long_long, + slong_long, + ulong_long, + long_long_int, + slong_long_int, + ulong_long_int, + + float, + double, + long_double, + complex, + complex_long, + complex_float, + complex_double, + complex_long_double, + + pointer: *Type, + unspecified_variable_len_array: *Type, + decayed_unspecified_variable_len_array: *Type, + func: *Func, + var_args_func: *Func, + old_style_func: *Func, + array: *Array, + decayed_array: *Array, + static_array: *Array, + decayed_static_array: *Array, + incomplete_array: *Array, + decayed_incomplete_array: *Array, + variable_len_array: *Expr, + decayed_variable_len_array: *Expr, + @"struct": *Record, + @"union": *Record, + @"enum": *Enum, + typeof_type: *Type, + decayed_typeof_type: *Type, + typeof_expr: *Expr, + decayed_typeof_expr: *Expr, + + attributed: *Attributed, + + pub fn str(spec: Builder.Specifier) ?[]const u8 { + return switch (spec) { + .none => unreachable, + .void => "void", + .bool => "_Bool", + .char => "char", + .schar => "signed char", + .uchar => "unsigned char", + .unsigned => "unsigned", + .signed => "signed", + .short => "short", + .ushort => "unsigned short", + .sshort => "signed short", + .short_int => "short int", + .sshort_int => "signed short int", + .ushort_int => "unsigned short int", + .int => "int", + .sint => "signed int", + .uint => "unsigned int", + .long => "long", + .slong => "signed long", + .ulong => "unsigned long", + .long_int => "long int", + .slong_int => "signed long int", + .ulong_int => "unsigned long int", + .long_long => "long long", + .slong_long => "signed long long", + .ulong_long => "unsigned long long", + .long_long_int => "long long int", + .slong_long_int => "signed long long int", + .ulong_long_int => "unsigned long long int", + + .float => "float", + .double => "double", + .long_double => "long double", + .complex => "_Complex", + .complex_long => "_Complex long", + .complex_float => "_Complex float", + .complex_double => "_Complex double", + .complex_long_double => "_Complex long double", + + .attributed => |attributed| Builder.fromType(attributed.base).str(), + + else => null, + }; + } + }; + + pub fn finish(b: Builder, p: *Parser, attr_buf_start: usize) Parser.Error!Type { + var ty: Type = .{ .specifier = undefined }; + switch (b.specifier) { + .none => { + if (b.typeof) |typeof| { + ty = typeof; + } else { + ty.specifier = .int; + try p.err(.missing_type_specifier); + } + }, + .void => ty.specifier = .void, + .bool => ty.specifier = .bool, + .char => ty.specifier = .char, + .schar => ty.specifier = .schar, + .uchar => ty.specifier = .uchar, + + .unsigned => ty.specifier = .uint, + .signed => ty.specifier = .int, + .short_int, .sshort_int, .short, .sshort => ty.specifier = .short, + .ushort, .ushort_int => ty.specifier = .ushort, + .int, .sint => ty.specifier = .int, + .uint => ty.specifier = .uint, + .long, .slong, .long_int, .slong_int => ty.specifier = .long, + .ulong, .ulong_int => ty.specifier = .ulong, + .long_long, .slong_long, .long_long_int, .slong_long_int => ty.specifier = .long_long, + .ulong_long, .ulong_long_int => ty.specifier = .ulong_long, + + .float => ty.specifier = .float, + .double => ty.specifier = .double, + .long_double => ty.specifier = .long_double, + .complex_float => ty.specifier = .complex_float, + .complex_double => ty.specifier = .complex_double, + .complex_long_double => ty.specifier = .complex_long_double, + .complex => { + try p.errTok(.plain_complex, p.tok_i - 1); + ty.specifier = .complex_double; + }, + .complex_long => { + try p.errExtra(.type_is_invalid, p.tok_i, .{ .str = b.specifier.str().? }); + return error.ParsingFailed; + }, + + .pointer => |data| { + ty.specifier = .pointer; + ty.data = .{ .sub_type = data }; + }, + .unspecified_variable_len_array => |data| { + ty.specifier = .unspecified_variable_len_array; + ty.data = .{ .sub_type = data }; + }, + .decayed_unspecified_variable_len_array => |data| { + ty.specifier = .decayed_unspecified_variable_len_array; + ty.data = .{ .sub_type = data }; + }, + .func => |data| { + ty.specifier = .func; + ty.data = .{ .func = data }; + }, + .var_args_func => |data| { + ty.specifier = .var_args_func; + ty.data = .{ .func = data }; + }, + .old_style_func => |data| { + ty.specifier = .old_style_func; + ty.data = .{ .func = data }; + }, + .array => |data| { + ty.specifier = .array; + ty.data = .{ .array = data }; + }, + .decayed_array => |data| { + ty.specifier = .decayed_array; + ty.data = .{ .array = data }; + }, + .static_array => |data| { + ty.specifier = .static_array; + ty.data = .{ .array = data }; + }, + .decayed_static_array => |data| { + ty.specifier = .decayed_static_array; + ty.data = .{ .array = data }; + }, + .incomplete_array => |data| { + ty.specifier = .incomplete_array; + ty.data = .{ .array = data }; + }, + .decayed_incomplete_array => |data| { + ty.specifier = .decayed_incomplete_array; + ty.data = .{ .array = data }; + }, + .variable_len_array => |data| { + ty.specifier = .variable_len_array; + ty.data = .{ .expr = data }; + }, + .decayed_variable_len_array => |data| { + ty.specifier = .decayed_variable_len_array; + ty.data = .{ .expr = data }; + }, + .@"struct" => |data| { + ty.specifier = .@"struct"; + ty.data = .{ .record = data }; + }, + .@"union" => |data| { + ty.specifier = .@"union"; + ty.data = .{ .record = data }; + }, + .@"enum" => |data| { + ty.specifier = .@"enum"; + ty.data = .{ .@"enum" = data }; + }, + .typeof_type => |data| { + ty.specifier = .typeof_type; + ty.data = .{ .sub_type = data }; + }, + .decayed_typeof_type => |data| { + ty.specifier = .decayed_typeof_type; + ty.data = .{ .sub_type = data }; + }, + .typeof_expr => |data| { + ty.specifier = .typeof_expr; + ty.data = .{ .expr = data }; + }, + .decayed_typeof_expr => |data| { + ty.specifier = .decayed_typeof_expr; + ty.data = .{ .expr = data }; + }, + .attributed => |data| { + ty.specifier = .attributed; + ty.data = .{ .attributed = data }; + }, + } + try b.qual.finish(p, &ty); + + return p.withAttributes(ty, attr_buf_start); + } + + fn cannotCombine(b: Builder, p: *Parser, source_tok: TokenIndex) !void { + if (b.error_on_invalid) return error.CannotCombine; + const ty_str = b.specifier.str() orelse try p.typeStr(try b.finish(p, p.attr_buf.len)); + try p.errExtra(.cannot_combine_spec, source_tok, .{ .str = ty_str }); + if (b.typedef) |some| try p.errStr(.spec_from_typedef, some.tok, try p.typeStr(some.ty)); + } + + fn duplicateSpec(b: *Builder, p: *Parser, spec: []const u8) !void { + if (b.error_on_invalid) return error.CannotCombine; + try p.errStr(.duplicate_decl_spec, p.tok_i, spec); + } + + pub fn combineFromTypeof(b: *Builder, p: *Parser, new: Type, source_tok: TokenIndex) Compilation.Error!void { + if (b.typeof != null) return p.errStr(.cannot_combine_spec, source_tok, "typeof"); + if (b.specifier != .none) return p.errStr(.invalid_typeof, source_tok, @tagName(b.specifier)); + const inner = switch (new.specifier) { + .typeof_type => new.data.sub_type.*, + .typeof_expr => new.data.expr.ty, + else => unreachable, + }; + + b.typeof = switch (inner.specifier) { + .attributed => inner.data.attributed.base, + else => new, + }; + } + + /// Try to combine type from typedef, returns true if successful. + pub fn combineTypedef(b: *Builder, p: *Parser, typedef_ty: Type, name_tok: TokenIndex) bool { + b.error_on_invalid = true; + defer b.error_on_invalid = false; + + const new_spec = fromType(typedef_ty); + b.combineExtra(p, new_spec, 0) catch |err| switch (err) { + error.FatalError => unreachable, // we do not add any diagnostics + error.OutOfMemory => unreachable, // we do not add any diagnostics + error.ParsingFailed => unreachable, // we do not add any diagnostics + error.CannotCombine => return false, + }; + b.typedef = .{ .tok = name_tok, .ty = typedef_ty }; + return true; + } + + pub fn combine(b: *Builder, p: *Parser, new: Builder.Specifier, source_tok: TokenIndex) !void { + b.combineExtra(p, new, source_tok) catch |err| switch (err) { + error.CannotCombine => unreachable, + else => |e| return e, + }; + } + + fn combineExtra(b: *Builder, p: *Parser, new: Builder.Specifier, source_tok: TokenIndex) !void { + if (b.typeof != null) { + if (b.error_on_invalid) return error.CannotCombine; + try p.errStr(.invalid_typeof, source_tok, @tagName(new)); + } + + switch (new) { + else => switch (b.specifier) { + .none => b.specifier = new, + else => return b.cannotCombine(p, source_tok), + }, + .signed => b.specifier = switch (b.specifier) { + .none => .signed, + .char => .schar, + .short => .sshort, + .short_int => .sshort_int, + .int => .sint, + .long => .slong, + .long_int => .slong_int, + .long_long => .slong_long, + .long_long_int => .slong_long_int, + .sshort, + .sshort_int, + .sint, + .slong, + .slong_int, + .slong_long, + .slong_long_int, + => return b.duplicateSpec(p, "signed"), + else => return b.cannotCombine(p, source_tok), + }, + .unsigned => b.specifier = switch (b.specifier) { + .none => .unsigned, + .char => .uchar, + .short => .ushort, + .short_int => .ushort_int, + .int => .uint, + .long => .ulong, + .long_int => .ulong_int, + .long_long => .ulong_long, + .long_long_int => .ulong_long_int, + .ushort, + .ushort_int, + .uint, + .ulong, + .ulong_int, + .ulong_long, + .ulong_long_int, + => return b.duplicateSpec(p, "unsigned"), + else => return b.cannotCombine(p, source_tok), + }, + .char => b.specifier = switch (b.specifier) { + .none => .char, + .unsigned => .uchar, + .signed => .schar, + .char, .schar, .uchar => return b.duplicateSpec(p, "char"), + else => return b.cannotCombine(p, source_tok), + }, + .short => b.specifier = switch (b.specifier) { + .none => .short, + .unsigned => .ushort, + .signed => .sshort, + else => return b.cannotCombine(p, source_tok), + }, + .int => b.specifier = switch (b.specifier) { + .none => .int, + .signed => .sint, + .unsigned => .uint, + .short => .short_int, + .sshort => .sshort_int, + .ushort => .ushort_int, + .long => .long_int, + .slong => .slong_int, + .ulong => .ulong_int, + .long_long => .long_long_int, + .slong_long => .slong_long_int, + .ulong_long => .ulong_long_int, + .int, + .sint, + .uint, + .short_int, + .sshort_int, + .ushort_int, + .long_int, + .slong_int, + .ulong_int, + .long_long_int, + .slong_long_int, + .ulong_long_int, + => return b.duplicateSpec(p, "int"), + else => return b.cannotCombine(p, source_tok), + }, + .long => b.specifier = switch (b.specifier) { + .none => .long, + .long => .long_long, + .unsigned => .ulong, + .signed => .long, + .int => .long_int, + .sint => .slong_int, + .ulong => .ulong_long, + .long_long, .ulong_long => return b.duplicateSpec(p, "long"), + .complex => .complex_long, + else => return b.cannotCombine(p, source_tok), + }, + .float => b.specifier = switch (b.specifier) { + .none => .float, + .complex => .complex_float, + .complex_float, .float => return b.duplicateSpec(p, "float"), + else => return b.cannotCombine(p, source_tok), + }, + .double => b.specifier = switch (b.specifier) { + .none => .double, + .long => .long_double, + .complex_long => .complex_long_double, + .complex => .complex_double, + .long_double, + .complex_long_double, + .complex_double, + .double, + => return b.duplicateSpec(p, "double"), + else => return b.cannotCombine(p, source_tok), + }, + .complex => b.specifier = switch (b.specifier) { + .none => .complex, + .long => .complex_long, + .float => .complex_float, + .double => .complex_double, + .long_double => .complex_long_double, + .complex, + .complex_long, + .complex_float, + .complex_double, + .complex_long_double, + => return b.duplicateSpec(p, "_Complex"), + else => return b.cannotCombine(p, source_tok), + }, + } + } + + pub fn fromType(ty: Type) Builder.Specifier { + return switch (ty.specifier) { + .void => .void, + .bool => .bool, + .char => .char, + .schar => .schar, + .uchar => .uchar, + .short => .short, + .ushort => .ushort, + .int => .int, + .uint => .uint, + .long => .long, + .ulong => .ulong, + .long_long => .long_long, + .ulong_long => .ulong_long, + .float => .float, + .double => .double, + .long_double => .long_double, + .complex_float => .complex_float, + .complex_double => .complex_double, + .complex_long_double => .complex_long_double, + + .pointer => .{ .pointer = ty.data.sub_type }, + .unspecified_variable_len_array => .{ .unspecified_variable_len_array = ty.data.sub_type }, + .decayed_unspecified_variable_len_array => .{ .decayed_unspecified_variable_len_array = ty.data.sub_type }, + .func => .{ .func = ty.data.func }, + .var_args_func => .{ .var_args_func = ty.data.func }, + .old_style_func => .{ .old_style_func = ty.data.func }, + .array => .{ .array = ty.data.array }, + .decayed_array => .{ .decayed_array = ty.data.array }, + .static_array => .{ .static_array = ty.data.array }, + .decayed_static_array => .{ .decayed_static_array = ty.data.array }, + .incomplete_array => .{ .incomplete_array = ty.data.array }, + .decayed_incomplete_array => .{ .decayed_incomplete_array = ty.data.array }, + .variable_len_array => .{ .variable_len_array = ty.data.expr }, + .decayed_variable_len_array => .{ .decayed_variable_len_array = ty.data.expr }, + .@"struct" => .{ .@"struct" = ty.data.record }, + .@"union" => .{ .@"union" = ty.data.record }, + .@"enum" => .{ .@"enum" = ty.data.@"enum" }, + + .typeof_type => .{ .typeof_type = ty.data.sub_type }, + .decayed_typeof_type => .{ .decayed_typeof_type = ty.data.sub_type }, + .typeof_expr => .{ .typeof_expr = ty.data.expr }, + .decayed_typeof_expr => .{ .decayed_typeof_expr = ty.data.expr }, + + .attributed => .{ .attributed = ty.data.attributed }, + else => unreachable, + }; + } +}; + +pub fn getAttribute(ty: Type, comptime tag: Attribute.Tag) ?Attribute.ArgumentsForTag(tag) { + switch (ty.specifier) { + .typeof_type => return ty.data.sub_type.getAttribute(tag), + .typeof_expr => return ty.data.expr.ty.getAttribute(tag), + .attributed => { + for (ty.data.attributed.attributes) |attribute| { + if (attribute.tag == tag) return @field(attribute.args, @tagName(tag)); + } + return null; + }, + else => return null, + } +} + +/// Print type in C style +pub fn print(ty: Type, w: anytype) @TypeOf(w).Error!void { + _ = try ty.printPrologue(w); + try ty.printEpilogue(w); +} + +pub fn printNamed(ty: Type, name: []const u8, w: anytype) @TypeOf(w).Error!void { + const simple = try ty.printPrologue(w); + if (simple) try w.writeByte(' '); + try w.writeAll(name); + try ty.printEpilogue(w); +} + +/// return true if `ty` is simple +fn printPrologue(ty: Type, w: anytype) @TypeOf(w).Error!bool { + if (ty.qual.atomic) { + var non_atomic_ty = ty; + non_atomic_ty.qual.atomic = false; + try w.writeAll("_Atomic("); + try non_atomic_ty.print(w); + try w.writeAll(")"); + return true; + } + switch (ty.specifier) { + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .decayed_typeof_type, + .decayed_typeof_expr, + => { + const elem_ty = ty.elemType(); + const simple = try elem_ty.printPrologue(w); + if (simple) try w.writeByte(' '); + if (elem_ty.isFunc() or elem_ty.isArray()) try w.writeByte('('); + try w.writeByte('*'); + try ty.qual.dump(w); + return false; + }, + .func, .var_args_func, .old_style_func => { + const ret_ty = ty.data.func.return_type; + const simple = try ret_ty.printPrologue(w); + if (simple) try w.writeByte(' '); + return false; + }, + .array, .static_array, .incomplete_array, .unspecified_variable_len_array, .variable_len_array => { + const elem_ty = ty.elemType(); + const simple = try elem_ty.printPrologue(w); + if (simple) try w.writeByte(' '); + return false; + }, + .typeof_type, .typeof_expr => { + const actual = ty.canonicalize(.standard); + return actual.printPrologue(w); + }, + .attributed => { + const actual = ty.canonicalize(.standard); + return actual.printPrologue(w); + }, + else => {}, + } + try ty.qual.dump(w); + + switch (ty.specifier) { + .@"enum" => try w.print("enum {s}", .{ty.data.@"enum".name}), + .@"struct" => try w.print("struct {s}", .{ty.data.record.name}), + .@"union" => try w.print("union {s}", .{ty.data.record.name}), + else => try w.writeAll(Builder.fromType(ty).str().?), + } + return true; +} + +fn printEpilogue(ty: Type, w: anytype) @TypeOf(w).Error!void { + if (ty.qual.atomic) return; + switch (ty.specifier) { + .pointer, + .decayed_array, + .decayed_static_array, + .decayed_incomplete_array, + .decayed_variable_len_array, + .decayed_unspecified_variable_len_array, + .decayed_typeof_type, + .decayed_typeof_expr, + => { + const elem_ty = ty.elemType(); + if (elem_ty.isFunc() or elem_ty.isArray()) try w.writeByte(')'); + try elem_ty.printEpilogue(w); + }, + .func, .var_args_func, .old_style_func => { + try w.writeByte('('); + for (ty.data.func.params) |param, i| { + if (i != 0) try w.writeAll(", "); + _ = try param.ty.printPrologue(w); + try param.ty.printEpilogue(w); + } + if (ty.specifier != .func) { + if (ty.data.func.params.len != 0) try w.writeAll(", "); + try w.writeAll("..."); + } else if (ty.data.func.params.len == 0) { + try w.writeAll("void"); + } + try w.writeByte(')'); + try ty.data.func.return_type.printEpilogue(w); + }, + .array, .static_array => { + try w.writeByte('['); + if (ty.specifier == .static_array) try w.writeAll("static "); + try ty.qual.dump(w); + try w.print("{d}]", .{ty.data.array.len}); + try ty.data.array.elem.printEpilogue(w); + }, + .incomplete_array => { + try w.writeByte('['); + try ty.qual.dump(w); + try w.writeByte(']'); + try ty.data.array.elem.printEpilogue(w); + }, + .unspecified_variable_len_array => { + try w.writeByte('['); + try ty.qual.dump(w); + try w.writeAll("*]"); + try ty.data.sub_type.printEpilogue(w); + }, + .variable_len_array => { + try w.writeByte('['); + try ty.qual.dump(w); + try w.writeAll("]"); + try ty.data.expr.ty.printEpilogue(w); + }, + else => {}, + } +} + +/// Useful for debugging, too noisy to be enabled by default. +const dump_detailed_containers = false; + +// Print as Zig types since those are actually readable +pub fn dump(ty: Type, w: anytype) @TypeOf(w).Error!void { + try ty.qual.dump(w); + switch (ty.specifier) { + .pointer => { + try w.writeAll("*"); + try ty.data.sub_type.dump(w); + }, + .func, .var_args_func, .old_style_func => { + try w.writeAll("fn ("); + for (ty.data.func.params) |param, i| { + if (i != 0) try w.writeAll(", "); + if (param.name.len != 0) try w.print("{s}: ", .{param.name}); + try param.ty.dump(w); + } + if (ty.specifier != .func) { + if (ty.data.func.params.len != 0) try w.writeAll(", "); + try w.writeAll("..."); + } + try w.writeAll(") "); + try ty.data.func.return_type.dump(w); + }, + .array, .static_array, .decayed_array, .decayed_static_array => { + if (ty.specifier == .decayed_array or ty.specifier == .decayed_static_array) try w.writeByte('d'); + try w.writeByte('['); + if (ty.specifier == .static_array or ty.specifier == .decayed_static_array) try w.writeAll("static "); + try w.print("{d}]", .{ty.data.array.len}); + try ty.data.array.elem.dump(w); + }, + .incomplete_array, .decayed_incomplete_array => { + if (ty.specifier == .decayed_incomplete_array) try w.writeByte('d'); + try w.writeAll("[]"); + try ty.data.array.elem.dump(w); + }, + .@"enum" => { + try w.print("enum {s}", .{ty.data.@"enum".name}); + if (dump_detailed_containers) try dumpEnum(ty.data.@"enum", w); + }, + .@"struct" => { + try w.print("struct {s}", .{ty.data.record.name}); + if (dump_detailed_containers) try dumpRecord(ty.data.record, w); + }, + .@"union" => { + try w.print("union {s}", .{ty.data.record.name}); + if (dump_detailed_containers) try dumpRecord(ty.data.record, w); + }, + .unspecified_variable_len_array, .decayed_unspecified_variable_len_array => { + if (ty.specifier == .decayed_unspecified_variable_len_array) try w.writeByte('d'); + try w.writeAll("[*]"); + try ty.data.sub_type.dump(w); + }, + .variable_len_array, .decayed_variable_len_array => { + if (ty.specifier == .decayed_variable_len_array) try w.writeByte('d'); + try w.writeAll("[]"); + try ty.data.expr.ty.dump(w); + }, + .typeof_type, .decayed_typeof_type => { + try w.writeAll("typeof("); + try ty.data.sub_type.dump(w); + try w.writeAll(")"); + }, + .typeof_expr, .decayed_typeof_expr => { + try w.writeAll("typeof(: "); + try ty.data.expr.ty.dump(w); + try w.writeAll(")"); + }, + .attributed => { + try w.writeAll("attributed("); + try ty.data.attributed.base.dump(w); + try w.writeAll(")"); + }, + .special_va_start => try w.writeAll("(va start param)"), + else => try w.writeAll(Builder.fromType(ty).str().?), + } +} + +fn dumpEnum(@"enum": *Enum, w: anytype) @TypeOf(w).Error!void { + try w.writeAll(" {"); + for (@"enum".fields) |field| { + try w.print(" {s} = {d},", .{ field.name, field.value }); + } + try w.writeAll(" }"); +} + +fn dumpRecord(record: *Record, w: anytype) @TypeOf(w).Error!void { + try w.writeAll(" {"); + for (record.fields) |field| { + try w.writeByte(' '); + try field.ty.dump(w); + try w.print(" {s}: {d};", .{ field.name, field.bit_width }); + } + try w.writeAll(" }"); +} diff --git a/src/aro/Value.zig b/src/aro/Value.zig new file mode 100644 index 000000000000..e1c3fa1031c7 --- /dev/null +++ b/src/aro/Value.zig @@ -0,0 +1,445 @@ +const std = @import("std"); +const assert = std.debug.assert; +const Compilation = @import("Compilation.zig"); +const Type = @import("Type.zig"); + +const Value = @This(); + +tag: Tag = .unavailable, +data: union { + none: void, + int: u64, + float: f64, + array: []Value, + bytes: []u8, +} = .{ .none = {} }, + +const Tag = enum { + unavailable, + /// int is used to store integer, boolean and pointer values + int, + float, + array, + bytes, +}; + +pub fn zero(v: Value) Value { + return switch (v.tag) { + .int => int(0), + .float => float(0), + else => unreachable, + }; +} + +pub fn one(v: Value) Value { + return switch (v.tag) { + .int => int(1), + .float => float(1), + else => unreachable, + }; +} + +pub fn int(v: anytype) Value { + if (@TypeOf(v) == comptime_int or @typeInfo(@TypeOf(v)).Int.signedness == .unsigned) + return .{ .tag = .int, .data = .{ .int = v } } + else + return .{ .tag = .int, .data = .{ .int = @bitCast(u64, @as(i64, v)) } }; +} + +pub fn float(v: anytype) Value { + return .{ .tag = .float, .data = .{ .float = v } }; +} + +pub fn bytes(v: anytype) Value { + return .{ .tag = .bytes, .data = .{ .bytes = v } }; +} + +pub fn signExtend(v: Value, old_ty: Type, comp: *Compilation) i64 { + const size = old_ty.sizeof(comp).?; + return switch (size) { + 4 => v.getInt(i32), + 8 => v.getInt(i64), + else => unreachable, + }; +} + +/// Converts the stored value from a float to an integer. +/// `.unavailable` value remains unchanged. +pub fn floatToInt(v: *Value, old_ty: Type, new_ty: Type, comp: *Compilation) void { + assert(old_ty.isFloat()); + if (v.tag == .unavailable) return; + if (new_ty.isUnsignedInt(comp) and v.data.float < 0) { + v.* = int(0); + return; + } else if (!std.math.isFinite(v.data.float)) { + v.tag = .unavailable; + return; + } + const size = old_ty.sizeof(comp).?; + v.* = int(switch (size) { + 4 => @floatToInt(i32, v.getFloat(f32)), + 8 => @floatToInt(i64, v.getFloat(f64)), + else => unreachable, + }); +} + +/// Converts the stored value from an integer to a float. +/// `.unavailable` value remains unchanged. +pub fn intToFloat(v: *Value, old_ty: Type, new_ty: Type, comp: *Compilation) void { + assert(old_ty.isInt()); + if (v.tag == .unavailable) return; + if (!new_ty.isReal() or new_ty.sizeof(comp).? > 8) { + v.tag = .unavailable; + } else if (old_ty.isUnsignedInt(comp)) { + v.* = float(@intToFloat(f64, v.data.int)); + } else { + v.* = float(@intToFloat(f64, @bitCast(i64, v.data.int))); + } +} + +/// Truncates or extends bits based on type. +/// old_ty is only used for size. +pub fn intCast(v: *Value, old_ty: Type, new_ty: Type, comp: *Compilation) void { + // assert(old_ty.isInt() and new_ty.isInt()); + if (v.tag == .unavailable) return; + if (new_ty.is(.bool)) return v.toBool(); + if (!old_ty.isUnsignedInt(comp)) { + const size = new_ty.sizeof(comp).?; + switch (size) { + 1 => v.* = int(@bitCast(u8, v.getInt(i8))), + 2 => v.* = int(@bitCast(u16, v.getInt(i16))), + 4 => v.* = int(@bitCast(u32, v.getInt(i32))), + 8 => return, + else => unreachable, + } + } +} + +/// Converts the stored value from an integer to a float. +/// `.unavailable` value remains unchanged. +pub fn floatCast(v: *Value, old_ty: Type, new_ty: Type, comp: *Compilation) void { + assert(old_ty.isFloat() and new_ty.isFloat()); + if (v.tag == .unavailable) return; + const size = new_ty.sizeof(comp).?; + if (!new_ty.isReal() or size > 8) { + v.tag = .unavailable; + } else if (size == 32) { + v.* = float(@floatCast(f32, v.data.float)); + } +} + +/// Truncates data.int to one bit +pub fn toBool(v: *Value) void { + if (v.tag == .unavailable) return; + const res = v.getBool(); + v.* = int(@boolToInt(res)); +} + +pub fn isZero(v: Value) bool { + return switch (v.tag) { + .unavailable => false, + .int => v.data.int == 0, + .float => v.data.float == 0, + .array => false, + .bytes => false, + }; +} + +pub fn getBool(v: Value) bool { + return switch (v.tag) { + .unavailable => unreachable, + .int => v.data.int != 0, + .float => v.data.float != 0, + .array => true, + .bytes => true, + }; +} + +pub fn getInt(v: Value, comptime T: type) T { + if (T == u64) return v.data.int; + return if (@typeInfo(T).Int.signedness == .unsigned) + @truncate(T, v.data.int) + else + @truncate(T, @bitCast(i64, v.data.int)); +} + +pub fn getFloat(v: Value, comptime T: type) T { + if (T == f64) return v.data.float; + return @floatCast(T, v.data.float); +} + +const bin_overflow = struct { + inline fn addInt(comptime T: type, out: *Value, a: Value, b: Value) bool { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + var c: T = undefined; + const overflow = @addWithOverflow(T, a_val, b_val, &c); + out.* = int(c); + return overflow; + } + inline fn addFloat(comptime T: type, aa: Value, bb: Value) Value { + const a_val = aa.getFloat(T); + const b_val = bb.getFloat(T); + return float(a_val + b_val); + } + + inline fn subInt(comptime T: type, out: *Value, a: Value, b: Value) bool { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + var c: T = undefined; + const overflow = @subWithOverflow(T, a_val, b_val, &c); + out.* = int(c); + return overflow; + } + inline fn subFloat(comptime T: type, aa: Value, bb: Value) Value { + const a_val = aa.getFloat(T); + const b_val = bb.getFloat(T); + return float(a_val - b_val); + } + + inline fn mulInt(comptime T: type, out: *Value, a: Value, b: Value) bool { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + var c: T = undefined; + const overflow = @mulWithOverflow(T, a_val, b_val, &c); + out.* = int(c); + return overflow; + } + inline fn mulFloat(comptime T: type, aa: Value, bb: Value) Value { + const a_val = aa.getFloat(T); + const b_val = bb.getFloat(T); + return float(a_val * b_val); + } + + const FT = fn (*Value, Value, Value, Type, *Compilation) bool; + fn getOp(intFunc: anytype, floatFunc: anytype) FT { + return struct { + fn op(res: *Value, a: Value, b: Value, ty: Type, comp: *Compilation) bool { + const size = ty.sizeof(comp).?; + if (@TypeOf(floatFunc) != @TypeOf(null) and ty.isFloat()) { + res.* = switch (size) { + 4 => floatFunc(f32, a, b), + 8 => floatFunc(f64, a, b), + else => unreachable, + }; + return false; + } + + if (ty.isUnsignedInt(comp)) switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return intFunc(u32, res, a, b), + 8 => return intFunc(u64, res, a, b), + else => unreachable, + } else switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return intFunc(i32, res, a, b), + 8 => return intFunc(i64, res, a, b), + else => unreachable, + } + } + }.op; + } +}; + +pub const add = bin_overflow.getOp(bin_overflow.addInt, bin_overflow.addFloat); +pub const sub = bin_overflow.getOp(bin_overflow.subInt, bin_overflow.subFloat); +pub const mul = bin_overflow.getOp(bin_overflow.mulInt, bin_overflow.mulFloat); + +const bin_ops = struct { + inline fn divInt(comptime T: type, aa: Value, bb: Value) Value { + const a_val = aa.getInt(T); + const b_val = bb.getInt(T); + return int(@divTrunc(a_val, b_val)); + } + inline fn divFloat(comptime T: type, aa: Value, bb: Value) Value { + const a_val = aa.getFloat(T); + const b_val = bb.getFloat(T); + return float(a_val / b_val); + } + + inline fn remInt(comptime T: type, a: Value, b: Value) Value { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + + if (@typeInfo(T).Int.signedness == .signed) { + if (a_val == std.math.minInt(T) and b_val == -1) { + return Value{ .tag = .unavailable, .data = .{ .none = {} } }; + } else { + if (b_val > 0) return int(@rem(a_val, b_val)); + return int(a_val - @divTrunc(a_val, b_val) * b_val); + } + } else { + return int(a_val % b_val); + } + } + + inline fn orInt(comptime T: type, a: Value, b: Value) Value { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + return int(a_val | b_val); + } + inline fn xorInt(comptime T: type, a: Value, b: Value) Value { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + return int(a_val ^ b_val); + } + inline fn andInt(comptime T: type, a: Value, b: Value) Value { + const a_val = a.getInt(T); + const b_val = b.getInt(T); + return int(a_val & b_val); + } + + inline fn shl(comptime T: type, a: Value, b: Value) Value { + const ShiftT = std.math.Log2Int(T); + const info = @typeInfo(T).Int; + const UT = std.meta.Int(.unsigned, info.bits); + const b_val = b.getInt(T); + + if (b_val > std.math.maxInt(ShiftT)) { + return if (info.signedness == .unsigned) + int(@as(UT, std.math.maxInt(UT))) + else + int(@as(T, std.math.minInt(T))); + } + const amt = @truncate(ShiftT, @bitCast(UT, b_val)); + const a_val = a.getInt(T); + return int(a_val << amt); + } + inline fn shr(comptime T: type, a: Value, b: Value) Value { + const ShiftT = std.math.Log2Int(T); + const UT = std.meta.Int(.unsigned, @typeInfo(T).Int.bits); + + const b_val = b.getInt(T); + if (b_val > std.math.maxInt(ShiftT)) return Value.int(0); + + const amt = @truncate(ShiftT, @intCast(UT, b_val)); + const a_val = a.getInt(T); + return int(a_val >> amt); + } + + const FT = fn (Value, Value, Type, *Compilation) Value; + fn getOp(intFunc: anytype, floatFunc: anytype) FT { + return struct { + fn op(a: Value, b: Value, ty: Type, comp: *Compilation) Value { + const size = ty.sizeof(comp).?; + if (@TypeOf(floatFunc) != @TypeOf(null) and ty.isFloat()) { + switch (size) { + 4 => return floatFunc(f32, a, b), + 8 => return floatFunc(f64, a, b), + else => unreachable, + } + } + + if (ty.isUnsignedInt(comp)) switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return intFunc(u32, a, b), + 8 => return intFunc(u64, a, b), + else => unreachable, + } else switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return intFunc(i32, a, b), + 8 => return intFunc(i64, a, b), + else => unreachable, + } + } + }.op; + } +}; + +/// caller guarantees rhs != 0 +pub const div = bin_ops.getOp(bin_ops.divInt, bin_ops.divFloat); +/// caller guarantees rhs != 0 +/// caller guarantees lhs != std.math.minInt(T) OR rhs != -1 +pub const rem = bin_ops.getOp(bin_ops.remInt, null); + +pub const bitOr = bin_ops.getOp(bin_ops.orInt, null); +pub const bitXor = bin_ops.getOp(bin_ops.xorInt, null); +pub const bitAnd = bin_ops.getOp(bin_ops.andInt, null); + +pub const shl = bin_ops.getOp(bin_ops.shl, null); +pub const shr = bin_ops.getOp(bin_ops.shr, null); + +pub fn bitNot(v: Value, ty: Type, comp: *Compilation) Value { + const size = ty.sizeof(comp).?; + var out: Value = undefined; + if (ty.isUnsignedInt(comp)) switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => out = int(~v.getInt(u32)), + 8 => out = int(~v.getInt(u64)), + else => unreachable, + } else switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => out = int(~v.getInt(i32)), + 8 => out = int(~v.getInt(i64)), + else => unreachable, + } + return out; +} + +pub fn compare(a: Value, op: std.math.CompareOperator, b: Value, ty: Type, comp: *Compilation) bool { + assert(a.tag == b.tag); + const S = struct { + inline fn doICompare(comptime T: type, aa: Value, opp: std.math.CompareOperator, bb: Value) bool { + const a_val = aa.getInt(T); + const b_val = bb.getInt(T); + return std.math.compare(a_val, opp, b_val); + } + inline fn doFCompare(comptime T: type, aa: Value, opp: std.math.CompareOperator, bb: Value) bool { + const a_val = aa.getFloat(T); + const b_val = bb.getFloat(T); + return std.math.compare(a_val, opp, b_val); + } + }; + const size = ty.sizeof(comp).?; + switch (a.tag) { + .unavailable => return true, + .int => if (ty.isUnsignedInt(comp)) switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return S.doICompare(u32, a, op, b), + 8 => return S.doICompare(u64, a, op, b), + else => unreachable, + } else switch (size) { + 1 => unreachable, // promoted to int + 2 => unreachable, // promoted to int + 4 => return S.doICompare(i32, a, op, b), + 8 => return S.doICompare(i64, a, op, b), + else => unreachable, + }, + .float => switch (size) { + 4 => return S.doFCompare(f32, a, op, b), + 8 => return S.doFCompare(f64, a, op, b), + else => unreachable, + }, + else => @panic("TODO"), + } + return false; +} + +pub fn hash(v: Value) u64 { + switch (v.tag) { + .unavailable => unreachable, + .int => return std.hash.Wyhash.hash(0, std.mem.asBytes(&v.data.int)), + else => @panic("TODO"), + } +} + +pub fn dump(v: Value, ty: Type, comp: *Compilation, w: anytype) !void { + switch (v.tag) { + .unavailable => try w.writeAll("unavailable"), + .int => if (ty.isUnsignedInt(comp)) + try w.print("{d}", .{v.data.int}) + else { + try w.print("{d}", .{v.signExtend(ty, comp)}); + }, + // std.fmt does @as instead of @floatCast + .float => try w.print("{d}", .{@floatCast(f64, v.data.float)}), + else => try w.print("({s})", .{@tagName(v.tag)}), + } +} diff --git a/src/aro/features.zig b/src/aro/features.zig new file mode 100644 index 000000000000..556a61ae2bb2 --- /dev/null +++ b/src/aro/features.zig @@ -0,0 +1,75 @@ +const std = @import("std"); +const Compilation = @import("Compilation.zig"); + +/// Used to implement the __has_feature macro. +pub fn hasFeature(comp: *Compilation, ext: []const u8) bool { + const list = .{ + .assume_nonnull = true, + .attribute_analyzer_noreturn = true, + .attribute_availability = true, + .attribute_availability_with_message = true, + .attribute_availability_app_extension = true, + .attribute_availability_with_version_underscores = true, + .attribute_availability_tvos = true, + .attribute_availability_watchos = true, + .attribute_availability_with_strict = true, + .attribute_availability_with_replacement = true, + .attribute_availability_in_templates = true, + .attribute_availability_swift = true, + .attribute_cf_returns_not_retained = true, + .attribute_cf_returns_retained = true, + .attribute_cf_returns_on_parameters = true, + .attribute_deprecated_with_message = true, + .attribute_deprecated_with_replacement = true, + .attribute_ext_vector_type = true, + .attribute_ns_returns_not_retained = true, + .attribute_ns_returns_retained = true, + .attribute_ns_consumes_self = true, + .attribute_ns_consumed = true, + .attribute_cf_consumed = true, + .attribute_overloadable = true, + .attribute_unavailable_with_message = true, + .attribute_unused_on_fields = true, + .attribute_diagnose_if_objc = true, + .blocks = false, // TODO + .c_thread_safety_attributes = true, + .enumerator_attributes = true, + .nullability = true, + .nullability_on_arrays = true, + .nullability_nullable_result = true, + .c_alignas = comp.langopts.standard.atLeast(.c11), + .c_alignof = comp.langopts.standard.atLeast(.c11), + .c_atomic = comp.langopts.standard.atLeast(.c11), + .c_generic_selections = comp.langopts.standard.atLeast(.c11), + .c_static_assert = comp.langopts.standard.atLeast(.c11), + .c_thread_local = comp.langopts.standard.atLeast(.c11) and comp.isTlsSupported(), + }; + inline for (std.meta.fields(@TypeOf(list))) |f| { + if (std.mem.eql(u8, f.name, ext)) return @field(list, f.name); + } + return false; +} + +/// Used to implement the __has_extension macro. +pub fn hasExtension(comp: *Compilation, ext: []const u8) bool { + const list = .{ + // C11 features + .c_alignas = true, + .c_alignof = true, + .c_atomic = false, // TODO + .c_generic_selections = true, + .c_static_assert = true, + .c_thread_local = comp.isTlsSupported(), + // misc + .overloadable_unmarked = false, // TODO + .statement_attributes_with_gnu_syntax = false, // TODO + .gnu_asm = true, + .gnu_asm_goto_with_outputs = true, + .matrix_types = false, // TODO + .matrix_types_scalar_division = false, // TODO + }; + inline for (std.meta.fields(@TypeOf(list))) |f| { + if (std.mem.eql(u8, f.name, ext)) return @field(list, f.name); + } + return false; +} diff --git a/src/aro/lib.zig b/src/aro/lib.zig new file mode 100644 index 000000000000..8004441c106b --- /dev/null +++ b/src/aro/lib.zig @@ -0,0 +1,13 @@ +pub const Codegen = @import("Codegen.zig"); +pub const Compilation = @import("Compilation.zig"); +pub const Diagnostics = @import("Diagnostics.zig"); +pub const Parser = @import("Parser.zig"); +pub const Preprocessor = @import("Preprocessor.zig"); +pub const Source = @import("Source.zig"); +pub const Tokenizer = @import("Tokenizer.zig"); +pub const Tree = @import("Tree.zig"); +pub const Type = @import("Type.zig"); +pub const Value = @import("Value.zig"); + +pub const version_str = "0.0.0-dev"; +pub const version = @import("std").SemanticVersion.parse(version_str) catch unreachable; diff --git a/src/aro/pragmas/gcc.zig b/src/aro/pragmas/gcc.zig new file mode 100644 index 000000000000..22bef7cfb447 --- /dev/null +++ b/src/aro/pragmas/gcc.zig @@ -0,0 +1,199 @@ +const std = @import("std"); +const mem = std.mem; +const Compilation = @import("../Compilation.zig"); +const Pragma = @import("../Pragma.zig"); +const Diagnostics = @import("../Diagnostics.zig"); +const Preprocessor = @import("../Preprocessor.zig"); +const Parser = @import("../Parser.zig"); +const TokenIndex = @import("../Tree.zig").TokenIndex; + +const GCC = @This(); + +pragma: Pragma = .{ + .beforeParse = beforeParse, + .beforePreprocess = beforePreprocess, + .afterParse = afterParse, + .deinit = deinit, + .preprocessorHandler = preprocessorHandler, + .parserHandler = parserHandler, + .preserveTokens = preserveTokens, +}, +original_options: Diagnostics.Options = .{}, +options_stack: std.ArrayListUnmanaged(Diagnostics.Options) = .{}, + +const Directive = enum { + warning, + @"error", + diagnostic, + poison, + const Diagnostics = enum { + ignored, + warning, + @"error", + fatal, + push, + pop, + }; +}; + +fn beforePreprocess(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + self.original_options = comp.diag.options; +} + +fn beforeParse(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + comp.diag.options = self.original_options; + self.options_stack.items.len = 0; +} + +fn afterParse(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + comp.diag.options = self.original_options; + self.options_stack.items.len = 0; +} + +pub fn init(allocator: mem.Allocator) !*Pragma { + var gcc = try allocator.create(GCC); + gcc.* = .{}; + return &gcc.pragma; +} + +fn deinit(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + self.options_stack.deinit(comp.gpa); + comp.gpa.destroy(self); +} + +fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void { + const diagnostic_tok = pp.tokens.get(start_idx); + if (diagnostic_tok.id == .nl) return; + + const diagnostic = std.meta.stringToEnum(Directive.Diagnostics, pp.expandedSlice(diagnostic_tok)) orelse + return error.UnknownPragma; + + switch (diagnostic) { + .ignored, .warning, .@"error", .fatal => { + const str = Pragma.pasteTokens(pp, start_idx + 1) catch |err| switch (err) { + error.ExpectedStringLiteral => { + return pp.comp.diag.add(.{ + .tag = .pragma_requires_string_literal, + .loc = diagnostic_tok.loc, + .extra = .{ .str = "GCC diagnostic" }, + }, diagnostic_tok.expansionSlice()); + }, + else => |e| return e, + }; + if (!mem.startsWith(u8, str, "-W")) { + const next = pp.tokens.get(start_idx + 1); + return pp.comp.diag.add(.{ + .tag = .malformed_warning_check, + .loc = next.loc, + .extra = .{ .str = "GCC diagnostic" }, + }, next.expansionSlice()); + } + const new_kind = switch (diagnostic) { + .ignored => Diagnostics.Kind.off, + .warning => Diagnostics.Kind.warning, + .@"error" => Diagnostics.Kind.@"error", + .fatal => Diagnostics.Kind.@"fatal error", + else => unreachable, + }; + + try pp.comp.diag.set(str[2..], new_kind); + }, + .push => try self.options_stack.append(pp.comp.gpa, pp.comp.diag.options), + .pop => pp.comp.diag.options = self.options_stack.popOrNull() orelse self.original_options, + } +} + +fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + const directive_tok = pp.tokens.get(start_idx + 1); + if (directive_tok.id == .nl) return; + + const gcc_pragma = std.meta.stringToEnum(Directive, pp.expandedSlice(directive_tok)) orelse + return pp.comp.diag.add(.{ + .tag = .unknown_gcc_pragma, + .loc = directive_tok.loc, + }, directive_tok.expansionSlice()); + + switch (gcc_pragma) { + .warning, .@"error" => { + const text = Pragma.pasteTokens(pp, start_idx + 2) catch |err| switch (err) { + error.ExpectedStringLiteral => { + return pp.comp.diag.add(.{ + .tag = .pragma_requires_string_literal, + .loc = directive_tok.loc, + .extra = .{ .str = @tagName(gcc_pragma) }, + }, directive_tok.expansionSlice()); + }, + else => |e| return e, + }; + const extra = Diagnostics.Message.Extra{ .str = try pp.comp.diag.arena.allocator().dupe(u8, text) }; + const diagnostic_tag: Diagnostics.Tag = if (gcc_pragma == .warning) .pragma_warning_message else .pragma_error_message; + return pp.comp.diag.add( + .{ .tag = diagnostic_tag, .loc = directive_tok.loc, .extra = extra }, + directive_tok.expansionSlice(), + ); + }, + .diagnostic => return self.diagnosticHandler(pp, start_idx + 2) catch |err| switch (err) { + error.UnknownPragma => { + const tok = pp.tokens.get(start_idx + 2); + return pp.comp.diag.add(.{ + .tag = .unknown_gcc_pragma_directive, + .loc = tok.loc, + }, tok.expansionSlice()); + }, + else => |e| return e, + }, + .poison => { + var i: usize = 2; + while (true) : (i += 1) { + const tok = pp.tokens.get(start_idx + i); + if (tok.id == .nl) break; + + if (!tok.id.isMacroIdentifier()) { + return pp.comp.diag.add(.{ + .tag = .pragma_poison_identifier, + .loc = tok.loc, + }, tok.expansionSlice()); + } + const str = pp.expandedSlice(tok); + if (pp.defines.get(str) != null) { + try pp.comp.diag.add(.{ + .tag = .pragma_poison_macro, + .loc = tok.loc, + }, tok.expansionSlice()); + } + try pp.poisoned_identifiers.put(str, {}); + } + return; + }, + } +} + +fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation.Error!void { + var self = @fieldParentPtr(GCC, "pragma", pragma); + const directive_tok = p.pp.tokens.get(start_idx + 1); + if (directive_tok.id == .nl) return; + const name = p.pp.expandedSlice(directive_tok); + if (mem.eql(u8, name, "diagnostic")) { + return self.diagnosticHandler(p.pp, start_idx + 2) catch |err| switch (err) { + error.UnknownPragma => {}, // handled during preprocessing + error.StopPreprocessing => unreachable, // Only used by #pragma once + else => |e| return e, + }; + } +} + +fn preserveTokens(_: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) bool { + const next = pp.tokens.get(start_idx + 1); + if (next.id != .nl) { + const name = pp.expandedSlice(next); + if (mem.eql(u8, name, "poison")) { + return false; + } + } + return true; +} diff --git a/src/aro/pragmas/message.zig b/src/aro/pragmas/message.zig new file mode 100644 index 000000000000..42752516c3c5 --- /dev/null +++ b/src/aro/pragmas/message.zig @@ -0,0 +1,50 @@ +const std = @import("std"); +const mem = std.mem; +const Compilation = @import("../Compilation.zig"); +const Pragma = @import("../Pragma.zig"); +const Diagnostics = @import("../Diagnostics.zig"); +const Preprocessor = @import("../Preprocessor.zig"); +const Parser = @import("../Parser.zig"); +const TokenIndex = @import("../Tree.zig").TokenIndex; +const Source = @import("../Source.zig"); + +const Message = @This(); + +pragma: Pragma = .{ + .deinit = deinit, + .preprocessorHandler = preprocessorHandler, +}, + +pub fn init(allocator: mem.Allocator) !*Pragma { + var once = try allocator.create(Message); + once.* = .{}; + return &once.pragma; +} + +fn deinit(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(Message, "pragma", pragma); + comp.gpa.destroy(self); +} + +fn preprocessorHandler(_: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void { + const message_tok = pp.tokens.get(start_idx); + const message_expansion_locs = message_tok.expansionSlice(); + + const str = Pragma.pasteTokens(pp, start_idx + 1) catch |err| switch (err) { + error.ExpectedStringLiteral => { + return pp.comp.diag.add(.{ + .tag = .pragma_requires_string_literal, + .loc = message_tok.loc, + .extra = .{ .str = "message" }, + }, message_expansion_locs); + }, + else => |e| return e, + }; + + const loc = if (message_expansion_locs.len != 0) + message_expansion_locs[message_expansion_locs.len - 1] + else + message_tok.loc; + const extra = Diagnostics.Message.Extra{ .str = try pp.comp.diag.arena.allocator().dupe(u8, str) }; + return pp.comp.diag.add(.{ .tag = .pragma_message, .loc = loc, .extra = extra }, &.{}); +} diff --git a/src/aro/pragmas/once.zig b/src/aro/pragmas/once.zig new file mode 100644 index 000000000000..c2de2efc1da3 --- /dev/null +++ b/src/aro/pragmas/once.zig @@ -0,0 +1,56 @@ +const std = @import("std"); +const mem = std.mem; +const Compilation = @import("../Compilation.zig"); +const Pragma = @import("../Pragma.zig"); +const Diagnostics = @import("../Diagnostics.zig"); +const Preprocessor = @import("../Preprocessor.zig"); +const Parser = @import("../Parser.zig"); +const TokenIndex = @import("../Tree.zig").TokenIndex; +const Source = @import("../Source.zig"); + +const Once = @This(); + +pragma: Pragma = .{ + .afterParse = afterParse, + .deinit = deinit, + .preprocessorHandler = preprocessorHandler, +}, +pragma_once: std.AutoHashMap(Source.Id, void), +preprocess_count: u32 = 0, + +pub fn init(allocator: mem.Allocator) !*Pragma { + var once = try allocator.create(Once); + once.* = .{ + .pragma_once = std.AutoHashMap(Source.Id, void).init(allocator), + }; + return &once.pragma; +} + +fn afterParse(pragma: *Pragma, _: *Compilation) void { + var self = @fieldParentPtr(Once, "pragma", pragma); + self.pragma_once.clearRetainingCapacity(); +} + +fn deinit(pragma: *Pragma, comp: *Compilation) void { + var self = @fieldParentPtr(Once, "pragma", pragma); + self.pragma_once.deinit(); + comp.gpa.destroy(self); +} + +fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void { + var self = @fieldParentPtr(Once, "pragma", pragma); + const name_tok = pp.tokens.get(start_idx); + const next = pp.tokens.get(start_idx + 1); + if (next.id != .nl) { + try pp.comp.diag.add(.{ + .tag = .extra_tokens_directive_end, + .loc = name_tok.loc, + }, next.expansionSlice()); + } + const seen = self.preprocess_count == pp.preprocess_count; + const prev = try self.pragma_once.fetchPut(name_tok.loc.id, {}); + if (prev != null and !seen) { + return error.StopPreprocessing; + } + self.preprocess_count = pp.preprocess_count; +} diff --git a/src/aro/util.zig b/src/aro/util.zig new file mode 100644 index 000000000000..3fd82dcfe09f --- /dev/null +++ b/src/aro/util.zig @@ -0,0 +1,56 @@ +const std = @import("std"); +const is_windows = @import("builtin").os.tag == .windows; + +pub const Color = enum { + reset, + red, + green, + blue, + cyan, + purple, + yellow, + white, +}; + +pub fn setColor(color: Color, w: anytype) void { + if (is_windows) { + const stderr_file = std.io.getStdErr(); + if (!stderr_file.isTty()) return; + const windows = std.os.windows; + const S = struct { + var attrs: windows.WORD = undefined; + var init_attrs = false; + }; + if (!S.init_attrs) { + S.init_attrs = true; + var info: windows.CONSOLE_SCREEN_BUFFER_INFO = undefined; + _ = windows.kernel32.GetConsoleScreenBufferInfo(stderr_file.handle, &info); + S.attrs = info.wAttributes; + _ = windows.kernel32.SetConsoleOutputCP(65001); + } + + // need to flush bufferedWriter + const T = if (@typeInfo(@TypeOf(w.context)) == .Pointer) @TypeOf(w.context.*) else @TypeOf(w.context); + if (T != void and @hasDecl(T, "flush")) w.context.flush() catch {}; + + switch (color) { + .reset => _ = windows.SetConsoleTextAttribute(stderr_file.handle, S.attrs) catch {}, + .red => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_RED | windows.FOREGROUND_INTENSITY) catch {}, + .green => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_GREEN | windows.FOREGROUND_INTENSITY) catch {}, + .blue => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_BLUE | windows.FOREGROUND_INTENSITY) catch {}, + .cyan => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_GREEN | windows.FOREGROUND_BLUE | windows.FOREGROUND_INTENSITY) catch {}, + .purple => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_RED | windows.FOREGROUND_BLUE | windows.FOREGROUND_INTENSITY) catch {}, + .yellow => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_RED | windows.FOREGROUND_GREEN | windows.FOREGROUND_INTENSITY) catch {}, + .white => _ = windows.SetConsoleTextAttribute(stderr_file.handle, windows.FOREGROUND_RED | windows.FOREGROUND_GREEN | windows.FOREGROUND_BLUE | windows.FOREGROUND_INTENSITY) catch {}, + } + } else switch (color) { + .reset => w.writeAll("\x1b[0m") catch {}, + .red => w.writeAll("\x1b[31;1m") catch {}, + .green => w.writeAll("\x1b[32;1m") catch {}, + .blue => w.writeAll("\x1b[34;1m") catch {}, + .cyan => w.writeAll("\x1b[36;1m") catch {}, + .purple => w.writeAll("\x1b[35;1m") catch {}, + .yellow => w.writeAll("\x1b[93;1m") catch {}, + .white => w.writeAll("\x1b[0m\x1b[1m") catch {}, + } +}