Skip to content

Commit cb467c9

Browse files
authored
Merge pull request #1173 from karkhaz/kk-synthesise-linker-symbols
Synthesise definitions of linker script symbols
2 parents c8f69b5 + 85521b0 commit cb467c9

File tree

18 files changed

+1189
-11
lines changed

18 files changed

+1189
-11
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
extern char src_start[];
2+
extern char src_end[];
3+
extern char dst_start[];
4+
5+
void *memcpy(void *dest, void *src, unsigned n){
6+
return (void *)0;
7+
}
8+
9+
int main(){
10+
memcpy(dst_start, src_start, (unsigned)src_end - (unsigned)src_start);
11+
return 0;
12+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
MEMORY {
2+
RAM : ORIGIN = 0x0, LENGTH = 25M
3+
}
4+
5+
SECTIONS
6+
{
7+
/* GCC insists on having these */
8+
.note.gnu.build-id : { } > RAM
9+
.text : { } > RAM
10+
11+
.src_section : {
12+
src_start = .;
13+
*(.text*)
14+
src_end = .;
15+
} > RAM
16+
17+
.dst_section : {
18+
dst_start = .;
19+
*(.text*)
20+
dst_end = .;
21+
} > RAM
22+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can:
15+
16+
- get the value of a symbol whose value indicates the start of a section;
17+
- get the value of a symbol whose value indicates the end of a section.
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
extern char src_start[];
2+
extern char src_size[];
3+
extern char dst_start[];
4+
5+
void *memcpy(void *dest, void *src, unsigned n){
6+
return (void *)0;
7+
}
8+
9+
int main(){
10+
memcpy(dst_start, src_start, (unsigned)src_size);
11+
return 0;
12+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
MEMORY {
3+
RAM : ORIGIN = 0x0, LENGTH = 25M
4+
}
5+
6+
SECTIONS
7+
{
8+
/* GCC insists on having these */
9+
.note.gnu.build-id : { } > RAM
10+
.text : { } > RAM
11+
12+
.src_section : {
13+
src_start = .;
14+
*(.text*)
15+
src_end = .;
16+
} > RAM
17+
18+
src_size = src_end - src_start;
19+
20+
.dst_section : {
21+
dst_start = .;
22+
*(.text*)
23+
dst_end = .;
24+
} > RAM
25+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can:
15+
16+
- get the value of a symbol whose value indicates the start of a section;
17+
- get the value of a symbol whose value indicates the size of a section,
18+
and whose value has been generated through a basic arithmetic
19+
expression in the linker script.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
extern char sym[];
2+
3+
int main(){
4+
int foo = (int)sym;
5+
return 0;
6+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
MEMORY {
2+
RAM : ORIGIN = 0x0, LENGTH = 25M
3+
}
4+
5+
SECTIONS
6+
{
7+
/* GCC insists on having these */
8+
.note.gnu.build-id : { } > RAM
9+
.text : { } > RAM
10+
11+
.src_section : {
12+
sym = .;
13+
*(.text*)
14+
} > RAM
15+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
CORE
2+
main.c
3+
-o out.gb -T script.ld -nostdlib
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
^CONVERSION ERROR$
9+
--
10+
Tesing the functionality of goto-cc's linker script parsing
11+
functionality, ensuring that it can get the values of symbols that are
12+
defined in linker scripts.
13+
14+
This test ensures that goto-cc and ls-parse can get the value of a
15+
symbol whose value indicates the start of a section.

scripts/ls_parse.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,10 @@ def get_linker_script_data(script):
7777
# script is a whitespace.
7878
text = " %s" % text
7979

80-
close_brace = re.compile(r"\s}")
80+
# Lex out comments
81+
text = re.sub(r"/\*.*?\*/", " ", text)
82+
83+
close_brace = re.compile(r"\s}(\s*>\s*\w+)?")
8184
memory_cmd = re.compile(r"\sMEMORY\s*{")
8285
sections_cmd = re.compile(r"\sSECTIONS\s*{")
8386
assign_current = re.compile(r"\s(?P<sym>\w+)\s*=\s*\.\s*;")
@@ -256,6 +259,7 @@ def close_brace_fun(state, _, buf):
256259
sym = state["end-valid"].group("sym")
257260
info("'%s' marks the end of section '%s'", sym, sec)
258261
state["sections"][sec]["end"] = sym
262+
state["end-valid"] = None
259263
else:
260264
# Linker script assigned end-of-section to a symbol, but not
261265
# the start. this is useless to us.
@@ -388,7 +392,7 @@ def symbols_from(object_file):
388392

389393
def match_up_addresses(script_data, symbol_table):
390394
ret = []
391-
for data in script_data["sections"].values():
395+
for name, data in script_data["sections"].items():
392396
ok = False
393397
if "size" in data and "start" in data:
394398
ok = True
@@ -404,6 +408,7 @@ def match_up_addresses(script_data, symbol_table):
404408
region["start"] = {"sym": sym, "val": value}
405409
if "end" in data and sym == data["end"]:
406410
region["end"] = {"sym": sym, "val": value}
411+
region["section"] = name
407412
append = False
408413
if "size" in region and "start" in region:
409414
append = True
@@ -424,6 +429,9 @@ def get_region_range(region):
424429
e_var = region["end"]["sym"]
425430
ret["start"] = start
426431
ret["size"] = size
432+
ret["start-symbol"] = s_var
433+
ret["end-symbol"] = e_var
434+
ret["has-end-symbol"] = True
427435
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
428436
ret["commt"] = "from %s to %s" % (s_var, e_var)
429437
elif "size" in region:
@@ -433,10 +441,14 @@ def get_region_range(region):
433441
z_var = region["size"]["sym"]
434442
ret["start"] = start
435443
ret["size"] = size
444+
ret["start-symbol"] = s_var
445+
ret["size-symbol"] = z_var
446+
ret["has-end-symbol"] = False
436447
ret["annot"] = "__CPROVER_allocated_memory(%s, %d);" % (hex(start), size)
437448
ret["commt"] = "from %s for %s bytes" % (s_var, z_var)
438449
else:
439450
raise "Malformatted region\n%s" % str(region)
451+
ret["section"] = region["section"]
440452
return ret
441453

442454

@@ -502,9 +514,9 @@ def main():
502514

503515
regions = match_up_addresses(script_data, symbol_table)
504516

505-
info(json.dumps(symbol_table, indent=2))
506-
info(json.dumps(script_data, indent=2))
507-
info(json.dumps(regions, indent=2))
517+
info("symbol table %s" % json.dumps(symbol_table, indent=2))
518+
info("script data %s" % json.dumps(script_data, indent=2))
519+
info("regions %s" % json.dumps(regions, indent=2))
508520

509521
final = json.dumps(final_json_output(regions, symbol_table),
510522
indent=2)

0 commit comments

Comments
 (0)