From 19ff1036043ae40ff3d8a2e1a6a793219e1ec378 Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Tue, 26 May 2020 15:51:56 +0200 Subject: [PATCH] Issue #454 Try harder to avoid #line directives confuse the rest of pre-parsing --- cffi/cparser.py | 37 ++++++++++++++++++++++++--- testing/cffi0/test_parsing.py | 48 ++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 4 deletions(-) diff --git a/cffi/cparser.py b/cffi/cparser.py index d7069a73..d9784655 100644 --- a/cffi/cparser.py +++ b/cffi/cparser.py @@ -29,6 +29,7 @@ _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)" r"\b((?:[^\n\\]|\\.)*?)$", re.DOTALL | re.MULTILINE) +_r_line_directive = re.compile(r"^[ \t]*#[ \t]*line\b.*$", re.MULTILINE) _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}") _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$") _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") @@ -163,10 +164,37 @@ def _warn_for_non_extern_non_static_global_variable(decl): "with C it should have a storage class specifier " "(usually 'extern')" % (decl.name,)) +def _remove_line_directives(csource): + # _r_line_directive matches whole lines, without the final \n, if they + # start with '#line' with some spacing allowed. This function stores + # them away and replaces them with exactly the string '#line@N', where + # N is the index in the list 'line_directives'. + line_directives = [] + def replace(m): + i = len(line_directives) + line_directives.append(m.group()) + return '#line@%d' % i + csource = _r_line_directive.sub(replace, csource) + return csource, line_directives + +def _put_back_line_directives(csource, line_directives): + def replace(m): + s = m.group() + if not s.startswith('#line@'): + raise AssertionError("unexpected #line directive " + "(should have been processed and removed") + return line_directives[int(s[6:])] + return _r_line_directive.sub(replace, csource) + def _preprocess(csource): + # First, remove the lines of the form '#line N "filename"' because + # the "filename" part could confuse the rest + csource, line_directives = _remove_line_directives(csource) # Remove comments. NOTE: this only work because the cdef() section - # should not contain any string literal! - csource = _r_comment.sub(' ', csource) + # should not contain any string literals (except in line directives)! + def replace_keeping_newlines(m): + return ' ' + m.group().count('\n') * '\n' + csource = _r_comment.sub(replace_keeping_newlines, csource) # Remove the "#define FOO x" lines macros = {} for match in _r_define.finditer(csource): @@ -219,7 +247,10 @@ def _preprocess(csource): csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource) # Replace all remaining "..." with the same name, "__dotdotdot__", # which is declared with a typedef for the purpose of C parsing. - return csource.replace('...', ' __dotdotdot__ '), macros + csource = csource.replace('...', ' __dotdotdot__ ') + # Finally, put back the line directives + csource = _put_back_line_directives(csource, line_directives) + return csource, macros def _common_type_names(csource): # Look in the source for what looks like usages of types from the diff --git a/testing/cffi0/test_parsing.py b/testing/cffi0/test_parsing.py index 3fc3783a..5f2d7ec4 100644 --- a/testing/cffi0/test_parsing.py +++ b/testing/cffi0/test_parsing.py @@ -174,7 +174,7 @@ def test_remove_line_continuation_comments(): double // blah \\ more comments x(void); - double // blah\\\\ + double // blah // blah\\\\ y(void); double // blah\\ \ etc @@ -185,6 +185,52 @@ def test_remove_line_continuation_comments(): m.y m.z +def test_dont_remove_comment_in_line_directives(): + ffi = FFI(backend=FakeBackend()) + e = py.test.raises(CDefError, ffi.cdef, """ + \t # \t line \t 8 \t "baz.c" \t + + some syntax error here + """) + assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax" + # + e = py.test.raises(CDefError, ffi.cdef, """ + #line 7 "foo//bar.c" + + some syntax error here + """) + assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax" + +def test_multiple_line_directives(): + ffi = FFI(backend=FakeBackend()) + e = py.test.raises(CDefError, ffi.cdef, + """ #line 5 "foo.c" + extern int xx; + #line 6 "bar.c" + extern int yy; + #line 7 "baz.c" + some syntax error here + #line 8 "yadda.c" + extern int zz; + """) + assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax" + +def test_commented_line_directive(): + ffi = FFI(backend=FakeBackend()) + e = py.test.raises(CDefError, ffi.cdef, """ + /* + #line 5 "foo.c" + */ + void xx(void); + + #line 6 "bar.c" + /* + #line 35 "foo.c" + */ + some syntax error + """) + assert str(e.value) == "parse error\nbar.c:9:14: before: syntax" + def test_line_continuation_in_defines(): ffi = FFI(backend=FakeBackend()) ffi.cdef(""" -- 2.26.2 From 31249d786c833d4960bbbf4e0d7f7bcaecf92d1f Mon Sep 17 00:00:00 2001 From: Armin Rigo Date: Fri, 29 May 2020 10:27:40 +0200 Subject: [PATCH] #454 Second try with '# NUMBER' instead of '#line NUMBER', as gcc seems to output --- cffi/cparser.py | 8 +++---- testing/cffi0/test_parsing.py | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/cffi/cparser.py b/cffi/cparser.py index d9784655..74830e91 100644 --- a/cffi/cparser.py +++ b/cffi/cparser.py @@ -29,7 +29,7 @@ _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$", _r_define = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)" r"\b((?:[^\n\\]|\\.)*?)$", re.DOTALL | re.MULTILINE) -_r_line_directive = re.compile(r"^[ \t]*#[ \t]*line\b.*$", re.MULTILINE) +_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE) _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}") _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$") _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]") @@ -166,9 +166,9 @@ def _warn_for_non_extern_non_static_global_variable(decl): def _remove_line_directives(csource): # _r_line_directive matches whole lines, without the final \n, if they - # start with '#line' with some spacing allowed. This function stores - # them away and replaces them with exactly the string '#line@N', where - # N is the index in the list 'line_directives'. + # start with '#line' with some spacing allowed, or '#NUMBER'. This + # function stores them away and replaces them with exactly the string + # '#line@N', where N is the index in the list 'line_directives'. line_directives = [] def replace(m): i = len(line_directives) diff --git a/testing/cffi0/test_parsing.py b/testing/cffi0/test_parsing.py index 5f2d7ec4..a5e45874 100644 --- a/testing/cffi0/test_parsing.py +++ b/testing/cffi0/test_parsing.py @@ -199,6 +199,21 @@ def test_dont_remove_comment_in_line_directives(): some syntax error here """) + # + assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax" + ffi = FFI(backend=FakeBackend()) + e = py.test.raises(CDefError, ffi.cdef, """ + \t # \t 8 \t "baz.c" \t + + some syntax error here + """) + assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax" + # + e = py.test.raises(CDefError, ffi.cdef, """ + # 7 "foo//bar.c" + + some syntax error here + """) assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax" def test_multiple_line_directives(): @@ -214,6 +229,18 @@ def test_multiple_line_directives(): extern int zz; """) assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax" + # + e = py.test.raises(CDefError, ffi.cdef, + """ # 5 "foo.c" + extern int xx; + # 6 "bar.c" + extern int yy; + # 7 "baz.c" + some syntax error here + # 8 "yadda.c" + extern int zz; + """) + assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax" def test_commented_line_directive(): ffi = FFI(backend=FakeBackend()) @@ -229,6 +256,20 @@ def test_commented_line_directive(): */ some syntax error """) + # + assert str(e.value) == "parse error\nbar.c:9:14: before: syntax" + e = py.test.raises(CDefError, ffi.cdef, """ + /* + # 5 "foo.c" + */ + void xx(void); + + # 6 "bar.c" + /* + # 35 "foo.c" + */ + some syntax error + """) assert str(e.value) == "parse error\nbar.c:9:14: before: syntax" def test_line_continuation_in_defines(): -- 2.26.2