From 19ff1036043ae40ff3d8a2e1a6a793219e1ec378 Mon Sep 17 00:00:00 2001
From: Armin Rigo <arigo@tunes.org>
Date: Tue, 26 May 2020 15:51:56 +0200
Subject: [PATCH] Issue #454

Try harder to avoid #line directives confuse the rest of pre-parsing
---
 cffi/cparser.py               | 37 ++++++++++++++++++++++++---
 testing/cffi0/test_parsing.py | 48 ++++++++++++++++++++++++++++++++++-
 2 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/cffi/cparser.py b/cffi/cparser.py
index d7069a73..d9784655 100644
--- a/cffi/cparser.py
+++ b/cffi/cparser.py
@@ -29,6 +29,7 @@ _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
 _r_define  = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
                         r"\b((?:[^\n\\]|\\.)*?)$",
                         re.DOTALL | re.MULTILINE)
+_r_line_directive = re.compile(r"^[ \t]*#[ \t]*line\b.*$", re.MULTILINE)
 _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
 _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
 _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
@@ -163,10 +164,37 @@ def _warn_for_non_extern_non_static_global_variable(decl):
                       "with C it should have a storage class specifier "
                       "(usually 'extern')" % (decl.name,))
 
+def _remove_line_directives(csource):
+    # _r_line_directive matches whole lines, without the final \n, if they
+    # start with '#line' with some spacing allowed.  This function stores
+    # them away and replaces them with exactly the string '#line@N', where
+    # N is the index in the list 'line_directives'.
+    line_directives = []
+    def replace(m):
+        i = len(line_directives)
+        line_directives.append(m.group())
+        return '#line@%d' % i
+    csource = _r_line_directive.sub(replace, csource)
+    return csource, line_directives
+
+def _put_back_line_directives(csource, line_directives):
+    def replace(m):
+        s = m.group()
+        if not s.startswith('#line@'):
+            raise AssertionError("unexpected #line directive "
+                                 "(should have been processed and removed")
+        return line_directives[int(s[6:])]
+    return _r_line_directive.sub(replace, csource)
+
 def _preprocess(csource):
+    # First, remove the lines of the form '#line N "filename"' because
+    # the "filename" part could confuse the rest
+    csource, line_directives = _remove_line_directives(csource)
     # Remove comments.  NOTE: this only work because the cdef() section
-    # should not contain any string literal!
-    csource = _r_comment.sub(' ', csource)
+    # should not contain any string literals (except in line directives)!
+    def replace_keeping_newlines(m):
+        return ' ' + m.group().count('\n') * '\n'
+    csource = _r_comment.sub(replace_keeping_newlines, csource)
     # Remove the "#define FOO x" lines
     macros = {}
     for match in _r_define.finditer(csource):
@@ -219,7 +247,10 @@ def _preprocess(csource):
     csource = _r_float_dotdotdot.sub(' __dotdotdotfloat__ ', csource)
     # Replace all remaining "..." with the same name, "__dotdotdot__",
     # which is declared with a typedef for the purpose of C parsing.
-    return csource.replace('...', ' __dotdotdot__ '), macros
+    csource = csource.replace('...', ' __dotdotdot__ ')
+    # Finally, put back the line directives
+    csource = _put_back_line_directives(csource, line_directives)
+    return csource, macros
 
 def _common_type_names(csource):
     # Look in the source for what looks like usages of types from the
diff --git a/testing/cffi0/test_parsing.py b/testing/cffi0/test_parsing.py
index 3fc3783a..5f2d7ec4 100644
--- a/testing/cffi0/test_parsing.py
+++ b/testing/cffi0/test_parsing.py
@@ -174,7 +174,7 @@ def test_remove_line_continuation_comments():
         double // blah \\
                   more comments
         x(void);
-        double // blah\\\\
+        double // blah // blah\\\\
         y(void);
         double // blah\\ \
                   etc
@@ -185,6 +185,52 @@ def test_remove_line_continuation_comments():
     m.y
     m.z
 
+def test_dont_remove_comment_in_line_directives():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        \t # \t line \t 8 \t "baz.c" \t
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef, """
+        #line 7 "foo//bar.c"
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax"
+
+def test_multiple_line_directives():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef,
+    """ #line 5 "foo.c"
+        extern int xx;
+        #line 6 "bar.c"
+        extern int yy;
+        #line 7 "baz.c"
+        some syntax error here
+        #line 8 "yadda.c"
+        extern int zz;
+    """)
+    assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax"
+
+def test_commented_line_directive():
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        /*
+        #line 5 "foo.c"
+        */
+        void xx(void);
+
+        #line 6 "bar.c"
+        /*
+        #line 35 "foo.c"
+        */
+        some syntax error
+    """)
+    assert str(e.value) == "parse error\nbar.c:9:14: before: syntax"
+
 def test_line_continuation_in_defines():
     ffi = FFI(backend=FakeBackend())
     ffi.cdef("""
-- 
2.26.2

From 31249d786c833d4960bbbf4e0d7f7bcaecf92d1f Mon Sep 17 00:00:00 2001
From: Armin Rigo <arigo@tunes.org>
Date: Fri, 29 May 2020 10:27:40 +0200
Subject: [PATCH] #454

Second try with '# NUMBER' instead of '#line NUMBER', as gcc seems to output
---
 cffi/cparser.py               |  8 +++----
 testing/cffi0/test_parsing.py | 41 +++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/cffi/cparser.py b/cffi/cparser.py
index d9784655..74830e91 100644
--- a/cffi/cparser.py
+++ b/cffi/cparser.py
@@ -29,7 +29,7 @@ _r_comment = re.compile(r"/\*.*?\*/|//([^\n\\]|\\.)*?$",
 _r_define  = re.compile(r"^\s*#\s*define\s+([A-Za-z_][A-Za-z_0-9]*)"
                         r"\b((?:[^\n\\]|\\.)*?)$",
                         re.DOTALL | re.MULTILINE)
-_r_line_directive = re.compile(r"^[ \t]*#[ \t]*line\b.*$", re.MULTILINE)
+_r_line_directive = re.compile(r"^[ \t]*#[ \t]*(?:line|\d+)\b.*$", re.MULTILINE)
 _r_partial_enum = re.compile(r"=\s*\.\.\.\s*[,}]|\.\.\.\s*\}")
 _r_enum_dotdotdot = re.compile(r"__dotdotdot\d+__$")
 _r_partial_array = re.compile(r"\[\s*\.\.\.\s*\]")
@@ -166,9 +166,9 @@ def _warn_for_non_extern_non_static_global_variable(decl):
 
 def _remove_line_directives(csource):
     # _r_line_directive matches whole lines, without the final \n, if they
-    # start with '#line' with some spacing allowed.  This function stores
-    # them away and replaces them with exactly the string '#line@N', where
-    # N is the index in the list 'line_directives'.
+    # start with '#line' with some spacing allowed, or '#NUMBER'.  This
+    # function stores them away and replaces them with exactly the string
+    # '#line@N', where N is the index in the list 'line_directives'.
     line_directives = []
     def replace(m):
         i = len(line_directives)
diff --git a/testing/cffi0/test_parsing.py b/testing/cffi0/test_parsing.py
index 5f2d7ec4..a5e45874 100644
--- a/testing/cffi0/test_parsing.py
+++ b/testing/cffi0/test_parsing.py
@@ -199,6 +199,21 @@ def test_dont_remove_comment_in_line_directives():
 
         some syntax error here
     """)
+    #
+    assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax"
+    ffi = FFI(backend=FakeBackend())
+    e = py.test.raises(CDefError, ffi.cdef, """
+        \t # \t 8 \t "baz.c" \t
+
+        some syntax error here
+    """)
+    assert str(e.value) == "parse error\nbaz.c:9:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef, """
+        # 7 "foo//bar.c"
+
+        some syntax error here
+    """)
     assert str(e.value) == "parse error\nfoo//bar.c:8:14: before: syntax"
 
 def test_multiple_line_directives():
@@ -214,6 +229,18 @@ def test_multiple_line_directives():
         extern int zz;
     """)
     assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax"
+    #
+    e = py.test.raises(CDefError, ffi.cdef,
+    """ # 5 "foo.c"
+        extern int xx;
+        # 6 "bar.c"
+        extern int yy;
+        # 7 "baz.c"
+        some syntax error here
+        # 8 "yadda.c"
+        extern int zz;
+    """)
+    assert str(e.value) == "parse error\nbaz.c:7:14: before: syntax"
 
 def test_commented_line_directive():
     ffi = FFI(backend=FakeBackend())
@@ -229,6 +256,20 @@ def test_commented_line_directive():
         */
         some syntax error
     """)
+    #
+    assert str(e.value) == "parse error\nbar.c:9:14: before: syntax"
+    e = py.test.raises(CDefError, ffi.cdef, """
+        /*
+        # 5 "foo.c"
+        */
+        void xx(void);
+
+        # 6 "bar.c"
+        /*
+        # 35 "foo.c"
+        */
+        some syntax error
+    """)
     assert str(e.value) == "parse error\nbar.c:9:14: before: syntax"
 
 def test_line_continuation_in_defines():
-- 
2.26.2