1 files changed, 748 insertions, 163 deletions
diff --git a/support/cpp/libcpp/include/cpplib.h b/support/cpp/libcpp/include/cpplib.h
index 6882514d5..dc764a66e 100644
--- a/support/cpp/libcpp/include/cpplib.h
+++ b/support/cpp/libcpp/include/cpplib.h
@@ -1,7 +1,5 @@
 /* Definitions for CPP library.
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2007, 2008, 2009, 2010
-   Free Software Foundation, Inc.
+   Copyright (C) 1995-2022 Free Software Foundation, Inc.
    Written by Per Bothner, 1994-95.
 
 This program is free software; you can redistribute it and/or modify it
@@ -38,7 +36,6 @@ typedef struct cpp_macro cpp_macro;
 typedef struct cpp_callbacks cpp_callbacks;
 typedef struct cpp_dir cpp_dir;
 
-struct answer;
 struct _cpp_file;
 
 /* The first three groups, apart from '=', can appear in preprocessor
@@ -49,7 +46,7 @@ struct _cpp_file;
    '='.  The lexer needs operators ending in '=', like ">>=", to be in
    the same order as their counterparts without the '=', like ">>".
 
-   See the cpp_operator table optab in expr.c if you change the order or
+   See the cpp_operator table optab in expr.cc if you change the order or
    add or remove anything in the first group.  */
 
 #define TTYPE_TABLE							\
@@ -81,6 +78,7 @@ struct _cpp_file;
   OP(NOT_EQ,		"!=")						\
   OP(GREATER_EQ,	">=")						\
   OP(LESS_EQ,		"<=")						\
+  OP(SPACESHIP,		"<=>")						\
 									\
   /* These two are unary + / - in preprocessor expressions.  */		\
   OP(PLUS_EQ,		"+=")	/* math */				\
@@ -121,6 +119,7 @@ struct _cpp_file;
   TK(WCHAR,		LITERAL) /* L'char' */				\
   TK(CHAR16,		LITERAL) /* u'char' */				\
   TK(CHAR32,		LITERAL) /* U'char' */				\
+  TK(UTF8CHAR,		LITERAL) /* u8'char' */				\
   TK(OTHER,		LITERAL) /* stray punctuation */		\
 									\
   TK(STRING,		LITERAL) /* "string" */				\
@@ -131,6 +130,17 @@ struct _cpp_file;
   TK(OBJC_STRING,	LITERAL) /* @"string" - Objective-C */		\
   TK(HEADER_NAME,	LITERAL) /* <stdio.h> in #include */		\
 									\
+  TK(CHAR_USERDEF,	LITERAL) /* 'char'_suffix - C++-0x */		\
+  TK(WCHAR_USERDEF,	LITERAL) /* L'char'_suffix - C++-0x */		\
+  TK(CHAR16_USERDEF,	LITERAL) /* u'char'_suffix - C++-0x */		\
+  TK(CHAR32_USERDEF,	LITERAL) /* U'char'_suffix - C++-0x */		\
+  TK(UTF8CHAR_USERDEF,	LITERAL) /* u8'char'_suffix - C++-0x */		\
+  TK(STRING_USERDEF,	LITERAL) /* "string"_suffix - C++-0x */		\
+  TK(WSTRING_USERDEF,	LITERAL) /* L"string"_suffix - C++-0x */	\
+  TK(STRING16_USERDEF,	LITERAL) /* u"string"_suffix - C++-0x */	\
+  TK(STRING32_USERDEF,	LITERAL) /* U"string"_suffix - C++-0x */	\
+  TK(UTF8STRING_USERDEF,LITERAL) /* u8"string"_suffix - C++-0x */	\
+									\
   TK(COMMENT,		LITERAL) /* Only if output comments.  */	\
 				 /* SPELL_LITERAL happens to DTRT.  */	\
   TK(MACRO_ARG,		NONE)	 /* Macro argument.  */			\
@@ -148,6 +158,9 @@ enum cpp_ttype
   TTYPE_TABLE
   N_TTYPES,
 
+  /* A token type for keywords, as opposed to ordinary identifiers.  */
+  CPP_KEYWORD,
+
   /* Positions in the table.  */
   CPP_LAST_EQ        = CPP_LSHIFT,
   CPP_FIRST_DIGRAPH  = CPP_HASH,
@@ -158,9 +171,13 @@ enum cpp_ttype
 #undef TK
 
 /* C language kind, used when calling cpp_create_reader.  */
-enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC1X,
-	     CLK_STDC89, CLK_STDC94, CLK_STDC99, CLK_STDC1X,
-	     CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX0X, CLK_CXX0X, CLK_ASM};
+enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_GNUC17, CLK_GNUC2X,
+	     CLK_STDC89, CLK_STDC94, CLK_STDC99, CLK_STDC11, CLK_STDC17,
+	     CLK_STDC2X,
+	     CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11,
+	     CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX17, CLK_CXX17,
+	     CLK_GNUCXX20, CLK_CXX20, CLK_GNUCXX23, CLK_CXX23,
+	     CLK_ASM};
 
 /* Payload of a NUMBER, STRING, CHAR or COMMENT token.  */
 struct GTY(()) cpp_string {
@@ -174,17 +191,22 @@ struct GTY(()) cpp_string {
 #define STRINGIFY_ARG	(1 << 2) /* If macro argument to be stringified.  */
 #define PASTE_LEFT	(1 << 3) /* If on LHS of a ## operator.  */
 #define NAMED_OP	(1 << 4) /* C++ named operators.  */
-#define NO_EXPAND	(1 << 5) /* Do not macro-expand this token.  */
+#define PREV_FALLTHROUGH (1 << 5) /* On a token preceeded by FALLTHROUGH
+				     comment.  */
 #define BOL		(1 << 6) /* Token at beginning of line.  */
 #define PURE_ZERO	(1 << 7) /* Single 0 digit, used by the C++ frontend,
-				    set in c-lex.c.  */
+				    set in c-lex.cc.  */
 #define SP_DIGRAPH	(1 << 8) /* # or ## token was a digraph.  */
 #define SP_PREV_WHITE	(1 << 9) /* If whitespace before a ##
 				    operator, or before this token
 				    after a # operator.  */
-#define PREV_NL         (1 <<11) /* If a newline before this token.  */
-#define ENTER_ASM       (1 <<12) /* enter an __asm __endasm pair.  */
-#define EXIT_ASM        (1 <<13) /* exit an __asm __endasm pair.  */
+#define NO_EXPAND	(1 << 10) /* Do not macro-expand this token.  */
+#define PRAGMA_OP	(1 << 11) /* _Pragma token.  */
+
+/* sdcc specific */
+#define PREV_NL         (1 <<12) /* If a newline before this token.  */
+#define ENTER_ASM       (1 <<13) /* enter an __asm __endasm pair.  */
+#define EXIT_ASM        (1 <<14) /* exit an __asm __endasm pair.  */
 
 /* Specify which field, if any, of the cpp_token union is used.  */
 
@@ -202,6 +224,12 @@ enum cpp_token_fld_kind {
 struct GTY(()) cpp_macro_arg {
   /* Argument number.  */
   unsigned int arg_no;
+  /* The original spelling of the macro argument token.  */
+  cpp_hashnode *
+    GTY ((nested_ptr (union tree_node,
+		"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
+			"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
+       spelling;
 };
 
 /* An identifier in the cpp_token union.  */
@@ -212,12 +240,21 @@ struct GTY(()) cpp_identifier {
 		"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
 			"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
        node;
+  /* The original spelling of the identifier.  */
+  cpp_hashnode *
+    GTY ((nested_ptr (union tree_node,
+		"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
+			"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL")))
+       spelling;
 };
 
 /* A preprocessing token.  This has been carefully packed and should
    occupy 16 bytes on 32-bit hosts and 24 bytes on 64-bit hosts.  */
 struct GTY(()) cpp_token {
-  source_location src_loc;	/* Location of first char of token.  */
+
+  /* Location of first char of token, together with range of full token.  */
+  location_t src_loc;
+
   ENUM_BITFIELD(cpp_ttype) type : CHAR_BIT;  /* token type */
   unsigned short flags;		/* flags - see above */
 
@@ -232,7 +269,7 @@ struct GTY(()) cpp_token {
     /* A string, or number.  */
     struct cpp_string GTY ((tag ("CPP_TOKEN_FLD_STR"))) str;
 
-    /* Argument no. for a CPP_MACRO_ARG.  */
+    /* Argument no. (and original spelling) for a CPP_MACRO_ARG.  */
     struct cpp_macro_arg GTY ((tag ("CPP_TOKEN_FLD_ARG_NO"))) macro_arg;
 
     /* Original token no. for a CPP_PASTE (from a sequence of
@@ -245,7 +282,7 @@ struct GTY(()) cpp_token {
 };
 
 /* Say which field is in use.  */
-extern enum cpp_token_fld_kind cpp_token_val_index (cpp_token *tok);
+extern enum cpp_token_fld_kind cpp_token_val_index (const cpp_token *tok);
 
 /* A type wide enough to hold any multibyte source character.
    cpplib's character constant interpreter requires an unsigned type.
@@ -281,13 +318,31 @@ enum cpp_normalize_level {
   normalized_none
 };
 
+enum cpp_main_search 
+{
+  CMS_none,    /* A regular source file.  */
+  CMS_header,  /* Is a directly-specified header file (eg PCH or
+		  header-unit).  */
+  CMS_user,    /* Search the user INCLUDE path.  */
+  CMS_system,  /* Search the system INCLUDE path.  */
+};
+
+/* The possible bidirectional control characters checking levels.  */
+enum cpp_bidirectional_level {
+  /* No checking.  */
+  bidirectional_none = 0,
+  /* Only detect unpaired uses of bidirectional control characters.  */
+  bidirectional_unpaired = 1,
+  /* Detect any use of bidirectional control characters.  */
+  bidirectional_any = 2,
+  /* Also warn about UCNs.  */
+  bidirectional_ucn = 4
+};
+
 /* This structure is nested inside struct cpp_reader, and
    carries all the options visible to the command line.  */
 struct cpp_options
 {
-  /* Characters between tab stops.  */
-  unsigned int tabstop;
-
   /* The language we're preprocessing.  */
   enum c_lang lang;
 
@@ -321,6 +376,13 @@ struct cpp_options
   /* Nonzero means process u/U prefix literals (UTF-16/32).  */
   unsigned char uliterals;
 
+  /* Nonzero means process u8 prefixed character literals (UTF-8).  */
+  unsigned char utf8_char_literals;
+
+  /* Nonzero means process r/R raw strings.  If this is set, uliterals
+     must be set as well.  */
+  unsigned char rliterals;
+
   /* Nonzero means print names of header files (-H).  */
   unsigned char print_include_names;
 
@@ -330,6 +392,9 @@ struct cpp_options
   /* Nonzero means warn if slash-star appears in a comment.  */
   unsigned char warn_comments;
 
+  /* Nonzero means to warn about __DATA__, __TIME__ and __TIMESTAMP__ usage.   */
+  unsigned char warn_date_time;
+
   /* Nonzero means warn if a user-supplied include directory does not
      exist.  */
   unsigned char warn_missing_include_dirs;
@@ -362,6 +427,9 @@ struct cpp_options
      explicitly undefined.  */
   unsigned char warn_builtin_macro_redefined;
 
+  /* Different -Wimplicit-fallthrough= levels.  */
+  unsigned char cpp_warn_implicit_fallthrough;
+
   /* Nonzero means we should look for header.gcc files that remap file
      names.  */
   unsigned char remap;
@@ -379,6 +447,10 @@ struct cpp_options
   /* Nonzero means warn if undefined identifiers are evaluated in an #if.  */
   unsigned char warn_undef;
 
+  /* Nonzero means warn if "defined" is encountered in a place other than
+     an #if.  */
+  unsigned char warn_expansion_to_defined;
+
   /* Nonzero means warn of unused macros from the main file.  */
   unsigned char warn_unused_macros;
 
@@ -394,6 +466,18 @@ struct cpp_options
   /* Nonzero means we're looking at already preprocessed code, so don't
      bother trying to do macro expansion and whatnot.  */
   unsigned char preprocessed;
+  
+  /* Nonzero means we are going to emit debugging logs during
+     preprocessing.  */
+  unsigned char debug;
+
+  /* Nonzero means we are tracking locations of tokens involved in
+     macro expansion. 1 Means we track the location in degraded mode
+     where we do not track locations of tokens resulting from the
+     expansion of arguments of function-like macro.  2 Means we do
+     track all macro expansions. This last option is the one that
+     consumes the highest amount of memory.  */
+  unsigned char track_macro_expansion;
 
   /* Nonzero means handle C++ alternate operator names.  */
   unsigned char operator_names;
@@ -404,6 +488,45 @@ struct cpp_options
   /* True for traditional preprocessing.  */
   unsigned char traditional;
 
+  /* Nonzero for C++ 2011 Standard user-defined literals.  */
+  unsigned char user_literals;
+
+  /* Nonzero means warn when a string or character literal is followed by a
+     ud-suffix which does not beging with an underscore.  */
+  unsigned char warn_literal_suffix;
+
+  /* Nonzero means interpret imaginary, fixed-point, or other gnu extension
+     literal number suffixes as user-defined literal number suffixes.  */
+  unsigned char ext_numeric_literals;
+
+  /* Nonzero means extended identifiers allow the characters specified
+     in C11.  */
+  unsigned char c11_identifiers;
+
+  /* Nonzero for C++ 2014 Standard binary constants.  */
+  unsigned char binary_constants;
+
+  /* Nonzero for C++ 2014 Standard digit separators.  */
+  unsigned char digit_separators;
+
+  /* Nonzero for C2X decimal floating-point constants.  */
+  unsigned char dfp_constants;
+
+  /* Nonzero for C++20 __VA_OPT__ feature.  */
+  unsigned char va_opt;
+
+  /* Nonzero for the '::' token.  */
+  unsigned char scope;
+
+  /* Nonzero for the '#elifdef' and '#elifndef' directives.  */
+  unsigned char elifdef;
+
+  /* Nonzero means tokenize C++20 module directives.  */
+  unsigned char module_directives;
+
+  /* Nonzero for C++23 size_t literals.  */
+  unsigned char size_t_literals;
+
   /* Holds the name of the target (execution) character set.  */
   const char *narrow_charset;
 
@@ -414,8 +537,8 @@ struct cpp_options
   const char *input_charset;
 
   /* The minimum permitted level of normalization before a warning
-     is generated.  */
-  enum cpp_normalize_level warn_normalize;
+     is generated.  See enum cpp_normalize_level.  */
+  int warn_normalize;
 
   /* True to warn about precompiled header files we couldn't use.  */
   bool warn_invalid_pch;
@@ -423,6 +546,19 @@ struct cpp_options
   /* True if dependencies should be restored from a precompiled header.  */
   bool restore_pch_deps;
 
+  /* True if warn about differences between C90 and C99.  */
+  signed char cpp_warn_c90_c99_compat;
+
+  /* True if warn about differences between C11 and C2X.  */
+  signed char cpp_warn_c11_c2x_compat;
+
+  /* True if warn about differences between C++98 and C++11.  */
+  bool cpp_warn_cxx11_compat;
+
+  /* Nonzero if bidirectional control characters checking is on.  See enum
+     cpp_bidirectional_level.  */
+  unsigned char cpp_warn_bidirectional;
+
   /* SDCC abuse by Kevin: allow naked '#' characters in expanded macros
    * (see _cpp_create_definition in cppmacro.c)
    */
@@ -453,6 +589,9 @@ struct cpp_options
        one.  */
     bool phony_targets;
 
+    /* Generate dependency info for modules.  */
+    bool modules;
+
     /* If true, no dependency is generated on the main file.  */
     bool ignore_main_file;
 
@@ -479,6 +618,70 @@ struct cpp_options
 
   /* True disables tokenization outside of preprocessing directives. */
   bool directives_only;
+
+  /* True enables canonicalization of system header file paths. */
+  bool canonical_system_headers;
+
+  /* The maximum depth of the nested #include.  */
+  unsigned int max_include_depth;
+
+  cpp_main_search main_search : 8;
+};
+
+/* Diagnostic levels.  To get a diagnostic without associating a
+   position in the translation unit with it, use cpp_error_with_line
+   with a line number of zero.  */
+
+enum cpp_diagnostic_level {
+  /* Warning, an error with -Werror.  */
+  CPP_DL_WARNING = 0,
+  /* Same as CPP_DL_WARNING, except it is not suppressed in system headers.  */
+  CPP_DL_WARNING_SYSHDR,
+  /* Warning, an error with -pedantic-errors or -Werror.  */
+  CPP_DL_PEDWARN,
+  /* An error.  */
+  CPP_DL_ERROR,
+  /* An internal consistency check failed.  Prints "internal error: ",
+     otherwise the same as CPP_DL_ERROR.  */
+  CPP_DL_ICE,
+  /* An informative note following a warning.  */
+  CPP_DL_NOTE,
+  /* A fatal error.  */
+  CPP_DL_FATAL
+};
+
+/* Warning reason codes. Use a reason code of CPP_W_NONE for unclassified
+   warnings and diagnostics that are not warnings.  */
+
+enum cpp_warning_reason {
+  CPP_W_NONE = 0,
+  CPP_W_DEPRECATED,
+  CPP_W_COMMENTS,
+  CPP_W_MISSING_INCLUDE_DIRS,
+  CPP_W_TRIGRAPHS,
+  CPP_W_MULTICHAR,
+  CPP_W_TRADITIONAL,
+  CPP_W_LONG_LONG,
+  CPP_W_ENDIF_LABELS,
+  CPP_W_NUM_SIGN_CHANGE,
+  CPP_W_VARIADIC_MACROS,
+  CPP_W_BUILTIN_MACRO_REDEFINED,
+  CPP_W_DOLLARS,
+  CPP_W_UNDEF,
+  CPP_W_UNUSED_MACROS,
+  CPP_W_CXX_OPERATOR_NAMES,
+  CPP_W_NORMALIZE,
+  CPP_W_INVALID_PCH,
+  CPP_W_WARNING_DIRECTIVE,
+  CPP_W_LITERAL_SUFFIX,
+  CPP_W_SIZE_T_LITERALS,
+  CPP_W_DATE_TIME,
+  CPP_W_PEDANTIC,
+  CPP_W_C90_C99_COMPAT,
+  CPP_W_C11_C2X_COMPAT,
+  CPP_W_CXX11_COMPAT,
+  CPP_W_EXPANSION_TO_DEFINED,
+  CPP_W_BIDIRECTIONAL
 };
 
 /* Callback for header lookup for HEADER, which is the name of a
@@ -499,15 +702,15 @@ struct cpp_callbacks
      The line_map is for the new file.  It is NULL if there is no new file.
      (In C this happens when done with <built-in>+<command line> and also
      when done with a main file.)  This can be used for resource cleanup.  */
-  void (*file_change) (cpp_reader *, const struct line_map *);
+  void (*file_change) (cpp_reader *, const line_map_ordinary *);
 
   void (*dir_change) (cpp_reader *, const char *);
-  void (*include) (cpp_reader *, unsigned int, const unsigned char *,
+  void (*include) (cpp_reader *, location_t, const unsigned char *,
 		   const char *, int, const cpp_token **);
-  void (*define) (cpp_reader *, unsigned int, cpp_hashnode *);
-  void (*undef) (cpp_reader *, unsigned int, cpp_hashnode *);
-  void (*ident) (cpp_reader *, unsigned int, const cpp_string *);
-  void (*def_pragma) (cpp_reader *, unsigned int);
+  void (*define) (cpp_reader *, location_t, cpp_hashnode *);
+  void (*undef) (cpp_reader *, location_t, cpp_hashnode *);
+  void (*ident) (cpp_reader *, location_t, const cpp_string *);
+  void (*def_pragma) (cpp_reader *, location_t);
   int (*valid_pch) (cpp_reader *, const char *, int);
   void (*read_pch) (cpp_reader *, const char *, int, const char *);
   missing_header_cb missing_header;
@@ -518,31 +721,75 @@ struct cpp_callbacks
 
   /* Called to emit a diagnostic.  This callback receives the
      translated message.  */
-  bool (*error) (cpp_reader *, int, int, source_location, unsigned int,
-		 const char *, va_list *)
-       ATTRIBUTE_FPTR_PRINTF(6,0);
+  bool (*diagnostic) (cpp_reader *,
+		      enum cpp_diagnostic_level,
+		      enum cpp_warning_reason,
+		      rich_location *,
+		      const char *, va_list *)
+       ATTRIBUTE_FPTR_PRINTF(5,0);
 
   /* Callbacks for when a macro is expanded, or tested (whether
      defined or not at the time) in #ifdef, #ifndef or "defined".  */
-  void (*used_define) (cpp_reader *, unsigned int, cpp_hashnode *);
-  void (*used_undef) (cpp_reader *, unsigned int, cpp_hashnode *);
+  void (*used_define) (cpp_reader *, location_t, cpp_hashnode *);
+  void (*used_undef) (cpp_reader *, location_t, cpp_hashnode *);
   /* Called before #define and #undef or other macro definition
      changes are processed.  */
   void (*before_define) (cpp_reader *);
   /* Called whenever a macro is expanded or tested.
      Second argument is the location of the start of the current expansion.  */
-  void (*used) (cpp_reader *, source_location, cpp_hashnode *);
+  void (*used) (cpp_reader *, location_t, cpp_hashnode *);
+
+  /* Callback to identify whether an attribute exists.  */
+  int (*has_attribute) (cpp_reader *, bool);
+
+  /* Callback to determine whether a built-in function is recognized.  */
+  int (*has_builtin) (cpp_reader *);
+
+  /* Callback that can change a user lazy into normal macro.  */
+  void (*user_lazy_macro) (cpp_reader *, cpp_macro *, unsigned);
+
+  /* Callback to handle deferred cpp_macros.  */
+  cpp_macro *(*user_deferred_macro) (cpp_reader *, location_t, cpp_hashnode *);
 
-  /* Callback that can change a user builtin into normal macro.  */
-  bool (*user_builtin_macro) (cpp_reader *, cpp_hashnode *);
+  /* Callback to parse SOURCE_DATE_EPOCH from environment.  */
+  time_t (*get_source_date_epoch) (cpp_reader *);
+
+  /* Callback for providing suggestions for misspelled directives.  */
+  const char *(*get_suggestion) (cpp_reader *, const char *, const char *const *);
+
+  /* Callback for when a comment is encountered, giving the location
+     of the opening slash, a pointer to the content (which is not
+     necessarily 0-terminated), and the length of the content.
+     The content contains the opening slash-star (or slash-slash),
+     and for C-style comments contains the closing star-slash.  For
+     C++-style comments it does not include the terminating newline.  */
+  void (*comment) (cpp_reader *, location_t, const unsigned char *,
+		   size_t);
+
+  /* Callback for filename remapping in __FILE__ and __BASE_FILE__ macro
+     expansions.  */
+  const char *(*remap_filename) (const char*);
+
+  /* Maybe translate a #include into something else.  Return a
+     cpp_buffer containing the translation if translating.  */
+  char *(*translate_include) (cpp_reader *, line_maps *, location_t,
+			      const char *path);
 };
 
 #ifdef VMS
 #define INO_T_CPP ino_t ino[3]
+#elif defined (_AIX) && SIZEOF_INO_T == 4
+#define INO_T_CPP ino64_t ino
 #else
 #define INO_T_CPP ino_t ino
 #endif
 
+#if defined (_AIX) && SIZEOF_DEV_T == 4
+#define DEV_T_CPP dev64_t dev
+#else
+#define DEV_T_CPP dev_t dev
+#endif
+
 /* Chain of directories to look for include files in.  */
 struct cpp_dir
 {
@@ -577,17 +824,87 @@ struct cpp_dir
   /* The C front end uses these to recognize duplicated
      directories in the search path.  */
   INO_T_CPP;
-  dev_t dev;
+  DEV_T_CPP;
+};
+
+/* The kind of the cpp_macro.  */
+enum cpp_macro_kind {
+  cmk_macro,	/* An ISO macro (token expansion).  */
+  cmk_assert,   /* An assertion.  */
+  cmk_traditional	/* A traditional macro (text expansion).  */
+};
+
+/* Each macro definition is recorded in a cpp_macro structure.
+   Variadic macros cannot occur with traditional cpp.  */
+struct GTY(()) cpp_macro {
+  union cpp_parm_u 
+  {
+    /* Parameters, if any.  If parameter names use extended identifiers,
+       the original spelling of those identifiers, not the canonical
+       UTF-8 spelling, goes here.  */
+    cpp_hashnode ** GTY ((tag ("false"),
+			  nested_ptr (union tree_node,
+	"%h ? CPP_HASHNODE (GCC_IDENT_TO_HT_IDENT (%h)) : NULL",
+	"%h ? HT_IDENT_TO_GCC_IDENT (HT_NODE (%h)) : NULL"),
+			  length ("%1.paramc"))) params;
+
+    /* If this is an assertion, the next one in the chain.  */
+    cpp_macro *GTY ((tag ("true"))) next;
+  } GTY ((desc ("%1.kind == cmk_assert"))) parm;
+
+  /* Definition line number.  */
+  location_t line;
+
+  /* Number of tokens in body, or bytes for traditional macros.  */
+  /* Do we really need 2^32-1 range here?  */
+  unsigned int count;
+
+  /* Number of parameters.  */
+  unsigned short paramc;
+
+  /* Non-zero if this is a user-lazy macro, value provided by user.  */
+  unsigned char lazy;
+
+  /* The kind of this macro (ISO, trad or assert) */
+  unsigned kind : 2;
+
+  /* If a function-like macro.  */
+  unsigned int fun_like : 1;
+
+  /* If a variadic macro.  */
+  unsigned int variadic : 1;
+
+  /* If macro defined in system header.  */
+  unsigned int syshdr   : 1;
+
+  /* Nonzero if it has been expanded or had its existence tested.  */
+  unsigned int used     : 1;
+
+  /* Indicate whether the tokens include extra CPP_PASTE tokens at the
+     end to track invalid redefinitions with consecutive CPP_PASTE
+     tokens.  */
+  unsigned int extra_tokens : 1;
+
+  /* Imported C++20 macro (from a header unit).  */
+  unsigned int imported_p : 1;
+
+  /* 0 bits spare (32-bit). 32 on 64-bit target.  */
+
+  union cpp_exp_u
+  {
+    /* Trailing array of replacement tokens (ISO), or assertion body value.  */
+    cpp_token GTY ((tag ("false"), length ("%1.count"))) tokens[1];
+
+    /* Pointer to replacement text (traditional).  See comment at top
+       of cpptrad.c for how traditional function-like macros are
+       encoded.  */
+    const unsigned char *GTY ((tag ("true"))) text;
+  } GTY ((desc ("%1.kind == cmk_traditional"))) exp;
 };
 
-/* The structure of a node in the hash table.  The hash table has
-   entries for all identifiers: either macros defined by #define
-   commands (type NT_MACRO), assertions created with #assert
-   (NT_ASSERTION), or neither of the above (NT_VOID).  Builtin macros
-   like __LINE__ are flagged NODE_BUILTIN.  Poisoned identifiers are
-   flagged NODE_POISONED.  NODE_OPERATOR (C++ only) indicates an
-   identifier that behaves like an operator such as "xor".
-   NODE_DIAGNOSTIC is for speed in lex_token: it indicates a
+/* Poisoned identifiers are flagged NODE_POISONED.  NODE_OPERATOR (C++
+   only) indicates an identifier that behaves like an operator such as
+   "xor".  NODE_DIAGNOSTIC is for speed in lex_token: it indicates a
    diagnostic may be required for this node.  Currently this only
    applies to __VA_ARGS__, poisoned identifiers, and -Wc++-compat
    warnings about NODE_OPERATOR.  */
@@ -595,21 +912,22 @@ struct cpp_dir
 /* Hash node flags.  */
 #define NODE_OPERATOR	(1 << 0)	/* C++ named operator.  */
 #define NODE_POISONED	(1 << 1)	/* Poisoned identifier.  */
-#define NODE_BUILTIN	(1 << 2)	/* Builtin macro.  */
-#define NODE_DIAGNOSTIC (1 << 3)	/* Possible diagnostic when lexed.  */
-#define NODE_WARN	(1 << 4)	/* Warn if redefined or undefined.  */
-#define NODE_DISABLED	(1 << 5)	/* A disabled macro.  */
-#define NODE_MACRO_ARG	(1 << 6)	/* Used during #define processing.  */
-#define NODE_USED	(1 << 7)	/* Dumped with -dU.  */
-#define NODE_CONDITIONAL (1 << 8)	/* Conditional macro */
-#define NODE_WARN_OPERATOR (1 << 9)	/* Warn about C++ named operator.  */
+#define NODE_DIAGNOSTIC (1 << 2)	/* Possible diagnostic when lexed.  */
+#define NODE_WARN	(1 << 3)	/* Warn if redefined or undefined.  */
+#define NODE_DISABLED	(1 << 4)	/* A disabled macro.  */
+#define NODE_USED	(1 << 5)	/* Dumped with -dU.  */
+#define NODE_CONDITIONAL (1 << 6)	/* Conditional macro */
+#define NODE_WARN_OPERATOR (1 << 7)	/* Warn about C++ named operator.  */
+#define NODE_MODULE (1 << 8)		/* C++-20 module-related name.  */
 
 /* Different flavors of hash node.  */
 enum node_type
 {
-  NT_VOID = 0,	   /* No definition yet.  */
-  NT_MACRO,	   /* A macro of some form.  */
-  NT_ASSERTION	   /* Predicate for #assert.  */
+  NT_VOID = 0,	   /* Maybe an assert?  */
+  NT_MACRO_ARG,	   /* A macro arg.  */
+  NT_USER_MACRO,   /* A user macro.  */
+  NT_BUILTIN_MACRO, /* A builtin macro.  */
+  NT_MACRO_MASK = NT_USER_MACRO  /* Mask for either macro kind.  */
 };
 
 /* Different flavors of builtin macro.  _Pragma is an operator, but we
@@ -619,6 +937,7 @@ enum cpp_builtin_type
   BT_SPECLINE = 0,		/* `__LINE__' */
   BT_DATE,			/* `__DATE__' */
   BT_FILE,			/* `__FILE__' */
+  BT_FILE_NAME,			/* `__FILE_NAME__' */
   BT_BASE_FILE,			/* `__BASE_FILE__' */
   BT_INCLUDE_LEVEL,		/* `__INCLUDE_LEVEL__' */
   BT_TIME,			/* `__TIME__' */
@@ -626,46 +945,35 @@ enum cpp_builtin_type
   BT_PRAGMA,			/* `_Pragma' operator */
   BT_TIMESTAMP,			/* `__TIMESTAMP__' */
   BT_COUNTER,			/* `__COUNTER__' */
+  BT_HAS_ATTRIBUTE,		/* `__has_attribute(x)' */
+  BT_HAS_STD_ATTRIBUTE,		/* `__has_c_attribute(x)' */
+  BT_HAS_BUILTIN,		/* `__has_builtin(x)' */
+  BT_HAS_INCLUDE,		/* `__has_include(x)' */
+  BT_HAS_INCLUDE_NEXT,		/* `__has_include_next(x)' */
+/* sdcc specific */
   BT_FUNCTION,                  /* `__func__' */
   BT_FIRST_USER,		/* User defined builtin macros.  */
   BT_LAST_USER = BT_FIRST_USER + 31
 };
 
 #define CPP_HASHNODE(HNODE)	((cpp_hashnode *) (HNODE))
-#define HT_NODE(NODE)		((ht_identifier *) (NODE))
-#define NODE_LEN(NODE)		HT_LEN (&(NODE)->ident)
-#define NODE_NAME(NODE)		HT_STR (&(NODE)->ident)
-
-/* Specify which field, if any, of the union is used.  */
-
-enum {
-  NTV_MACRO,
-  NTV_ANSWER,
-  NTV_BUILTIN,
-  NTV_ARGUMENT,
-  NTV_NONE
-};
-
-#define CPP_HASHNODE_VALUE_IDX(HNODE)				\
-  ((HNODE.flags & NODE_MACRO_ARG) ? NTV_ARGUMENT		\
-   : HNODE.type == NT_MACRO ? ((HNODE.flags & NODE_BUILTIN) 	\
-			       ? NTV_BUILTIN : NTV_MACRO)	\
-   : HNODE.type == NT_ASSERTION ? NTV_ANSWER			\
-   : NTV_NONE)
+#define HT_NODE(NODE)		(&(NODE)->ident)
+#define NODE_LEN(NODE)		HT_LEN (HT_NODE (NODE))
+#define NODE_NAME(NODE)		HT_STR (HT_NODE (NODE))
 
 /* The common part of an identifier node shared amongst all 3 C front
    ends.  Also used to store CPP identifiers, which are a superset of
    identifiers in the grammatical sense.  */
 
 union GTY(()) _cpp_hashnode_value {
-  /* If a macro.  */
-  cpp_macro * GTY((tag ("NTV_MACRO"))) macro;
-  /* Answers to an assertion.  */
-  struct answer * GTY ((tag ("NTV_ANSWER"))) answers;
+  /* Assert (maybe NULL) */
+  cpp_macro * GTY((tag ("NT_VOID"))) answers;
+  /* Macro (maybe NULL) */
+  cpp_macro * GTY((tag ("NT_USER_MACRO"))) macro;
   /* Code for a builtin macro.  */
-  enum cpp_builtin_type GTY ((tag ("NTV_BUILTIN"))) builtin;
+  enum cpp_builtin_type GTY ((tag ("NT_BUILTIN_MACRO"))) builtin;
   /* Macro argument index.  */
-  unsigned short GTY ((tag ("NTV_ARGUMENT"))) arg_index;
+  unsigned short GTY ((tag ("NT_MACRO_ARG"))) arg_index;
 };
 
 struct GTY(()) cpp_hashnode {
@@ -674,11 +982,63 @@ struct GTY(()) cpp_hashnode {
   unsigned int directive_index : 7;	/* If is_directive,
 					   then index into directive table.
 					   Otherwise, a NODE_OPERATOR.  */
-  unsigned char rid_code;		/* Rid code - for front ends.  */
-  ENUM_BITFIELD(node_type) type : 6;	/* CPP node type.  */
-  unsigned int flags : 10;		/* CPP flags.  */
+  unsigned int rid_code : 8;		/* Rid code - for front ends.  */
+  unsigned int flags : 9;		/* CPP flags.  */
+  ENUM_BITFIELD(node_type) type : 2;	/* CPP node type.  */
+
+  /* 5 bits spare.  */
+
+  /* The deferred cookie is applicable to NT_USER_MACRO or NT_VOID.
+     The latter for when a macro had a prevailing undef.
+     On a 64-bit system there would be 32-bits of padding to the value
+     field.  So placing the deferred index here is not costly.   */
+  unsigned deferred;			/* Deferred cookie  */
+
+  union _cpp_hashnode_value GTY ((desc ("%1.type"))) value;
+};
+
+/* A class for iterating through the source locations within a
+   string token (before escapes are interpreted, and before
+   concatenation).  */
 
-  union _cpp_hashnode_value GTY ((desc ("CPP_HASHNODE_VALUE_IDX (%1)"))) value;
+class cpp_string_location_reader {
+ public:
+  cpp_string_location_reader (location_t src_loc,
+			      line_maps *line_table);
+
+  source_range get_next ();
+
+ private:
+  location_t m_loc;
+  int m_offset_per_column;
+};
+
+/* A class for storing the source ranges of all of the characters within
+   a string literal, after escapes are interpreted, and after
+   concatenation.
+
+   This is not GTY-marked, as instances are intended to be temporary.  */
+
+class cpp_substring_ranges
+{
+ public:
+  cpp_substring_ranges ();
+  ~cpp_substring_ranges ();
+
+  int get_num_ranges () const { return m_num_ranges; }
+  source_range get_range (int idx) const
+  {
+    linemap_assert (idx < m_num_ranges);
+    return m_ranges[idx];
+  }
+
+  void add_range (source_range range);
+  void add_n_ranges (int num, cpp_string_location_reader &loc_reader);
+
+ private:
+  source_range *m_ranges;
+  int m_num_ranges;
+  int m_alloc_ranges;
 };
 
 /* Call this first to get a handle to pass to other functions.
@@ -688,11 +1048,11 @@ struct GTY(()) cpp_hashnode {
    that cpplib will share; this technique is used by the C front
    ends.  */
 extern cpp_reader *cpp_create_reader (enum c_lang, struct ht *,
-				      struct line_maps *);
+				      class line_maps *);
 
 /* Reset the cpp_reader's line_map.  This is only used after reading a
    PCH file.  */
-extern void cpp_set_line_map (cpp_reader *, struct line_maps *);
+extern void cpp_set_line_map (cpp_reader *, class line_maps *);
 
 /* Call this to change the selected language standard (e.g. because of
    command line options).  */
@@ -706,17 +1066,30 @@ extern void cpp_set_include_chains (cpp_reader *, cpp_dir *, cpp_dir *, int);
    call cpp_finish on that reader.  You can either edit the callbacks
    through the pointer returned from cpp_get_callbacks, or set them
    with cpp_set_callbacks.  */
-extern cpp_options *cpp_get_options (cpp_reader *);
-extern cpp_callbacks *cpp_get_callbacks (cpp_reader *);
+extern cpp_options *cpp_get_options (cpp_reader *) ATTRIBUTE_PURE;
+extern cpp_callbacks *cpp_get_callbacks (cpp_reader *) ATTRIBUTE_PURE;
 extern void cpp_set_callbacks (cpp_reader *, cpp_callbacks *);
-extern struct deps *cpp_get_deps (cpp_reader *);
+extern class mkdeps *cpp_get_deps (cpp_reader *) ATTRIBUTE_PURE;
+
+extern const char *cpp_probe_header_unit (cpp_reader *, const char *file,
+					  bool angle_p,  location_t);
+
+/* Call these to get name data about the various compile-time
+   charsets.  */
+extern const char *cpp_get_narrow_charset_name (cpp_reader *) ATTRIBUTE_PURE;
+extern const char *cpp_get_wide_charset_name (cpp_reader *) ATTRIBUTE_PURE;
 
 /* This function reads the file, but does not start preprocessing.  It
    returns the name of the original file; this is the same as the
    input file, except for preprocessed input.  This will generate at
    least one file change callback, and possibly a line change callback
    too.  If there was an error opening the file, it returns NULL.  */
-extern const char *cpp_read_main_file (cpp_reader *, const char *);
+extern const char *cpp_read_main_file (cpp_reader *, const char *,
+				       bool injecting = false);
+extern location_t cpp_main_loc (const cpp_reader *);
+
+/* Adjust for the main file to be an include.  */
+extern void cpp_retrofit_as_include (cpp_reader *);
 
 /* Set up built-ins with special behavior.  Use cpp_init_builtins()
    instead unless your know what you are doing.  */
@@ -753,9 +1126,55 @@ extern int cpp_avoid_paste (cpp_reader *, const cpp_token *,
 			    const cpp_token *);
 extern const cpp_token *cpp_get_token (cpp_reader *);
 extern const cpp_token *cpp_get_token_with_location (cpp_reader *,
-						     source_location *);
-extern const unsigned char *cpp_macro_definition (cpp_reader *,
-						  cpp_hashnode *);
+						     location_t *);
+inline bool cpp_user_macro_p (const cpp_hashnode *node)
+{
+  return node->type == NT_USER_MACRO;
+}
+inline bool cpp_builtin_macro_p (const cpp_hashnode *node)
+{
+  return node->type == NT_BUILTIN_MACRO;
+}
+inline bool cpp_macro_p (const cpp_hashnode *node)
+{
+  return node->type & NT_MACRO_MASK;
+}
+inline cpp_macro *cpp_set_deferred_macro (cpp_hashnode *node,
+					  cpp_macro *forced = NULL)
+{
+  cpp_macro *old = node->value.macro;
+
+  node->value.macro = forced;
+  node->type = NT_USER_MACRO;
+  node->flags &= ~NODE_USED;
+
+  return old;
+}
+cpp_macro *cpp_get_deferred_macro (cpp_reader *, cpp_hashnode *, location_t);
+
+/* Returns true if NODE is a function-like user macro.  */
+inline bool cpp_fun_like_macro_p (cpp_hashnode *node)
+{
+  return cpp_user_macro_p (node) && node->value.macro->fun_like;
+}
+
+extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *);
+extern const unsigned char *cpp_macro_definition (cpp_reader *, cpp_hashnode *,
+						  const cpp_macro *);
+inline location_t cpp_macro_definition_location (cpp_hashnode *node)
+{
+  const cpp_macro *macro = node->value.macro;
+  return macro ? macro->line : 0;
+}
+/* Return an idempotent time stamp (possibly from SOURCE_DATE_EPOCH).  */
+enum class CPP_time_kind 
+{
+  FIXED = -1,	/* Fixed time via source epoch.  */
+  DYNAMIC = -2,	/* Dynamic via time(2).  */
+  UNKNOWN = -3	/* Wibbly wobbly, timey wimey.  */
+};
+extern CPP_time_kind cpp_get_date (cpp_reader *, time_t *);
+
 extern void _cpp_backup_tokens (cpp_reader *, unsigned int);
 extern const cpp_token *cpp_peek_token (cpp_reader *, int);
 
@@ -766,6 +1185,12 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
 extern bool cpp_interpret_string (cpp_reader *,
 				  const cpp_string *, size_t,
 				  cpp_string *, enum cpp_ttype);
+extern const char *cpp_interpret_string_ranges (cpp_reader *pfile,
+						const cpp_string *from,
+						cpp_string_location_reader *,
+						size_t count,
+						cpp_substring_ranges *out,
+						enum cpp_ttype type);
 extern bool cpp_interpret_string_notranslate (cpp_reader *,
 					      const cpp_string *, size_t,
 					      cpp_string *, enum cpp_ttype);
@@ -776,12 +1201,19 @@ extern cppchar_t cpp_host_to_exec_charset (cpp_reader *, cppchar_t);
 /* Used to register macros and assertions, perhaps from the command line.
    The text is the same as the command line argument.  */
 extern void cpp_define (cpp_reader *, const char *);
+extern void cpp_define_unused (cpp_reader *, const char *);
 extern void cpp_define_formatted (cpp_reader *pfile, 
 				  const char *fmt, ...) ATTRIBUTE_PRINTF_2;
+extern void cpp_define_formatted_unused (cpp_reader *pfile,
+					 const char *fmt,
+					 ...) ATTRIBUTE_PRINTF_2;
 extern void cpp_assert (cpp_reader *, const char *);
 extern void cpp_undef (cpp_reader *, const char *);
 extern void cpp_unassert (cpp_reader *, const char *);
 
+/* Mark a node as a lazily defined macro.  */
+extern void cpp_define_lazily (cpp_reader *, cpp_hashnode *node, unsigned N);
+
 /* Undefine all macros and assertions.  */
 extern void cpp_undef_all (cpp_reader *);
 
@@ -791,7 +1223,10 @@ extern int cpp_defined (cpp_reader *, const unsigned char *, int);
 
 /* A preprocessing number.  Code assumes that any unused high bits of
    the double integer are set to zero.  */
-typedef unsigned HOST_WIDE_INT cpp_num_part;
+
+/* This type has to be equal to unsigned HOST_WIDE_INT, see
+   gcc/c-family/c-lex.cc.  */
+typedef uint64_t cpp_num_part;
 typedef struct cpp_num cpp_num;
 struct cpp_num
 {
@@ -814,7 +1249,7 @@ struct cpp_num
 #define CPP_N_FLOATING	0x0002
 
 #define CPP_N_WIDTH	0x00F0
-#define CPP_N_SMALL	0x0010	/* int, float, shrot _Fract/Accum  */
+#define CPP_N_SMALL	0x0010	/* int, float, short _Fract/Accum  */
 #define CPP_N_MEDIUM	0x0020	/* long, double, long _Fract/_Accum.  */
 #define CPP_N_LARGE	0x0040	/* long long, long double,
 				   long long _Fract/Accum.  */
@@ -836,100 +1271,105 @@ struct cpp_num
 
 #define CPP_N_FRACT	0x100000 /* Fract types.  */
 #define CPP_N_ACCUM	0x200000 /* Accum types.  */
+#define CPP_N_FLOATN	0x400000 /* _FloatN types.  */
+#define CPP_N_FLOATNX	0x800000 /* _FloatNx types.  */
+
+#define CPP_N_USERDEF	0x1000000 /* C++11 user-defined literal.  */
+
+#define CPP_N_SIZE_T	0x2000000 /* C++23 size_t literal.  */
+
+#define CPP_N_WIDTH_FLOATN_NX	0xF0000000 /* _FloatN / _FloatNx value
+					      of N, divided by 16.  */
+#define CPP_FLOATN_SHIFT	24
+#define CPP_FLOATN_MAX	0xF0
 
 /* Classify a CPP_NUMBER token.  The return value is a combination of
    the flags from the above sets.  */
-extern unsigned cpp_classify_number (cpp_reader *, const cpp_token *);
+extern unsigned cpp_classify_number (cpp_reader *, const cpp_token *,
+				     const char **, location_t);
+
+/* Return the classification flags for a float suffix.  */
+extern unsigned int cpp_interpret_float_suffix (cpp_reader *, const char *,
+						size_t);
+
+/* Return the classification flags for an int suffix.  */
+extern unsigned int cpp_interpret_int_suffix (cpp_reader *, const char *,
+					      size_t);
 
 /* Evaluate a token classified as category CPP_N_INTEGER.  */
 extern cpp_num cpp_interpret_integer (cpp_reader *, const cpp_token *,
-				      unsigned int type);
+				      unsigned int);
 
 /* Sign extend a number, with PRECISION significant bits and all
    others assumed clear, to fill out a cpp_num structure.  */
 cpp_num cpp_num_sign_extend (cpp_num, size_t);
 
-/* Diagnostic levels.  To get a diagnostic without associating a
-   position in the translation unit with it, use cpp_error_with_line
-   with a line number of zero.  */
-
-enum {
-  /* Warning, an error with -Werror.  */
-  CPP_DL_WARNING = 0,
-  /* Same as CPP_DL_WARNING, except it is not suppressed in system headers.  */
-  CPP_DL_WARNING_SYSHDR,
-  /* Warning, an error with -pedantic-errors or -Werror.  */
-  CPP_DL_PEDWARN,
-  /* An error.  */
-  CPP_DL_ERROR,
-  /* An internal consistency check failed.  Prints "internal error: ",
-     otherwise the same as CPP_DL_ERROR.  */
-  CPP_DL_ICE,
-  /* An informative note following a warning.  */
-  CPP_DL_NOTE,
-  /* A fatal error.  */
-  CPP_DL_FATAL
-};
-
-/* Warning reason codes. Use a reason code of zero for unclassified warnings
-   and errors that are not warnings.  */
-enum {
-  CPP_W_NONE = 0,
-  CPP_W_DEPRECATED,
-  CPP_W_COMMENTS,
-  CPP_W_MISSING_INCLUDE_DIRS,
-  CPP_W_TRIGRAPHS,
-  CPP_W_MULTICHAR,
-  CPP_W_TRADITIONAL,
-  CPP_W_LONG_LONG,
-  CPP_W_ENDIF_LABELS,
-  CPP_W_NUM_SIGN_CHANGE,
-  CPP_W_VARIADIC_MACROS,
-  CPP_W_BUILTIN_MACRO_REDEFINED,
-  CPP_W_DOLLARS,
-  CPP_W_UNDEF,
-  CPP_W_UNUSED_MACROS,
-  CPP_W_CXX_OPERATOR_NAMES,
-  CPP_W_NORMALIZE,
-  CPP_W_INVALID_PCH,
-  CPP_W_WARNING_DIRECTIVE
-};
-
 /* Output a diagnostic of some kind.  */
-extern bool cpp_error (cpp_reader *, int, const char *msgid, ...)
+extern bool cpp_error (cpp_reader *, enum cpp_diagnostic_level,
+		       const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;
-extern bool cpp_warning (cpp_reader *, int, const char *msgid, ...)
+extern bool cpp_warning (cpp_reader *, enum cpp_warning_reason,
+			 const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;
-extern bool cpp_pedwarning (cpp_reader *, int, const char *msgid, ...)
+extern bool cpp_pedwarning (cpp_reader *, enum cpp_warning_reason,
+			    const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;
-extern bool cpp_warning_syshdr (cpp_reader *, int, const char *msgid, ...)
+extern bool cpp_warning_syshdr (cpp_reader *, enum cpp_warning_reason reason,
+				const char *msgid, ...)
   ATTRIBUTE_PRINTF_3;
 
+/* As their counterparts above, but use RICHLOC.  */
+extern bool cpp_warning_at (cpp_reader *, enum cpp_warning_reason,
+			    rich_location *richloc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+extern bool cpp_pedwarning_at (cpp_reader *, enum cpp_warning_reason,
+			       rich_location *richloc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+
 /* Output a diagnostic with "MSGID: " preceding the
    error string of errno.  No location is printed.  */
-extern bool cpp_errno (cpp_reader *, int, const char *msgid);
+extern bool cpp_errno (cpp_reader *, enum cpp_diagnostic_level,
+		       const char *msgid);
+/* Similarly, but with "FILENAME: " instead of "MSGID: ", where
+   the filename is not localized.  */
+extern bool cpp_errno_filename (cpp_reader *, enum cpp_diagnostic_level,
+				const char *filename, location_t loc);
 
 /* Same as cpp_error, except additionally specifies a position as a
    (translation unit) physical line and physical column.  If the line is
    zero, then no location is printed.  */
-extern bool cpp_error_with_line (cpp_reader *, int, source_location,
-                                 unsigned, const char *msgid, ...)
+extern bool cpp_error_with_line (cpp_reader *, enum cpp_diagnostic_level,
+				 location_t, unsigned,
+				 const char *msgid, ...)
   ATTRIBUTE_PRINTF_5;
-extern bool cpp_warning_with_line (cpp_reader *, int, source_location,
-                                   unsigned, const char *msgid, ...)
+extern bool cpp_warning_with_line (cpp_reader *, enum cpp_warning_reason,
+				   location_t, unsigned,
+				   const char *msgid, ...)
   ATTRIBUTE_PRINTF_5;
-extern bool cpp_pedwarning_with_line (cpp_reader *, int, source_location,
-                                      unsigned, const char *msgid, ...)
+extern bool cpp_pedwarning_with_line (cpp_reader *, enum cpp_warning_reason,
+				      location_t, unsigned,
+				      const char *msgid, ...)
   ATTRIBUTE_PRINTF_5;
-extern bool cpp_warning_with_line_syshdr (cpp_reader *, int, source_location,
-                                          unsigned, const char *msgid, ...)
+extern bool cpp_warning_with_line_syshdr (cpp_reader *, enum cpp_warning_reason,
+					  location_t, unsigned,
+					  const char *msgid, ...)
   ATTRIBUTE_PRINTF_5;
 
-/* In lex.c */
+extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level,
+			  location_t src_loc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+
+extern bool cpp_error_at (cpp_reader * pfile, enum cpp_diagnostic_level,
+			  rich_location *richloc, const char *msgid, ...)
+  ATTRIBUTE_PRINTF_4;
+
+/* In lex.cc */
 extern int cpp_ideq (const cpp_token *, const char *);
 extern void cpp_output_line (cpp_reader *, FILE *);
 extern unsigned char *cpp_output_line_to_string (cpp_reader *,
 						 const unsigned char *);
+extern const unsigned char *cpp_alloc_token_string
+  (cpp_reader *, const unsigned char *, unsigned);
 extern void cpp_output_token (const cpp_token *, FILE *);
 extern const char *cpp_type2name (enum cpp_ttype, unsigned char flags);
 /* Returns the value of an escape sequence, truncated to the correct
@@ -949,7 +1389,7 @@ typedef struct
   char *comment;
 
   /* source location for the given comment.  */
-  source_location sloc;
+  location_t sloc;
 } cpp_comment;
 
 /* Structure holding all comments for a given cpp_reader.  */
@@ -980,17 +1420,20 @@ extern cpp_hashnode *cpp_lookup (cpp_reader *, const unsigned char *,
 typedef int (*cpp_cb) (cpp_reader *, cpp_hashnode *, void *);
 extern void cpp_forall_identifiers (cpp_reader *, cpp_cb, void *);
 
-/* In macro.c */
+/* In macro.cc */
 extern void cpp_scan_nooutput (cpp_reader *);
 extern int  cpp_sys_macro_p (cpp_reader *);
 extern unsigned char *cpp_quote_string (unsigned char *, const unsigned char *,
 					unsigned int);
+extern bool cpp_compare_macros (const cpp_macro *macro1,
+				const cpp_macro *macro2);
 
-/* In files.c */
+/* In files.cc */
 extern bool cpp_included (cpp_reader *, const char *);
-extern bool cpp_included_before (cpp_reader *, const char *, source_location);
+extern bool cpp_included_before (cpp_reader *, const char *, location_t);
 extern void cpp_make_system_header (cpp_reader *, int, int);
 extern bool cpp_push_include (cpp_reader *, const char *);
+extern bool cpp_push_default_include (cpp_reader *, const char *);
 extern void cpp_change_file (cpp_reader *, enum lc_reason, const char *);
 extern const char *cpp_get_path (struct _cpp_file *);
 extern cpp_dir *cpp_get_dir (struct _cpp_file *);
@@ -999,7 +1442,21 @@ extern struct _cpp_file *cpp_get_file (cpp_buffer *);
 extern cpp_buffer *cpp_get_prev (cpp_buffer *);
 extern void cpp_clear_file_cache (cpp_reader *);
 
-/* In pch.c */
+/* cpp_get_converted_source returns the contents of the given file, as it exists
+   after cpplib has read it and converted it from the input charset to the
+   source charset.  Return struct will be zero-filled if the data could not be
+   read for any reason.  The data starts at the DATA pointer, but the TO_FREE
+   pointer is what should be passed to free(), as there may be an offset.  */
+struct cpp_converted_source
+{
+  char *to_free;
+  char *data;
+  size_t len;
+};
+cpp_converted_source cpp_get_converted_source (const char *fname,
+					       const char *input_charset);
+
+/* In pch.cc */
 struct save_macro_data;
 extern int cpp_save_state (cpp_reader *, FILE *);
 extern int cpp_write_pch_deps (cpp_reader *, FILE *);
@@ -1009,4 +1466,132 @@ extern void cpp_prepare_state (cpp_reader *, struct save_macro_data **);
 extern int cpp_read_state (cpp_reader *, const char *, FILE *,
 			   struct save_macro_data *);
 
+/* In lex.cc */
+extern void cpp_force_token_locations (cpp_reader *, location_t);
+extern void cpp_stop_forcing_token_locations (cpp_reader *);
+enum CPP_DO_task
+{
+  CPP_DO_print,
+  CPP_DO_location,
+  CPP_DO_token
+};
+
+extern void cpp_directive_only_process (cpp_reader *pfile,
+					void *data,
+					void (*cb) (cpp_reader *,
+						    CPP_DO_task,
+						    void *data, ...));
+
+/* In expr.cc */
+extern enum cpp_ttype cpp_userdef_string_remove_type
+  (enum cpp_ttype type);
+extern enum cpp_ttype cpp_userdef_string_add_type
+  (enum cpp_ttype type);
+extern enum cpp_ttype cpp_userdef_char_remove_type
+  (enum cpp_ttype type);
+extern enum cpp_ttype cpp_userdef_char_add_type
+  (enum cpp_ttype type);
+extern bool cpp_userdef_string_p
+  (enum cpp_ttype type);
+extern bool cpp_userdef_char_p
+  (enum cpp_ttype type);
+extern const char * cpp_get_userdef_suffix
+  (const cpp_token *);
+
+/* In charset.cc */
+
+/* The result of attempting to decode a run of UTF-8 bytes.  */
+
+struct cpp_decoded_char
+{
+  const char *m_start_byte;
+  const char *m_next_byte;
+
+  bool m_valid_ch;
+  cppchar_t m_ch;
+};
+
+/* Information for mapping between code points and display columns.
+
+   This is a tabstop value, along with a callback for getting the
+   widths of characters.  Normally this callback is cpp_wcwidth, but we
+   support other schemes for escaping non-ASCII unicode as a series of
+   ASCII chars when printing the user's source code in diagnostic-show-locus.cc
+
+   For example, consider:
+   - the Unicode character U+03C0 "GREEK SMALL LETTER PI" (UTF-8: 0xCF 0x80)
+   - the Unicode character U+1F642 "SLIGHTLY SMILING FACE"
+     (UTF-8: 0xF0 0x9F 0x99 0x82)
+   - the byte 0xBF (a stray trailing byte of a UTF-8 character)
+   Normally U+03C0 would occupy one display column, U+1F642
+   would occupy two display columns, and the stray byte would be
+   printed verbatim as one display column.
+
+   However when escaping them as unicode code points as "<U+03C0>"
+   and "<U+1F642>" they occupy 8 and 9 display columns respectively,
+   and when escaping them as bytes as "<CF><80>" and "<F0><9F><99><82>"
+   they occupy 8 and 16 display columns respectively.  In both cases
+   the stray byte is escaped to <BF> as 4 display columns.  */
+
+struct cpp_char_column_policy
+{
+  cpp_char_column_policy (int tabstop,
+			  int (*width_cb) (cppchar_t c))
+  : m_tabstop (tabstop),
+    m_undecoded_byte_width (1),
+    m_width_cb (width_cb)
+  {}
+
+  int m_tabstop;
+  /* Width in display columns of a stray byte that isn't decodable
+     as UTF-8.  */
+  int m_undecoded_byte_width;
+  int (*m_width_cb) (cppchar_t c);
+};
+
+/* A class to manage the state while converting a UTF-8 sequence to cppchar_t
+   and computing the display width one character at a time.  */
+class cpp_display_width_computation {
+ public:
+  cpp_display_width_computation (const char *data, int data_length,
+				 const cpp_char_column_policy &policy);
+  const char *next_byte () const { return m_next; }
+  int bytes_processed () const { return m_next - m_begin; }
+  int bytes_left () const { return m_bytes_left; }
+  bool done () const { return !bytes_left (); }
+  int display_cols_processed () const { return m_display_cols; }
+
+  int process_next_codepoint (cpp_decoded_char *out);
+  int advance_display_cols (int n);
+
+ private:
+  const char *const m_begin;
+  const char *m_next;
+  size_t m_bytes_left;
+  const cpp_char_column_policy &m_policy;
+  int m_display_cols;
+};
+
+/* Convenience functions that are simple use cases for class
+   cpp_display_width_computation.  Tab characters will be expanded to spaces
+   as determined by POLICY.m_tabstop, and non-printable-ASCII characters
+   will be escaped as per POLICY.  */
+
+int cpp_byte_column_to_display_column (const char *data, int data_length,
+				       int column,
+				       const cpp_char_column_policy &policy);
+inline int cpp_display_width (const char *data, int data_length,
+			      const cpp_char_column_policy &policy)
+{
+  return cpp_byte_column_to_display_column (data, data_length, data_length,
+					    policy);
+}
+int cpp_display_column_to_byte_column (const char *data, int data_length,
+				       int display_col,
+				       const cpp_char_column_policy &policy);
+int cpp_wcwidth (cppchar_t c);
+
+bool cpp_input_conversion_is_trivial (const char *input_charset);
+int cpp_check_utf8_bom (const char *data, size_t data_length);
+
 #endif /* ! LIBCPP_CPPLIB_H */