From ef6a70de5d0bc252380ebcb21eb84913473ead5d Mon Sep 17 00:00:00 2001 From: Boris Kolpackov Date: Mon, 17 Jan 2022 10:45:06 +0200 Subject: Use UTF-8 as default input/source charset for C/C++ compilation --- NEWS | 13 +++++ bootstrap-clang.bat | 2 +- bootstrap-mingw.bat | 2 +- bootstrap-msvc.bat | 2 +- bootstrap.gmake | 6 +-- bootstrap.sh | 2 +- build/root.build | 2 +- libbuild2/cc/compile-rule.cxx | 116 ++++++++++++++++++++++++++++++++---------- 8 files changed, 110 insertions(+), 35 deletions(-) diff --git a/NEWS b/NEWS index a0c8694..d2c0090 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,18 @@ Version 0.15.0 + * UTF-8 is now the default input/source character set for C/C++ compilation. + + Specifically, the cc module now passes the appropriate compiler option + (/utf-8 for MSVC and -finput-charset=UTF-8 for GCC and Clang) unless a + custom value is already specified (with /{source,execution}-charset for + MSVC and -finput-charset for GCC and Clang). + + This change may trigger new compilation errors in your source code if + it's not valid UTF-8 (such errors most commonly point into comments). + For various ways to fix this, see: + + https://github.com/build2/HOWTO/blob/master/entries/convert-source-files-to-utf8.md + * Support for dynamic dependencies in ad hoc recipes. Specifically, the `depdb` builtin now has the new `dyndep` command that diff --git a/bootstrap-clang.bat b/bootstrap-clang.bat index 00302e9..5a06a70 100644 --- a/bootstrap-clang.bat +++ b/bootstrap-clang.bat @@ -88,7 +88,7 @@ rem worked around by passing an obscure internal option. Clang 9 doesn't rem have this problem. To keep things simple, let's just suppress this rem warning. rem -set "ops=-m64 -std=c++1y -D_MT -D_CRT_SECURE_NO_WARNINGS -Xlinker /ignore:4217" +set "ops=-finput-charset=UTF-8 -m64 -std=c++1y -D_MT -D_CRT_SECURE_NO_WARNINGS -Xlinker /ignore:4217" :ops_next shift if "_%1_" == "__" ( diff --git a/bootstrap-mingw.bat b/bootstrap-mingw.bat index df7e677..cfd9d7c 100644 --- a/bootstrap-mingw.bat +++ b/bootstrap-mingw.bat @@ -83,7 +83,7 @@ rem rem Note that for as long as we support GCC 4.9 we have to compile in the rem C++14 mode since 4.9 doesn't recognize c++1z. rem -set "ops=-std=c++1y" +set "ops=-finput-charset=UTF-8 -std=c++1y" :ops_next shift if "_%1_" == "__" ( diff --git a/bootstrap-msvc.bat b/bootstrap-msvc.bat index 3d74427..6a6fcbc 100644 --- a/bootstrap-msvc.bat +++ b/bootstrap-msvc.bat @@ -111,7 +111,7 @@ set "src=%src% %libbutl%\libbutl" rem Get the compile options. rem -set "ops=/nologo /EHsc /MT /MP" +set "ops=/nologo /utf-8 /EHsc /MT /MP" :ops_next shift if "_%1_" == "__" ( diff --git a/bootstrap.gmake b/bootstrap.gmake index 1e0e8e2..e5ab285 100644 --- a/bootstrap.gmake +++ b/bootstrap.gmake @@ -190,13 +190,13 @@ $(out_root)/build2/b-boot$(exe): $(build2_obj) $(libbuild2_obj) $(libbutl_obj) $(CXX) -std=c++1y $(CXXFLAGS) $(LDFLAGS) -o $@ $^ $(LIBS) $(out_root)/build2/%.b.o: $(src_root)/build2/%.cxx | $$(dir $$@). - $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $< + $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $< $(out_root)/libbuild2/%.b.o: $(src_root)/libbuild2/%.cxx | $$(dir $$@). - $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $< + $(CXX) -I$(libbutl) -I$(src_root) -DBUILD2_BOOTSTRAP -DBUILD2_HOST_TRIPLET=\"$(chost)\" $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $< $(libbutl_out)/%.b.o: $(libbutl)/libbutl/%.cxx | $$(dir $$@). - $(CXX) -I$(libbutl) -DBUILD2_BOOTSTRAP $(CPPFLAGS) -std=c++1y $(CXXFLAGS) -o $@ -c $< + $(CXX) -I$(libbutl) -DBUILD2_BOOTSTRAP $(CPPFLAGS) -finput-charset=UTF-8 -std=c++1y $(CXXFLAGS) -o $@ -c $< .PRECIOUS: %/. %/. : diff --git a/bootstrap.sh b/bootstrap.sh index 14e52cf..9bd13b4 100755 --- a/bootstrap.sh +++ b/bootstrap.sh @@ -147,4 +147,4 @@ done # mode since 4.9 doesn't recognize c++1z. # set -x -"$cxx" "-I$libbutl" -I. -DBUILD2_BOOTSTRAP '-DBUILD2_HOST_TRIPLET="'"$host"'"' -std=c++1y "$@" -o build2/b-boot $r -lpthread +"$cxx" "-I$libbutl" -I. -DBUILD2_BOOTSTRAP '-DBUILD2_HOST_TRIPLET="'"$host"'"' -finput-charset=UTF-8 -std=c++1y "$@" -o build2/b-boot $r -lpthread diff --git a/build/root.build b/build/root.build index 4925c19..3afdcf9 100644 --- a/build/root.build +++ b/build/root.build @@ -20,7 +20,7 @@ if ($cxx.target.system == 'win32-msvc') cxx.poptions += -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS if ($cxx.class == 'msvc') - cxx.coptions += /wd4251 /wd4275 /wd4800 /wd4819 + cxx.coptions += /wd4251 /wd4275 /wd4800 elif ($cxx.id == 'gcc') cxx.coptions += -Wno-maybe-uninitialized -Wno-free-nonheap-object \ -Wno-stringop-overread # libbutl diff --git a/libbuild2/cc/compile-rule.cxx b/libbuild2/cc/compile-rule.cxx index 29b32c6..77d01c6 100644 --- a/libbuild2/cc/compile-rule.cxx +++ b/libbuild2/cc/compile-rule.cxx @@ -250,7 +250,7 @@ namespace build2 compile_rule:: compile_rule (data&& d) : common (move (d)), - rule_id (string (x) += ".compile 5") + rule_id (string (x) += ".compile 6") { static_assert (sizeof (match_data) <= target::data_size, "insufficient space"); @@ -1149,15 +1149,6 @@ namespace build2 append_options (cs, t, c_coptions); append_options (cs, t, x_coptions); - - if (ot == otype::s) - { - // On Darwin, Win32 -fPIC is the default. - // - if (tclass == "linux" || tclass == "bsd") - cs.append ("-fPIC"); - } - append_options (cs, cmode); if (md.pp != preprocessed::all) @@ -3015,8 +3006,8 @@ namespace build2 // // So seeing that it is hard to trigger a legitimate VC preprocessor // warning, for now, we will just treat them as errors by adding /WX. - // BTW, another example of a plausible preprocessor warning is C4819 - // (character unrepresentable in source charset). + // BTW, another example of a plausible preprocessor warnings are C4819 + // and C4828 (character unrepresentable in source charset). // // Finally, if we are using the module mapper, then all this mess falls // away: we only run the compiler once, we let the diagnostics through, @@ -3263,8 +3254,24 @@ namespace build2 append_options (args, cmode); append_sys_hdr_options (args); // Extra system header dirs (last). - // See perform_update() for details on /external:W0, /EHsc, /MD. + // See perform_update() for details on the choice of options. // + { + bool sc (find_option_prefix ("/source-charset:", args)); + bool ec (find_option_prefix ("/execution-charset:", args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); + + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + if (cvariant != "clang" && isystem (*this)) { if (find_option_prefix ("/external:I", args) && @@ -3305,8 +3312,15 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode, + cmode.size () - (modules && clang ? 1 : 0)); + append_sys_hdr_options (args); // Extra system header dirs (last). + // See perform_update() for details on the choice of options. // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { if (tclass == "linux" || tclass == "bsd") @@ -3335,10 +3349,6 @@ namespace build2 } } - append_options (args, cmode, - cmode.size () - (modules && clang ? 1 : 0)); - append_sys_hdr_options (args); // Extra system header dirs (last). - // Setup the dynamic module mapper if needed. // // Note that it's plausible in the future we will use it even if @@ -4609,8 +4619,24 @@ namespace build2 append_options (args, cmode); append_sys_hdr_options (args); - // See perform_update() for details on /external:W0, /EHsc, /MD. + // See perform_update() for details on the choice of options. // + { + bool sc (find_option_prefix ("/source-charset:", args)); + bool ec (find_option_prefix ("/execution-charset:", args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); + + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + if (cvariant != "clang" && isystem (*this)) { if (find_option_prefix ("/external:I", args) && @@ -4635,6 +4661,15 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode, + cmode.size () - (modules && clang ? 1 : 0)); + append_sys_hdr_options (args); + + // See perform_update() for details on the choice of options. + // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { if (tclass == "linux" || tclass == "bsd") @@ -4663,10 +4698,6 @@ namespace build2 } } - append_options (args, cmode, - cmode.size () - (modules && clang ? 1 : 0)); - append_sys_hdr_options (args); - args.push_back ("-E"); append_lang_options (args, md); @@ -6518,6 +6549,27 @@ namespace build2 if (md.pp != preprocessed::all) append_sys_hdr_options (args); // Extra system header dirs (last). + // Set source/execution charsets to UTF-8 unless a custom charset + // is specified. + // + // Note that clang-cl supports /utf-8 and /*-charset. + // + { + bool sc (find_option_prefix ("/source-charset:", args)); + bool ec (find_option_prefix ("/execution-charset:", args)); + + if (!sc && !ec) + args.push_back ("/utf-8"); + else + { + if (!sc) + args.push_back ("/source-charset:UTF-8"); + + if (!ec) + args.push_back ("/execution-charset:UTF-8"); + } + } + // If we have any /external:I options but no /external:Wn, then add // /external:W0 to emulate the -isystem semantics. // @@ -6631,6 +6683,21 @@ namespace build2 } case compiler_class::gcc: { + append_options (args, cmode); + + if (md.pp != preprocessed::all) + append_sys_hdr_options (args); // Extra system header dirs (last). + + // Set the input charset to UTF-8 unless a custom one is specified. + // + // Note that the execution charset (-fexec-charset) is UTF-8 by + // default. + // + // Note that early versions of Clang only recognize uppercase UTF-8. + // + if (!find_option_prefix ("-finput-charset=", args)) + args.push_back ("-finput-charset=UTF-8"); + if (ot == otype::s) { // On Darwin, Win32 -fPIC is the default. @@ -6734,11 +6801,6 @@ namespace build2 } } - append_options (args, cmode); - - if (md.pp != preprocessed::all) - append_sys_hdr_options (args); // Extra system header dirs (last). - append_header_options (env, args, header_args, a, t, md, md.dd); append_module_options (env, args, module_args, a, t, md, md.dd); -- cgit v1.1